aboutsummaryrefslogtreecommitdiffstats
path: root/contrib
diff options
context:
space:
mode:
authorgebetix <gebetix@yandex-team.ru>2022-02-10 16:48:54 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:48:54 +0300
commite14861bee6e628671fd6da83422252ca41bd2e6d (patch)
tree5d5cb817648f650d76cf1076100726fd9b8448e8 /contrib
parent41f5af039e2324748594194babdd070299be3a3b (diff)
downloadydb-e14861bee6e628671fd6da83422252ca41bd2e6d.tar.gz
Restoring authorship annotation for <gebetix@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib')
-rw-r--r--contrib/python/boto3/ya.make2
-rw-r--r--contrib/python/s3transfer/py2/s3transfer/__init__.py1474
-rw-r--r--contrib/python/s3transfer/py2/s3transfer/bandwidth.py826
-rw-r--r--contrib/python/s3transfer/py2/s3transfer/compat.py182
-rw-r--r--contrib/python/s3transfer/py2/s3transfer/copies.py640
-rw-r--r--contrib/python/s3transfer/py2/s3transfer/delete.py144
-rw-r--r--contrib/python/s3transfer/py2/s3transfer/download.py1400
-rw-r--r--contrib/python/s3transfer/py2/s3transfer/exceptions.py72
-rw-r--r--contrib/python/s3transfer/py2/s3transfer/futures.py1096
-rw-r--r--contrib/python/s3transfer/py2/s3transfer/manager.py1302
-rw-r--r--contrib/python/s3transfer/py2/s3transfer/subscribers.py190
-rw-r--r--contrib/python/s3transfer/py2/s3transfer/tasks.py728
-rw-r--r--contrib/python/s3transfer/py2/s3transfer/upload.py1432
-rw-r--r--contrib/python/s3transfer/py2/s3transfer/utils.py1408
-rw-r--r--contrib/python/s3transfer/py2/ya.make48
-rw-r--r--contrib/python/s3transfer/py3/s3transfer/__init__.py1192
-rw-r--r--contrib/python/s3transfer/py3/s3transfer/bandwidth.py756
-rw-r--r--contrib/python/s3transfer/py3/s3transfer/compat.py144
-rw-r--r--contrib/python/s3transfer/py3/s3transfer/copies.py538
-rw-r--r--contrib/python/s3transfer/py3/s3transfer/delete.py136
-rw-r--r--contrib/python/s3transfer/py3/s3transfer/download.py1210
-rw-r--r--contrib/python/s3transfer/py3/s3transfer/exceptions.py72
-rw-r--r--contrib/python/s3transfer/py3/s3transfer/futures.py1048
-rw-r--r--contrib/python/s3transfer/py3/s3transfer/manager.py1112
-rw-r--r--contrib/python/s3transfer/py3/s3transfer/subscribers.py164
-rw-r--r--contrib/python/s3transfer/py3/s3transfer/tasks.py674
-rw-r--r--contrib/python/s3transfer/py3/s3transfer/upload.py1286
-rw-r--r--contrib/python/s3transfer/py3/s3transfer/utils.py1304
-rw-r--r--contrib/python/s3transfer/py3/ya.make48
-rw-r--r--contrib/python/s3transfer/ya.make12
-rw-r--r--contrib/python/ya.make6
31 files changed, 10323 insertions, 10323 deletions
diff --git a/contrib/python/boto3/ya.make b/contrib/python/boto3/ya.make
index 711e1f16ac..bd12ae12b7 100644
--- a/contrib/python/boto3/ya.make
+++ b/contrib/python/boto3/ya.make
@@ -9,7 +9,7 @@ LICENSE(Apache-2.0)
PEERDIR(
contrib/python/botocore
contrib/python/jmespath
- contrib/python/s3transfer
+ contrib/python/s3transfer
)
NO_LINT()
diff --git a/contrib/python/s3transfer/py2/s3transfer/__init__.py b/contrib/python/s3transfer/py2/s3transfer/__init__.py
index d109318a0a..63735d66b1 100644
--- a/contrib/python/s3transfer/py2/s3transfer/__init__.py
+++ b/contrib/python/s3transfer/py2/s3transfer/__init__.py
@@ -1,741 +1,741 @@
-# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-# http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-"""Abstractions over S3's upload/download operations.
-
-This module provides high level abstractions for efficient
-uploads/downloads. It handles several things for the user:
-
-* Automatically switching to multipart transfers when
- a file is over a specific size threshold
-* Uploading/downloading a file in parallel
-* Throttling based on max bandwidth
-* Progress callbacks to monitor transfers
-* Retries. While botocore handles retries for streaming uploads,
- it is not possible for it to handle retries for streaming
- downloads. This module handles retries for both cases so
- you don't need to implement any retry logic yourself.
-
-This module has a reasonable set of defaults. It also allows you
-to configure many aspects of the transfer process including:
-
-* Multipart threshold size
-* Max parallel downloads
-* Max bandwidth
-* Socket timeouts
-* Retry amounts
-
-There is no support for s3->s3 multipart copies at this
-time.
-
-
-.. _ref_s3transfer_usage:
-
-Usage
-=====
-
-The simplest way to use this module is:
-
-.. code-block:: python
-
- client = boto3.client('s3', 'us-west-2')
- transfer = S3Transfer(client)
- # Upload /tmp/myfile to s3://bucket/key
- transfer.upload_file('/tmp/myfile', 'bucket', 'key')
-
- # Download s3://bucket/key to /tmp/myfile
- transfer.download_file('bucket', 'key', '/tmp/myfile')
-
-The ``upload_file`` and ``download_file`` methods also accept
-``**kwargs``, which will be forwarded through to the corresponding
-client operation. Here are a few examples using ``upload_file``::
-
- # Making the object public
- transfer.upload_file('/tmp/myfile', 'bucket', 'key',
- extra_args={'ACL': 'public-read'})
-
- # Setting metadata
- transfer.upload_file('/tmp/myfile', 'bucket', 'key',
- extra_args={'Metadata': {'a': 'b', 'c': 'd'}})
-
- # Setting content type
- transfer.upload_file('/tmp/myfile.json', 'bucket', 'key',
- extra_args={'ContentType': "application/json"})
-
-
-The ``S3Transfer`` clas also supports progress callbacks so you can
-provide transfer progress to users. Both the ``upload_file`` and
-``download_file`` methods take an optional ``callback`` parameter.
-Here's an example of how to print a simple progress percentage
-to the user:
-
-.. code-block:: python
-
- class ProgressPercentage(object):
- def __init__(self, filename):
- self._filename = filename
- self._size = float(os.path.getsize(filename))
- self._seen_so_far = 0
- self._lock = threading.Lock()
-
- def __call__(self, bytes_amount):
- # To simplify we'll assume this is hooked up
- # to a single filename.
- with self._lock:
- self._seen_so_far += bytes_amount
- percentage = (self._seen_so_far / self._size) * 100
- sys.stdout.write(
- "\r%s %s / %s (%.2f%%)" % (self._filename, self._seen_so_far,
- self._size, percentage))
- sys.stdout.flush()
-
-
- transfer = S3Transfer(boto3.client('s3', 'us-west-2'))
- # Upload /tmp/myfile to s3://bucket/key and print upload progress.
- transfer.upload_file('/tmp/myfile', 'bucket', 'key',
- callback=ProgressPercentage('/tmp/myfile'))
-
-
-
-You can also provide a TransferConfig object to the S3Transfer
-object that gives you more fine grained control over the
-transfer. For example:
-
-.. code-block:: python
-
- client = boto3.client('s3', 'us-west-2')
- config = TransferConfig(
- multipart_threshold=8 * 1024 * 1024,
- max_concurrency=10,
- num_download_attempts=10,
- )
- transfer = S3Transfer(client, config)
- transfer.upload_file('/tmp/foo', 'bucket', 'key')
-
-
-"""
-import os
-import math
-import functools
-import logging
-import socket
-import threading
-import random
-import string
-import concurrent.futures
-
-from botocore.compat import six
+# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+# http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+"""Abstractions over S3's upload/download operations.
+
+This module provides high level abstractions for efficient
+uploads/downloads. It handles several things for the user:
+
+* Automatically switching to multipart transfers when
+ a file is over a specific size threshold
+* Uploading/downloading a file in parallel
+* Throttling based on max bandwidth
+* Progress callbacks to monitor transfers
+* Retries. While botocore handles retries for streaming uploads,
+ it is not possible for it to handle retries for streaming
+ downloads. This module handles retries for both cases so
+ you don't need to implement any retry logic yourself.
+
+This module has a reasonable set of defaults. It also allows you
+to configure many aspects of the transfer process including:
+
+* Multipart threshold size
+* Max parallel downloads
+* Max bandwidth
+* Socket timeouts
+* Retry amounts
+
+There is no support for s3->s3 multipart copies at this
+time.
+
+
+.. _ref_s3transfer_usage:
+
+Usage
+=====
+
+The simplest way to use this module is:
+
+.. code-block:: python
+
+ client = boto3.client('s3', 'us-west-2')
+ transfer = S3Transfer(client)
+ # Upload /tmp/myfile to s3://bucket/key
+ transfer.upload_file('/tmp/myfile', 'bucket', 'key')
+
+ # Download s3://bucket/key to /tmp/myfile
+ transfer.download_file('bucket', 'key', '/tmp/myfile')
+
+The ``upload_file`` and ``download_file`` methods also accept
+``**kwargs``, which will be forwarded through to the corresponding
+client operation. Here are a few examples using ``upload_file``::
+
+ # Making the object public
+ transfer.upload_file('/tmp/myfile', 'bucket', 'key',
+ extra_args={'ACL': 'public-read'})
+
+ # Setting metadata
+ transfer.upload_file('/tmp/myfile', 'bucket', 'key',
+ extra_args={'Metadata': {'a': 'b', 'c': 'd'}})
+
+ # Setting content type
+ transfer.upload_file('/tmp/myfile.json', 'bucket', 'key',
+ extra_args={'ContentType': "application/json"})
+
+
+The ``S3Transfer`` clas also supports progress callbacks so you can
+provide transfer progress to users. Both the ``upload_file`` and
+``download_file`` methods take an optional ``callback`` parameter.
+Here's an example of how to print a simple progress percentage
+to the user:
+
+.. code-block:: python
+
+ class ProgressPercentage(object):
+ def __init__(self, filename):
+ self._filename = filename
+ self._size = float(os.path.getsize(filename))
+ self._seen_so_far = 0
+ self._lock = threading.Lock()
+
+ def __call__(self, bytes_amount):
+ # To simplify we'll assume this is hooked up
+ # to a single filename.
+ with self._lock:
+ self._seen_so_far += bytes_amount
+ percentage = (self._seen_so_far / self._size) * 100
+ sys.stdout.write(
+ "\r%s %s / %s (%.2f%%)" % (self._filename, self._seen_so_far,
+ self._size, percentage))
+ sys.stdout.flush()
+
+
+ transfer = S3Transfer(boto3.client('s3', 'us-west-2'))
+ # Upload /tmp/myfile to s3://bucket/key and print upload progress.
+ transfer.upload_file('/tmp/myfile', 'bucket', 'key',
+ callback=ProgressPercentage('/tmp/myfile'))
+
+
+
+You can also provide a TransferConfig object to the S3Transfer
+object that gives you more fine grained control over the
+transfer. For example:
+
+.. code-block:: python
+
+ client = boto3.client('s3', 'us-west-2')
+ config = TransferConfig(
+ multipart_threshold=8 * 1024 * 1024,
+ max_concurrency=10,
+ num_download_attempts=10,
+ )
+ transfer = S3Transfer(client, config)
+ transfer.upload_file('/tmp/foo', 'bucket', 'key')
+
+
+"""
+import os
+import math
+import functools
+import logging
+import socket
+import threading
+import random
+import string
+import concurrent.futures
+
+from botocore.compat import six
from botocore.vendored.requests.packages.urllib3.exceptions import \
- ReadTimeoutError
-from botocore.exceptions import IncompleteReadError
-
-import s3transfer.compat
-from s3transfer.exceptions import RetriesExceededError, S3UploadFailedError
-
-
-__author__ = 'Amazon Web Services'
+ ReadTimeoutError
+from botocore.exceptions import IncompleteReadError
+
+import s3transfer.compat
+from s3transfer.exceptions import RetriesExceededError, S3UploadFailedError
+
+
+__author__ = 'Amazon Web Services'
__version__ = '0.4.2'
-
-
-class NullHandler(logging.Handler):
- def emit(self, record):
- pass
-
-
-logger = logging.getLogger(__name__)
-logger.addHandler(NullHandler())
-
-queue = six.moves.queue
-
-MB = 1024 * 1024
-SHUTDOWN_SENTINEL = object()
-
-
-def random_file_extension(num_digits=8):
- return ''.join(random.choice(string.hexdigits) for _ in range(num_digits))
-
-
-def disable_upload_callbacks(request, operation_name, **kwargs):
- if operation_name in ['PutObject', 'UploadPart'] and \
- hasattr(request.body, 'disable_callback'):
- request.body.disable_callback()
-
-
-def enable_upload_callbacks(request, operation_name, **kwargs):
- if operation_name in ['PutObject', 'UploadPart'] and \
- hasattr(request.body, 'enable_callback'):
- request.body.enable_callback()
-
-
-class QueueShutdownError(Exception):
- pass
-
-
-class ReadFileChunk(object):
- def __init__(self, fileobj, start_byte, chunk_size, full_file_size,
- callback=None, enable_callback=True):
- """
-
- Given a file object shown below:
-
- |___________________________________________________|
- 0 | | full_file_size
- |----chunk_size---|
- start_byte
-
- :type fileobj: file
- :param fileobj: File like object
-
- :type start_byte: int
- :param start_byte: The first byte from which to start reading.
-
- :type chunk_size: int
- :param chunk_size: The max chunk size to read. Trying to read
- pass the end of the chunk size will behave like you've
- reached the end of the file.
-
- :type full_file_size: int
- :param full_file_size: The entire content length associated
- with ``fileobj``.
-
- :type callback: function(amount_read)
- :param callback: Called whenever data is read from this object.
-
- """
- self._fileobj = fileobj
- self._start_byte = start_byte
- self._size = self._calculate_file_size(
- self._fileobj, requested_size=chunk_size,
- start_byte=start_byte, actual_file_size=full_file_size)
- self._fileobj.seek(self._start_byte)
- self._amount_read = 0
- self._callback = callback
- self._callback_enabled = enable_callback
-
- @classmethod
- def from_filename(cls, filename, start_byte, chunk_size, callback=None,
- enable_callback=True):
- """Convenience factory function to create from a filename.
-
- :type start_byte: int
- :param start_byte: The first byte from which to start reading.
-
- :type chunk_size: int
- :param chunk_size: The max chunk size to read. Trying to read
- pass the end of the chunk size will behave like you've
- reached the end of the file.
-
- :type full_file_size: int
- :param full_file_size: The entire content length associated
- with ``fileobj``.
-
- :type callback: function(amount_read)
- :param callback: Called whenever data is read from this object.
-
- :type enable_callback: bool
- :param enable_callback: Indicate whether to invoke callback
- during read() calls.
-
- :rtype: ``ReadFileChunk``
- :return: A new instance of ``ReadFileChunk``
-
- """
- f = open(filename, 'rb')
- file_size = os.fstat(f.fileno()).st_size
- return cls(f, start_byte, chunk_size, file_size, callback,
- enable_callback)
-
- def _calculate_file_size(self, fileobj, requested_size, start_byte,
- actual_file_size):
- max_chunk_size = actual_file_size - start_byte
- return min(max_chunk_size, requested_size)
-
- def read(self, amount=None):
- if amount is None:
- amount_to_read = self._size - self._amount_read
- else:
- amount_to_read = min(self._size - self._amount_read, amount)
- data = self._fileobj.read(amount_to_read)
- self._amount_read += len(data)
- if self._callback is not None and self._callback_enabled:
- self._callback(len(data))
- return data
-
- def enable_callback(self):
- self._callback_enabled = True
-
- def disable_callback(self):
- self._callback_enabled = False
-
- def seek(self, where):
- self._fileobj.seek(self._start_byte + where)
- if self._callback is not None and self._callback_enabled:
- # To also rewind the callback() for an accurate progress report
- self._callback(where - self._amount_read)
- self._amount_read = where
-
- def close(self):
- self._fileobj.close()
-
- def tell(self):
- return self._amount_read
-
- def __len__(self):
- # __len__ is defined because requests will try to determine the length
- # of the stream to set a content length. In the normal case
- # of the file it will just stat the file, but we need to change that
- # behavior. By providing a __len__, requests will use that instead
- # of stat'ing the file.
- return self._size
-
- def __enter__(self):
- return self
-
- def __exit__(self, *args, **kwargs):
- self.close()
-
- def __iter__(self):
- # This is a workaround for http://bugs.python.org/issue17575
- # Basically httplib will try to iterate over the contents, even
- # if its a file like object. This wasn't noticed because we've
- # already exhausted the stream so iterating over the file immediately
- # stops, which is what we're simulating here.
- return iter([])
-
-
-class StreamReaderProgress(object):
- """Wrapper for a read only stream that adds progress callbacks."""
- def __init__(self, stream, callback=None):
- self._stream = stream
- self._callback = callback
-
- def read(self, *args, **kwargs):
- value = self._stream.read(*args, **kwargs)
- if self._callback is not None:
- self._callback(len(value))
- return value
-
-
-class OSUtils(object):
- def get_file_size(self, filename):
- return os.path.getsize(filename)
-
- def open_file_chunk_reader(self, filename, start_byte, size, callback):
- return ReadFileChunk.from_filename(filename, start_byte,
- size, callback,
- enable_callback=False)
-
- def open(self, filename, mode):
- return open(filename, mode)
-
- def remove_file(self, filename):
- """Remove a file, noop if file does not exist."""
- # Unlike os.remove, if the file does not exist,
- # then this method does nothing.
- try:
- os.remove(filename)
- except OSError:
- pass
-
- def rename_file(self, current_filename, new_filename):
- s3transfer.compat.rename_file(current_filename, new_filename)
-
-
-class MultipartUploader(object):
- # These are the extra_args that need to be forwarded onto
- # subsequent upload_parts.
- UPLOAD_PART_ARGS = [
- 'SSECustomerKey',
- 'SSECustomerAlgorithm',
- 'SSECustomerKeyMD5',
- 'RequestPayer',
- ]
-
- def __init__(self, client, config, osutil,
- executor_cls=concurrent.futures.ThreadPoolExecutor):
- self._client = client
- self._config = config
- self._os = osutil
- self._executor_cls = executor_cls
-
- def _extra_upload_part_args(self, extra_args):
- # Only the args in UPLOAD_PART_ARGS actually need to be passed
- # onto the upload_part calls.
- upload_parts_args = {}
- for key, value in extra_args.items():
- if key in self.UPLOAD_PART_ARGS:
- upload_parts_args[key] = value
- return upload_parts_args
-
- def upload_file(self, filename, bucket, key, callback, extra_args):
- response = self._client.create_multipart_upload(Bucket=bucket,
- Key=key, **extra_args)
- upload_id = response['UploadId']
- try:
- parts = self._upload_parts(upload_id, filename, bucket, key,
- callback, extra_args)
- except Exception as e:
- logger.debug("Exception raised while uploading parts, "
- "aborting multipart upload.", exc_info=True)
- self._client.abort_multipart_upload(
- Bucket=bucket, Key=key, UploadId=upload_id)
- raise S3UploadFailedError(
- "Failed to upload %s to %s: %s" % (
- filename, '/'.join([bucket, key]), e))
- self._client.complete_multipart_upload(
- Bucket=bucket, Key=key, UploadId=upload_id,
- MultipartUpload={'Parts': parts})
-
- def _upload_parts(self, upload_id, filename, bucket, key, callback,
- extra_args):
- upload_parts_extra_args = self._extra_upload_part_args(extra_args)
- parts = []
- part_size = self._config.multipart_chunksize
- num_parts = int(
- math.ceil(self._os.get_file_size(filename) / float(part_size)))
- max_workers = self._config.max_concurrency
- with self._executor_cls(max_workers=max_workers) as executor:
- upload_partial = functools.partial(
- self._upload_one_part, filename, bucket, key, upload_id,
- part_size, upload_parts_extra_args, callback)
- for part in executor.map(upload_partial, range(1, num_parts + 1)):
- parts.append(part)
- return parts
-
- def _upload_one_part(self, filename, bucket, key,
- upload_id, part_size, extra_args,
- callback, part_number):
- open_chunk_reader = self._os.open_file_chunk_reader
- with open_chunk_reader(filename, part_size * (part_number - 1),
- part_size, callback) as body:
- response = self._client.upload_part(
- Bucket=bucket, Key=key,
- UploadId=upload_id, PartNumber=part_number, Body=body,
- **extra_args)
- etag = response['ETag']
- return {'ETag': etag, 'PartNumber': part_number}
-
-
-class ShutdownQueue(queue.Queue):
- """A queue implementation that can be shutdown.
-
- Shutting down a queue means that this class adds a
- trigger_shutdown method that will trigger all subsequent
- calls to put() to fail with a ``QueueShutdownError``.
-
- It purposefully deviates from queue.Queue, and is *not* meant
- to be a drop in replacement for ``queue.Queue``.
-
- """
- def _init(self, maxsize):
- self._shutdown = False
- self._shutdown_lock = threading.Lock()
- # queue.Queue is an old style class so we don't use super().
- return queue.Queue._init(self, maxsize)
-
- def trigger_shutdown(self):
- with self._shutdown_lock:
- self._shutdown = True
- logger.debug("The IO queue is now shutdown.")
-
- def put(self, item):
- # Note: this is not sufficient, it's still possible to deadlock!
- # Need to hook into the condition vars used by this class.
- with self._shutdown_lock:
- if self._shutdown:
- raise QueueShutdownError("Cannot put item to queue when "
- "queue has been shutdown.")
- return queue.Queue.put(self, item)
-
-
-class MultipartDownloader(object):
- def __init__(self, client, config, osutil,
- executor_cls=concurrent.futures.ThreadPoolExecutor):
- self._client = client
- self._config = config
- self._os = osutil
- self._executor_cls = executor_cls
- self._ioqueue = ShutdownQueue(self._config.max_io_queue)
-
- def download_file(self, bucket, key, filename, object_size,
- extra_args, callback=None):
- with self._executor_cls(max_workers=2) as controller:
- # 1 thread for the future that manages the uploading of files
- # 1 thread for the future that manages IO writes.
- download_parts_handler = functools.partial(
- self._download_file_as_future,
- bucket, key, filename, object_size, callback)
- parts_future = controller.submit(download_parts_handler)
-
- io_writes_handler = functools.partial(
- self._perform_io_writes, filename)
- io_future = controller.submit(io_writes_handler)
- results = concurrent.futures.wait(
- [parts_future, io_future],
- return_when=concurrent.futures.FIRST_EXCEPTION)
- self._process_future_results(results)
-
- def _process_future_results(self, futures):
- finished, unfinished = futures
- for future in finished:
- future.result()
-
- def _download_file_as_future(self, bucket, key, filename, object_size,
- callback):
- part_size = self._config.multipart_chunksize
- num_parts = int(math.ceil(object_size / float(part_size)))
- max_workers = self._config.max_concurrency
- download_partial = functools.partial(
- self._download_range, bucket, key, filename,
- part_size, num_parts, callback)
- try:
- with self._executor_cls(max_workers=max_workers) as executor:
- list(executor.map(download_partial, range(num_parts)))
- finally:
- self._ioqueue.put(SHUTDOWN_SENTINEL)
-
- def _calculate_range_param(self, part_size, part_index, num_parts):
- start_range = part_index * part_size
- if part_index == num_parts - 1:
- end_range = ''
- else:
- end_range = start_range + part_size - 1
- range_param = 'bytes=%s-%s' % (start_range, end_range)
- return range_param
-
- def _download_range(self, bucket, key, filename,
- part_size, num_parts, callback, part_index):
- try:
- range_param = self._calculate_range_param(
- part_size, part_index, num_parts)
-
- max_attempts = self._config.num_download_attempts
- last_exception = None
- for i in range(max_attempts):
- try:
- logger.debug("Making get_object call.")
- response = self._client.get_object(
- Bucket=bucket, Key=key, Range=range_param)
- streaming_body = StreamReaderProgress(
- response['Body'], callback)
- buffer_size = 1024 * 16
- current_index = part_size * part_index
- for chunk in iter(lambda: streaming_body.read(buffer_size),
- b''):
- self._ioqueue.put((current_index, chunk))
- current_index += len(chunk)
- return
- except (socket.timeout, socket.error,
- ReadTimeoutError, IncompleteReadError) as e:
- logger.debug("Retrying exception caught (%s), "
- "retrying request, (attempt %s / %s)", e, i,
- max_attempts, exc_info=True)
- last_exception = e
- continue
- raise RetriesExceededError(last_exception)
- finally:
- logger.debug("EXITING _download_range for part: %s", part_index)
-
- def _perform_io_writes(self, filename):
- with self._os.open(filename, 'wb') as f:
- while True:
- task = self._ioqueue.get()
- if task is SHUTDOWN_SENTINEL:
- logger.debug("Shutdown sentinel received in IO handler, "
- "shutting down IO handler.")
- return
- else:
- try:
- offset, data = task
- f.seek(offset)
- f.write(data)
- except Exception as e:
- logger.debug("Caught exception in IO thread: %s",
- e, exc_info=True)
- self._ioqueue.trigger_shutdown()
- raise
-
-
-class TransferConfig(object):
- def __init__(self,
- multipart_threshold=8 * MB,
- max_concurrency=10,
- multipart_chunksize=8 * MB,
- num_download_attempts=5,
- max_io_queue=100):
- self.multipart_threshold = multipart_threshold
- self.max_concurrency = max_concurrency
- self.multipart_chunksize = multipart_chunksize
- self.num_download_attempts = num_download_attempts
- self.max_io_queue = max_io_queue
-
-
-class S3Transfer(object):
-
- ALLOWED_DOWNLOAD_ARGS = [
- 'VersionId',
- 'SSECustomerAlgorithm',
- 'SSECustomerKey',
- 'SSECustomerKeyMD5',
- 'RequestPayer',
- ]
-
- ALLOWED_UPLOAD_ARGS = [
- 'ACL',
- 'CacheControl',
- 'ContentDisposition',
- 'ContentEncoding',
- 'ContentLanguage',
- 'ContentType',
- 'Expires',
- 'GrantFullControl',
- 'GrantRead',
- 'GrantReadACP',
- 'GrantWriteACL',
- 'Metadata',
- 'RequestPayer',
- 'ServerSideEncryption',
- 'StorageClass',
- 'SSECustomerAlgorithm',
- 'SSECustomerKey',
- 'SSECustomerKeyMD5',
- 'SSEKMSKeyId',
+
+
+class NullHandler(logging.Handler):
+ def emit(self, record):
+ pass
+
+
+logger = logging.getLogger(__name__)
+logger.addHandler(NullHandler())
+
+queue = six.moves.queue
+
+MB = 1024 * 1024
+SHUTDOWN_SENTINEL = object()
+
+
+def random_file_extension(num_digits=8):
+ return ''.join(random.choice(string.hexdigits) for _ in range(num_digits))
+
+
+def disable_upload_callbacks(request, operation_name, **kwargs):
+ if operation_name in ['PutObject', 'UploadPart'] and \
+ hasattr(request.body, 'disable_callback'):
+ request.body.disable_callback()
+
+
+def enable_upload_callbacks(request, operation_name, **kwargs):
+ if operation_name in ['PutObject', 'UploadPart'] and \
+ hasattr(request.body, 'enable_callback'):
+ request.body.enable_callback()
+
+
+class QueueShutdownError(Exception):
+ pass
+
+
+class ReadFileChunk(object):
+ def __init__(self, fileobj, start_byte, chunk_size, full_file_size,
+ callback=None, enable_callback=True):
+ """
+
+ Given a file object shown below:
+
+ |___________________________________________________|
+ 0 | | full_file_size
+ |----chunk_size---|
+ start_byte
+
+ :type fileobj: file
+ :param fileobj: File like object
+
+ :type start_byte: int
+ :param start_byte: The first byte from which to start reading.
+
+ :type chunk_size: int
+ :param chunk_size: The max chunk size to read. Trying to read
+ pass the end of the chunk size will behave like you've
+ reached the end of the file.
+
+ :type full_file_size: int
+ :param full_file_size: The entire content length associated
+ with ``fileobj``.
+
+ :type callback: function(amount_read)
+ :param callback: Called whenever data is read from this object.
+
+ """
+ self._fileobj = fileobj
+ self._start_byte = start_byte
+ self._size = self._calculate_file_size(
+ self._fileobj, requested_size=chunk_size,
+ start_byte=start_byte, actual_file_size=full_file_size)
+ self._fileobj.seek(self._start_byte)
+ self._amount_read = 0
+ self._callback = callback
+ self._callback_enabled = enable_callback
+
+ @classmethod
+ def from_filename(cls, filename, start_byte, chunk_size, callback=None,
+ enable_callback=True):
+ """Convenience factory function to create from a filename.
+
+ :type start_byte: int
+ :param start_byte: The first byte from which to start reading.
+
+ :type chunk_size: int
+ :param chunk_size: The max chunk size to read. Trying to read
+ pass the end of the chunk size will behave like you've
+ reached the end of the file.
+
+ :type full_file_size: int
+ :param full_file_size: The entire content length associated
+ with ``fileobj``.
+
+ :type callback: function(amount_read)
+ :param callback: Called whenever data is read from this object.
+
+ :type enable_callback: bool
+ :param enable_callback: Indicate whether to invoke callback
+ during read() calls.
+
+ :rtype: ``ReadFileChunk``
+ :return: A new instance of ``ReadFileChunk``
+
+ """
+ f = open(filename, 'rb')
+ file_size = os.fstat(f.fileno()).st_size
+ return cls(f, start_byte, chunk_size, file_size, callback,
+ enable_callback)
+
+ def _calculate_file_size(self, fileobj, requested_size, start_byte,
+ actual_file_size):
+ max_chunk_size = actual_file_size - start_byte
+ return min(max_chunk_size, requested_size)
+
+ def read(self, amount=None):
+ if amount is None:
+ amount_to_read = self._size - self._amount_read
+ else:
+ amount_to_read = min(self._size - self._amount_read, amount)
+ data = self._fileobj.read(amount_to_read)
+ self._amount_read += len(data)
+ if self._callback is not None and self._callback_enabled:
+ self._callback(len(data))
+ return data
+
+ def enable_callback(self):
+ self._callback_enabled = True
+
+ def disable_callback(self):
+ self._callback_enabled = False
+
+ def seek(self, where):
+ self._fileobj.seek(self._start_byte + where)
+ if self._callback is not None and self._callback_enabled:
+ # To also rewind the callback() for an accurate progress report
+ self._callback(where - self._amount_read)
+ self._amount_read = where
+
+ def close(self):
+ self._fileobj.close()
+
+ def tell(self):
+ return self._amount_read
+
+ def __len__(self):
+ # __len__ is defined because requests will try to determine the length
+ # of the stream to set a content length. In the normal case
+ # of the file it will just stat the file, but we need to change that
+ # behavior. By providing a __len__, requests will use that instead
+ # of stat'ing the file.
+ return self._size
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, *args, **kwargs):
+ self.close()
+
+ def __iter__(self):
+ # This is a workaround for http://bugs.python.org/issue17575
+ # Basically httplib will try to iterate over the contents, even
+ # if its a file like object. This wasn't noticed because we've
+ # already exhausted the stream so iterating over the file immediately
+ # stops, which is what we're simulating here.
+ return iter([])
+
+
+class StreamReaderProgress(object):
+ """Wrapper for a read only stream that adds progress callbacks."""
+ def __init__(self, stream, callback=None):
+ self._stream = stream
+ self._callback = callback
+
+ def read(self, *args, **kwargs):
+ value = self._stream.read(*args, **kwargs)
+ if self._callback is not None:
+ self._callback(len(value))
+ return value
+
+
+class OSUtils(object):
+ def get_file_size(self, filename):
+ return os.path.getsize(filename)
+
+ def open_file_chunk_reader(self, filename, start_byte, size, callback):
+ return ReadFileChunk.from_filename(filename, start_byte,
+ size, callback,
+ enable_callback=False)
+
+ def open(self, filename, mode):
+ return open(filename, mode)
+
+ def remove_file(self, filename):
+ """Remove a file, noop if file does not exist."""
+ # Unlike os.remove, if the file does not exist,
+ # then this method does nothing.
+ try:
+ os.remove(filename)
+ except OSError:
+ pass
+
+ def rename_file(self, current_filename, new_filename):
+ s3transfer.compat.rename_file(current_filename, new_filename)
+
+
+class MultipartUploader(object):
+ # These are the extra_args that need to be forwarded onto
+ # subsequent upload_parts.
+ UPLOAD_PART_ARGS = [
+ 'SSECustomerKey',
+ 'SSECustomerAlgorithm',
+ 'SSECustomerKeyMD5',
+ 'RequestPayer',
+ ]
+
+ def __init__(self, client, config, osutil,
+ executor_cls=concurrent.futures.ThreadPoolExecutor):
+ self._client = client
+ self._config = config
+ self._os = osutil
+ self._executor_cls = executor_cls
+
+ def _extra_upload_part_args(self, extra_args):
+ # Only the args in UPLOAD_PART_ARGS actually need to be passed
+ # onto the upload_part calls.
+ upload_parts_args = {}
+ for key, value in extra_args.items():
+ if key in self.UPLOAD_PART_ARGS:
+ upload_parts_args[key] = value
+ return upload_parts_args
+
+ def upload_file(self, filename, bucket, key, callback, extra_args):
+ response = self._client.create_multipart_upload(Bucket=bucket,
+ Key=key, **extra_args)
+ upload_id = response['UploadId']
+ try:
+ parts = self._upload_parts(upload_id, filename, bucket, key,
+ callback, extra_args)
+ except Exception as e:
+ logger.debug("Exception raised while uploading parts, "
+ "aborting multipart upload.", exc_info=True)
+ self._client.abort_multipart_upload(
+ Bucket=bucket, Key=key, UploadId=upload_id)
+ raise S3UploadFailedError(
+ "Failed to upload %s to %s: %s" % (
+ filename, '/'.join([bucket, key]), e))
+ self._client.complete_multipart_upload(
+ Bucket=bucket, Key=key, UploadId=upload_id,
+ MultipartUpload={'Parts': parts})
+
+ def _upload_parts(self, upload_id, filename, bucket, key, callback,
+ extra_args):
+ upload_parts_extra_args = self._extra_upload_part_args(extra_args)
+ parts = []
+ part_size = self._config.multipart_chunksize
+ num_parts = int(
+ math.ceil(self._os.get_file_size(filename) / float(part_size)))
+ max_workers = self._config.max_concurrency
+ with self._executor_cls(max_workers=max_workers) as executor:
+ upload_partial = functools.partial(
+ self._upload_one_part, filename, bucket, key, upload_id,
+ part_size, upload_parts_extra_args, callback)
+ for part in executor.map(upload_partial, range(1, num_parts + 1)):
+ parts.append(part)
+ return parts
+
+ def _upload_one_part(self, filename, bucket, key,
+ upload_id, part_size, extra_args,
+ callback, part_number):
+ open_chunk_reader = self._os.open_file_chunk_reader
+ with open_chunk_reader(filename, part_size * (part_number - 1),
+ part_size, callback) as body:
+ response = self._client.upload_part(
+ Bucket=bucket, Key=key,
+ UploadId=upload_id, PartNumber=part_number, Body=body,
+ **extra_args)
+ etag = response['ETag']
+ return {'ETag': etag, 'PartNumber': part_number}
+
+
+class ShutdownQueue(queue.Queue):
+ """A queue implementation that can be shutdown.
+
+ Shutting down a queue means that this class adds a
+ trigger_shutdown method that will trigger all subsequent
+ calls to put() to fail with a ``QueueShutdownError``.
+
+ It purposefully deviates from queue.Queue, and is *not* meant
+ to be a drop in replacement for ``queue.Queue``.
+
+ """
+ def _init(self, maxsize):
+ self._shutdown = False
+ self._shutdown_lock = threading.Lock()
+ # queue.Queue is an old style class so we don't use super().
+ return queue.Queue._init(self, maxsize)
+
+ def trigger_shutdown(self):
+ with self._shutdown_lock:
+ self._shutdown = True
+ logger.debug("The IO queue is now shutdown.")
+
+ def put(self, item):
+ # Note: this is not sufficient, it's still possible to deadlock!
+ # Need to hook into the condition vars used by this class.
+ with self._shutdown_lock:
+ if self._shutdown:
+ raise QueueShutdownError("Cannot put item to queue when "
+ "queue has been shutdown.")
+ return queue.Queue.put(self, item)
+
+
+class MultipartDownloader(object):
+ def __init__(self, client, config, osutil,
+ executor_cls=concurrent.futures.ThreadPoolExecutor):
+ self._client = client
+ self._config = config
+ self._os = osutil
+ self._executor_cls = executor_cls
+ self._ioqueue = ShutdownQueue(self._config.max_io_queue)
+
+ def download_file(self, bucket, key, filename, object_size,
+ extra_args, callback=None):
+ with self._executor_cls(max_workers=2) as controller:
+ # 1 thread for the future that manages the uploading of files
+ # 1 thread for the future that manages IO writes.
+ download_parts_handler = functools.partial(
+ self._download_file_as_future,
+ bucket, key, filename, object_size, callback)
+ parts_future = controller.submit(download_parts_handler)
+
+ io_writes_handler = functools.partial(
+ self._perform_io_writes, filename)
+ io_future = controller.submit(io_writes_handler)
+ results = concurrent.futures.wait(
+ [parts_future, io_future],
+ return_when=concurrent.futures.FIRST_EXCEPTION)
+ self._process_future_results(results)
+
+ def _process_future_results(self, futures):
+ finished, unfinished = futures
+ for future in finished:
+ future.result()
+
+ def _download_file_as_future(self, bucket, key, filename, object_size,
+ callback):
+ part_size = self._config.multipart_chunksize
+ num_parts = int(math.ceil(object_size / float(part_size)))
+ max_workers = self._config.max_concurrency
+ download_partial = functools.partial(
+ self._download_range, bucket, key, filename,
+ part_size, num_parts, callback)
+ try:
+ with self._executor_cls(max_workers=max_workers) as executor:
+ list(executor.map(download_partial, range(num_parts)))
+ finally:
+ self._ioqueue.put(SHUTDOWN_SENTINEL)
+
+ def _calculate_range_param(self, part_size, part_index, num_parts):
+ start_range = part_index * part_size
+ if part_index == num_parts - 1:
+ end_range = ''
+ else:
+ end_range = start_range + part_size - 1
+ range_param = 'bytes=%s-%s' % (start_range, end_range)
+ return range_param
+
+ def _download_range(self, bucket, key, filename,
+ part_size, num_parts, callback, part_index):
+ try:
+ range_param = self._calculate_range_param(
+ part_size, part_index, num_parts)
+
+ max_attempts = self._config.num_download_attempts
+ last_exception = None
+ for i in range(max_attempts):
+ try:
+ logger.debug("Making get_object call.")
+ response = self._client.get_object(
+ Bucket=bucket, Key=key, Range=range_param)
+ streaming_body = StreamReaderProgress(
+ response['Body'], callback)
+ buffer_size = 1024 * 16
+ current_index = part_size * part_index
+ for chunk in iter(lambda: streaming_body.read(buffer_size),
+ b''):
+ self._ioqueue.put((current_index, chunk))
+ current_index += len(chunk)
+ return
+ except (socket.timeout, socket.error,
+ ReadTimeoutError, IncompleteReadError) as e:
+ logger.debug("Retrying exception caught (%s), "
+ "retrying request, (attempt %s / %s)", e, i,
+ max_attempts, exc_info=True)
+ last_exception = e
+ continue
+ raise RetriesExceededError(last_exception)
+ finally:
+ logger.debug("EXITING _download_range for part: %s", part_index)
+
+ def _perform_io_writes(self, filename):
+ with self._os.open(filename, 'wb') as f:
+ while True:
+ task = self._ioqueue.get()
+ if task is SHUTDOWN_SENTINEL:
+ logger.debug("Shutdown sentinel received in IO handler, "
+ "shutting down IO handler.")
+ return
+ else:
+ try:
+ offset, data = task
+ f.seek(offset)
+ f.write(data)
+ except Exception as e:
+ logger.debug("Caught exception in IO thread: %s",
+ e, exc_info=True)
+ self._ioqueue.trigger_shutdown()
+ raise
+
+
+class TransferConfig(object):
+ def __init__(self,
+ multipart_threshold=8 * MB,
+ max_concurrency=10,
+ multipart_chunksize=8 * MB,
+ num_download_attempts=5,
+ max_io_queue=100):
+ self.multipart_threshold = multipart_threshold
+ self.max_concurrency = max_concurrency
+ self.multipart_chunksize = multipart_chunksize
+ self.num_download_attempts = num_download_attempts
+ self.max_io_queue = max_io_queue
+
+
+class S3Transfer(object):
+
+ ALLOWED_DOWNLOAD_ARGS = [
+ 'VersionId',
+ 'SSECustomerAlgorithm',
+ 'SSECustomerKey',
+ 'SSECustomerKeyMD5',
+ 'RequestPayer',
+ ]
+
+ ALLOWED_UPLOAD_ARGS = [
+ 'ACL',
+ 'CacheControl',
+ 'ContentDisposition',
+ 'ContentEncoding',
+ 'ContentLanguage',
+ 'ContentType',
+ 'Expires',
+ 'GrantFullControl',
+ 'GrantRead',
+ 'GrantReadACP',
+ 'GrantWriteACL',
+ 'Metadata',
+ 'RequestPayer',
+ 'ServerSideEncryption',
+ 'StorageClass',
+ 'SSECustomerAlgorithm',
+ 'SSECustomerKey',
+ 'SSECustomerKeyMD5',
+ 'SSEKMSKeyId',
'SSEKMSEncryptionContext',
'Tagging',
- ]
-
- def __init__(self, client, config=None, osutil=None):
- self._client = client
- if config is None:
- config = TransferConfig()
- self._config = config
- if osutil is None:
- osutil = OSUtils()
- self._osutil = osutil
-
- def upload_file(self, filename, bucket, key,
- callback=None, extra_args=None):
- """Upload a file to an S3 object.
-
- Variants have also been injected into S3 client, Bucket and Object.
- You don't have to use S3Transfer.upload_file() directly.
- """
- if extra_args is None:
- extra_args = {}
- self._validate_all_known_args(extra_args, self.ALLOWED_UPLOAD_ARGS)
- events = self._client.meta.events
- events.register_first('request-created.s3',
- disable_upload_callbacks,
- unique_id='s3upload-callback-disable')
- events.register_last('request-created.s3',
- enable_upload_callbacks,
- unique_id='s3upload-callback-enable')
- if self._osutil.get_file_size(filename) >= \
- self._config.multipart_threshold:
- self._multipart_upload(filename, bucket, key, callback, extra_args)
- else:
- self._put_object(filename, bucket, key, callback, extra_args)
-
- def _put_object(self, filename, bucket, key, callback, extra_args):
- # We're using open_file_chunk_reader so we can take advantage of the
- # progress callback functionality.
- open_chunk_reader = self._osutil.open_file_chunk_reader
- with open_chunk_reader(filename, 0,
- self._osutil.get_file_size(filename),
- callback=callback) as body:
- self._client.put_object(Bucket=bucket, Key=key, Body=body,
- **extra_args)
-
- def download_file(self, bucket, key, filename, extra_args=None,
- callback=None):
- """Download an S3 object to a file.
-
- Variants have also been injected into S3 client, Bucket and Object.
- You don't have to use S3Transfer.download_file() directly.
- """
- # This method will issue a ``head_object`` request to determine
- # the size of the S3 object. This is used to determine if the
- # object is downloaded in parallel.
- if extra_args is None:
- extra_args = {}
- self._validate_all_known_args(extra_args, self.ALLOWED_DOWNLOAD_ARGS)
- object_size = self._object_size(bucket, key, extra_args)
- temp_filename = filename + os.extsep + random_file_extension()
- try:
- self._download_file(bucket, key, temp_filename, object_size,
- extra_args, callback)
- except Exception:
- logger.debug("Exception caught in download_file, removing partial "
- "file: %s", temp_filename, exc_info=True)
- self._osutil.remove_file(temp_filename)
- raise
- else:
- self._osutil.rename_file(temp_filename, filename)
-
- def _download_file(self, bucket, key, filename, object_size,
- extra_args, callback):
- if object_size >= self._config.multipart_threshold:
- self._ranged_download(bucket, key, filename, object_size,
- extra_args, callback)
- else:
- self._get_object(bucket, key, filename, extra_args, callback)
-
- def _validate_all_known_args(self, actual, allowed):
- for kwarg in actual:
- if kwarg not in allowed:
- raise ValueError(
- "Invalid extra_args key '%s', "
- "must be one of: %s" % (
- kwarg, ', '.join(allowed)))
-
- def _ranged_download(self, bucket, key, filename, object_size,
- extra_args, callback):
- downloader = MultipartDownloader(self._client, self._config,
- self._osutil)
- downloader.download_file(bucket, key, filename, object_size,
- extra_args, callback)
-
- def _get_object(self, bucket, key, filename, extra_args, callback):
- # precondition: num_download_attempts > 0
- max_attempts = self._config.num_download_attempts
- last_exception = None
- for i in range(max_attempts):
- try:
- return self._do_get_object(bucket, key, filename,
- extra_args, callback)
- except (socket.timeout, socket.error,
- ReadTimeoutError, IncompleteReadError) as e:
- # TODO: we need a way to reset the callback if the
- # download failed.
- logger.debug("Retrying exception caught (%s), "
- "retrying request, (attempt %s / %s)", e, i,
- max_attempts, exc_info=True)
- last_exception = e
- continue
- raise RetriesExceededError(last_exception)
-
- def _do_get_object(self, bucket, key, filename, extra_args, callback):
- response = self._client.get_object(Bucket=bucket, Key=key,
- **extra_args)
- streaming_body = StreamReaderProgress(
- response['Body'], callback)
- with self._osutil.open(filename, 'wb') as f:
- for chunk in iter(lambda: streaming_body.read(8192), b''):
- f.write(chunk)
-
- def _object_size(self, bucket, key, extra_args):
- return self._client.head_object(
- Bucket=bucket, Key=key, **extra_args)['ContentLength']
-
- def _multipart_upload(self, filename, bucket, key, callback, extra_args):
- uploader = MultipartUploader(self._client, self._config, self._osutil)
- uploader.upload_file(filename, bucket, key, callback, extra_args)
+ ]
+
+ def __init__(self, client, config=None, osutil=None):
+ self._client = client
+ if config is None:
+ config = TransferConfig()
+ self._config = config
+ if osutil is None:
+ osutil = OSUtils()
+ self._osutil = osutil
+
+ def upload_file(self, filename, bucket, key,
+ callback=None, extra_args=None):
+ """Upload a file to an S3 object.
+
+ Variants have also been injected into S3 client, Bucket and Object.
+ You don't have to use S3Transfer.upload_file() directly.
+ """
+ if extra_args is None:
+ extra_args = {}
+ self._validate_all_known_args(extra_args, self.ALLOWED_UPLOAD_ARGS)
+ events = self._client.meta.events
+ events.register_first('request-created.s3',
+ disable_upload_callbacks,
+ unique_id='s3upload-callback-disable')
+ events.register_last('request-created.s3',
+ enable_upload_callbacks,
+ unique_id='s3upload-callback-enable')
+ if self._osutil.get_file_size(filename) >= \
+ self._config.multipart_threshold:
+ self._multipart_upload(filename, bucket, key, callback, extra_args)
+ else:
+ self._put_object(filename, bucket, key, callback, extra_args)
+
+ def _put_object(self, filename, bucket, key, callback, extra_args):
+ # We're using open_file_chunk_reader so we can take advantage of the
+ # progress callback functionality.
+ open_chunk_reader = self._osutil.open_file_chunk_reader
+ with open_chunk_reader(filename, 0,
+ self._osutil.get_file_size(filename),
+ callback=callback) as body:
+ self._client.put_object(Bucket=bucket, Key=key, Body=body,
+ **extra_args)
+
+ def download_file(self, bucket, key, filename, extra_args=None,
+ callback=None):
+ """Download an S3 object to a file.
+
+ Variants have also been injected into S3 client, Bucket and Object.
+ You don't have to use S3Transfer.download_file() directly.
+ """
+ # This method will issue a ``head_object`` request to determine
+ # the size of the S3 object. This is used to determine if the
+ # object is downloaded in parallel.
+ if extra_args is None:
+ extra_args = {}
+ self._validate_all_known_args(extra_args, self.ALLOWED_DOWNLOAD_ARGS)
+ object_size = self._object_size(bucket, key, extra_args)
+ temp_filename = filename + os.extsep + random_file_extension()
+ try:
+ self._download_file(bucket, key, temp_filename, object_size,
+ extra_args, callback)
+ except Exception:
+ logger.debug("Exception caught in download_file, removing partial "
+ "file: %s", temp_filename, exc_info=True)
+ self._osutil.remove_file(temp_filename)
+ raise
+ else:
+ self._osutil.rename_file(temp_filename, filename)
+
+ def _download_file(self, bucket, key, filename, object_size,
+ extra_args, callback):
+ if object_size >= self._config.multipart_threshold:
+ self._ranged_download(bucket, key, filename, object_size,
+ extra_args, callback)
+ else:
+ self._get_object(bucket, key, filename, extra_args, callback)
+
+ def _validate_all_known_args(self, actual, allowed):
+ for kwarg in actual:
+ if kwarg not in allowed:
+ raise ValueError(
+ "Invalid extra_args key '%s', "
+ "must be one of: %s" % (
+ kwarg, ', '.join(allowed)))
+
+ def _ranged_download(self, bucket, key, filename, object_size,
+ extra_args, callback):
+ downloader = MultipartDownloader(self._client, self._config,
+ self._osutil)
+ downloader.download_file(bucket, key, filename, object_size,
+ extra_args, callback)
+
+ def _get_object(self, bucket, key, filename, extra_args, callback):
+ # precondition: num_download_attempts > 0
+ max_attempts = self._config.num_download_attempts
+ last_exception = None
+ for i in range(max_attempts):
+ try:
+ return self._do_get_object(bucket, key, filename,
+ extra_args, callback)
+ except (socket.timeout, socket.error,
+ ReadTimeoutError, IncompleteReadError) as e:
+ # TODO: we need a way to reset the callback if the
+ # download failed.
+ logger.debug("Retrying exception caught (%s), "
+ "retrying request, (attempt %s / %s)", e, i,
+ max_attempts, exc_info=True)
+ last_exception = e
+ continue
+ raise RetriesExceededError(last_exception)
+
+ def _do_get_object(self, bucket, key, filename, extra_args, callback):
+ response = self._client.get_object(Bucket=bucket, Key=key,
+ **extra_args)
+ streaming_body = StreamReaderProgress(
+ response['Body'], callback)
+ with self._osutil.open(filename, 'wb') as f:
+ for chunk in iter(lambda: streaming_body.read(8192), b''):
+ f.write(chunk)
+
+ def _object_size(self, bucket, key, extra_args):
+ return self._client.head_object(
+ Bucket=bucket, Key=key, **extra_args)['ContentLength']
+
+ def _multipart_upload(self, filename, bucket, key, callback, extra_args):
+ uploader = MultipartUploader(self._client, self._config, self._osutil)
+ uploader.upload_file(filename, bucket, key, callback, extra_args)
diff --git a/contrib/python/s3transfer/py2/s3transfer/bandwidth.py b/contrib/python/s3transfer/py2/s3transfer/bandwidth.py
index 2f964c6b4d..07f5096369 100644
--- a/contrib/python/s3transfer/py2/s3transfer/bandwidth.py
+++ b/contrib/python/s3transfer/py2/s3transfer/bandwidth.py
@@ -1,416 +1,416 @@
-# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-# http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-import time
-import threading
-
-
-class RequestExceededException(Exception):
- def __init__(self, requested_amt, retry_time):
- """Error when requested amount exceeds what is allowed
-
- The request that raised this error should be retried after waiting
- the time specified by ``retry_time``.
-
- :type requested_amt: int
- :param requested_amt: The originally requested byte amount
-
- :type retry_time: float
- :param retry_time: The length in time to wait to retry for the
- requested amount
- """
- self.requested_amt = requested_amt
- self.retry_time = retry_time
- msg = (
- 'Request amount %s exceeded the amount available. Retry in %s' % (
- requested_amt, retry_time)
- )
- super(RequestExceededException, self).__init__(msg)
-
-
-class RequestToken(object):
- """A token to pass as an identifier when consuming from the LeakyBucket"""
- pass
-
-
-class TimeUtils(object):
- def time(self):
- """Get the current time back
-
- :rtype: float
- :returns: The current time in seconds
- """
- return time.time()
-
- def sleep(self, value):
- """Sleep for a designated time
-
- :type value: float
- :param value: The time to sleep for in seconds
- """
- return time.sleep(value)
-
-
-class BandwidthLimiter(object):
- def __init__(self, leaky_bucket, time_utils=None):
- """Limits bandwidth for shared S3 transfers
-
- :type leaky_bucket: LeakyBucket
- :param leaky_bucket: The leaky bucket to use limit bandwidth
-
- :type time_utils: TimeUtils
- :param time_utils: Time utility to use for interacting with time.
- """
- self._leaky_bucket = leaky_bucket
- self._time_utils = time_utils
- if time_utils is None:
- self._time_utils = TimeUtils()
-
- def get_bandwith_limited_stream(self, fileobj, transfer_coordinator,
- enabled=True):
- """Wraps a fileobj in a bandwidth limited stream wrapper
-
- :type fileobj: file-like obj
- :param fileobj: The file-like obj to wrap
-
- :type transfer_coordinator: s3transfer.futures.TransferCoordinator
- param transfer_coordinator: The coordinator for the general transfer
- that the wrapped stream is a part of
-
- :type enabled: boolean
- :param enabled: Whether bandwidth limiting should be enabled to start
- """
- stream = BandwidthLimitedStream(
- fileobj, self._leaky_bucket, transfer_coordinator,
- self._time_utils)
- if not enabled:
- stream.disable_bandwidth_limiting()
- return stream
-
-
-class BandwidthLimitedStream(object):
- def __init__(self, fileobj, leaky_bucket, transfer_coordinator,
- time_utils=None, bytes_threshold=256 * 1024):
- """Limits bandwidth for reads on a wrapped stream
-
- :type fileobj: file-like object
- :param fileobj: The file like object to wrap
-
- :type leaky_bucket: LeakyBucket
- :param leaky_bucket: The leaky bucket to use to throttle reads on
- the stream
-
- :type transfer_coordinator: s3transfer.futures.TransferCoordinator
- param transfer_coordinator: The coordinator for the general transfer
- that the wrapped stream is a part of
-
- :type time_utils: TimeUtils
- :param time_utils: The time utility to use for interacting with time
- """
- self._fileobj = fileobj
- self._leaky_bucket = leaky_bucket
- self._transfer_coordinator = transfer_coordinator
- self._time_utils = time_utils
- if time_utils is None:
- self._time_utils = TimeUtils()
- self._bandwidth_limiting_enabled = True
- self._request_token = RequestToken()
- self._bytes_seen = 0
- self._bytes_threshold = bytes_threshold
-
- def enable_bandwidth_limiting(self):
- """Enable bandwidth limiting on reads to the stream"""
- self._bandwidth_limiting_enabled = True
-
- def disable_bandwidth_limiting(self):
- """Disable bandwidth limiting on reads to the stream"""
- self._bandwidth_limiting_enabled = False
-
- def read(self, amount):
- """Read a specified amount
-
- Reads will only be throttled if bandwidth limiting is enabled.
- """
- if not self._bandwidth_limiting_enabled:
- return self._fileobj.read(amount)
-
- # We do not want to be calling consume on every read as the read
- # amounts can be small causing the lock of the leaky bucket to
- # introduce noticeable overhead. So instead we keep track of
- # how many bytes we have seen and only call consume once we pass a
- # certain threshold.
- self._bytes_seen += amount
- if self._bytes_seen < self._bytes_threshold:
- return self._fileobj.read(amount)
-
- self._consume_through_leaky_bucket()
- return self._fileobj.read(amount)
-
- def _consume_through_leaky_bucket(self):
- # NOTE: If the read amonut on the stream are high, it will result
- # in large bursty behavior as there is not an interface for partial
- # reads. However given the read's on this abstraction are at most 256KB
- # (via downloads), it reduces the burstiness to be small KB bursts at
- # worst.
- while not self._transfer_coordinator.exception:
- try:
- self._leaky_bucket.consume(
- self._bytes_seen, self._request_token)
- self._bytes_seen = 0
- return
- except RequestExceededException as e:
- self._time_utils.sleep(e.retry_time)
- else:
- raise self._transfer_coordinator.exception
-
- def signal_transferring(self):
- """Signal that data being read is being transferred to S3"""
- self.enable_bandwidth_limiting()
-
- def signal_not_transferring(self):
- """Signal that data being read is not being transferred to S3"""
- self.disable_bandwidth_limiting()
-
+# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+# http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+import time
+import threading
+
+
+class RequestExceededException(Exception):
+ def __init__(self, requested_amt, retry_time):
+ """Error when requested amount exceeds what is allowed
+
+ The request that raised this error should be retried after waiting
+ the time specified by ``retry_time``.
+
+ :type requested_amt: int
+ :param requested_amt: The originally requested byte amount
+
+ :type retry_time: float
+ :param retry_time: The length in time to wait to retry for the
+ requested amount
+ """
+ self.requested_amt = requested_amt
+ self.retry_time = retry_time
+ msg = (
+ 'Request amount %s exceeded the amount available. Retry in %s' % (
+ requested_amt, retry_time)
+ )
+ super(RequestExceededException, self).__init__(msg)
+
+
+class RequestToken(object):
+ """A token to pass as an identifier when consuming from the LeakyBucket"""
+ pass
+
+
+class TimeUtils(object):
+ def time(self):
+ """Get the current time back
+
+ :rtype: float
+ :returns: The current time in seconds
+ """
+ return time.time()
+
+ def sleep(self, value):
+ """Sleep for a designated time
+
+ :type value: float
+ :param value: The time to sleep for in seconds
+ """
+ return time.sleep(value)
+
+
+class BandwidthLimiter(object):
+ def __init__(self, leaky_bucket, time_utils=None):
+ """Limits bandwidth for shared S3 transfers
+
+ :type leaky_bucket: LeakyBucket
+ :param leaky_bucket: The leaky bucket to use limit bandwidth
+
+ :type time_utils: TimeUtils
+ :param time_utils: Time utility to use for interacting with time.
+ """
+ self._leaky_bucket = leaky_bucket
+ self._time_utils = time_utils
+ if time_utils is None:
+ self._time_utils = TimeUtils()
+
+ def get_bandwith_limited_stream(self, fileobj, transfer_coordinator,
+ enabled=True):
+ """Wraps a fileobj in a bandwidth limited stream wrapper
+
+ :type fileobj: file-like obj
+ :param fileobj: The file-like obj to wrap
+
+ :type transfer_coordinator: s3transfer.futures.TransferCoordinator
+ param transfer_coordinator: The coordinator for the general transfer
+ that the wrapped stream is a part of
+
+ :type enabled: boolean
+ :param enabled: Whether bandwidth limiting should be enabled to start
+ """
+ stream = BandwidthLimitedStream(
+ fileobj, self._leaky_bucket, transfer_coordinator,
+ self._time_utils)
+ if not enabled:
+ stream.disable_bandwidth_limiting()
+ return stream
+
+
+class BandwidthLimitedStream(object):
+ def __init__(self, fileobj, leaky_bucket, transfer_coordinator,
+ time_utils=None, bytes_threshold=256 * 1024):
+ """Limits bandwidth for reads on a wrapped stream
+
+ :type fileobj: file-like object
+ :param fileobj: The file like object to wrap
+
+ :type leaky_bucket: LeakyBucket
+ :param leaky_bucket: The leaky bucket to use to throttle reads on
+ the stream
+
+ :type transfer_coordinator: s3transfer.futures.TransferCoordinator
+ param transfer_coordinator: The coordinator for the general transfer
+ that the wrapped stream is a part of
+
+ :type time_utils: TimeUtils
+ :param time_utils: The time utility to use for interacting with time
+ """
+ self._fileobj = fileobj
+ self._leaky_bucket = leaky_bucket
+ self._transfer_coordinator = transfer_coordinator
+ self._time_utils = time_utils
+ if time_utils is None:
+ self._time_utils = TimeUtils()
+ self._bandwidth_limiting_enabled = True
+ self._request_token = RequestToken()
+ self._bytes_seen = 0
+ self._bytes_threshold = bytes_threshold
+
+ def enable_bandwidth_limiting(self):
+ """Enable bandwidth limiting on reads to the stream"""
+ self._bandwidth_limiting_enabled = True
+
+ def disable_bandwidth_limiting(self):
+ """Disable bandwidth limiting on reads to the stream"""
+ self._bandwidth_limiting_enabled = False
+
+ def read(self, amount):
+ """Read a specified amount
+
+ Reads will only be throttled if bandwidth limiting is enabled.
+ """
+ if not self._bandwidth_limiting_enabled:
+ return self._fileobj.read(amount)
+
+ # We do not want to be calling consume on every read as the read
+ # amounts can be small causing the lock of the leaky bucket to
+ # introduce noticeable overhead. So instead we keep track of
+ # how many bytes we have seen and only call consume once we pass a
+ # certain threshold.
+ self._bytes_seen += amount
+ if self._bytes_seen < self._bytes_threshold:
+ return self._fileobj.read(amount)
+
+ self._consume_through_leaky_bucket()
+ return self._fileobj.read(amount)
+
+ def _consume_through_leaky_bucket(self):
+ # NOTE: If the read amonut on the stream are high, it will result
+ # in large bursty behavior as there is not an interface for partial
+ # reads. However given the read's on this abstraction are at most 256KB
+ # (via downloads), it reduces the burstiness to be small KB bursts at
+ # worst.
+ while not self._transfer_coordinator.exception:
+ try:
+ self._leaky_bucket.consume(
+ self._bytes_seen, self._request_token)
+ self._bytes_seen = 0
+ return
+ except RequestExceededException as e:
+ self._time_utils.sleep(e.retry_time)
+ else:
+ raise self._transfer_coordinator.exception
+
+ def signal_transferring(self):
+ """Signal that data being read is being transferred to S3"""
+ self.enable_bandwidth_limiting()
+
+ def signal_not_transferring(self):
+ """Signal that data being read is not being transferred to S3"""
+ self.disable_bandwidth_limiting()
+
def seek(self, where, whence=0):
self._fileobj.seek(where, whence)
-
- def tell(self):
- return self._fileobj.tell()
-
- def close(self):
- if self._bandwidth_limiting_enabled and self._bytes_seen:
- # This handles the case where the file is small enough to never
- # trigger the threshold and thus is never subjugated to the
- # leaky bucket on read(). This specifically happens for small
- # uploads. So instead to account for those bytes, have
- # it go through the leaky bucket when the file gets closed.
- self._consume_through_leaky_bucket()
- self._fileobj.close()
-
- def __enter__(self):
- return self
-
- def __exit__(self, *args, **kwargs):
- self.close()
-
-
-class LeakyBucket(object):
- def __init__(self, max_rate, time_utils=None, rate_tracker=None,
- consumption_scheduler=None):
- """A leaky bucket abstraction to limit bandwidth consumption
-
- :type rate: int
- :type rate: The maximum rate to allow. This rate is in terms of
- bytes per second.
-
- :type time_utils: TimeUtils
- :param time_utils: The time utility to use for interacting with time
-
- :type rate_tracker: BandwidthRateTracker
- :param rate_tracker: Tracks bandwidth consumption
-
- :type consumption_scheduler: ConsumptionScheduler
- :param consumption_scheduler: Schedules consumption retries when
- necessary
- """
- self._max_rate = float(max_rate)
- self._time_utils = time_utils
- if time_utils is None:
- self._time_utils = TimeUtils()
- self._lock = threading.Lock()
- self._rate_tracker = rate_tracker
- if rate_tracker is None:
- self._rate_tracker = BandwidthRateTracker()
- self._consumption_scheduler = consumption_scheduler
- if consumption_scheduler is None:
- self._consumption_scheduler = ConsumptionScheduler()
-
- def consume(self, amt, request_token):
- """Consume an a requested amount
-
- :type amt: int
- :param amt: The amount of bytes to request to consume
-
- :type request_token: RequestToken
- :param request_token: The token associated to the consumption
- request that is used to identify the request. So if a
- RequestExceededException is raised the token should be used
- in subsequent retry consume() request.
-
- :raises RequestExceededException: If the consumption amount would
- exceed the maximum allocated bandwidth
-
- :rtype: int
- :returns: The amount consumed
- """
- with self._lock:
- time_now = self._time_utils.time()
- if self._consumption_scheduler.is_scheduled(request_token):
- return self._release_requested_amt_for_scheduled_request(
- amt, request_token, time_now)
- elif self._projected_to_exceed_max_rate(amt, time_now):
- self._raise_request_exceeded_exception(
- amt, request_token, time_now)
- else:
- return self._release_requested_amt(amt, time_now)
-
- def _projected_to_exceed_max_rate(self, amt, time_now):
- projected_rate = self._rate_tracker.get_projected_rate(amt, time_now)
- return projected_rate > self._max_rate
-
- def _release_requested_amt_for_scheduled_request(self, amt, request_token,
- time_now):
- self._consumption_scheduler.process_scheduled_consumption(
- request_token)
- return self._release_requested_amt(amt, time_now)
-
- def _raise_request_exceeded_exception(self, amt, request_token, time_now):
- allocated_time = amt/float(self._max_rate)
- retry_time = self._consumption_scheduler.schedule_consumption(
- amt, request_token, allocated_time)
- raise RequestExceededException(
- requested_amt=amt, retry_time=retry_time)
-
- def _release_requested_amt(self, amt, time_now):
- self._rate_tracker.record_consumption_rate(amt, time_now)
- return amt
-
-
-class ConsumptionScheduler(object):
- def __init__(self):
- """Schedules when to consume a desired amount"""
- self._tokens_to_scheduled_consumption = {}
- self._total_wait = 0
-
- def is_scheduled(self, token):
- """Indicates if a consumption request has been scheduled
-
- :type token: RequestToken
- :param token: The token associated to the consumption
- request that is used to identify the request.
- """
- return token in self._tokens_to_scheduled_consumption
-
- def schedule_consumption(self, amt, token, time_to_consume):
- """Schedules a wait time to be able to consume an amount
-
- :type amt: int
- :param amt: The amount of bytes scheduled to be consumed
-
- :type token: RequestToken
- :param token: The token associated to the consumption
- request that is used to identify the request.
-
- :type time_to_consume: float
- :param time_to_consume: The desired time it should take for that
- specific request amount to be consumed in regardless of previously
- scheduled consumption requests
-
- :rtype: float
- :returns: The amount of time to wait for the specific request before
- actually consuming the specified amount.
- """
- self._total_wait += time_to_consume
- self._tokens_to_scheduled_consumption[token] = {
- 'wait_duration': self._total_wait,
- 'time_to_consume': time_to_consume,
- }
- return self._total_wait
-
- def process_scheduled_consumption(self, token):
- """Processes a scheduled consumption request that has completed
-
- :type token: RequestToken
- :param token: The token associated to the consumption
- request that is used to identify the request.
- """
- scheduled_retry = self._tokens_to_scheduled_consumption.pop(token)
- self._total_wait = max(
- self._total_wait - scheduled_retry['time_to_consume'], 0)
-
-
-class BandwidthRateTracker(object):
- def __init__(self, alpha=0.8):
- """Tracks the rate of bandwidth consumption
-
- :type a: float
- :param a: The constant to use in calculating the exponentional moving
- average of the bandwidth rate. Specifically it is used in the
- following calculation:
-
- current_rate = alpha * new_rate + (1 - alpha) * current_rate
-
- This value of this constant should be between 0 and 1.
- """
- self._alpha = alpha
- self._last_time = None
- self._current_rate = None
-
- @property
- def current_rate(self):
- """The current transfer rate
-
- :rtype: float
- :returns: The current tracked transfer rate
- """
- if self._last_time is None:
- return 0.0
- return self._current_rate
-
- def get_projected_rate(self, amt, time_at_consumption):
- """Get the projected rate using a provided amount and time
-
- :type amt: int
- :param amt: The proposed amount to consume
-
- :type time_at_consumption: float
- :param time_at_consumption: The proposed time to consume at
-
- :rtype: float
- :returns: The consumption rate if that amt and time were consumed
- """
- if self._last_time is None:
- return 0.0
- return self._calculate_exponential_moving_average_rate(
- amt, time_at_consumption)
-
- def record_consumption_rate(self, amt, time_at_consumption):
- """Record the consumption rate based off amount and time point
-
- :type amt: int
- :param amt: The amount that got consumed
-
- :type time_at_consumption: float
- :param time_at_consumption: The time at which the amount was consumed
- """
- if self._last_time is None:
- self._last_time = time_at_consumption
- self._current_rate = 0.0
- return
- self._current_rate = self._calculate_exponential_moving_average_rate(
- amt, time_at_consumption)
- self._last_time = time_at_consumption
-
- def _calculate_rate(self, amt, time_at_consumption):
- time_delta = time_at_consumption - self._last_time
- if time_delta <= 0:
+
+ def tell(self):
+ return self._fileobj.tell()
+
+ def close(self):
+ if self._bandwidth_limiting_enabled and self._bytes_seen:
+ # This handles the case where the file is small enough to never
+ # trigger the threshold and thus is never subjugated to the
+ # leaky bucket on read(). This specifically happens for small
+ # uploads. So instead to account for those bytes, have
+ # it go through the leaky bucket when the file gets closed.
+ self._consume_through_leaky_bucket()
+ self._fileobj.close()
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, *args, **kwargs):
+ self.close()
+
+
+class LeakyBucket(object):
+ def __init__(self, max_rate, time_utils=None, rate_tracker=None,
+ consumption_scheduler=None):
+ """A leaky bucket abstraction to limit bandwidth consumption
+
+ :type rate: int
+ :type rate: The maximum rate to allow. This rate is in terms of
+ bytes per second.
+
+ :type time_utils: TimeUtils
+ :param time_utils: The time utility to use for interacting with time
+
+ :type rate_tracker: BandwidthRateTracker
+ :param rate_tracker: Tracks bandwidth consumption
+
+ :type consumption_scheduler: ConsumptionScheduler
+ :param consumption_scheduler: Schedules consumption retries when
+ necessary
+ """
+ self._max_rate = float(max_rate)
+ self._time_utils = time_utils
+ if time_utils is None:
+ self._time_utils = TimeUtils()
+ self._lock = threading.Lock()
+ self._rate_tracker = rate_tracker
+ if rate_tracker is None:
+ self._rate_tracker = BandwidthRateTracker()
+ self._consumption_scheduler = consumption_scheduler
+ if consumption_scheduler is None:
+ self._consumption_scheduler = ConsumptionScheduler()
+
+ def consume(self, amt, request_token):
+ """Consume an a requested amount
+
+ :type amt: int
+ :param amt: The amount of bytes to request to consume
+
+ :type request_token: RequestToken
+ :param request_token: The token associated to the consumption
+ request that is used to identify the request. So if a
+ RequestExceededException is raised the token should be used
+ in subsequent retry consume() request.
+
+ :raises RequestExceededException: If the consumption amount would
+ exceed the maximum allocated bandwidth
+
+ :rtype: int
+ :returns: The amount consumed
+ """
+ with self._lock:
+ time_now = self._time_utils.time()
+ if self._consumption_scheduler.is_scheduled(request_token):
+ return self._release_requested_amt_for_scheduled_request(
+ amt, request_token, time_now)
+ elif self._projected_to_exceed_max_rate(amt, time_now):
+ self._raise_request_exceeded_exception(
+ amt, request_token, time_now)
+ else:
+ return self._release_requested_amt(amt, time_now)
+
+ def _projected_to_exceed_max_rate(self, amt, time_now):
+ projected_rate = self._rate_tracker.get_projected_rate(amt, time_now)
+ return projected_rate > self._max_rate
+
+ def _release_requested_amt_for_scheduled_request(self, amt, request_token,
+ time_now):
+ self._consumption_scheduler.process_scheduled_consumption(
+ request_token)
+ return self._release_requested_amt(amt, time_now)
+
+ def _raise_request_exceeded_exception(self, amt, request_token, time_now):
+ allocated_time = amt/float(self._max_rate)
+ retry_time = self._consumption_scheduler.schedule_consumption(
+ amt, request_token, allocated_time)
+ raise RequestExceededException(
+ requested_amt=amt, retry_time=retry_time)
+
+ def _release_requested_amt(self, amt, time_now):
+ self._rate_tracker.record_consumption_rate(amt, time_now)
+ return amt
+
+
+class ConsumptionScheduler(object):
+ def __init__(self):
+ """Schedules when to consume a desired amount"""
+ self._tokens_to_scheduled_consumption = {}
+ self._total_wait = 0
+
+ def is_scheduled(self, token):
+ """Indicates if a consumption request has been scheduled
+
+ :type token: RequestToken
+ :param token: The token associated to the consumption
+ request that is used to identify the request.
+ """
+ return token in self._tokens_to_scheduled_consumption
+
+ def schedule_consumption(self, amt, token, time_to_consume):
+ """Schedules a wait time to be able to consume an amount
+
+ :type amt: int
+ :param amt: The amount of bytes scheduled to be consumed
+
+ :type token: RequestToken
+ :param token: The token associated to the consumption
+ request that is used to identify the request.
+
+ :type time_to_consume: float
+ :param time_to_consume: The desired time it should take for that
+ specific request amount to be consumed in regardless of previously
+ scheduled consumption requests
+
+ :rtype: float
+ :returns: The amount of time to wait for the specific request before
+ actually consuming the specified amount.
+ """
+ self._total_wait += time_to_consume
+ self._tokens_to_scheduled_consumption[token] = {
+ 'wait_duration': self._total_wait,
+ 'time_to_consume': time_to_consume,
+ }
+ return self._total_wait
+
+ def process_scheduled_consumption(self, token):
+ """Processes a scheduled consumption request that has completed
+
+ :type token: RequestToken
+ :param token: The token associated to the consumption
+ request that is used to identify the request.
+ """
+ scheduled_retry = self._tokens_to_scheduled_consumption.pop(token)
+ self._total_wait = max(
+ self._total_wait - scheduled_retry['time_to_consume'], 0)
+
+
+class BandwidthRateTracker(object):
+ def __init__(self, alpha=0.8):
+ """Tracks the rate of bandwidth consumption
+
+ :type a: float
+ :param a: The constant to use in calculating the exponentional moving
+ average of the bandwidth rate. Specifically it is used in the
+ following calculation:
+
+ current_rate = alpha * new_rate + (1 - alpha) * current_rate
+
+ This value of this constant should be between 0 and 1.
+ """
+ self._alpha = alpha
+ self._last_time = None
+ self._current_rate = None
+
+ @property
+ def current_rate(self):
+ """The current transfer rate
+
+ :rtype: float
+ :returns: The current tracked transfer rate
+ """
+ if self._last_time is None:
+ return 0.0
+ return self._current_rate
+
+ def get_projected_rate(self, amt, time_at_consumption):
+ """Get the projected rate using a provided amount and time
+
+ :type amt: int
+ :param amt: The proposed amount to consume
+
+ :type time_at_consumption: float
+ :param time_at_consumption: The proposed time to consume at
+
+ :rtype: float
+ :returns: The consumption rate if that amt and time were consumed
+ """
+ if self._last_time is None:
+ return 0.0
+ return self._calculate_exponential_moving_average_rate(
+ amt, time_at_consumption)
+
+ def record_consumption_rate(self, amt, time_at_consumption):
+ """Record the consumption rate based off amount and time point
+
+ :type amt: int
+ :param amt: The amount that got consumed
+
+ :type time_at_consumption: float
+ :param time_at_consumption: The time at which the amount was consumed
+ """
+ if self._last_time is None:
+ self._last_time = time_at_consumption
+ self._current_rate = 0.0
+ return
+ self._current_rate = self._calculate_exponential_moving_average_rate(
+ amt, time_at_consumption)
+ self._last_time = time_at_consumption
+
+ def _calculate_rate(self, amt, time_at_consumption):
+ time_delta = time_at_consumption - self._last_time
+ if time_delta <= 0:
# While it is really unlikely to see this in an actual transfer,
- # we do not want to be returning back a negative rate or try to
- # divide the amount by zero. So instead return back an infinite
- # rate as the time delta is infinitesimally small.
- return float('inf')
- return amt / (time_delta)
-
- def _calculate_exponential_moving_average_rate(self, amt,
- time_at_consumption):
- new_rate = self._calculate_rate(amt, time_at_consumption)
- return self._alpha * new_rate + (1 - self._alpha) * self._current_rate
+ # we do not want to be returning back a negative rate or try to
+ # divide the amount by zero. So instead return back an infinite
+ # rate as the time delta is infinitesimally small.
+ return float('inf')
+ return amt / (time_delta)
+
+ def _calculate_exponential_moving_average_rate(self, amt,
+ time_at_consumption):
+ new_rate = self._calculate_rate(amt, time_at_consumption)
+ return self._alpha * new_rate + (1 - self._alpha) * self._current_rate
diff --git a/contrib/python/s3transfer/py2/s3transfer/compat.py b/contrib/python/s3transfer/py2/s3transfer/compat.py
index 76543b9812..4909f1aeaa 100644
--- a/contrib/python/s3transfer/py2/s3transfer/compat.py
+++ b/contrib/python/s3transfer/py2/s3transfer/compat.py
@@ -1,94 +1,94 @@
-# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-# http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-import inspect
-import sys
-import os
-import errno
-import socket
-
-from botocore.compat import six
-
-
-if sys.platform.startswith('win'):
- def rename_file(current_filename, new_filename):
- try:
- os.remove(new_filename)
- except OSError as e:
- if not e.errno == errno.ENOENT:
- # We only want to a ignore trying to remove
- # a file that does not exist. If it fails
- # for any other reason we should be propagating
- # that exception.
- raise
- os.rename(current_filename, new_filename)
-else:
- rename_file = os.rename
-
-if six.PY3:
- def accepts_kwargs(func):
- # In python3.4.1, there's backwards incompatible
- # changes when using getargspec with functools.partials.
- return inspect.getfullargspec(func)[2]
-
- # In python3, socket.error is OSError, which is too general
- # for what we want (i.e FileNotFoundError is a subclass of OSError).
- # In py3 all the socket related errors are in a newly created
- # ConnectionError
- SOCKET_ERROR = ConnectionError
- MAXINT = None
-else:
- def accepts_kwargs(func):
- return inspect.getargspec(func)[2]
-
- SOCKET_ERROR = socket.error
- MAXINT = sys.maxint
-
-
-def seekable(fileobj):
- """Backwards compat function to determine if a fileobj is seekable
-
- :param fileobj: The file-like object to determine if seekable
-
- :returns: True, if seekable. False, otherwise.
- """
- # If the fileobj has a seekable attr, try calling the seekable()
- # method on it.
- if hasattr(fileobj, 'seekable'):
- return fileobj.seekable()
- # If there is no seekable attr, check if the object can be seeked
- # or telled. If it can, try to seek to the current position.
- elif hasattr(fileobj, 'seek') and hasattr(fileobj, 'tell'):
- try:
- fileobj.seek(0, 1)
- return True
- except (OSError, IOError):
- # If an io related error was thrown then it is not seekable.
- return False
- # Else, the fileobj is not seekable
- return False
-
-
-def readable(fileobj):
- """Determines whether or not a file-like object is readable.
-
- :param fileobj: The file-like object to determine if readable
-
- :returns: True, if readable. False otherwise.
- """
- if hasattr(fileobj, 'readable'):
- return fileobj.readable()
-
- return hasattr(fileobj, 'read')
+# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+# http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+import inspect
+import sys
+import os
+import errno
+import socket
+
+from botocore.compat import six
+
+
+if sys.platform.startswith('win'):
+ def rename_file(current_filename, new_filename):
+ try:
+ os.remove(new_filename)
+ except OSError as e:
+ if not e.errno == errno.ENOENT:
+ # We only want to a ignore trying to remove
+ # a file that does not exist. If it fails
+ # for any other reason we should be propagating
+ # that exception.
+ raise
+ os.rename(current_filename, new_filename)
+else:
+ rename_file = os.rename
+
+if six.PY3:
+ def accepts_kwargs(func):
+ # In python3.4.1, there's backwards incompatible
+ # changes when using getargspec with functools.partials.
+ return inspect.getfullargspec(func)[2]
+
+ # In python3, socket.error is OSError, which is too general
+ # for what we want (i.e FileNotFoundError is a subclass of OSError).
+ # In py3 all the socket related errors are in a newly created
+ # ConnectionError
+ SOCKET_ERROR = ConnectionError
+ MAXINT = None
+else:
+ def accepts_kwargs(func):
+ return inspect.getargspec(func)[2]
+
+ SOCKET_ERROR = socket.error
+ MAXINT = sys.maxint
+
+
+def seekable(fileobj):
+ """Backwards compat function to determine if a fileobj is seekable
+
+ :param fileobj: The file-like object to determine if seekable
+
+ :returns: True, if seekable. False, otherwise.
+ """
+ # If the fileobj has a seekable attr, try calling the seekable()
+ # method on it.
+ if hasattr(fileobj, 'seekable'):
+ return fileobj.seekable()
+ # If there is no seekable attr, check if the object can be seeked
+ # or telled. If it can, try to seek to the current position.
+ elif hasattr(fileobj, 'seek') and hasattr(fileobj, 'tell'):
+ try:
+ fileobj.seek(0, 1)
+ return True
+ except (OSError, IOError):
+ # If an io related error was thrown then it is not seekable.
+ return False
+ # Else, the fileobj is not seekable
+ return False
+
+
+def readable(fileobj):
+ """Determines whether or not a file-like object is readable.
+
+ :param fileobj: The file-like object to determine if readable
+
+ :returns: True, if readable. False otherwise.
+ """
+ if hasattr(fileobj, 'readable'):
+ return fileobj.readable()
+
+ return hasattr(fileobj, 'read')
def fallocate(fileobj, size):
diff --git a/contrib/python/s3transfer/py2/s3transfer/copies.py b/contrib/python/s3transfer/py2/s3transfer/copies.py
index e9c2140407..4b4086c24e 100644
--- a/contrib/python/s3transfer/py2/s3transfer/copies.py
+++ b/contrib/python/s3transfer/py2/s3transfer/copies.py
@@ -1,327 +1,327 @@
-# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-# http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-import copy
-import math
-
-from s3transfer.tasks import Task
-from s3transfer.tasks import SubmissionTask
-from s3transfer.tasks import CreateMultipartUploadTask
-from s3transfer.tasks import CompleteMultipartUploadTask
-from s3transfer.utils import get_callbacks
-from s3transfer.utils import calculate_range_parameter
-from s3transfer.utils import get_filtered_dict
-from s3transfer.utils import ChunksizeAdjuster
-
-
-class CopySubmissionTask(SubmissionTask):
- """Task for submitting tasks to execute a copy"""
-
- EXTRA_ARGS_TO_HEAD_ARGS_MAPPING = {
- 'CopySourceIfMatch': 'IfMatch',
- 'CopySourceIfModifiedSince': 'IfModifiedSince',
- 'CopySourceIfNoneMatch': 'IfNoneMatch',
- 'CopySourceIfUnmodifiedSince': 'IfUnmodifiedSince',
- 'CopySourceSSECustomerKey': 'SSECustomerKey',
- 'CopySourceSSECustomerAlgorithm': 'SSECustomerAlgorithm',
- 'CopySourceSSECustomerKeyMD5': 'SSECustomerKeyMD5',
+# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+# http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+import copy
+import math
+
+from s3transfer.tasks import Task
+from s3transfer.tasks import SubmissionTask
+from s3transfer.tasks import CreateMultipartUploadTask
+from s3transfer.tasks import CompleteMultipartUploadTask
+from s3transfer.utils import get_callbacks
+from s3transfer.utils import calculate_range_parameter
+from s3transfer.utils import get_filtered_dict
+from s3transfer.utils import ChunksizeAdjuster
+
+
+class CopySubmissionTask(SubmissionTask):
+ """Task for submitting tasks to execute a copy"""
+
+ EXTRA_ARGS_TO_HEAD_ARGS_MAPPING = {
+ 'CopySourceIfMatch': 'IfMatch',
+ 'CopySourceIfModifiedSince': 'IfModifiedSince',
+ 'CopySourceIfNoneMatch': 'IfNoneMatch',
+ 'CopySourceIfUnmodifiedSince': 'IfUnmodifiedSince',
+ 'CopySourceSSECustomerKey': 'SSECustomerKey',
+ 'CopySourceSSECustomerAlgorithm': 'SSECustomerAlgorithm',
+ 'CopySourceSSECustomerKeyMD5': 'SSECustomerKeyMD5',
'RequestPayer': 'RequestPayer',
'ExpectedBucketOwner': 'ExpectedBucketOwner'
- }
-
- UPLOAD_PART_COPY_ARGS = [
- 'CopySourceIfMatch',
- 'CopySourceIfModifiedSince',
- 'CopySourceIfNoneMatch',
- 'CopySourceIfUnmodifiedSince',
- 'CopySourceSSECustomerKey',
- 'CopySourceSSECustomerAlgorithm',
- 'CopySourceSSECustomerKeyMD5',
- 'SSECustomerKey',
- 'SSECustomerAlgorithm',
- 'SSECustomerKeyMD5',
- 'RequestPayer',
+ }
+
+ UPLOAD_PART_COPY_ARGS = [
+ 'CopySourceIfMatch',
+ 'CopySourceIfModifiedSince',
+ 'CopySourceIfNoneMatch',
+ 'CopySourceIfUnmodifiedSince',
+ 'CopySourceSSECustomerKey',
+ 'CopySourceSSECustomerAlgorithm',
+ 'CopySourceSSECustomerKeyMD5',
+ 'SSECustomerKey',
+ 'SSECustomerAlgorithm',
+ 'SSECustomerKeyMD5',
+ 'RequestPayer',
'ExpectedBucketOwner'
- ]
-
- CREATE_MULTIPART_ARGS_BLACKLIST = [
- 'CopySourceIfMatch',
- 'CopySourceIfModifiedSince',
- 'CopySourceIfNoneMatch',
- 'CopySourceIfUnmodifiedSince',
- 'CopySourceSSECustomerKey',
- 'CopySourceSSECustomerAlgorithm',
- 'CopySourceSSECustomerKeyMD5',
+ ]
+
+ CREATE_MULTIPART_ARGS_BLACKLIST = [
+ 'CopySourceIfMatch',
+ 'CopySourceIfModifiedSince',
+ 'CopySourceIfNoneMatch',
+ 'CopySourceIfUnmodifiedSince',
+ 'CopySourceSSECustomerKey',
+ 'CopySourceSSECustomerAlgorithm',
+ 'CopySourceSSECustomerKeyMD5',
'MetadataDirective',
'TaggingDirective',
- ]
-
- COMPLETE_MULTIPART_ARGS = [
+ ]
+
+ COMPLETE_MULTIPART_ARGS = [
'RequestPayer',
'ExpectedBucketOwner'
- ]
-
- def _submit(self, client, config, osutil, request_executor,
- transfer_future):
- """
- :param client: The client associated with the transfer manager
-
- :type config: s3transfer.manager.TransferConfig
- :param config: The transfer config associated with the transfer
- manager
-
- :type osutil: s3transfer.utils.OSUtil
- :param osutil: The os utility associated to the transfer manager
-
- :type request_executor: s3transfer.futures.BoundedExecutor
- :param request_executor: The request executor associated with the
- transfer manager
-
- :type transfer_future: s3transfer.futures.TransferFuture
- :param transfer_future: The transfer future associated with the
- transfer request that tasks are being submitted for
- """
- # Determine the size if it was not provided
- if transfer_future.meta.size is None:
- # If a size was not provided figure out the size for the
- # user. Note that we will only use the client provided to
- # the TransferManager. If the object is outside of the region
- # of the client, they may have to provide the file size themselves
- # with a completely new client.
- call_args = transfer_future.meta.call_args
- head_object_request = \
- self._get_head_object_request_from_copy_source(
- call_args.copy_source)
- extra_args = call_args.extra_args
-
- # Map any values that may be used in the head object that is
- # used in the copy object
- for param, value in extra_args.items():
- if param in self.EXTRA_ARGS_TO_HEAD_ARGS_MAPPING:
- head_object_request[
- self.EXTRA_ARGS_TO_HEAD_ARGS_MAPPING[param]] = value
-
- response = call_args.source_client.head_object(
- **head_object_request)
- transfer_future.meta.provide_transfer_size(
- response['ContentLength'])
-
- # If it is greater than threshold do a multipart copy, otherwise
- # do a regular copy object.
- if transfer_future.meta.size < config.multipart_threshold:
- self._submit_copy_request(
- client, config, osutil, request_executor, transfer_future)
- else:
- self._submit_multipart_request(
- client, config, osutil, request_executor, transfer_future)
-
- def _submit_copy_request(self, client, config, osutil, request_executor,
- transfer_future):
- call_args = transfer_future.meta.call_args
-
- # Get the needed progress callbacks for the task
- progress_callbacks = get_callbacks(transfer_future, 'progress')
-
- # Submit the request of a single copy.
- self._transfer_coordinator.submit(
- request_executor,
- CopyObjectTask(
- transfer_coordinator=self._transfer_coordinator,
- main_kwargs={
- 'client': client,
- 'copy_source': call_args.copy_source,
- 'bucket': call_args.bucket,
- 'key': call_args.key,
- 'extra_args': call_args.extra_args,
- 'callbacks': progress_callbacks,
- 'size': transfer_future.meta.size
- },
- is_final=True
- )
- )
-
- def _submit_multipart_request(self, client, config, osutil,
- request_executor, transfer_future):
- call_args = transfer_future.meta.call_args
-
- # Submit the request to create a multipart upload and make sure it
- # does not include any of the arguments used for copy part.
- create_multipart_extra_args = {}
- for param, val in call_args.extra_args.items():
- if param not in self.CREATE_MULTIPART_ARGS_BLACKLIST:
- create_multipart_extra_args[param] = val
-
- create_multipart_future = self._transfer_coordinator.submit(
- request_executor,
- CreateMultipartUploadTask(
- transfer_coordinator=self._transfer_coordinator,
- main_kwargs={
- 'client': client,
- 'bucket': call_args.bucket,
- 'key': call_args.key,
- 'extra_args': create_multipart_extra_args,
- }
- )
- )
-
- # Determine how many parts are needed based on filesize and
- # desired chunksize.
- part_size = config.multipart_chunksize
- adjuster = ChunksizeAdjuster()
- part_size = adjuster.adjust_chunksize(
- part_size, transfer_future.meta.size)
- num_parts = int(
- math.ceil(transfer_future.meta.size / float(part_size)))
-
- # Submit requests to upload the parts of the file.
- part_futures = []
- progress_callbacks = get_callbacks(transfer_future, 'progress')
-
- for part_number in range(1, num_parts + 1):
- extra_part_args = self._extra_upload_part_args(
- call_args.extra_args)
- # The part number for upload part starts at 1 while the
- # range parameter starts at zero, so just subtract 1 off of
- # the part number
- extra_part_args['CopySourceRange'] = calculate_range_parameter(
- part_size, part_number-1, num_parts, transfer_future.meta.size)
- # Get the size of the part copy as well for the progress
- # callbacks.
- size = self._get_transfer_size(
- part_size, part_number-1, num_parts, transfer_future.meta.size
- )
- part_futures.append(
- self._transfer_coordinator.submit(
- request_executor,
- CopyPartTask(
- transfer_coordinator=self._transfer_coordinator,
- main_kwargs={
- 'client': client,
- 'copy_source': call_args.copy_source,
- 'bucket': call_args.bucket,
- 'key': call_args.key,
- 'part_number': part_number,
- 'extra_args': extra_part_args,
- 'callbacks': progress_callbacks,
- 'size': size
- },
- pending_main_kwargs={
- 'upload_id': create_multipart_future
- }
- )
- )
- )
-
- complete_multipart_extra_args = self._extra_complete_multipart_args(
- call_args.extra_args)
- # Submit the request to complete the multipart upload.
- self._transfer_coordinator.submit(
- request_executor,
- CompleteMultipartUploadTask(
- transfer_coordinator=self._transfer_coordinator,
- main_kwargs={
- 'client': client,
- 'bucket': call_args.bucket,
- 'key': call_args.key,
- 'extra_args': complete_multipart_extra_args,
- },
- pending_main_kwargs={
- 'upload_id': create_multipart_future,
- 'parts': part_futures
- },
- is_final=True
- )
- )
-
- def _get_head_object_request_from_copy_source(self, copy_source):
- if isinstance(copy_source, dict):
- return copy.copy(copy_source)
- else:
- raise TypeError(
- 'Expecting dictionary formatted: '
- '{"Bucket": bucket_name, "Key": key} '
- 'but got %s or type %s.'
- % (copy_source, type(copy_source))
- )
-
- def _extra_upload_part_args(self, extra_args):
- # Only the args in COPY_PART_ARGS actually need to be passed
- # onto the upload_part_copy calls.
- return get_filtered_dict(extra_args, self.UPLOAD_PART_COPY_ARGS)
-
- def _extra_complete_multipart_args(self, extra_args):
- return get_filtered_dict(extra_args, self.COMPLETE_MULTIPART_ARGS)
-
- def _get_transfer_size(self, part_size, part_index, num_parts,
- total_transfer_size):
- if part_index == num_parts - 1:
- # The last part may be different in size then the rest of the
- # parts.
- return total_transfer_size - (part_index * part_size)
- return part_size
-
-
-class CopyObjectTask(Task):
- """Task to do a nonmultipart copy"""
- def _main(self, client, copy_source, bucket, key, extra_args, callbacks,
- size):
- """
- :param client: The client to use when calling PutObject
- :param copy_source: The CopySource parameter to use
- :param bucket: The name of the bucket to copy to
- :param key: The name of the key to copy to
- :param extra_args: A dictionary of any extra arguments that may be
- used in the upload.
- :param callbacks: List of callbacks to call after copy
- :param size: The size of the transfer. This value is passed into
- the callbacks
-
- """
- client.copy_object(
- CopySource=copy_source, Bucket=bucket, Key=key, **extra_args)
- for callback in callbacks:
- callback(bytes_transferred=size)
-
-
-class CopyPartTask(Task):
- """Task to upload a part in a multipart copy"""
- def _main(self, client, copy_source, bucket, key, upload_id, part_number,
- extra_args, callbacks, size):
- """
- :param client: The client to use when calling PutObject
- :param copy_source: The CopySource parameter to use
- :param bucket: The name of the bucket to upload to
- :param key: The name of the key to upload to
- :param upload_id: The id of the upload
- :param part_number: The number representing the part of the multipart
- upload
- :param extra_args: A dictionary of any extra arguments that may be
- used in the upload.
- :param callbacks: List of callbacks to call after copy part
- :param size: The size of the transfer. This value is passed into
- the callbacks
-
- :rtype: dict
- :returns: A dictionary representing a part::
-
- {'Etag': etag_value, 'PartNumber': part_number}
-
- This value can be appended to a list to be used to complete
- the multipart upload.
- """
- response = client.upload_part_copy(
- CopySource=copy_source, Bucket=bucket, Key=key,
- UploadId=upload_id, PartNumber=part_number, **extra_args)
- for callback in callbacks:
- callback(bytes_transferred=size)
- etag = response['CopyPartResult']['ETag']
- return {'ETag': etag, 'PartNumber': part_number}
+ ]
+
+ def _submit(self, client, config, osutil, request_executor,
+ transfer_future):
+ """
+ :param client: The client associated with the transfer manager
+
+ :type config: s3transfer.manager.TransferConfig
+ :param config: The transfer config associated with the transfer
+ manager
+
+ :type osutil: s3transfer.utils.OSUtil
+ :param osutil: The os utility associated to the transfer manager
+
+ :type request_executor: s3transfer.futures.BoundedExecutor
+ :param request_executor: The request executor associated with the
+ transfer manager
+
+ :type transfer_future: s3transfer.futures.TransferFuture
+ :param transfer_future: The transfer future associated with the
+ transfer request that tasks are being submitted for
+ """
+ # Determine the size if it was not provided
+ if transfer_future.meta.size is None:
+ # If a size was not provided figure out the size for the
+ # user. Note that we will only use the client provided to
+ # the TransferManager. If the object is outside of the region
+ # of the client, they may have to provide the file size themselves
+ # with a completely new client.
+ call_args = transfer_future.meta.call_args
+ head_object_request = \
+ self._get_head_object_request_from_copy_source(
+ call_args.copy_source)
+ extra_args = call_args.extra_args
+
+ # Map any values that may be used in the head object that is
+ # used in the copy object
+ for param, value in extra_args.items():
+ if param in self.EXTRA_ARGS_TO_HEAD_ARGS_MAPPING:
+ head_object_request[
+ self.EXTRA_ARGS_TO_HEAD_ARGS_MAPPING[param]] = value
+
+ response = call_args.source_client.head_object(
+ **head_object_request)
+ transfer_future.meta.provide_transfer_size(
+ response['ContentLength'])
+
+ # If it is greater than threshold do a multipart copy, otherwise
+ # do a regular copy object.
+ if transfer_future.meta.size < config.multipart_threshold:
+ self._submit_copy_request(
+ client, config, osutil, request_executor, transfer_future)
+ else:
+ self._submit_multipart_request(
+ client, config, osutil, request_executor, transfer_future)
+
+ def _submit_copy_request(self, client, config, osutil, request_executor,
+ transfer_future):
+ call_args = transfer_future.meta.call_args
+
+ # Get the needed progress callbacks for the task
+ progress_callbacks = get_callbacks(transfer_future, 'progress')
+
+ # Submit the request of a single copy.
+ self._transfer_coordinator.submit(
+ request_executor,
+ CopyObjectTask(
+ transfer_coordinator=self._transfer_coordinator,
+ main_kwargs={
+ 'client': client,
+ 'copy_source': call_args.copy_source,
+ 'bucket': call_args.bucket,
+ 'key': call_args.key,
+ 'extra_args': call_args.extra_args,
+ 'callbacks': progress_callbacks,
+ 'size': transfer_future.meta.size
+ },
+ is_final=True
+ )
+ )
+
+ def _submit_multipart_request(self, client, config, osutil,
+ request_executor, transfer_future):
+ call_args = transfer_future.meta.call_args
+
+ # Submit the request to create a multipart upload and make sure it
+ # does not include any of the arguments used for copy part.
+ create_multipart_extra_args = {}
+ for param, val in call_args.extra_args.items():
+ if param not in self.CREATE_MULTIPART_ARGS_BLACKLIST:
+ create_multipart_extra_args[param] = val
+
+ create_multipart_future = self._transfer_coordinator.submit(
+ request_executor,
+ CreateMultipartUploadTask(
+ transfer_coordinator=self._transfer_coordinator,
+ main_kwargs={
+ 'client': client,
+ 'bucket': call_args.bucket,
+ 'key': call_args.key,
+ 'extra_args': create_multipart_extra_args,
+ }
+ )
+ )
+
+ # Determine how many parts are needed based on filesize and
+ # desired chunksize.
+ part_size = config.multipart_chunksize
+ adjuster = ChunksizeAdjuster()
+ part_size = adjuster.adjust_chunksize(
+ part_size, transfer_future.meta.size)
+ num_parts = int(
+ math.ceil(transfer_future.meta.size / float(part_size)))
+
+ # Submit requests to upload the parts of the file.
+ part_futures = []
+ progress_callbacks = get_callbacks(transfer_future, 'progress')
+
+ for part_number in range(1, num_parts + 1):
+ extra_part_args = self._extra_upload_part_args(
+ call_args.extra_args)
+ # The part number for upload part starts at 1 while the
+ # range parameter starts at zero, so just subtract 1 off of
+ # the part number
+ extra_part_args['CopySourceRange'] = calculate_range_parameter(
+ part_size, part_number-1, num_parts, transfer_future.meta.size)
+ # Get the size of the part copy as well for the progress
+ # callbacks.
+ size = self._get_transfer_size(
+ part_size, part_number-1, num_parts, transfer_future.meta.size
+ )
+ part_futures.append(
+ self._transfer_coordinator.submit(
+ request_executor,
+ CopyPartTask(
+ transfer_coordinator=self._transfer_coordinator,
+ main_kwargs={
+ 'client': client,
+ 'copy_source': call_args.copy_source,
+ 'bucket': call_args.bucket,
+ 'key': call_args.key,
+ 'part_number': part_number,
+ 'extra_args': extra_part_args,
+ 'callbacks': progress_callbacks,
+ 'size': size
+ },
+ pending_main_kwargs={
+ 'upload_id': create_multipart_future
+ }
+ )
+ )
+ )
+
+ complete_multipart_extra_args = self._extra_complete_multipart_args(
+ call_args.extra_args)
+ # Submit the request to complete the multipart upload.
+ self._transfer_coordinator.submit(
+ request_executor,
+ CompleteMultipartUploadTask(
+ transfer_coordinator=self._transfer_coordinator,
+ main_kwargs={
+ 'client': client,
+ 'bucket': call_args.bucket,
+ 'key': call_args.key,
+ 'extra_args': complete_multipart_extra_args,
+ },
+ pending_main_kwargs={
+ 'upload_id': create_multipart_future,
+ 'parts': part_futures
+ },
+ is_final=True
+ )
+ )
+
+ def _get_head_object_request_from_copy_source(self, copy_source):
+ if isinstance(copy_source, dict):
+ return copy.copy(copy_source)
+ else:
+ raise TypeError(
+ 'Expecting dictionary formatted: '
+ '{"Bucket": bucket_name, "Key": key} '
+ 'but got %s or type %s.'
+ % (copy_source, type(copy_source))
+ )
+
+ def _extra_upload_part_args(self, extra_args):
+ # Only the args in COPY_PART_ARGS actually need to be passed
+ # onto the upload_part_copy calls.
+ return get_filtered_dict(extra_args, self.UPLOAD_PART_COPY_ARGS)
+
+ def _extra_complete_multipart_args(self, extra_args):
+ return get_filtered_dict(extra_args, self.COMPLETE_MULTIPART_ARGS)
+
+ def _get_transfer_size(self, part_size, part_index, num_parts,
+ total_transfer_size):
+ if part_index == num_parts - 1:
+ # The last part may be different in size then the rest of the
+ # parts.
+ return total_transfer_size - (part_index * part_size)
+ return part_size
+
+
+class CopyObjectTask(Task):
+ """Task to do a nonmultipart copy"""
+ def _main(self, client, copy_source, bucket, key, extra_args, callbacks,
+ size):
+ """
+ :param client: The client to use when calling PutObject
+ :param copy_source: The CopySource parameter to use
+ :param bucket: The name of the bucket to copy to
+ :param key: The name of the key to copy to
+ :param extra_args: A dictionary of any extra arguments that may be
+ used in the upload.
+ :param callbacks: List of callbacks to call after copy
+ :param size: The size of the transfer. This value is passed into
+ the callbacks
+
+ """
+ client.copy_object(
+ CopySource=copy_source, Bucket=bucket, Key=key, **extra_args)
+ for callback in callbacks:
+ callback(bytes_transferred=size)
+
+
+class CopyPartTask(Task):
+ """Task to upload a part in a multipart copy"""
+ def _main(self, client, copy_source, bucket, key, upload_id, part_number,
+ extra_args, callbacks, size):
+ """
+ :param client: The client to use when calling PutObject
+ :param copy_source: The CopySource parameter to use
+ :param bucket: The name of the bucket to upload to
+ :param key: The name of the key to upload to
+ :param upload_id: The id of the upload
+ :param part_number: The number representing the part of the multipart
+ upload
+ :param extra_args: A dictionary of any extra arguments that may be
+ used in the upload.
+ :param callbacks: List of callbacks to call after copy part
+ :param size: The size of the transfer. This value is passed into
+ the callbacks
+
+ :rtype: dict
+ :returns: A dictionary representing a part::
+
+ {'Etag': etag_value, 'PartNumber': part_number}
+
+ This value can be appended to a list to be used to complete
+ the multipart upload.
+ """
+ response = client.upload_part_copy(
+ CopySource=copy_source, Bucket=bucket, Key=key,
+ UploadId=upload_id, PartNumber=part_number, **extra_args)
+ for callback in callbacks:
+ callback(bytes_transferred=size)
+ etag = response['CopyPartResult']['ETag']
+ return {'ETag': etag, 'PartNumber': part_number}
diff --git a/contrib/python/s3transfer/py2/s3transfer/delete.py b/contrib/python/s3transfer/py2/s3transfer/delete.py
index 35b18a1139..ad9210b373 100644
--- a/contrib/python/s3transfer/py2/s3transfer/delete.py
+++ b/contrib/python/s3transfer/py2/s3transfer/delete.py
@@ -1,72 +1,72 @@
-# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-# http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-from s3transfer.tasks import Task
-from s3transfer.tasks import SubmissionTask
-
-
-class DeleteSubmissionTask(SubmissionTask):
- """Task for submitting tasks to execute an object deletion."""
-
- def _submit(self, client, request_executor, transfer_future, **kwargs):
- """
- :param client: The client associated with the transfer manager
-
- :type config: s3transfer.manager.TransferConfig
- :param config: The transfer config associated with the transfer
- manager
-
- :type osutil: s3transfer.utils.OSUtil
- :param osutil: The os utility associated to the transfer manager
-
- :type request_executor: s3transfer.futures.BoundedExecutor
- :param request_executor: The request executor associated with the
- transfer manager
-
- :type transfer_future: s3transfer.futures.TransferFuture
- :param transfer_future: The transfer future associated with the
- transfer request that tasks are being submitted for
- """
- call_args = transfer_future.meta.call_args
-
- self._transfer_coordinator.submit(
- request_executor,
- DeleteObjectTask(
- transfer_coordinator=self._transfer_coordinator,
- main_kwargs={
- 'client': client,
- 'bucket': call_args.bucket,
- 'key': call_args.key,
- 'extra_args': call_args.extra_args,
- },
- is_final=True
- )
- )
-
-
-class DeleteObjectTask(Task):
- def _main(self, client, bucket, key, extra_args):
- """
-
- :param client: The S3 client to use when calling DeleteObject
-
- :type bucket: str
- :param bucket: The name of the bucket.
-
- :type key: str
- :param key: The name of the object to delete.
-
- :type extra_args: dict
- :param extra_args: Extra arguments to pass to the DeleteObject call.
-
- """
- client.delete_object(Bucket=bucket, Key=key, **extra_args)
+# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+# http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from s3transfer.tasks import Task
+from s3transfer.tasks import SubmissionTask
+
+
+class DeleteSubmissionTask(SubmissionTask):
+ """Task for submitting tasks to execute an object deletion."""
+
+ def _submit(self, client, request_executor, transfer_future, **kwargs):
+ """
+ :param client: The client associated with the transfer manager
+
+ :type config: s3transfer.manager.TransferConfig
+ :param config: The transfer config associated with the transfer
+ manager
+
+ :type osutil: s3transfer.utils.OSUtil
+ :param osutil: The os utility associated to the transfer manager
+
+ :type request_executor: s3transfer.futures.BoundedExecutor
+ :param request_executor: The request executor associated with the
+ transfer manager
+
+ :type transfer_future: s3transfer.futures.TransferFuture
+ :param transfer_future: The transfer future associated with the
+ transfer request that tasks are being submitted for
+ """
+ call_args = transfer_future.meta.call_args
+
+ self._transfer_coordinator.submit(
+ request_executor,
+ DeleteObjectTask(
+ transfer_coordinator=self._transfer_coordinator,
+ main_kwargs={
+ 'client': client,
+ 'bucket': call_args.bucket,
+ 'key': call_args.key,
+ 'extra_args': call_args.extra_args,
+ },
+ is_final=True
+ )
+ )
+
+
+class DeleteObjectTask(Task):
+ def _main(self, client, bucket, key, extra_args):
+ """
+
+ :param client: The S3 client to use when calling DeleteObject
+
+ :type bucket: str
+ :param bucket: The name of the bucket.
+
+ :type key: str
+ :param key: The name of the object to delete.
+
+ :type extra_args: dict
+ :param extra_args: Extra arguments to pass to the DeleteObject call.
+
+ """
+ client.delete_object(Bucket=bucket, Key=key, **extra_args)
diff --git a/contrib/python/s3transfer/py2/s3transfer/download.py b/contrib/python/s3transfer/py2/s3transfer/download.py
index ffac23d683..0da00c4541 100644
--- a/contrib/python/s3transfer/py2/s3transfer/download.py
+++ b/contrib/python/s3transfer/py2/s3transfer/download.py
@@ -1,710 +1,710 @@
-# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-# http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-import logging
-import threading
+# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+# http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+import logging
+import threading
import heapq
-
-
+
+
from botocore.compat import six
-from s3transfer.compat import seekable
-from s3transfer.exceptions import RetriesExceededError
-from s3transfer.futures import IN_MEMORY_DOWNLOAD_TAG
+from s3transfer.compat import seekable
+from s3transfer.exceptions import RetriesExceededError
+from s3transfer.futures import IN_MEMORY_DOWNLOAD_TAG
from s3transfer.utils import S3_RETRYABLE_DOWNLOAD_ERRORS
-from s3transfer.utils import get_callbacks
-from s3transfer.utils import invoke_progress_callbacks
+from s3transfer.utils import get_callbacks
+from s3transfer.utils import invoke_progress_callbacks
from s3transfer.utils import calculate_num_parts
-from s3transfer.utils import calculate_range_parameter
-from s3transfer.utils import FunctionContainer
-from s3transfer.utils import CountCallbackInvoker
-from s3transfer.utils import StreamReaderProgress
-from s3transfer.utils import DeferredOpenFile
-from s3transfer.tasks import Task
-from s3transfer.tasks import SubmissionTask
-
-
-logger = logging.getLogger(__name__)
-
-
-class DownloadOutputManager(object):
- """Base manager class for handling various types of files for downloads
-
- This class is typically used for the DownloadSubmissionTask class to help
- determine the following:
-
- * Provides the fileobj to write to downloads to
- * Get a task to complete once everything downloaded has been written
-
- The answers/implementations differ for the various types of file outputs
- that may be accepted. All implementations must subclass and override
- public methods from this class.
- """
- def __init__(self, osutil, transfer_coordinator, io_executor):
- self._osutil = osutil
- self._transfer_coordinator = transfer_coordinator
- self._io_executor = io_executor
-
- @classmethod
- def is_compatible(cls, download_target, osutil):
- """Determines if the target for the download is compatible with manager
-
- :param download_target: The target for which the upload will write
- data to.
-
- :param osutil: The os utility to be used for the transfer
-
- :returns: True if the manager can handle the type of target specified
- otherwise returns False.
- """
- raise NotImplementedError('must implement is_compatible()')
-
- def get_download_task_tag(self):
- """Get the tag (if any) to associate all GetObjectTasks
-
- :rtype: s3transfer.futures.TaskTag
- :returns: The tag to associate all GetObjectTasks with
- """
- return None
-
- def get_fileobj_for_io_writes(self, transfer_future):
- """Get file-like object to use for io writes in the io executor
-
- :type transfer_future: s3transfer.futures.TransferFuture
- :param transfer_future: The future associated with upload request
-
- returns: A file-like object to write to
- """
- raise NotImplementedError('must implement get_fileobj_for_io_writes()')
-
- def queue_file_io_task(self, fileobj, data, offset):
- """Queue IO write for submission to the IO executor.
-
- This method accepts an IO executor and information about the
- downloaded data, and handles submitting this to the IO executor.
-
- This method may defer submission to the IO executor if necessary.
-
- """
- self._transfer_coordinator.submit(
- self._io_executor,
- self.get_io_write_task(fileobj, data, offset)
+from s3transfer.utils import calculate_range_parameter
+from s3transfer.utils import FunctionContainer
+from s3transfer.utils import CountCallbackInvoker
+from s3transfer.utils import StreamReaderProgress
+from s3transfer.utils import DeferredOpenFile
+from s3transfer.tasks import Task
+from s3transfer.tasks import SubmissionTask
+
+
+logger = logging.getLogger(__name__)
+
+
+class DownloadOutputManager(object):
+ """Base manager class for handling various types of files for downloads
+
+ This class is typically used for the DownloadSubmissionTask class to help
+ determine the following:
+
+ * Provides the fileobj to write to downloads to
+ * Get a task to complete once everything downloaded has been written
+
+ The answers/implementations differ for the various types of file outputs
+ that may be accepted. All implementations must subclass and override
+ public methods from this class.
+ """
+ def __init__(self, osutil, transfer_coordinator, io_executor):
+ self._osutil = osutil
+ self._transfer_coordinator = transfer_coordinator
+ self._io_executor = io_executor
+
+ @classmethod
+ def is_compatible(cls, download_target, osutil):
+ """Determines if the target for the download is compatible with manager
+
+ :param download_target: The target for which the upload will write
+ data to.
+
+ :param osutil: The os utility to be used for the transfer
+
+ :returns: True if the manager can handle the type of target specified
+ otherwise returns False.
+ """
+ raise NotImplementedError('must implement is_compatible()')
+
+ def get_download_task_tag(self):
+ """Get the tag (if any) to associate all GetObjectTasks
+
+ :rtype: s3transfer.futures.TaskTag
+ :returns: The tag to associate all GetObjectTasks with
+ """
+ return None
+
+ def get_fileobj_for_io_writes(self, transfer_future):
+ """Get file-like object to use for io writes in the io executor
+
+ :type transfer_future: s3transfer.futures.TransferFuture
+ :param transfer_future: The future associated with upload request
+
+ returns: A file-like object to write to
+ """
+ raise NotImplementedError('must implement get_fileobj_for_io_writes()')
+
+ def queue_file_io_task(self, fileobj, data, offset):
+ """Queue IO write for submission to the IO executor.
+
+ This method accepts an IO executor and information about the
+ downloaded data, and handles submitting this to the IO executor.
+
+ This method may defer submission to the IO executor if necessary.
+
+ """
+ self._transfer_coordinator.submit(
+ self._io_executor,
+ self.get_io_write_task(fileobj, data, offset)
)
-
- def get_io_write_task(self, fileobj, data, offset):
- """Get an IO write task for the requested set of data
-
- This task can be ran immediately or be submitted to the IO executor
- for it to run.
-
- :type fileobj: file-like object
- :param fileobj: The file-like object to write to
-
- :type data: bytes
- :param data: The data to write out
-
- :type offset: integer
- :param offset: The offset to write the data to in the file-like object
-
- :returns: An IO task to be used to write data to a file-like object
- """
- return IOWriteTask(
- self._transfer_coordinator,
- main_kwargs={
- 'fileobj': fileobj,
- 'data': data,
- 'offset': offset,
- }
- )
-
- def get_final_io_task(self):
- """Get the final io task to complete the download
-
- This is needed because based on the architecture of the TransferManager
- the final tasks will be sent to the IO executor, but the executor
- needs a final task for it to signal that the transfer is done and
- all done callbacks can be run.
-
- :rtype: s3transfer.tasks.Task
- :returns: A final task to completed in the io executor
- """
- raise NotImplementedError(
- 'must implement get_final_io_task()')
-
- def _get_fileobj_from_filename(self, filename):
- f = DeferredOpenFile(
- filename, mode='wb', open_function=self._osutil.open)
- # Make sure the file gets closed and we remove the temporary file
- # if anything goes wrong during the process.
- self._transfer_coordinator.add_failure_cleanup(f.close)
- return f
-
-
-class DownloadFilenameOutputManager(DownloadOutputManager):
- def __init__(self, osutil, transfer_coordinator, io_executor):
- super(DownloadFilenameOutputManager, self).__init__(
- osutil, transfer_coordinator, io_executor)
- self._final_filename = None
- self._temp_filename = None
- self._temp_fileobj = None
-
- @classmethod
- def is_compatible(cls, download_target, osutil):
- return isinstance(download_target, six.string_types)
-
- def get_fileobj_for_io_writes(self, transfer_future):
- fileobj = transfer_future.meta.call_args.fileobj
- self._final_filename = fileobj
+
+ def get_io_write_task(self, fileobj, data, offset):
+ """Get an IO write task for the requested set of data
+
+ This task can be ran immediately or be submitted to the IO executor
+ for it to run.
+
+ :type fileobj: file-like object
+ :param fileobj: The file-like object to write to
+
+ :type data: bytes
+ :param data: The data to write out
+
+ :type offset: integer
+ :param offset: The offset to write the data to in the file-like object
+
+ :returns: An IO task to be used to write data to a file-like object
+ """
+ return IOWriteTask(
+ self._transfer_coordinator,
+ main_kwargs={
+ 'fileobj': fileobj,
+ 'data': data,
+ 'offset': offset,
+ }
+ )
+
+ def get_final_io_task(self):
+ """Get the final io task to complete the download
+
+ This is needed because based on the architecture of the TransferManager
+ the final tasks will be sent to the IO executor, but the executor
+ needs a final task for it to signal that the transfer is done and
+ all done callbacks can be run.
+
+ :rtype: s3transfer.tasks.Task
+ :returns: A final task to completed in the io executor
+ """
+ raise NotImplementedError(
+ 'must implement get_final_io_task()')
+
+ def _get_fileobj_from_filename(self, filename):
+ f = DeferredOpenFile(
+ filename, mode='wb', open_function=self._osutil.open)
+ # Make sure the file gets closed and we remove the temporary file
+ # if anything goes wrong during the process.
+ self._transfer_coordinator.add_failure_cleanup(f.close)
+ return f
+
+
+class DownloadFilenameOutputManager(DownloadOutputManager):
+ def __init__(self, osutil, transfer_coordinator, io_executor):
+ super(DownloadFilenameOutputManager, self).__init__(
+ osutil, transfer_coordinator, io_executor)
+ self._final_filename = None
+ self._temp_filename = None
+ self._temp_fileobj = None
+
+ @classmethod
+ def is_compatible(cls, download_target, osutil):
+ return isinstance(download_target, six.string_types)
+
+ def get_fileobj_for_io_writes(self, transfer_future):
+ fileobj = transfer_future.meta.call_args.fileobj
+ self._final_filename = fileobj
self._temp_filename = self._osutil.get_temp_filename(fileobj)
- self._temp_fileobj = self._get_temp_fileobj()
- return self._temp_fileobj
-
- def get_final_io_task(self):
- # A task to rename the file from the temporary file to its final
- # location is needed. This should be the last task needed to complete
- # the download.
- return IORenameFileTask(
- transfer_coordinator=self._transfer_coordinator,
- main_kwargs={
- 'fileobj': self._temp_fileobj,
- 'final_filename': self._final_filename,
- 'osutil': self._osutil
- },
- is_final=True
- )
-
- def _get_temp_fileobj(self):
- f = self._get_fileobj_from_filename(self._temp_filename)
- self._transfer_coordinator.add_failure_cleanup(
- self._osutil.remove_file, self._temp_filename)
- return f
-
-
-class DownloadSeekableOutputManager(DownloadOutputManager):
- @classmethod
- def is_compatible(cls, download_target, osutil):
- return seekable(download_target)
-
- def get_fileobj_for_io_writes(self, transfer_future):
- # Return the fileobj provided to the future.
- return transfer_future.meta.call_args.fileobj
-
- def get_final_io_task(self):
- # This task will serve the purpose of signaling when all of the io
- # writes have finished so done callbacks can be called.
- return CompleteDownloadNOOPTask(
- transfer_coordinator=self._transfer_coordinator)
-
-
-class DownloadNonSeekableOutputManager(DownloadOutputManager):
- def __init__(self, osutil, transfer_coordinator, io_executor,
- defer_queue=None):
- super(DownloadNonSeekableOutputManager, self).__init__(
- osutil, transfer_coordinator, io_executor)
- if defer_queue is None:
- defer_queue = DeferQueue()
- self._defer_queue = defer_queue
- self._io_submit_lock = threading.Lock()
-
- @classmethod
- def is_compatible(cls, download_target, osutil):
- return hasattr(download_target, 'write')
-
- def get_download_task_tag(self):
- return IN_MEMORY_DOWNLOAD_TAG
-
- def get_fileobj_for_io_writes(self, transfer_future):
- return transfer_future.meta.call_args.fileobj
-
- def get_final_io_task(self):
- return CompleteDownloadNOOPTask(
- transfer_coordinator=self._transfer_coordinator)
-
- def queue_file_io_task(self, fileobj, data, offset):
- with self._io_submit_lock:
- writes = self._defer_queue.request_writes(offset, data)
- for write in writes:
- data = write['data']
- logger.debug("Queueing IO offset %s for fileobj: %s",
- write['offset'], fileobj)
- super(
- DownloadNonSeekableOutputManager, self).queue_file_io_task(
- fileobj, data, offset)
-
- def get_io_write_task(self, fileobj, data, offset):
- return IOStreamingWriteTask(
- self._transfer_coordinator,
- main_kwargs={
- 'fileobj': fileobj,
- 'data': data,
- }
- )
-
-
-class DownloadSpecialFilenameOutputManager(DownloadNonSeekableOutputManager):
- def __init__(self, osutil, transfer_coordinator, io_executor,
- defer_queue=None):
- super(DownloadSpecialFilenameOutputManager, self).__init__(
- osutil, transfer_coordinator, io_executor, defer_queue)
- self._fileobj = None
-
- @classmethod
- def is_compatible(cls, download_target, osutil):
- return isinstance(download_target, six.string_types) and \
- osutil.is_special_file(download_target)
-
- def get_fileobj_for_io_writes(self, transfer_future):
- filename = transfer_future.meta.call_args.fileobj
- self._fileobj = self._get_fileobj_from_filename(filename)
- return self._fileobj
-
- def get_final_io_task(self):
- # Make sure the file gets closed once the transfer is done.
- return IOCloseTask(
- transfer_coordinator=self._transfer_coordinator,
- is_final=True,
- main_kwargs={'fileobj': self._fileobj})
-
-
-class DownloadSubmissionTask(SubmissionTask):
- """Task for submitting tasks to execute a download"""
-
- def _get_download_output_manager_cls(self, transfer_future, osutil):
- """Retrieves a class for managing output for a download
-
- :type transfer_future: s3transfer.futures.TransferFuture
- :param transfer_future: The transfer future for the request
-
- :type osutil: s3transfer.utils.OSUtils
- :param osutil: The os utility associated to the transfer
-
- :rtype: class of DownloadOutputManager
- :returns: The appropriate class to use for managing a specific type of
- input for downloads.
- """
- download_manager_resolver_chain = [
- DownloadSpecialFilenameOutputManager,
- DownloadFilenameOutputManager,
- DownloadSeekableOutputManager,
- DownloadNonSeekableOutputManager,
- ]
-
- fileobj = transfer_future.meta.call_args.fileobj
- for download_manager_cls in download_manager_resolver_chain:
- if download_manager_cls.is_compatible(fileobj, osutil):
- return download_manager_cls
- raise RuntimeError(
- 'Output %s of type: %s is not supported.' % (
- fileobj, type(fileobj)))
-
- def _submit(self, client, config, osutil, request_executor, io_executor,
- transfer_future, bandwidth_limiter=None):
- """
- :param client: The client associated with the transfer manager
-
- :type config: s3transfer.manager.TransferConfig
- :param config: The transfer config associated with the transfer
- manager
-
- :type osutil: s3transfer.utils.OSUtil
- :param osutil: The os utility associated to the transfer manager
-
- :type request_executor: s3transfer.futures.BoundedExecutor
- :param request_executor: The request executor associated with the
- transfer manager
-
- :type io_executor: s3transfer.futures.BoundedExecutor
- :param io_executor: The io executor associated with the
- transfer manager
-
- :type transfer_future: s3transfer.futures.TransferFuture
- :param transfer_future: The transfer future associated with the
- transfer request that tasks are being submitted for
-
- :type bandwidth_limiter: s3transfer.bandwidth.BandwidthLimiter
- :param bandwidth_limiter: The bandwidth limiter to use when
- downloading streams
- """
- if transfer_future.meta.size is None:
- # If a size was not provided figure out the size for the
- # user.
- response = client.head_object(
- Bucket=transfer_future.meta.call_args.bucket,
- Key=transfer_future.meta.call_args.key,
- **transfer_future.meta.call_args.extra_args
- )
- transfer_future.meta.provide_transfer_size(
- response['ContentLength'])
-
- download_output_manager = self._get_download_output_manager_cls(
- transfer_future, osutil)(osutil, self._transfer_coordinator,
- io_executor)
-
- # If it is greater than threshold do a ranged download, otherwise
- # do a regular GetObject download.
- if transfer_future.meta.size < config.multipart_threshold:
- self._submit_download_request(
- client, config, osutil, request_executor, io_executor,
- download_output_manager, transfer_future, bandwidth_limiter)
- else:
- self._submit_ranged_download_request(
- client, config, osutil, request_executor, io_executor,
- download_output_manager, transfer_future, bandwidth_limiter)
-
- def _submit_download_request(self, client, config, osutil,
- request_executor, io_executor,
- download_output_manager, transfer_future,
- bandwidth_limiter):
- call_args = transfer_future.meta.call_args
-
- # Get a handle to the file that will be used for writing downloaded
- # contents
- fileobj = download_output_manager.get_fileobj_for_io_writes(
- transfer_future)
-
- # Get the needed callbacks for the task
- progress_callbacks = get_callbacks(transfer_future, 'progress')
-
- # Get any associated tags for the get object task.
- get_object_tag = download_output_manager.get_download_task_tag()
-
- # Get the final io task to run once the download is complete.
- final_task = download_output_manager.get_final_io_task()
-
- # Submit the task to download the object.
- self._transfer_coordinator.submit(
- request_executor,
- ImmediatelyWriteIOGetObjectTask(
- transfer_coordinator=self._transfer_coordinator,
- main_kwargs={
- 'client': client,
- 'bucket': call_args.bucket,
- 'key': call_args.key,
- 'fileobj': fileobj,
- 'extra_args': call_args.extra_args,
- 'callbacks': progress_callbacks,
- 'max_attempts': config.num_download_attempts,
- 'download_output_manager': download_output_manager,
- 'io_chunksize': config.io_chunksize,
- 'bandwidth_limiter': bandwidth_limiter
- },
- done_callbacks=[final_task]
- ),
- tag=get_object_tag
- )
-
- def _submit_ranged_download_request(self, client, config, osutil,
- request_executor, io_executor,
- download_output_manager,
- transfer_future,
- bandwidth_limiter):
- call_args = transfer_future.meta.call_args
-
- # Get the needed progress callbacks for the task
- progress_callbacks = get_callbacks(transfer_future, 'progress')
-
- # Get a handle to the file that will be used for writing downloaded
- # contents
- fileobj = download_output_manager.get_fileobj_for_io_writes(
- transfer_future)
-
- # Determine the number of parts
- part_size = config.multipart_chunksize
+ self._temp_fileobj = self._get_temp_fileobj()
+ return self._temp_fileobj
+
+ def get_final_io_task(self):
+ # A task to rename the file from the temporary file to its final
+ # location is needed. This should be the last task needed to complete
+ # the download.
+ return IORenameFileTask(
+ transfer_coordinator=self._transfer_coordinator,
+ main_kwargs={
+ 'fileobj': self._temp_fileobj,
+ 'final_filename': self._final_filename,
+ 'osutil': self._osutil
+ },
+ is_final=True
+ )
+
+ def _get_temp_fileobj(self):
+ f = self._get_fileobj_from_filename(self._temp_filename)
+ self._transfer_coordinator.add_failure_cleanup(
+ self._osutil.remove_file, self._temp_filename)
+ return f
+
+
+class DownloadSeekableOutputManager(DownloadOutputManager):
+ @classmethod
+ def is_compatible(cls, download_target, osutil):
+ return seekable(download_target)
+
+ def get_fileobj_for_io_writes(self, transfer_future):
+ # Return the fileobj provided to the future.
+ return transfer_future.meta.call_args.fileobj
+
+ def get_final_io_task(self):
+ # This task will serve the purpose of signaling when all of the io
+ # writes have finished so done callbacks can be called.
+ return CompleteDownloadNOOPTask(
+ transfer_coordinator=self._transfer_coordinator)
+
+
+class DownloadNonSeekableOutputManager(DownloadOutputManager):
+ def __init__(self, osutil, transfer_coordinator, io_executor,
+ defer_queue=None):
+ super(DownloadNonSeekableOutputManager, self).__init__(
+ osutil, transfer_coordinator, io_executor)
+ if defer_queue is None:
+ defer_queue = DeferQueue()
+ self._defer_queue = defer_queue
+ self._io_submit_lock = threading.Lock()
+
+ @classmethod
+ def is_compatible(cls, download_target, osutil):
+ return hasattr(download_target, 'write')
+
+ def get_download_task_tag(self):
+ return IN_MEMORY_DOWNLOAD_TAG
+
+ def get_fileobj_for_io_writes(self, transfer_future):
+ return transfer_future.meta.call_args.fileobj
+
+ def get_final_io_task(self):
+ return CompleteDownloadNOOPTask(
+ transfer_coordinator=self._transfer_coordinator)
+
+ def queue_file_io_task(self, fileobj, data, offset):
+ with self._io_submit_lock:
+ writes = self._defer_queue.request_writes(offset, data)
+ for write in writes:
+ data = write['data']
+ logger.debug("Queueing IO offset %s for fileobj: %s",
+ write['offset'], fileobj)
+ super(
+ DownloadNonSeekableOutputManager, self).queue_file_io_task(
+ fileobj, data, offset)
+
+ def get_io_write_task(self, fileobj, data, offset):
+ return IOStreamingWriteTask(
+ self._transfer_coordinator,
+ main_kwargs={
+ 'fileobj': fileobj,
+ 'data': data,
+ }
+ )
+
+
+class DownloadSpecialFilenameOutputManager(DownloadNonSeekableOutputManager):
+ def __init__(self, osutil, transfer_coordinator, io_executor,
+ defer_queue=None):
+ super(DownloadSpecialFilenameOutputManager, self).__init__(
+ osutil, transfer_coordinator, io_executor, defer_queue)
+ self._fileobj = None
+
+ @classmethod
+ def is_compatible(cls, download_target, osutil):
+ return isinstance(download_target, six.string_types) and \
+ osutil.is_special_file(download_target)
+
+ def get_fileobj_for_io_writes(self, transfer_future):
+ filename = transfer_future.meta.call_args.fileobj
+ self._fileobj = self._get_fileobj_from_filename(filename)
+ return self._fileobj
+
+ def get_final_io_task(self):
+ # Make sure the file gets closed once the transfer is done.
+ return IOCloseTask(
+ transfer_coordinator=self._transfer_coordinator,
+ is_final=True,
+ main_kwargs={'fileobj': self._fileobj})
+
+
+class DownloadSubmissionTask(SubmissionTask):
+ """Task for submitting tasks to execute a download"""
+
+ def _get_download_output_manager_cls(self, transfer_future, osutil):
+ """Retrieves a class for managing output for a download
+
+ :type transfer_future: s3transfer.futures.TransferFuture
+ :param transfer_future: The transfer future for the request
+
+ :type osutil: s3transfer.utils.OSUtils
+ :param osutil: The os utility associated to the transfer
+
+ :rtype: class of DownloadOutputManager
+ :returns: The appropriate class to use for managing a specific type of
+ input for downloads.
+ """
+ download_manager_resolver_chain = [
+ DownloadSpecialFilenameOutputManager,
+ DownloadFilenameOutputManager,
+ DownloadSeekableOutputManager,
+ DownloadNonSeekableOutputManager,
+ ]
+
+ fileobj = transfer_future.meta.call_args.fileobj
+ for download_manager_cls in download_manager_resolver_chain:
+ if download_manager_cls.is_compatible(fileobj, osutil):
+ return download_manager_cls
+ raise RuntimeError(
+ 'Output %s of type: %s is not supported.' % (
+ fileobj, type(fileobj)))
+
+ def _submit(self, client, config, osutil, request_executor, io_executor,
+ transfer_future, bandwidth_limiter=None):
+ """
+ :param client: The client associated with the transfer manager
+
+ :type config: s3transfer.manager.TransferConfig
+ :param config: The transfer config associated with the transfer
+ manager
+
+ :type osutil: s3transfer.utils.OSUtil
+ :param osutil: The os utility associated to the transfer manager
+
+ :type request_executor: s3transfer.futures.BoundedExecutor
+ :param request_executor: The request executor associated with the
+ transfer manager
+
+ :type io_executor: s3transfer.futures.BoundedExecutor
+ :param io_executor: The io executor associated with the
+ transfer manager
+
+ :type transfer_future: s3transfer.futures.TransferFuture
+ :param transfer_future: The transfer future associated with the
+ transfer request that tasks are being submitted for
+
+ :type bandwidth_limiter: s3transfer.bandwidth.BandwidthLimiter
+ :param bandwidth_limiter: The bandwidth limiter to use when
+ downloading streams
+ """
+ if transfer_future.meta.size is None:
+ # If a size was not provided figure out the size for the
+ # user.
+ response = client.head_object(
+ Bucket=transfer_future.meta.call_args.bucket,
+ Key=transfer_future.meta.call_args.key,
+ **transfer_future.meta.call_args.extra_args
+ )
+ transfer_future.meta.provide_transfer_size(
+ response['ContentLength'])
+
+ download_output_manager = self._get_download_output_manager_cls(
+ transfer_future, osutil)(osutil, self._transfer_coordinator,
+ io_executor)
+
+ # If it is greater than threshold do a ranged download, otherwise
+ # do a regular GetObject download.
+ if transfer_future.meta.size < config.multipart_threshold:
+ self._submit_download_request(
+ client, config, osutil, request_executor, io_executor,
+ download_output_manager, transfer_future, bandwidth_limiter)
+ else:
+ self._submit_ranged_download_request(
+ client, config, osutil, request_executor, io_executor,
+ download_output_manager, transfer_future, bandwidth_limiter)
+
+ def _submit_download_request(self, client, config, osutil,
+ request_executor, io_executor,
+ download_output_manager, transfer_future,
+ bandwidth_limiter):
+ call_args = transfer_future.meta.call_args
+
+ # Get a handle to the file that will be used for writing downloaded
+ # contents
+ fileobj = download_output_manager.get_fileobj_for_io_writes(
+ transfer_future)
+
+ # Get the needed callbacks for the task
+ progress_callbacks = get_callbacks(transfer_future, 'progress')
+
+ # Get any associated tags for the get object task.
+ get_object_tag = download_output_manager.get_download_task_tag()
+
+ # Get the final io task to run once the download is complete.
+ final_task = download_output_manager.get_final_io_task()
+
+ # Submit the task to download the object.
+ self._transfer_coordinator.submit(
+ request_executor,
+ ImmediatelyWriteIOGetObjectTask(
+ transfer_coordinator=self._transfer_coordinator,
+ main_kwargs={
+ 'client': client,
+ 'bucket': call_args.bucket,
+ 'key': call_args.key,
+ 'fileobj': fileobj,
+ 'extra_args': call_args.extra_args,
+ 'callbacks': progress_callbacks,
+ 'max_attempts': config.num_download_attempts,
+ 'download_output_manager': download_output_manager,
+ 'io_chunksize': config.io_chunksize,
+ 'bandwidth_limiter': bandwidth_limiter
+ },
+ done_callbacks=[final_task]
+ ),
+ tag=get_object_tag
+ )
+
+ def _submit_ranged_download_request(self, client, config, osutil,
+ request_executor, io_executor,
+ download_output_manager,
+ transfer_future,
+ bandwidth_limiter):
+ call_args = transfer_future.meta.call_args
+
+ # Get the needed progress callbacks for the task
+ progress_callbacks = get_callbacks(transfer_future, 'progress')
+
+ # Get a handle to the file that will be used for writing downloaded
+ # contents
+ fileobj = download_output_manager.get_fileobj_for_io_writes(
+ transfer_future)
+
+ # Determine the number of parts
+ part_size = config.multipart_chunksize
num_parts = calculate_num_parts(transfer_future.meta.size, part_size)
-
- # Get any associated tags for the get object task.
- get_object_tag = download_output_manager.get_download_task_tag()
-
- # Callback invoker to submit the final io task once all downloads
- # are complete.
- finalize_download_invoker = CountCallbackInvoker(
- self._get_final_io_task_submission_callback(
- download_output_manager, io_executor
- )
- )
- for i in range(num_parts):
- # Calculate the range parameter
- range_parameter = calculate_range_parameter(
- part_size, i, num_parts)
-
- # Inject the Range parameter to the parameters to be passed in
- # as extra args
- extra_args = {'Range': range_parameter}
- extra_args.update(call_args.extra_args)
- finalize_download_invoker.increment()
- # Submit the ranged downloads
- self._transfer_coordinator.submit(
- request_executor,
- GetObjectTask(
- transfer_coordinator=self._transfer_coordinator,
- main_kwargs={
- 'client': client,
- 'bucket': call_args.bucket,
- 'key': call_args.key,
- 'fileobj': fileobj,
- 'extra_args': extra_args,
- 'callbacks': progress_callbacks,
- 'max_attempts': config.num_download_attempts,
- 'start_index': i * part_size,
- 'download_output_manager': download_output_manager,
- 'io_chunksize': config.io_chunksize,
- 'bandwidth_limiter': bandwidth_limiter
- },
- done_callbacks=[finalize_download_invoker.decrement]
- ),
- tag=get_object_tag
- )
- finalize_download_invoker.finalize()
-
- def _get_final_io_task_submission_callback(self, download_manager,
- io_executor):
- final_task = download_manager.get_final_io_task()
- return FunctionContainer(
- self._transfer_coordinator.submit, io_executor, final_task)
-
- def _calculate_range_param(self, part_size, part_index, num_parts):
- # Used to calculate the Range parameter
- start_range = part_index * part_size
- if part_index == num_parts - 1:
- end_range = ''
- else:
- end_range = start_range + part_size - 1
- range_param = 'bytes=%s-%s' % (start_range, end_range)
- return range_param
-
-
-class GetObjectTask(Task):
- def _main(self, client, bucket, key, fileobj, extra_args, callbacks,
- max_attempts, download_output_manager, io_chunksize,
- start_index=0, bandwidth_limiter=None):
- """Downloads an object and places content into io queue
-
- :param client: The client to use when calling GetObject
- :param bucket: The bucket to download from
- :param key: The key to download from
- :param fileobj: The file handle to write content to
- :param exta_args: Any extra arguements to include in GetObject request
- :param callbacks: List of progress callbacks to invoke on download
- :param max_attempts: The number of retries to do when downloading
- :param download_output_manager: The download output manager associated
- with the current download.
- :param io_chunksize: The size of each io chunk to read from the
- download stream and queue in the io queue.
- :param start_index: The location in the file to start writing the
- content of the key to.
- :param bandwidth_limiter: The bandwidth limiter to use when throttling
- the downloading of data in streams.
- """
- last_exception = None
- for i in range(max_attempts):
- try:
+
+ # Get any associated tags for the get object task.
+ get_object_tag = download_output_manager.get_download_task_tag()
+
+ # Callback invoker to submit the final io task once all downloads
+ # are complete.
+ finalize_download_invoker = CountCallbackInvoker(
+ self._get_final_io_task_submission_callback(
+ download_output_manager, io_executor
+ )
+ )
+ for i in range(num_parts):
+ # Calculate the range parameter
+ range_parameter = calculate_range_parameter(
+ part_size, i, num_parts)
+
+ # Inject the Range parameter to the parameters to be passed in
+ # as extra args
+ extra_args = {'Range': range_parameter}
+ extra_args.update(call_args.extra_args)
+ finalize_download_invoker.increment()
+ # Submit the ranged downloads
+ self._transfer_coordinator.submit(
+ request_executor,
+ GetObjectTask(
+ transfer_coordinator=self._transfer_coordinator,
+ main_kwargs={
+ 'client': client,
+ 'bucket': call_args.bucket,
+ 'key': call_args.key,
+ 'fileobj': fileobj,
+ 'extra_args': extra_args,
+ 'callbacks': progress_callbacks,
+ 'max_attempts': config.num_download_attempts,
+ 'start_index': i * part_size,
+ 'download_output_manager': download_output_manager,
+ 'io_chunksize': config.io_chunksize,
+ 'bandwidth_limiter': bandwidth_limiter
+ },
+ done_callbacks=[finalize_download_invoker.decrement]
+ ),
+ tag=get_object_tag
+ )
+ finalize_download_invoker.finalize()
+
+ def _get_final_io_task_submission_callback(self, download_manager,
+ io_executor):
+ final_task = download_manager.get_final_io_task()
+ return FunctionContainer(
+ self._transfer_coordinator.submit, io_executor, final_task)
+
+ def _calculate_range_param(self, part_size, part_index, num_parts):
+ # Used to calculate the Range parameter
+ start_range = part_index * part_size
+ if part_index == num_parts - 1:
+ end_range = ''
+ else:
+ end_range = start_range + part_size - 1
+ range_param = 'bytes=%s-%s' % (start_range, end_range)
+ return range_param
+
+
+class GetObjectTask(Task):
+ def _main(self, client, bucket, key, fileobj, extra_args, callbacks,
+ max_attempts, download_output_manager, io_chunksize,
+ start_index=0, bandwidth_limiter=None):
+ """Downloads an object and places content into io queue
+
+ :param client: The client to use when calling GetObject
+ :param bucket: The bucket to download from
+ :param key: The key to download from
+ :param fileobj: The file handle to write content to
+ :param exta_args: Any extra arguements to include in GetObject request
+ :param callbacks: List of progress callbacks to invoke on download
+ :param max_attempts: The number of retries to do when downloading
+ :param download_output_manager: The download output manager associated
+ with the current download.
+ :param io_chunksize: The size of each io chunk to read from the
+ download stream and queue in the io queue.
+ :param start_index: The location in the file to start writing the
+ content of the key to.
+ :param bandwidth_limiter: The bandwidth limiter to use when throttling
+ the downloading of data in streams.
+ """
+ last_exception = None
+ for i in range(max_attempts):
+ try:
current_index = start_index
- response = client.get_object(
- Bucket=bucket, Key=key, **extra_args)
- streaming_body = StreamReaderProgress(
- response['Body'], callbacks)
- if bandwidth_limiter:
- streaming_body = \
- bandwidth_limiter.get_bandwith_limited_stream(
- streaming_body, self._transfer_coordinator)
-
- chunks = DownloadChunkIterator(streaming_body, io_chunksize)
- for chunk in chunks:
- # If the transfer is done because of a cancellation
- # or error somewhere else, stop trying to submit more
- # data to be written and break out of the download.
- if not self._transfer_coordinator.done():
- self._handle_io(
- download_output_manager, fileobj, chunk,
- current_index
- )
- current_index += len(chunk)
- else:
- return
- return
+ response = client.get_object(
+ Bucket=bucket, Key=key, **extra_args)
+ streaming_body = StreamReaderProgress(
+ response['Body'], callbacks)
+ if bandwidth_limiter:
+ streaming_body = \
+ bandwidth_limiter.get_bandwith_limited_stream(
+ streaming_body, self._transfer_coordinator)
+
+ chunks = DownloadChunkIterator(streaming_body, io_chunksize)
+ for chunk in chunks:
+ # If the transfer is done because of a cancellation
+ # or error somewhere else, stop trying to submit more
+ # data to be written and break out of the download.
+ if not self._transfer_coordinator.done():
+ self._handle_io(
+ download_output_manager, fileobj, chunk,
+ current_index
+ )
+ current_index += len(chunk)
+ else:
+ return
+ return
except S3_RETRYABLE_DOWNLOAD_ERRORS as e:
- logger.debug("Retrying exception caught (%s), "
- "retrying request, (attempt %s / %s)", e, i,
- max_attempts, exc_info=True)
- last_exception = e
- # Also invoke the progress callbacks to indicate that we
- # are trying to download the stream again and all progress
- # for this GetObject has been lost.
- invoke_progress_callbacks(
- callbacks, start_index - current_index)
- continue
- raise RetriesExceededError(last_exception)
-
- def _handle_io(self, download_output_manager, fileobj, chunk, index):
- download_output_manager.queue_file_io_task(fileobj, chunk, index)
-
-
-class ImmediatelyWriteIOGetObjectTask(GetObjectTask):
- """GetObjectTask that immediately writes to the provided file object
-
- This is useful for downloads where it is known only one thread is
- downloading the object so there is no reason to go through the
- overhead of using an IO queue and executor.
- """
- def _handle_io(self, download_output_manager, fileobj, chunk, index):
- task = download_output_manager.get_io_write_task(fileobj, chunk, index)
- task()
-
-
-class IOWriteTask(Task):
- def _main(self, fileobj, data, offset):
- """Pulls off an io queue to write contents to a file
-
- :param fileobj: The file handle to write content to
- :param data: The data to write
- :param offset: The offset to write the data to.
- """
- fileobj.seek(offset)
- fileobj.write(data)
-
-
-class IOStreamingWriteTask(Task):
- """Task for writing data to a non-seekable stream."""
-
- def _main(self, fileobj, data):
- """Write data to a fileobj.
-
- Data will be written directly to the fileboj without
- any prior seeking.
-
- :param fileobj: The fileobj to write content to
- :param data: The data to write
-
- """
- fileobj.write(data)
-
-
-class IORenameFileTask(Task):
- """A task to rename a temporary file to its final filename
-
- :param fileobj: The file handle that content was written to.
- :param final_filename: The final name of the file to rename to
- upon completion of writing the contents.
- :param osutil: OS utility
- """
- def _main(self, fileobj, final_filename, osutil):
- fileobj.close()
- osutil.rename_file(fileobj.name, final_filename)
-
-
-class IOCloseTask(Task):
- """A task to close out a file once the download is complete.
-
- :param fileobj: The fileobj to close.
- """
- def _main(self, fileobj):
- fileobj.close()
-
-
-class CompleteDownloadNOOPTask(Task):
- """A NOOP task to serve as an indicator that the download is complete
-
- Note that the default for is_final is set to True because this should
- always be the last task.
- """
- def __init__(self, transfer_coordinator, main_kwargs=None,
- pending_main_kwargs=None, done_callbacks=None,
- is_final=True):
- super(CompleteDownloadNOOPTask, self).__init__(
- transfer_coordinator=transfer_coordinator,
- main_kwargs=main_kwargs,
- pending_main_kwargs=pending_main_kwargs,
- done_callbacks=done_callbacks,
- is_final=is_final
- )
-
- def _main(self):
- pass
-
-
-class DownloadChunkIterator(object):
- def __init__(self, body, chunksize):
- """Iterator to chunk out a downloaded S3 stream
-
- :param body: A readable file-like object
- :param chunksize: The amount to read each time
- """
- self._body = body
- self._chunksize = chunksize
- self._num_reads = 0
-
- def __iter__(self):
- return self
-
- def __next__(self):
- chunk = self._body.read(self._chunksize)
- self._num_reads += 1
- if chunk:
- return chunk
- elif self._num_reads == 1:
- # Even though the response may have not had any
- # content, we still want to account for an empty object's
- # existance so return the empty chunk for that initial
- # read.
- return chunk
- raise StopIteration()
-
- next = __next__
-
-
-class DeferQueue(object):
- """IO queue that defers write requests until they are queued sequentially.
-
- This class is used to track IO data for a *single* fileobj.
-
- You can send data to this queue, and it will defer any IO write requests
- until it has the next contiguous block available (starting at 0).
-
- """
- def __init__(self):
- self._writes = []
- self._pending_offsets = set()
- self._next_offset = 0
-
- def request_writes(self, offset, data):
- """Request any available writes given new incoming data.
-
- You call this method by providing new data along with the
- offset associated with the data. If that new data unlocks
- any contiguous writes that can now be submitted, this
- method will return all applicable writes.
-
- This is done with 1 method call so you don't have to
- make two method calls (put(), get()) which acquires a lock
- each method call.
-
- """
- if offset < self._next_offset:
- # This is a request for a write that we've already
- # seen. This can happen in the event of a retry
- # where if we retry at at offset N/2, we'll requeue
- # offsets 0-N/2 again.
- return []
- writes = []
- if offset in self._pending_offsets:
- # We've already queued this offset so this request is
- # a duplicate. In this case we should ignore
- # this request and prefer what's already queued.
- return []
- heapq.heappush(self._writes, (offset, data))
- self._pending_offsets.add(offset)
- while self._writes and self._writes[0][0] == self._next_offset:
- next_write = heapq.heappop(self._writes)
- writes.append({'offset': next_write[0], 'data': next_write[1]})
- self._pending_offsets.remove(next_write[0])
- self._next_offset += len(next_write[1])
- return writes
+ logger.debug("Retrying exception caught (%s), "
+ "retrying request, (attempt %s / %s)", e, i,
+ max_attempts, exc_info=True)
+ last_exception = e
+ # Also invoke the progress callbacks to indicate that we
+ # are trying to download the stream again and all progress
+ # for this GetObject has been lost.
+ invoke_progress_callbacks(
+ callbacks, start_index - current_index)
+ continue
+ raise RetriesExceededError(last_exception)
+
+ def _handle_io(self, download_output_manager, fileobj, chunk, index):
+ download_output_manager.queue_file_io_task(fileobj, chunk, index)
+
+
+class ImmediatelyWriteIOGetObjectTask(GetObjectTask):
+ """GetObjectTask that immediately writes to the provided file object
+
+ This is useful for downloads where it is known only one thread is
+ downloading the object so there is no reason to go through the
+ overhead of using an IO queue and executor.
+ """
+ def _handle_io(self, download_output_manager, fileobj, chunk, index):
+ task = download_output_manager.get_io_write_task(fileobj, chunk, index)
+ task()
+
+
+class IOWriteTask(Task):
+ def _main(self, fileobj, data, offset):
+ """Pulls off an io queue to write contents to a file
+
+ :param fileobj: The file handle to write content to
+ :param data: The data to write
+ :param offset: The offset to write the data to.
+ """
+ fileobj.seek(offset)
+ fileobj.write(data)
+
+
+class IOStreamingWriteTask(Task):
+ """Task for writing data to a non-seekable stream."""
+
+ def _main(self, fileobj, data):
+ """Write data to a fileobj.
+
+ Data will be written directly to the fileboj without
+ any prior seeking.
+
+ :param fileobj: The fileobj to write content to
+ :param data: The data to write
+
+ """
+ fileobj.write(data)
+
+
+class IORenameFileTask(Task):
+ """A task to rename a temporary file to its final filename
+
+ :param fileobj: The file handle that content was written to.
+ :param final_filename: The final name of the file to rename to
+ upon completion of writing the contents.
+ :param osutil: OS utility
+ """
+ def _main(self, fileobj, final_filename, osutil):
+ fileobj.close()
+ osutil.rename_file(fileobj.name, final_filename)
+
+
+class IOCloseTask(Task):
+ """A task to close out a file once the download is complete.
+
+ :param fileobj: The fileobj to close.
+ """
+ def _main(self, fileobj):
+ fileobj.close()
+
+
+class CompleteDownloadNOOPTask(Task):
+ """A NOOP task to serve as an indicator that the download is complete
+
+ Note that the default for is_final is set to True because this should
+ always be the last task.
+ """
+ def __init__(self, transfer_coordinator, main_kwargs=None,
+ pending_main_kwargs=None, done_callbacks=None,
+ is_final=True):
+ super(CompleteDownloadNOOPTask, self).__init__(
+ transfer_coordinator=transfer_coordinator,
+ main_kwargs=main_kwargs,
+ pending_main_kwargs=pending_main_kwargs,
+ done_callbacks=done_callbacks,
+ is_final=is_final
+ )
+
+ def _main(self):
+ pass
+
+
+class DownloadChunkIterator(object):
+ def __init__(self, body, chunksize):
+ """Iterator to chunk out a downloaded S3 stream
+
+ :param body: A readable file-like object
+ :param chunksize: The amount to read each time
+ """
+ self._body = body
+ self._chunksize = chunksize
+ self._num_reads = 0
+
+ def __iter__(self):
+ return self
+
+ def __next__(self):
+ chunk = self._body.read(self._chunksize)
+ self._num_reads += 1
+ if chunk:
+ return chunk
+ elif self._num_reads == 1:
+ # Even though the response may have not had any
+ # content, we still want to account for an empty object's
+ # existance so return the empty chunk for that initial
+ # read.
+ return chunk
+ raise StopIteration()
+
+ next = __next__
+
+
+class DeferQueue(object):
+ """IO queue that defers write requests until they are queued sequentially.
+
+ This class is used to track IO data for a *single* fileobj.
+
+ You can send data to this queue, and it will defer any IO write requests
+ until it has the next contiguous block available (starting at 0).
+
+ """
+ def __init__(self):
+ self._writes = []
+ self._pending_offsets = set()
+ self._next_offset = 0
+
+ def request_writes(self, offset, data):
+ """Request any available writes given new incoming data.
+
+ You call this method by providing new data along with the
+ offset associated with the data. If that new data unlocks
+ any contiguous writes that can now be submitted, this
+ method will return all applicable writes.
+
+ This is done with 1 method call so you don't have to
+ make two method calls (put(), get()) which acquires a lock
+ each method call.
+
+ """
+ if offset < self._next_offset:
+ # This is a request for a write that we've already
+ # seen. This can happen in the event of a retry
+ # where if we retry at at offset N/2, we'll requeue
+ # offsets 0-N/2 again.
+ return []
+ writes = []
+ if offset in self._pending_offsets:
+ # We've already queued this offset so this request is
+ # a duplicate. In this case we should ignore
+ # this request and prefer what's already queued.
+ return []
+ heapq.heappush(self._writes, (offset, data))
+ self._pending_offsets.add(offset)
+ while self._writes and self._writes[0][0] == self._next_offset:
+ next_write = heapq.heappop(self._writes)
+ writes.append({'offset': next_write[0], 'data': next_write[1]})
+ self._pending_offsets.remove(next_write[0])
+ self._next_offset += len(next_write[1])
+ return writes
diff --git a/contrib/python/s3transfer/py2/s3transfer/exceptions.py b/contrib/python/s3transfer/py2/s3transfer/exceptions.py
index 7756bfed93..7f3a138104 100644
--- a/contrib/python/s3transfer/py2/s3transfer/exceptions.py
+++ b/contrib/python/s3transfer/py2/s3transfer/exceptions.py
@@ -1,36 +1,36 @@
-# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-# http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-from concurrent.futures import CancelledError
-
-
-class RetriesExceededError(Exception):
- def __init__(self, last_exception, msg='Max Retries Exceeded'):
- super(RetriesExceededError, self).__init__(msg)
- self.last_exception = last_exception
-
-
-class S3UploadFailedError(Exception):
- pass
-
-
-class InvalidSubscriberMethodError(Exception):
- pass
-
-
-class TransferNotDoneError(Exception):
- pass
-
-
-class FatalError(CancelledError):
- """A CancelledError raised from an error in the TransferManager"""
- pass
+# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+# http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from concurrent.futures import CancelledError
+
+
+class RetriesExceededError(Exception):
+ def __init__(self, last_exception, msg='Max Retries Exceeded'):
+ super(RetriesExceededError, self).__init__(msg)
+ self.last_exception = last_exception
+
+
+class S3UploadFailedError(Exception):
+ pass
+
+
+class InvalidSubscriberMethodError(Exception):
+ pass
+
+
+class TransferNotDoneError(Exception):
+ pass
+
+
+class FatalError(CancelledError):
+ """A CancelledError raised from an error in the TransferManager"""
+ pass
diff --git a/contrib/python/s3transfer/py2/s3transfer/futures.py b/contrib/python/s3transfer/py2/s3transfer/futures.py
index c868a08ae0..d08cb7b3b4 100644
--- a/contrib/python/s3transfer/py2/s3transfer/futures.py
+++ b/contrib/python/s3transfer/py2/s3transfer/futures.py
@@ -1,32 +1,32 @@
-# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-# http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-from concurrent import futures
-from collections import namedtuple
-import copy
-import logging
-import sys
-import threading
-
-from s3transfer.compat import MAXINT
-from s3transfer.compat import six
-from s3transfer.exceptions import CancelledError, TransferNotDoneError
-from s3transfer.utils import FunctionContainer
-from s3transfer.utils import TaskSemaphore
-
-
-logger = logging.getLogger(__name__)
-
-
+# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+# http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from concurrent import futures
+from collections import namedtuple
+import copy
+import logging
+import sys
+import threading
+
+from s3transfer.compat import MAXINT
+from s3transfer.compat import six
+from s3transfer.exceptions import CancelledError, TransferNotDoneError
+from s3transfer.utils import FunctionContainer
+from s3transfer.utils import TaskSemaphore
+
+
+logger = logging.getLogger(__name__)
+
+
class BaseTransferFuture(object):
@property
def meta(self):
@@ -72,523 +72,523 @@ class BaseTransferMeta(object):
class TransferFuture(BaseTransferFuture):
- def __init__(self, meta=None, coordinator=None):
- """The future associated to a submitted transfer request
-
- :type meta: TransferMeta
- :param meta: The metadata associated to the request. This object
- is visible to the requester.
-
- :type coordinator: TransferCoordinator
- :param coordinator: The coordinator associated to the request. This
- object is not visible to the requester.
- """
- self._meta = meta
- if meta is None:
- self._meta = TransferMeta()
-
- self._coordinator = coordinator
- if coordinator is None:
- self._coordinator = TransferCoordinator()
-
- @property
- def meta(self):
- return self._meta
-
- def done(self):
- return self._coordinator.done()
-
- def result(self):
- try:
- # Usually the result() method blocks until the transfer is done,
- # however if a KeyboardInterrupt is raised we want want to exit
- # out of this and propogate the exception.
- return self._coordinator.result()
- except KeyboardInterrupt as e:
- self.cancel()
- raise e
-
- def cancel(self):
- self._coordinator.cancel()
-
- def set_exception(self, exception):
- """Sets the exception on the future."""
- if not self.done():
- raise TransferNotDoneError(
- 'set_exception can only be called once the transfer is '
- 'complete.')
- self._coordinator.set_exception(exception, override=True)
-
-
+ def __init__(self, meta=None, coordinator=None):
+ """The future associated to a submitted transfer request
+
+ :type meta: TransferMeta
+ :param meta: The metadata associated to the request. This object
+ is visible to the requester.
+
+ :type coordinator: TransferCoordinator
+ :param coordinator: The coordinator associated to the request. This
+ object is not visible to the requester.
+ """
+ self._meta = meta
+ if meta is None:
+ self._meta = TransferMeta()
+
+ self._coordinator = coordinator
+ if coordinator is None:
+ self._coordinator = TransferCoordinator()
+
+ @property
+ def meta(self):
+ return self._meta
+
+ def done(self):
+ return self._coordinator.done()
+
+ def result(self):
+ try:
+ # Usually the result() method blocks until the transfer is done,
+ # however if a KeyboardInterrupt is raised we want want to exit
+ # out of this and propogate the exception.
+ return self._coordinator.result()
+ except KeyboardInterrupt as e:
+ self.cancel()
+ raise e
+
+ def cancel(self):
+ self._coordinator.cancel()
+
+ def set_exception(self, exception):
+ """Sets the exception on the future."""
+ if not self.done():
+ raise TransferNotDoneError(
+ 'set_exception can only be called once the transfer is '
+ 'complete.')
+ self._coordinator.set_exception(exception, override=True)
+
+
class TransferMeta(BaseTransferMeta):
- """Holds metadata about the TransferFuture"""
- def __init__(self, call_args=None, transfer_id=None):
- self._call_args = call_args
- self._transfer_id = transfer_id
- self._size = None
- self._user_context = {}
-
- @property
- def call_args(self):
- """The call args used in the transfer request"""
- return self._call_args
-
- @property
- def transfer_id(self):
- """The unique id of the transfer"""
- return self._transfer_id
-
- @property
- def size(self):
- """The size of the transfer request if known"""
- return self._size
-
- @property
- def user_context(self):
- """A dictionary that requesters can store data in"""
- return self._user_context
-
- def provide_transfer_size(self, size):
- """A method to provide the size of a transfer request
-
- By providing this value, the TransferManager will not try to
- call HeadObject or use the use OS to determine the size of the
- transfer.
- """
- self._size = size
-
-
-class TransferCoordinator(object):
- """A helper class for managing TransferFuture"""
- def __init__(self, transfer_id=None):
- self.transfer_id = transfer_id
- self._status = 'not-started'
- self._result = None
- self._exception = None
- self._associated_futures = set()
- self._failure_cleanups = []
- self._done_callbacks = []
- self._done_event = threading.Event()
- self._lock = threading.Lock()
- self._associated_futures_lock = threading.Lock()
- self._done_callbacks_lock = threading.Lock()
- self._failure_cleanups_lock = threading.Lock()
-
- def __repr__(self):
- return '%s(transfer_id=%s)' % (
- self.__class__.__name__, self.transfer_id)
-
- @property
- def exception(self):
- return self._exception
-
- @property
- def associated_futures(self):
- """The list of futures associated to the inprogress TransferFuture
-
- Once the transfer finishes this list becomes empty as the transfer
- is considered done and there should be no running futures left.
- """
- with self._associated_futures_lock:
- # We return a copy of the list because we do not want to
- # processing the returned list while another thread is adding
- # more futures to the actual list.
- return copy.copy(self._associated_futures)
-
- @property
- def failure_cleanups(self):
- """The list of callbacks to call when the TransferFuture fails"""
- return self._failure_cleanups
-
- @property
- def status(self):
- """The status of the TransferFuture
-
- The currently supported states are:
- * not-started - Has yet to start. If in this state, a transfer
- can be canceled immediately and nothing will happen.
- * queued - SubmissionTask is about to submit tasks
- * running - Is inprogress. In-progress as of now means that
- the SubmissionTask that runs the transfer is being executed. So
- there is no guarantee any transfer requests had been made to
- S3 if this state is reached.
- * cancelled - Was cancelled
- * failed - An exception other than CancelledError was thrown
- * success - No exceptions were thrown and is done.
- """
- return self._status
-
- def set_result(self, result):
- """Set a result for the TransferFuture
-
- Implies that the TransferFuture succeeded. This will always set a
- result because it is invoked on the final task where there is only
- ever one final task and it is ran at the very end of a transfer
- process. So if a result is being set for this final task, the transfer
- succeeded even if something came a long and canceled the transfer
- on the final task.
- """
- with self._lock:
- self._exception = None
- self._result = result
- self._status = 'success'
-
- def set_exception(self, exception, override=False):
- """Set an exception for the TransferFuture
-
- Implies the TransferFuture failed.
-
- :param exception: The exception that cause the transfer to fail.
- :param override: If True, override any existing state.
- """
- with self._lock:
- if not self.done() or override:
- self._exception = exception
- self._status = 'failed'
-
- def result(self):
- """Waits until TransferFuture is done and returns the result
-
- If the TransferFuture succeeded, it will return the result. If the
- TransferFuture failed, it will raise the exception associated to the
- failure.
- """
- # Doing a wait() with no timeout cannot be interrupted in python2 but
- # can be interrupted in python3 so we just wait with the largest
- # possible value integer value, which is on the scale of billions of
- # years...
- self._done_event.wait(MAXINT)
-
- # Once done waiting, raise an exception if present or return the
- # final result.
- if self._exception:
- raise self._exception
- return self._result
-
- def cancel(self, msg='', exc_type=CancelledError):
- """Cancels the TransferFuture
-
- :param msg: The message to attach to the cancellation
- :param exc_type: The type of exception to set for the cancellation
- """
- with self._lock:
- if not self.done():
- should_announce_done = False
- logger.debug('%s cancel(%s) called', self, msg)
- self._exception = exc_type(msg)
- if self._status == 'not-started':
- should_announce_done = True
- self._status = 'cancelled'
- if should_announce_done:
- self.announce_done()
-
- def set_status_to_queued(self):
- """Sets the TransferFutrue's status to running"""
- self._transition_to_non_done_state('queued')
-
- def set_status_to_running(self):
- """Sets the TransferFuture's status to running"""
- self._transition_to_non_done_state('running')
-
- def _transition_to_non_done_state(self, desired_state):
- with self._lock:
- if self.done():
- raise RuntimeError(
- 'Unable to transition from done state %s to non-done '
- 'state %s.' % (self.status, desired_state))
- self._status = desired_state
-
- def submit(self, executor, task, tag=None):
- """Submits a task to a provided executor
-
- :type executor: s3transfer.futures.BoundedExecutor
- :param executor: The executor to submit the callable to
-
- :type task: s3transfer.tasks.Task
- :param task: The task to submit to the executor
-
- :type tag: s3transfer.futures.TaskTag
- :param tag: A tag to associate to the submitted task
-
- :rtype: concurrent.futures.Future
- :returns: A future representing the submitted task
- """
- logger.debug(
- "Submitting task %s to executor %s for transfer request: %s." % (
- task, executor, self.transfer_id)
- )
- future = executor.submit(task, tag=tag)
- # Add this created future to the list of associated future just
- # in case it is needed during cleanups.
- self.add_associated_future(future)
- future.add_done_callback(
- FunctionContainer(self.remove_associated_future, future))
- return future
-
- def done(self):
- """Determines if a TransferFuture has completed
-
- :returns: False if status is equal to 'failed', 'cancelled', or
- 'success'. True, otherwise
- """
- return self.status in ['failed', 'cancelled', 'success']
-
- def add_associated_future(self, future):
- """Adds a future to be associated with the TransferFuture"""
- with self._associated_futures_lock:
- self._associated_futures.add(future)
-
- def remove_associated_future(self, future):
- """Removes a future's association to the TransferFuture"""
- with self._associated_futures_lock:
- self._associated_futures.remove(future)
-
- def add_done_callback(self, function, *args, **kwargs):
- """Add a done callback to be invoked when transfer is done"""
- with self._done_callbacks_lock:
- self._done_callbacks.append(
- FunctionContainer(function, *args, **kwargs)
- )
-
- def add_failure_cleanup(self, function, *args, **kwargs):
- """Adds a callback to call upon failure"""
- with self._failure_cleanups_lock:
- self._failure_cleanups.append(
- FunctionContainer(function, *args, **kwargs))
-
- def announce_done(self):
- """Announce that future is done running and run associated callbacks
-
- This will run any failure cleanups if the transfer failed if not
- they have not been run, allows the result() to be unblocked, and will
- run any done callbacks associated to the TransferFuture if they have
- not already been ran.
- """
- if self.status != 'success':
- self._run_failure_cleanups()
- self._done_event.set()
- self._run_done_callbacks()
-
- def _run_done_callbacks(self):
- # Run the callbacks and remove the callbacks from the internal
- # list so they do not get ran again if done is announced more than
- # once.
- with self._done_callbacks_lock:
- self._run_callbacks(self._done_callbacks)
- self._done_callbacks = []
-
- def _run_failure_cleanups(self):
- # Run the cleanup callbacks and remove the callbacks from the internal
- # list so they do not get ran again if done is announced more than
- # once.
- with self._failure_cleanups_lock:
- self._run_callbacks(self.failure_cleanups)
- self._failure_cleanups = []
-
- def _run_callbacks(self, callbacks):
- for callback in callbacks:
- self._run_callback(callback)
-
- def _run_callback(self, callback):
- try:
- callback()
- # We do not want a callback interrupting the process, especially
- # in the failure cleanups. So log and catch, the excpetion.
- except Exception:
- logger.debug("Exception raised in %s." % callback, exc_info=True)
-
-
-class BoundedExecutor(object):
- EXECUTOR_CLS = futures.ThreadPoolExecutor
-
- def __init__(self, max_size, max_num_threads, tag_semaphores=None,
- executor_cls=None):
- """An executor implentation that has a maximum queued up tasks
-
- The executor will block if the number of tasks that have been
- submitted and is currently working on is past its maximum.
-
- :params max_size: The maximum number of inflight futures. An inflight
- future means that the task is either queued up or is currently
- being executed. A size of None or 0 means that the executor will
- have no bound in terms of the number of inflight futures.
-
- :params max_num_threads: The maximum number of threads the executor
- uses.
-
- :type tag_semaphores: dict
- :params tag_semaphores: A dictionary where the key is the name of the
- tag and the value is the semaphore to use when limiting the
- number of tasks the executor is processing at a time.
-
- :type executor_cls: BaseExecutor
- :param underlying_executor_cls: The executor class that
- get bounded by this executor. If None is provided, the
- concurrent.futures.ThreadPoolExecutor class is used.
- """
- self._max_num_threads = max_num_threads
- if executor_cls is None:
- executor_cls = self.EXECUTOR_CLS
- self._executor = executor_cls(max_workers=self._max_num_threads)
- self._semaphore = TaskSemaphore(max_size)
- self._tag_semaphores = tag_semaphores
-
- def submit(self, task, tag=None, block=True):
- """Submit a task to complete
-
- :type task: s3transfer.tasks.Task
- :param task: The task to run __call__ on
-
-
- :type tag: s3transfer.futures.TaskTag
- :param tag: An optional tag to associate to the task. This
- is used to override which semaphore to use.
-
- :type block: boolean
- :param block: True if to wait till it is possible to submit a task.
- False, if not to wait and raise an error if not able to submit
- a task.
-
- :returns: The future assocaited to the submitted task
- """
- semaphore = self._semaphore
- # If a tag was provided, use the semaphore associated to that
- # tag.
- if tag:
- semaphore = self._tag_semaphores[tag]
-
- # Call acquire on the semaphore.
- acquire_token = semaphore.acquire(task.transfer_id, block)
- # Create a callback to invoke when task is done in order to call
- # release on the semaphore.
- release_callback = FunctionContainer(
- semaphore.release, task.transfer_id, acquire_token)
- # Submit the task to the underlying executor.
- future = ExecutorFuture(self._executor.submit(task))
- # Add the Semaphore.release() callback to the future such that
- # it is invoked once the future completes.
- future.add_done_callback(release_callback)
- return future
-
- def shutdown(self, wait=True):
- self._executor.shutdown(wait)
-
-
-class ExecutorFuture(object):
- def __init__(self, future):
- """A future returned from the executor
-
- Currently, it is just a wrapper around a concurrent.futures.Future.
- However, this can eventually grow to implement the needed functionality
- of concurrent.futures.Future if we move off of the library and not
- affect the rest of the codebase.
-
- :type future: concurrent.futures.Future
- :param future: The underlying future
- """
- self._future = future
-
- def result(self):
- return self._future.result()
-
- def add_done_callback(self, fn):
- """Adds a callback to be completed once future is done
-
- :parm fn: A callable that takes no arguments. Note that is different
- than concurrent.futures.Future.add_done_callback that requires
- a single argument for the future.
- """
- # The done callback for concurrent.futures.Future will always pass a
- # the future in as the only argument. So we need to create the
- # proper signature wrapper that will invoke the callback provided.
- def done_callback(future_passed_to_callback):
- return fn()
- self._future.add_done_callback(done_callback)
-
- def done(self):
- return self._future.done()
-
-
-class BaseExecutor(object):
- """Base Executor class implementation needed to work with s3transfer"""
- def __init__(self, max_workers=None):
- pass
-
- def submit(self, fn, *args, **kwargs):
- raise NotImplementedError('submit()')
-
- def shutdown(self, wait=True):
- raise NotImplementedError('shutdown()')
-
-
-class NonThreadedExecutor(BaseExecutor):
- """A drop-in replacement non-threaded version of ThreadPoolExecutor"""
- def submit(self, fn, *args, **kwargs):
- future = NonThreadedExecutorFuture()
- try:
- result = fn(*args, **kwargs)
- future.set_result(result)
- except Exception:
- e, tb = sys.exc_info()[1:]
- logger.debug(
- 'Setting exception for %s to %s with traceback %s',
- future, e, tb
- )
- future.set_exception_info(e, tb)
- return future
-
- def shutdown(self, wait=True):
- pass
-
-
-class NonThreadedExecutorFuture(object):
- """The Future returned from NonThreadedExecutor
-
- Note that this future is **not** thread-safe as it is being used
- from the context of a non-threaded environment.
- """
- def __init__(self):
- self._result = None
- self._exception = None
- self._traceback = None
- self._done = False
- self._done_callbacks = []
-
- def set_result(self, result):
- self._result = result
- self._set_done()
-
- def set_exception_info(self, exception, traceback):
- self._exception = exception
- self._traceback = traceback
- self._set_done()
-
- def result(self, timeout=None):
- if self._exception:
- six.reraise(
- type(self._exception), self._exception, self._traceback)
- return self._result
-
- def _set_done(self):
- self._done = True
- for done_callback in self._done_callbacks:
- self._invoke_done_callback(done_callback)
- self._done_callbacks = []
-
- def _invoke_done_callback(self, done_callback):
- return done_callback(self)
-
- def done(self):
- return self._done
-
- def add_done_callback(self, fn):
- if self._done:
- self._invoke_done_callback(fn)
- else:
- self._done_callbacks.append(fn)
-
-
-TaskTag = namedtuple('TaskTag', ['name'])
-
-IN_MEMORY_UPLOAD_TAG = TaskTag('in_memory_upload')
-IN_MEMORY_DOWNLOAD_TAG = TaskTag('in_memory_download')
+ """Holds metadata about the TransferFuture"""
+ def __init__(self, call_args=None, transfer_id=None):
+ self._call_args = call_args
+ self._transfer_id = transfer_id
+ self._size = None
+ self._user_context = {}
+
+ @property
+ def call_args(self):
+ """The call args used in the transfer request"""
+ return self._call_args
+
+ @property
+ def transfer_id(self):
+ """The unique id of the transfer"""
+ return self._transfer_id
+
+ @property
+ def size(self):
+ """The size of the transfer request if known"""
+ return self._size
+
+ @property
+ def user_context(self):
+ """A dictionary that requesters can store data in"""
+ return self._user_context
+
+ def provide_transfer_size(self, size):
+ """A method to provide the size of a transfer request
+
+ By providing this value, the TransferManager will not try to
+ call HeadObject or use the use OS to determine the size of the
+ transfer.
+ """
+ self._size = size
+
+
+class TransferCoordinator(object):
+ """A helper class for managing TransferFuture"""
+ def __init__(self, transfer_id=None):
+ self.transfer_id = transfer_id
+ self._status = 'not-started'
+ self._result = None
+ self._exception = None
+ self._associated_futures = set()
+ self._failure_cleanups = []
+ self._done_callbacks = []
+ self._done_event = threading.Event()
+ self._lock = threading.Lock()
+ self._associated_futures_lock = threading.Lock()
+ self._done_callbacks_lock = threading.Lock()
+ self._failure_cleanups_lock = threading.Lock()
+
+ def __repr__(self):
+ return '%s(transfer_id=%s)' % (
+ self.__class__.__name__, self.transfer_id)
+
+ @property
+ def exception(self):
+ return self._exception
+
+ @property
+ def associated_futures(self):
+ """The list of futures associated to the inprogress TransferFuture
+
+ Once the transfer finishes this list becomes empty as the transfer
+ is considered done and there should be no running futures left.
+ """
+ with self._associated_futures_lock:
+ # We return a copy of the list because we do not want to
+ # processing the returned list while another thread is adding
+ # more futures to the actual list.
+ return copy.copy(self._associated_futures)
+
+ @property
+ def failure_cleanups(self):
+ """The list of callbacks to call when the TransferFuture fails"""
+ return self._failure_cleanups
+
+ @property
+ def status(self):
+ """The status of the TransferFuture
+
+ The currently supported states are:
+ * not-started - Has yet to start. If in this state, a transfer
+ can be canceled immediately and nothing will happen.
+ * queued - SubmissionTask is about to submit tasks
+ * running - Is inprogress. In-progress as of now means that
+ the SubmissionTask that runs the transfer is being executed. So
+ there is no guarantee any transfer requests had been made to
+ S3 if this state is reached.
+ * cancelled - Was cancelled
+ * failed - An exception other than CancelledError was thrown
+ * success - No exceptions were thrown and is done.
+ """
+ return self._status
+
+ def set_result(self, result):
+ """Set a result for the TransferFuture
+
+ Implies that the TransferFuture succeeded. This will always set a
+ result because it is invoked on the final task where there is only
+ ever one final task and it is ran at the very end of a transfer
+ process. So if a result is being set for this final task, the transfer
+ succeeded even if something came a long and canceled the transfer
+ on the final task.
+ """
+ with self._lock:
+ self._exception = None
+ self._result = result
+ self._status = 'success'
+
+ def set_exception(self, exception, override=False):
+ """Set an exception for the TransferFuture
+
+ Implies the TransferFuture failed.
+
+ :param exception: The exception that cause the transfer to fail.
+ :param override: If True, override any existing state.
+ """
+ with self._lock:
+ if not self.done() or override:
+ self._exception = exception
+ self._status = 'failed'
+
+ def result(self):
+ """Waits until TransferFuture is done and returns the result
+
+ If the TransferFuture succeeded, it will return the result. If the
+ TransferFuture failed, it will raise the exception associated to the
+ failure.
+ """
+ # Doing a wait() with no timeout cannot be interrupted in python2 but
+ # can be interrupted in python3 so we just wait with the largest
+ # possible value integer value, which is on the scale of billions of
+ # years...
+ self._done_event.wait(MAXINT)
+
+ # Once done waiting, raise an exception if present or return the
+ # final result.
+ if self._exception:
+ raise self._exception
+ return self._result
+
+ def cancel(self, msg='', exc_type=CancelledError):
+ """Cancels the TransferFuture
+
+ :param msg: The message to attach to the cancellation
+ :param exc_type: The type of exception to set for the cancellation
+ """
+ with self._lock:
+ if not self.done():
+ should_announce_done = False
+ logger.debug('%s cancel(%s) called', self, msg)
+ self._exception = exc_type(msg)
+ if self._status == 'not-started':
+ should_announce_done = True
+ self._status = 'cancelled'
+ if should_announce_done:
+ self.announce_done()
+
+ def set_status_to_queued(self):
+ """Sets the TransferFutrue's status to running"""
+ self._transition_to_non_done_state('queued')
+
+ def set_status_to_running(self):
+ """Sets the TransferFuture's status to running"""
+ self._transition_to_non_done_state('running')
+
+ def _transition_to_non_done_state(self, desired_state):
+ with self._lock:
+ if self.done():
+ raise RuntimeError(
+ 'Unable to transition from done state %s to non-done '
+ 'state %s.' % (self.status, desired_state))
+ self._status = desired_state
+
+ def submit(self, executor, task, tag=None):
+ """Submits a task to a provided executor
+
+ :type executor: s3transfer.futures.BoundedExecutor
+ :param executor: The executor to submit the callable to
+
+ :type task: s3transfer.tasks.Task
+ :param task: The task to submit to the executor
+
+ :type tag: s3transfer.futures.TaskTag
+ :param tag: A tag to associate to the submitted task
+
+ :rtype: concurrent.futures.Future
+ :returns: A future representing the submitted task
+ """
+ logger.debug(
+ "Submitting task %s to executor %s for transfer request: %s." % (
+ task, executor, self.transfer_id)
+ )
+ future = executor.submit(task, tag=tag)
+ # Add this created future to the list of associated future just
+ # in case it is needed during cleanups.
+ self.add_associated_future(future)
+ future.add_done_callback(
+ FunctionContainer(self.remove_associated_future, future))
+ return future
+
+ def done(self):
+ """Determines if a TransferFuture has completed
+
+ :returns: False if status is equal to 'failed', 'cancelled', or
+ 'success'. True, otherwise
+ """
+ return self.status in ['failed', 'cancelled', 'success']
+
+ def add_associated_future(self, future):
+ """Adds a future to be associated with the TransferFuture"""
+ with self._associated_futures_lock:
+ self._associated_futures.add(future)
+
+ def remove_associated_future(self, future):
+ """Removes a future's association to the TransferFuture"""
+ with self._associated_futures_lock:
+ self._associated_futures.remove(future)
+
+ def add_done_callback(self, function, *args, **kwargs):
+ """Add a done callback to be invoked when transfer is done"""
+ with self._done_callbacks_lock:
+ self._done_callbacks.append(
+ FunctionContainer(function, *args, **kwargs)
+ )
+
+ def add_failure_cleanup(self, function, *args, **kwargs):
+ """Adds a callback to call upon failure"""
+ with self._failure_cleanups_lock:
+ self._failure_cleanups.append(
+ FunctionContainer(function, *args, **kwargs))
+
+ def announce_done(self):
+ """Announce that future is done running and run associated callbacks
+
+ This will run any failure cleanups if the transfer failed if not
+ they have not been run, allows the result() to be unblocked, and will
+ run any done callbacks associated to the TransferFuture if they have
+ not already been ran.
+ """
+ if self.status != 'success':
+ self._run_failure_cleanups()
+ self._done_event.set()
+ self._run_done_callbacks()
+
+ def _run_done_callbacks(self):
+ # Run the callbacks and remove the callbacks from the internal
+ # list so they do not get ran again if done is announced more than
+ # once.
+ with self._done_callbacks_lock:
+ self._run_callbacks(self._done_callbacks)
+ self._done_callbacks = []
+
+ def _run_failure_cleanups(self):
+ # Run the cleanup callbacks and remove the callbacks from the internal
+ # list so they do not get ran again if done is announced more than
+ # once.
+ with self._failure_cleanups_lock:
+ self._run_callbacks(self.failure_cleanups)
+ self._failure_cleanups = []
+
+ def _run_callbacks(self, callbacks):
+ for callback in callbacks:
+ self._run_callback(callback)
+
+ def _run_callback(self, callback):
+ try:
+ callback()
+ # We do not want a callback interrupting the process, especially
+ # in the failure cleanups. So log and catch, the excpetion.
+ except Exception:
+ logger.debug("Exception raised in %s." % callback, exc_info=True)
+
+
+class BoundedExecutor(object):
+ EXECUTOR_CLS = futures.ThreadPoolExecutor
+
+ def __init__(self, max_size, max_num_threads, tag_semaphores=None,
+ executor_cls=None):
+ """An executor implentation that has a maximum queued up tasks
+
+ The executor will block if the number of tasks that have been
+ submitted and is currently working on is past its maximum.
+
+ :params max_size: The maximum number of inflight futures. An inflight
+ future means that the task is either queued up or is currently
+ being executed. A size of None or 0 means that the executor will
+ have no bound in terms of the number of inflight futures.
+
+ :params max_num_threads: The maximum number of threads the executor
+ uses.
+
+ :type tag_semaphores: dict
+ :params tag_semaphores: A dictionary where the key is the name of the
+ tag and the value is the semaphore to use when limiting the
+ number of tasks the executor is processing at a time.
+
+ :type executor_cls: BaseExecutor
+ :param underlying_executor_cls: The executor class that
+ get bounded by this executor. If None is provided, the
+ concurrent.futures.ThreadPoolExecutor class is used.
+ """
+ self._max_num_threads = max_num_threads
+ if executor_cls is None:
+ executor_cls = self.EXECUTOR_CLS
+ self._executor = executor_cls(max_workers=self._max_num_threads)
+ self._semaphore = TaskSemaphore(max_size)
+ self._tag_semaphores = tag_semaphores
+
+ def submit(self, task, tag=None, block=True):
+ """Submit a task to complete
+
+ :type task: s3transfer.tasks.Task
+ :param task: The task to run __call__ on
+
+
+ :type tag: s3transfer.futures.TaskTag
+ :param tag: An optional tag to associate to the task. This
+ is used to override which semaphore to use.
+
+ :type block: boolean
+ :param block: True if to wait till it is possible to submit a task.
+ False, if not to wait and raise an error if not able to submit
+ a task.
+
+ :returns: The future assocaited to the submitted task
+ """
+ semaphore = self._semaphore
+ # If a tag was provided, use the semaphore associated to that
+ # tag.
+ if tag:
+ semaphore = self._tag_semaphores[tag]
+
+ # Call acquire on the semaphore.
+ acquire_token = semaphore.acquire(task.transfer_id, block)
+ # Create a callback to invoke when task is done in order to call
+ # release on the semaphore.
+ release_callback = FunctionContainer(
+ semaphore.release, task.transfer_id, acquire_token)
+ # Submit the task to the underlying executor.
+ future = ExecutorFuture(self._executor.submit(task))
+ # Add the Semaphore.release() callback to the future such that
+ # it is invoked once the future completes.
+ future.add_done_callback(release_callback)
+ return future
+
+ def shutdown(self, wait=True):
+ self._executor.shutdown(wait)
+
+
+class ExecutorFuture(object):
+ def __init__(self, future):
+ """A future returned from the executor
+
+ Currently, it is just a wrapper around a concurrent.futures.Future.
+ However, this can eventually grow to implement the needed functionality
+ of concurrent.futures.Future if we move off of the library and not
+ affect the rest of the codebase.
+
+ :type future: concurrent.futures.Future
+ :param future: The underlying future
+ """
+ self._future = future
+
+ def result(self):
+ return self._future.result()
+
+ def add_done_callback(self, fn):
+ """Adds a callback to be completed once future is done
+
+ :parm fn: A callable that takes no arguments. Note that is different
+ than concurrent.futures.Future.add_done_callback that requires
+ a single argument for the future.
+ """
+ # The done callback for concurrent.futures.Future will always pass a
+ # the future in as the only argument. So we need to create the
+ # proper signature wrapper that will invoke the callback provided.
+ def done_callback(future_passed_to_callback):
+ return fn()
+ self._future.add_done_callback(done_callback)
+
+ def done(self):
+ return self._future.done()
+
+
+class BaseExecutor(object):
+ """Base Executor class implementation needed to work with s3transfer"""
+ def __init__(self, max_workers=None):
+ pass
+
+ def submit(self, fn, *args, **kwargs):
+ raise NotImplementedError('submit()')
+
+ def shutdown(self, wait=True):
+ raise NotImplementedError('shutdown()')
+
+
+class NonThreadedExecutor(BaseExecutor):
+ """A drop-in replacement non-threaded version of ThreadPoolExecutor"""
+ def submit(self, fn, *args, **kwargs):
+ future = NonThreadedExecutorFuture()
+ try:
+ result = fn(*args, **kwargs)
+ future.set_result(result)
+ except Exception:
+ e, tb = sys.exc_info()[1:]
+ logger.debug(
+ 'Setting exception for %s to %s with traceback %s',
+ future, e, tb
+ )
+ future.set_exception_info(e, tb)
+ return future
+
+ def shutdown(self, wait=True):
+ pass
+
+
+class NonThreadedExecutorFuture(object):
+ """The Future returned from NonThreadedExecutor
+
+ Note that this future is **not** thread-safe as it is being used
+ from the context of a non-threaded environment.
+ """
+ def __init__(self):
+ self._result = None
+ self._exception = None
+ self._traceback = None
+ self._done = False
+ self._done_callbacks = []
+
+ def set_result(self, result):
+ self._result = result
+ self._set_done()
+
+ def set_exception_info(self, exception, traceback):
+ self._exception = exception
+ self._traceback = traceback
+ self._set_done()
+
+ def result(self, timeout=None):
+ if self._exception:
+ six.reraise(
+ type(self._exception), self._exception, self._traceback)
+ return self._result
+
+ def _set_done(self):
+ self._done = True
+ for done_callback in self._done_callbacks:
+ self._invoke_done_callback(done_callback)
+ self._done_callbacks = []
+
+ def _invoke_done_callback(self, done_callback):
+ return done_callback(self)
+
+ def done(self):
+ return self._done
+
+ def add_done_callback(self, fn):
+ if self._done:
+ self._invoke_done_callback(fn)
+ else:
+ self._done_callbacks.append(fn)
+
+
+TaskTag = namedtuple('TaskTag', ['name'])
+
+IN_MEMORY_UPLOAD_TAG = TaskTag('in_memory_upload')
+IN_MEMORY_DOWNLOAD_TAG = TaskTag('in_memory_download')
diff --git a/contrib/python/s3transfer/py2/s3transfer/manager.py b/contrib/python/s3transfer/py2/s3transfer/manager.py
index a7e7c33b00..c6e6eb47f6 100644
--- a/contrib/python/s3transfer/py2/s3transfer/manager.py
+++ b/contrib/python/s3transfer/py2/s3transfer/manager.py
@@ -1,208 +1,208 @@
-# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-# http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-import copy
-import logging
+# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+# http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+import copy
+import logging
import re
-import threading
-
-from botocore.compat import six
-
+import threading
+
+from botocore.compat import six
+
from s3transfer.constants import KB, MB
from s3transfer.constants import ALLOWED_DOWNLOAD_ARGS
-from s3transfer.utils import get_callbacks
-from s3transfer.utils import signal_transferring
-from s3transfer.utils import signal_not_transferring
-from s3transfer.utils import CallArgs
-from s3transfer.utils import OSUtils
-from s3transfer.utils import TaskSemaphore
-from s3transfer.utils import SlidingWindowSemaphore
-from s3transfer.exceptions import CancelledError
-from s3transfer.exceptions import FatalError
-from s3transfer.futures import IN_MEMORY_DOWNLOAD_TAG
-from s3transfer.futures import IN_MEMORY_UPLOAD_TAG
-from s3transfer.futures import BoundedExecutor
-from s3transfer.futures import TransferFuture
-from s3transfer.futures import TransferMeta
-from s3transfer.futures import TransferCoordinator
-from s3transfer.download import DownloadSubmissionTask
-from s3transfer.upload import UploadSubmissionTask
-from s3transfer.copies import CopySubmissionTask
-from s3transfer.delete import DeleteSubmissionTask
-from s3transfer.bandwidth import LeakyBucket
-from s3transfer.bandwidth import BandwidthLimiter
-
-
-logger = logging.getLogger(__name__)
-
-
-class TransferConfig(object):
- def __init__(self,
- multipart_threshold=8 * MB,
- multipart_chunksize=8 * MB,
- max_request_concurrency=10,
- max_submission_concurrency=5,
- max_request_queue_size=1000,
- max_submission_queue_size=1000,
- max_io_queue_size=1000,
- io_chunksize=256 * KB,
- num_download_attempts=5,
- max_in_memory_upload_chunks=10,
- max_in_memory_download_chunks=10,
- max_bandwidth=None):
- """Configurations for the transfer mangager
-
- :param multipart_threshold: The threshold for which multipart
- transfers occur.
-
- :param max_request_concurrency: The maximum number of S3 API
- transfer-related requests that can happen at a time.
-
- :param max_submission_concurrency: The maximum number of threads
- processing a call to a TransferManager method. Processing a
- call usually entails determining which S3 API requests that need
- to be enqueued, but does **not** entail making any of the
- S3 API data transfering requests needed to perform the transfer.
- The threads controlled by ``max_request_concurrency`` is
- responsible for that.
-
- :param multipart_chunksize: The size of each transfer if a request
- becomes a multipart transfer.
-
- :param max_request_queue_size: The maximum amount of S3 API requests
- that can be queued at a time. A value of zero means that there
- is no maximum.
-
- :param max_submission_queue_size: The maximum amount of
- TransferManager method calls that can be queued at a time. A value
- of zero means that there is no maximum.
-
- :param max_io_queue_size: The maximum amount of read parts that
- can be queued to be written to disk per download. A value of zero
- means that there is no maximum. The default size for each element
- in this queue is 8 KB.
-
- :param io_chunksize: The max size of each chunk in the io queue.
- Currently, this is size used when reading from the downloaded
- stream as well.
-
- :param num_download_attempts: The number of download attempts that
- will be tried upon errors with downloading an object in S3. Note
- that these retries account for errors that occur when streamming
- down the data from s3 (i.e. socket errors and read timeouts that
- occur after recieving an OK response from s3).
- Other retryable exceptions such as throttling errors and 5xx errors
- are already retried by botocore (this default is 5). The
- ``num_download_attempts`` does not take into account the
- number of exceptions retried by botocore.
-
- :param max_in_memory_upload_chunks: The number of chunks that can
- be stored in memory at a time for all ongoing upload requests.
- This pertains to chunks of data that need to be stored in memory
- during an upload if the data is sourced from a file-like object.
- The total maximum memory footprint due to a in-memory upload
- chunks is roughly equal to:
-
- max_in_memory_upload_chunks * multipart_chunksize
- + max_submission_concurrency * multipart_chunksize
-
- ``max_submission_concurrency`` has an affect on this value because
- for each thread pulling data off of a file-like object, they may
- be waiting with a single read chunk to be submitted for upload
- because the ``max_in_memory_upload_chunks`` value has been reached
- by the threads making the upload request.
-
- :param max_in_memory_download_chunks: The number of chunks that can
- be buffered in memory and **not** in the io queue at a time for all
- ongoing dowload requests. This pertains specifically to file-like
- objects that cannot be seeked. The total maximum memory footprint
- due to a in-memory download chunks is roughly equal to:
-
- max_in_memory_download_chunks * multipart_chunksize
-
- :param max_bandwidth: The maximum bandwidth that will be consumed
- in uploading and downloading file content. The value is in terms of
- bytes per second.
- """
- self.multipart_threshold = multipart_threshold
- self.multipart_chunksize = multipart_chunksize
- self.max_request_concurrency = max_request_concurrency
- self.max_submission_concurrency = max_submission_concurrency
- self.max_request_queue_size = max_request_queue_size
- self.max_submission_queue_size = max_submission_queue_size
- self.max_io_queue_size = max_io_queue_size
- self.io_chunksize = io_chunksize
- self.num_download_attempts = num_download_attempts
- self.max_in_memory_upload_chunks = max_in_memory_upload_chunks
- self.max_in_memory_download_chunks = max_in_memory_download_chunks
- self.max_bandwidth = max_bandwidth
- self._validate_attrs_are_nonzero()
-
- def _validate_attrs_are_nonzero(self):
- for attr, attr_val, in self.__dict__.items():
- if attr_val is not None and attr_val <= 0:
- raise ValueError(
- 'Provided parameter %s of value %s must be greater than '
- '0.' % (attr, attr_val))
-
-
-class TransferManager(object):
+from s3transfer.utils import get_callbacks
+from s3transfer.utils import signal_transferring
+from s3transfer.utils import signal_not_transferring
+from s3transfer.utils import CallArgs
+from s3transfer.utils import OSUtils
+from s3transfer.utils import TaskSemaphore
+from s3transfer.utils import SlidingWindowSemaphore
+from s3transfer.exceptions import CancelledError
+from s3transfer.exceptions import FatalError
+from s3transfer.futures import IN_MEMORY_DOWNLOAD_TAG
+from s3transfer.futures import IN_MEMORY_UPLOAD_TAG
+from s3transfer.futures import BoundedExecutor
+from s3transfer.futures import TransferFuture
+from s3transfer.futures import TransferMeta
+from s3transfer.futures import TransferCoordinator
+from s3transfer.download import DownloadSubmissionTask
+from s3transfer.upload import UploadSubmissionTask
+from s3transfer.copies import CopySubmissionTask
+from s3transfer.delete import DeleteSubmissionTask
+from s3transfer.bandwidth import LeakyBucket
+from s3transfer.bandwidth import BandwidthLimiter
+
+
+logger = logging.getLogger(__name__)
+
+
+class TransferConfig(object):
+ def __init__(self,
+ multipart_threshold=8 * MB,
+ multipart_chunksize=8 * MB,
+ max_request_concurrency=10,
+ max_submission_concurrency=5,
+ max_request_queue_size=1000,
+ max_submission_queue_size=1000,
+ max_io_queue_size=1000,
+ io_chunksize=256 * KB,
+ num_download_attempts=5,
+ max_in_memory_upload_chunks=10,
+ max_in_memory_download_chunks=10,
+ max_bandwidth=None):
+ """Configurations for the transfer mangager
+
+ :param multipart_threshold: The threshold for which multipart
+ transfers occur.
+
+ :param max_request_concurrency: The maximum number of S3 API
+ transfer-related requests that can happen at a time.
+
+ :param max_submission_concurrency: The maximum number of threads
+ processing a call to a TransferManager method. Processing a
+ call usually entails determining which S3 API requests that need
+ to be enqueued, but does **not** entail making any of the
+ S3 API data transfering requests needed to perform the transfer.
+ The threads controlled by ``max_request_concurrency`` is
+ responsible for that.
+
+ :param multipart_chunksize: The size of each transfer if a request
+ becomes a multipart transfer.
+
+ :param max_request_queue_size: The maximum amount of S3 API requests
+ that can be queued at a time. A value of zero means that there
+ is no maximum.
+
+ :param max_submission_queue_size: The maximum amount of
+ TransferManager method calls that can be queued at a time. A value
+ of zero means that there is no maximum.
+
+ :param max_io_queue_size: The maximum amount of read parts that
+ can be queued to be written to disk per download. A value of zero
+ means that there is no maximum. The default size for each element
+ in this queue is 8 KB.
+
+ :param io_chunksize: The max size of each chunk in the io queue.
+ Currently, this is size used when reading from the downloaded
+ stream as well.
+
+ :param num_download_attempts: The number of download attempts that
+ will be tried upon errors with downloading an object in S3. Note
+ that these retries account for errors that occur when streamming
+ down the data from s3 (i.e. socket errors and read timeouts that
+ occur after recieving an OK response from s3).
+ Other retryable exceptions such as throttling errors and 5xx errors
+ are already retried by botocore (this default is 5). The
+ ``num_download_attempts`` does not take into account the
+ number of exceptions retried by botocore.
+
+ :param max_in_memory_upload_chunks: The number of chunks that can
+ be stored in memory at a time for all ongoing upload requests.
+ This pertains to chunks of data that need to be stored in memory
+ during an upload if the data is sourced from a file-like object.
+ The total maximum memory footprint due to a in-memory upload
+ chunks is roughly equal to:
+
+ max_in_memory_upload_chunks * multipart_chunksize
+ + max_submission_concurrency * multipart_chunksize
+
+ ``max_submission_concurrency`` has an affect on this value because
+ for each thread pulling data off of a file-like object, they may
+ be waiting with a single read chunk to be submitted for upload
+ because the ``max_in_memory_upload_chunks`` value has been reached
+ by the threads making the upload request.
+
+ :param max_in_memory_download_chunks: The number of chunks that can
+ be buffered in memory and **not** in the io queue at a time for all
+ ongoing dowload requests. This pertains specifically to file-like
+ objects that cannot be seeked. The total maximum memory footprint
+ due to a in-memory download chunks is roughly equal to:
+
+ max_in_memory_download_chunks * multipart_chunksize
+
+ :param max_bandwidth: The maximum bandwidth that will be consumed
+ in uploading and downloading file content. The value is in terms of
+ bytes per second.
+ """
+ self.multipart_threshold = multipart_threshold
+ self.multipart_chunksize = multipart_chunksize
+ self.max_request_concurrency = max_request_concurrency
+ self.max_submission_concurrency = max_submission_concurrency
+ self.max_request_queue_size = max_request_queue_size
+ self.max_submission_queue_size = max_submission_queue_size
+ self.max_io_queue_size = max_io_queue_size
+ self.io_chunksize = io_chunksize
+ self.num_download_attempts = num_download_attempts
+ self.max_in_memory_upload_chunks = max_in_memory_upload_chunks
+ self.max_in_memory_download_chunks = max_in_memory_download_chunks
+ self.max_bandwidth = max_bandwidth
+ self._validate_attrs_are_nonzero()
+
+ def _validate_attrs_are_nonzero(self):
+ for attr, attr_val, in self.__dict__.items():
+ if attr_val is not None and attr_val <= 0:
+ raise ValueError(
+ 'Provided parameter %s of value %s must be greater than '
+ '0.' % (attr, attr_val))
+
+
+class TransferManager(object):
ALLOWED_DOWNLOAD_ARGS = ALLOWED_DOWNLOAD_ARGS
-
- ALLOWED_UPLOAD_ARGS = [
- 'ACL',
- 'CacheControl',
- 'ContentDisposition',
- 'ContentEncoding',
- 'ContentLanguage',
- 'ContentType',
+
+ ALLOWED_UPLOAD_ARGS = [
+ 'ACL',
+ 'CacheControl',
+ 'ContentDisposition',
+ 'ContentEncoding',
+ 'ContentLanguage',
+ 'ContentType',
'ExpectedBucketOwner',
- 'Expires',
- 'GrantFullControl',
- 'GrantRead',
- 'GrantReadACP',
- 'GrantWriteACP',
- 'Metadata',
- 'RequestPayer',
- 'ServerSideEncryption',
- 'StorageClass',
- 'SSECustomerAlgorithm',
- 'SSECustomerKey',
- 'SSECustomerKeyMD5',
- 'SSEKMSKeyId',
+ 'Expires',
+ 'GrantFullControl',
+ 'GrantRead',
+ 'GrantReadACP',
+ 'GrantWriteACP',
+ 'Metadata',
+ 'RequestPayer',
+ 'ServerSideEncryption',
+ 'StorageClass',
+ 'SSECustomerAlgorithm',
+ 'SSECustomerKey',
+ 'SSECustomerKeyMD5',
+ 'SSEKMSKeyId',
'SSEKMSEncryptionContext',
'Tagging',
- 'WebsiteRedirectLocation'
- ]
-
- ALLOWED_COPY_ARGS = ALLOWED_UPLOAD_ARGS + [
- 'CopySourceIfMatch',
- 'CopySourceIfModifiedSince',
- 'CopySourceIfNoneMatch',
- 'CopySourceIfUnmodifiedSince',
- 'CopySourceSSECustomerAlgorithm',
- 'CopySourceSSECustomerKey',
- 'CopySourceSSECustomerKeyMD5',
+ 'WebsiteRedirectLocation'
+ ]
+
+ ALLOWED_COPY_ARGS = ALLOWED_UPLOAD_ARGS + [
+ 'CopySourceIfMatch',
+ 'CopySourceIfModifiedSince',
+ 'CopySourceIfNoneMatch',
+ 'CopySourceIfUnmodifiedSince',
+ 'CopySourceSSECustomerAlgorithm',
+ 'CopySourceSSECustomerKey',
+ 'CopySourceSSECustomerKeyMD5',
'MetadataDirective',
'TaggingDirective',
- ]
-
- ALLOWED_DELETE_ARGS = [
- 'MFA',
- 'VersionId',
- 'RequestPayer',
+ ]
+
+ ALLOWED_DELETE_ARGS = [
+ 'MFA',
+ 'VersionId',
+ 'RequestPayer',
'ExpectedBucketOwner'
- ]
-
+ ]
+
VALIDATE_SUPPORTED_BUCKET_VALUES = True
_UNSUPPORTED_BUCKET_PATTERNS = {
@@ -212,70 +212,70 @@ class TransferManager(object):
),
}
- def __init__(self, client, config=None, osutil=None, executor_cls=None):
- """A transfer manager interface for Amazon S3
-
- :param client: Client to be used by the manager
- :param config: TransferConfig to associate specific configurations
- :param osutil: OSUtils object to use for os-related behavior when
- using with transfer manager.
-
- :type executor_cls: s3transfer.futures.BaseExecutor
- :param executor_cls: The class of executor to use with the transfer
- manager. By default, concurrent.futures.ThreadPoolExecutor is used.
- """
- self._client = client
- self._config = config
- if config is None:
- self._config = TransferConfig()
- self._osutil = osutil
- if osutil is None:
- self._osutil = OSUtils()
- self._coordinator_controller = TransferCoordinatorController()
- # A counter to create unique id's for each transfer submitted.
- self._id_counter = 0
-
- # The executor responsible for making S3 API transfer requests
- self._request_executor = BoundedExecutor(
- max_size=self._config.max_request_queue_size,
- max_num_threads=self._config.max_request_concurrency,
- tag_semaphores={
- IN_MEMORY_UPLOAD_TAG: TaskSemaphore(
- self._config.max_in_memory_upload_chunks),
- IN_MEMORY_DOWNLOAD_TAG: SlidingWindowSemaphore(
- self._config.max_in_memory_download_chunks)
- },
- executor_cls=executor_cls
- )
-
- # The executor responsible for submitting the necessary tasks to
- # perform the desired transfer
- self._submission_executor = BoundedExecutor(
- max_size=self._config.max_submission_queue_size,
- max_num_threads=self._config.max_submission_concurrency,
- executor_cls=executor_cls
-
- )
-
- # There is one thread available for writing to disk. It will handle
- # downloads for all files.
- self._io_executor = BoundedExecutor(
- max_size=self._config.max_io_queue_size,
- max_num_threads=1,
- executor_cls=executor_cls
- )
-
- # The component responsible for limiting bandwidth usage if it
- # is configured.
- self._bandwidth_limiter = None
- if self._config.max_bandwidth is not None:
- logger.debug(
- 'Setting max_bandwidth to %s', self._config.max_bandwidth)
- leaky_bucket = LeakyBucket(self._config.max_bandwidth)
- self._bandwidth_limiter = BandwidthLimiter(leaky_bucket)
-
- self._register_handlers()
-
+ def __init__(self, client, config=None, osutil=None, executor_cls=None):
+ """A transfer manager interface for Amazon S3
+
+ :param client: Client to be used by the manager
+ :param config: TransferConfig to associate specific configurations
+ :param osutil: OSUtils object to use for os-related behavior when
+ using with transfer manager.
+
+ :type executor_cls: s3transfer.futures.BaseExecutor
+ :param executor_cls: The class of executor to use with the transfer
+ manager. By default, concurrent.futures.ThreadPoolExecutor is used.
+ """
+ self._client = client
+ self._config = config
+ if config is None:
+ self._config = TransferConfig()
+ self._osutil = osutil
+ if osutil is None:
+ self._osutil = OSUtils()
+ self._coordinator_controller = TransferCoordinatorController()
+ # A counter to create unique id's for each transfer submitted.
+ self._id_counter = 0
+
+ # The executor responsible for making S3 API transfer requests
+ self._request_executor = BoundedExecutor(
+ max_size=self._config.max_request_queue_size,
+ max_num_threads=self._config.max_request_concurrency,
+ tag_semaphores={
+ IN_MEMORY_UPLOAD_TAG: TaskSemaphore(
+ self._config.max_in_memory_upload_chunks),
+ IN_MEMORY_DOWNLOAD_TAG: SlidingWindowSemaphore(
+ self._config.max_in_memory_download_chunks)
+ },
+ executor_cls=executor_cls
+ )
+
+ # The executor responsible for submitting the necessary tasks to
+ # perform the desired transfer
+ self._submission_executor = BoundedExecutor(
+ max_size=self._config.max_submission_queue_size,
+ max_num_threads=self._config.max_submission_concurrency,
+ executor_cls=executor_cls
+
+ )
+
+ # There is one thread available for writing to disk. It will handle
+ # downloads for all files.
+ self._io_executor = BoundedExecutor(
+ max_size=self._config.max_io_queue_size,
+ max_num_threads=1,
+ executor_cls=executor_cls
+ )
+
+ # The component responsible for limiting bandwidth usage if it
+ # is configured.
+ self._bandwidth_limiter = None
+ if self._config.max_bandwidth is not None:
+ logger.debug(
+ 'Setting max_bandwidth to %s', self._config.max_bandwidth)
+ leaky_bucket = LeakyBucket(self._config.max_bandwidth)
+ self._bandwidth_limiter = BandwidthLimiter(leaky_bucket)
+
+ self._register_handlers()
+
@property
def client(self):
return self._client
@@ -284,178 +284,178 @@ class TransferManager(object):
def config(self):
return self._config
- def upload(self, fileobj, bucket, key, extra_args=None, subscribers=None):
- """Uploads a file to S3
-
- :type fileobj: str or seekable file-like object
- :param fileobj: The name of a file to upload or a seekable file-like
- object to upload. It is recommended to use a filename because
- file-like objects may result in higher memory usage.
-
- :type bucket: str
- :param bucket: The name of the bucket to upload to
-
- :type key: str
- :param key: The name of the key to upload to
-
- :type extra_args: dict
- :param extra_args: Extra arguments that may be passed to the
- client operation
-
- :type subscribers: list(s3transfer.subscribers.BaseSubscriber)
- :param subscribers: The list of subscribers to be invoked in the
- order provided based on the event emit during the process of
- the transfer request.
-
- :rtype: s3transfer.futures.TransferFuture
- :returns: Transfer future representing the upload
- """
- if extra_args is None:
- extra_args = {}
- if subscribers is None:
- subscribers = []
- self._validate_all_known_args(extra_args, self.ALLOWED_UPLOAD_ARGS)
+ def upload(self, fileobj, bucket, key, extra_args=None, subscribers=None):
+ """Uploads a file to S3
+
+ :type fileobj: str or seekable file-like object
+ :param fileobj: The name of a file to upload or a seekable file-like
+ object to upload. It is recommended to use a filename because
+ file-like objects may result in higher memory usage.
+
+ :type bucket: str
+ :param bucket: The name of the bucket to upload to
+
+ :type key: str
+ :param key: The name of the key to upload to
+
+ :type extra_args: dict
+ :param extra_args: Extra arguments that may be passed to the
+ client operation
+
+ :type subscribers: list(s3transfer.subscribers.BaseSubscriber)
+ :param subscribers: The list of subscribers to be invoked in the
+ order provided based on the event emit during the process of
+ the transfer request.
+
+ :rtype: s3transfer.futures.TransferFuture
+ :returns: Transfer future representing the upload
+ """
+ if extra_args is None:
+ extra_args = {}
+ if subscribers is None:
+ subscribers = []
+ self._validate_all_known_args(extra_args, self.ALLOWED_UPLOAD_ARGS)
self._validate_if_bucket_supported(bucket)
- call_args = CallArgs(
- fileobj=fileobj, bucket=bucket, key=key, extra_args=extra_args,
- subscribers=subscribers
- )
- extra_main_kwargs = {}
- if self._bandwidth_limiter:
- extra_main_kwargs['bandwidth_limiter'] = self._bandwidth_limiter
- return self._submit_transfer(
- call_args, UploadSubmissionTask, extra_main_kwargs)
-
- def download(self, bucket, key, fileobj, extra_args=None,
- subscribers=None):
- """Downloads a file from S3
-
- :type bucket: str
- :param bucket: The name of the bucket to download from
-
- :type key: str
- :param key: The name of the key to download from
-
- :type fileobj: str or seekable file-like object
- :param fileobj: The name of a file to download or a seekable file-like
- object to download. It is recommended to use a filename because
- file-like objects may result in higher memory usage.
-
- :type extra_args: dict
- :param extra_args: Extra arguments that may be passed to the
- client operation
-
- :type subscribers: list(s3transfer.subscribers.BaseSubscriber)
- :param subscribers: The list of subscribers to be invoked in the
- order provided based on the event emit during the process of
- the transfer request.
-
- :rtype: s3transfer.futures.TransferFuture
- :returns: Transfer future representing the download
- """
- if extra_args is None:
- extra_args = {}
- if subscribers is None:
- subscribers = []
- self._validate_all_known_args(extra_args, self.ALLOWED_DOWNLOAD_ARGS)
+ call_args = CallArgs(
+ fileobj=fileobj, bucket=bucket, key=key, extra_args=extra_args,
+ subscribers=subscribers
+ )
+ extra_main_kwargs = {}
+ if self._bandwidth_limiter:
+ extra_main_kwargs['bandwidth_limiter'] = self._bandwidth_limiter
+ return self._submit_transfer(
+ call_args, UploadSubmissionTask, extra_main_kwargs)
+
+ def download(self, bucket, key, fileobj, extra_args=None,
+ subscribers=None):
+ """Downloads a file from S3
+
+ :type bucket: str
+ :param bucket: The name of the bucket to download from
+
+ :type key: str
+ :param key: The name of the key to download from
+
+ :type fileobj: str or seekable file-like object
+ :param fileobj: The name of a file to download or a seekable file-like
+ object to download. It is recommended to use a filename because
+ file-like objects may result in higher memory usage.
+
+ :type extra_args: dict
+ :param extra_args: Extra arguments that may be passed to the
+ client operation
+
+ :type subscribers: list(s3transfer.subscribers.BaseSubscriber)
+ :param subscribers: The list of subscribers to be invoked in the
+ order provided based on the event emit during the process of
+ the transfer request.
+
+ :rtype: s3transfer.futures.TransferFuture
+ :returns: Transfer future representing the download
+ """
+ if extra_args is None:
+ extra_args = {}
+ if subscribers is None:
+ subscribers = []
+ self._validate_all_known_args(extra_args, self.ALLOWED_DOWNLOAD_ARGS)
self._validate_if_bucket_supported(bucket)
- call_args = CallArgs(
- bucket=bucket, key=key, fileobj=fileobj, extra_args=extra_args,
- subscribers=subscribers
- )
- extra_main_kwargs = {'io_executor': self._io_executor}
- if self._bandwidth_limiter:
- extra_main_kwargs['bandwidth_limiter'] = self._bandwidth_limiter
- return self._submit_transfer(
- call_args, DownloadSubmissionTask, extra_main_kwargs)
-
- def copy(self, copy_source, bucket, key, extra_args=None,
- subscribers=None, source_client=None):
- """Copies a file in S3
-
- :type copy_source: dict
- :param copy_source: The name of the source bucket, key name of the
- source object, and optional version ID of the source object. The
- dictionary format is:
- ``{'Bucket': 'bucket', 'Key': 'key', 'VersionId': 'id'}``. Note
- that the ``VersionId`` key is optional and may be omitted.
-
- :type bucket: str
- :param bucket: The name of the bucket to copy to
-
- :type key: str
- :param key: The name of the key to copy to
-
- :type extra_args: dict
- :param extra_args: Extra arguments that may be passed to the
- client operation
-
- :type subscribers: a list of subscribers
- :param subscribers: The list of subscribers to be invoked in the
- order provided based on the event emit during the process of
- the transfer request.
-
- :type source_client: botocore or boto3 Client
- :param source_client: The client to be used for operation that
- may happen at the source object. For example, this client is
- used for the head_object that determines the size of the copy.
- If no client is provided, the transfer manager's client is used
- as the client for the source object.
-
- :rtype: s3transfer.futures.TransferFuture
- :returns: Transfer future representing the copy
- """
- if extra_args is None:
- extra_args = {}
- if subscribers is None:
- subscribers = []
- if source_client is None:
- source_client = self._client
- self._validate_all_known_args(extra_args, self.ALLOWED_COPY_ARGS)
+ call_args = CallArgs(
+ bucket=bucket, key=key, fileobj=fileobj, extra_args=extra_args,
+ subscribers=subscribers
+ )
+ extra_main_kwargs = {'io_executor': self._io_executor}
+ if self._bandwidth_limiter:
+ extra_main_kwargs['bandwidth_limiter'] = self._bandwidth_limiter
+ return self._submit_transfer(
+ call_args, DownloadSubmissionTask, extra_main_kwargs)
+
+ def copy(self, copy_source, bucket, key, extra_args=None,
+ subscribers=None, source_client=None):
+ """Copies a file in S3
+
+ :type copy_source: dict
+ :param copy_source: The name of the source bucket, key name of the
+ source object, and optional version ID of the source object. The
+ dictionary format is:
+ ``{'Bucket': 'bucket', 'Key': 'key', 'VersionId': 'id'}``. Note
+ that the ``VersionId`` key is optional and may be omitted.
+
+ :type bucket: str
+ :param bucket: The name of the bucket to copy to
+
+ :type key: str
+ :param key: The name of the key to copy to
+
+ :type extra_args: dict
+ :param extra_args: Extra arguments that may be passed to the
+ client operation
+
+ :type subscribers: a list of subscribers
+ :param subscribers: The list of subscribers to be invoked in the
+ order provided based on the event emit during the process of
+ the transfer request.
+
+ :type source_client: botocore or boto3 Client
+ :param source_client: The client to be used for operation that
+ may happen at the source object. For example, this client is
+ used for the head_object that determines the size of the copy.
+ If no client is provided, the transfer manager's client is used
+ as the client for the source object.
+
+ :rtype: s3transfer.futures.TransferFuture
+ :returns: Transfer future representing the copy
+ """
+ if extra_args is None:
+ extra_args = {}
+ if subscribers is None:
+ subscribers = []
+ if source_client is None:
+ source_client = self._client
+ self._validate_all_known_args(extra_args, self.ALLOWED_COPY_ARGS)
if isinstance(copy_source, dict):
self._validate_if_bucket_supported(copy_source.get('Bucket'))
self._validate_if_bucket_supported(bucket)
- call_args = CallArgs(
- copy_source=copy_source, bucket=bucket, key=key,
- extra_args=extra_args, subscribers=subscribers,
- source_client=source_client
- )
- return self._submit_transfer(call_args, CopySubmissionTask)
-
- def delete(self, bucket, key, extra_args=None, subscribers=None):
- """Delete an S3 object.
-
- :type bucket: str
- :param bucket: The name of the bucket.
-
- :type key: str
- :param key: The name of the S3 object to delete.
-
- :type extra_args: dict
- :param extra_args: Extra arguments that may be passed to the
- DeleteObject call.
-
- :type subscribers: list
- :param subscribers: A list of subscribers to be invoked during the
- process of the transfer request. Note that the ``on_progress``
- callback is not invoked during object deletion.
-
- :rtype: s3transfer.futures.TransferFuture
- :return: Transfer future representing the deletion.
-
- """
- if extra_args is None:
- extra_args = {}
- if subscribers is None:
- subscribers = []
- self._validate_all_known_args(extra_args, self.ALLOWED_DELETE_ARGS)
+ call_args = CallArgs(
+ copy_source=copy_source, bucket=bucket, key=key,
+ extra_args=extra_args, subscribers=subscribers,
+ source_client=source_client
+ )
+ return self._submit_transfer(call_args, CopySubmissionTask)
+
+ def delete(self, bucket, key, extra_args=None, subscribers=None):
+ """Delete an S3 object.
+
+ :type bucket: str
+ :param bucket: The name of the bucket.
+
+ :type key: str
+ :param key: The name of the S3 object to delete.
+
+ :type extra_args: dict
+ :param extra_args: Extra arguments that may be passed to the
+ DeleteObject call.
+
+ :type subscribers: list
+ :param subscribers: A list of subscribers to be invoked during the
+ process of the transfer request. Note that the ``on_progress``
+ callback is not invoked during object deletion.
+
+ :rtype: s3transfer.futures.TransferFuture
+ :return: Transfer future representing the deletion.
+
+ """
+ if extra_args is None:
+ extra_args = {}
+ if subscribers is None:
+ subscribers = []
+ self._validate_all_known_args(extra_args, self.ALLOWED_DELETE_ARGS)
self._validate_if_bucket_supported(bucket)
- call_args = CallArgs(
- bucket=bucket, key=key, extra_args=extra_args,
- subscribers=subscribers
- )
- return self._submit_transfer(call_args, DeleteSubmissionTask)
-
+ call_args = CallArgs(
+ bucket=bucket, key=key, extra_args=extra_args,
+ subscribers=subscribers
+ )
+ return self._submit_transfer(call_args, DeleteSubmissionTask)
+
def _validate_if_bucket_supported(self, bucket):
# s3 high level operations don't support some resources
# (eg. S3 Object Lambda) only direct API calls are available
@@ -469,229 +469,229 @@ class TransferManager(object):
'resource. Use direct client calls instead.' % resource
)
- def _validate_all_known_args(self, actual, allowed):
- for kwarg in actual:
- if kwarg not in allowed:
- raise ValueError(
- "Invalid extra_args key '%s', "
- "must be one of: %s" % (
- kwarg, ', '.join(allowed)))
-
- def _submit_transfer(self, call_args, submission_task_cls,
- extra_main_kwargs=None):
- if not extra_main_kwargs:
- extra_main_kwargs = {}
-
- # Create a TransferFuture to return back to the user
- transfer_future, components = self._get_future_with_components(
- call_args)
-
- # Add any provided done callbacks to the created transfer future
- # to be invoked on the transfer future being complete.
- for callback in get_callbacks(transfer_future, 'done'):
- components['coordinator'].add_done_callback(callback)
-
- # Get the main kwargs needed to instantiate the submission task
- main_kwargs = self._get_submission_task_main_kwargs(
- transfer_future, extra_main_kwargs)
-
- # Submit a SubmissionTask that will submit all of the necessary
- # tasks needed to complete the S3 transfer.
- self._submission_executor.submit(
- submission_task_cls(
- transfer_coordinator=components['coordinator'],
- main_kwargs=main_kwargs
- )
- )
-
- # Increment the unique id counter for future transfer requests
- self._id_counter += 1
-
- return transfer_future
-
- def _get_future_with_components(self, call_args):
- transfer_id = self._id_counter
- # Creates a new transfer future along with its components
- transfer_coordinator = TransferCoordinator(transfer_id=transfer_id)
- # Track the transfer coordinator for transfers to manage.
- self._coordinator_controller.add_transfer_coordinator(
- transfer_coordinator)
- # Also make sure that the transfer coordinator is removed once
- # the transfer completes so it does not stick around in memory.
- transfer_coordinator.add_done_callback(
- self._coordinator_controller.remove_transfer_coordinator,
- transfer_coordinator)
- components = {
- 'meta': TransferMeta(call_args, transfer_id=transfer_id),
- 'coordinator': transfer_coordinator
- }
- transfer_future = TransferFuture(**components)
- return transfer_future, components
-
- def _get_submission_task_main_kwargs(
- self, transfer_future, extra_main_kwargs):
- main_kwargs = {
- 'client': self._client,
- 'config': self._config,
- 'osutil': self._osutil,
- 'request_executor': self._request_executor,
- 'transfer_future': transfer_future
- }
- main_kwargs.update(extra_main_kwargs)
- return main_kwargs
-
- def _register_handlers(self):
- # Register handlers to enable/disable callbacks on uploads.
- event_name = 'request-created.s3'
- self._client.meta.events.register_first(
- event_name, signal_not_transferring,
- unique_id='s3upload-not-transferring')
- self._client.meta.events.register_last(
- event_name, signal_transferring,
- unique_id='s3upload-transferring')
-
- def __enter__(self):
- return self
-
- def __exit__(self, exc_type, exc_value, *args):
- cancel = False
- cancel_msg = ''
- cancel_exc_type = FatalError
- # If a exception was raised in the context handler, signal to cancel
- # all of the inprogress futures in the shutdown.
- if exc_type:
- cancel = True
- cancel_msg = six.text_type(exc_value)
- if not cancel_msg:
- cancel_msg = repr(exc_value)
- # If it was a KeyboardInterrupt, the cancellation was initiated
- # by the user.
- if isinstance(exc_value, KeyboardInterrupt):
- cancel_exc_type = CancelledError
- self._shutdown(cancel, cancel_msg, cancel_exc_type)
-
- def shutdown(self, cancel=False, cancel_msg=''):
- """Shutdown the TransferManager
-
- It will wait till all transfers complete before it completely shuts
- down.
-
- :type cancel: boolean
- :param cancel: If True, calls TransferFuture.cancel() for
- all in-progress in transfers. This is useful if you want the
- shutdown to happen quicker.
-
- :type cancel_msg: str
- :param cancel_msg: The message to specify if canceling all in-progress
- transfers.
- """
- self._shutdown(cancel, cancel, cancel_msg)
-
- def _shutdown(self, cancel, cancel_msg, exc_type=CancelledError):
- if cancel:
- # Cancel all in-flight transfers if requested, before waiting
- # for them to complete.
- self._coordinator_controller.cancel(cancel_msg, exc_type)
- try:
- # Wait until there are no more in-progress transfers. This is
- # wrapped in a try statement because this can be interrupted
- # with a KeyboardInterrupt that needs to be caught.
- self._coordinator_controller.wait()
- except KeyboardInterrupt:
- # If not errors were raised in the try block, the cancel should
- # have no coordinators it needs to run cancel on. If there was
- # an error raised in the try statement we want to cancel all of
- # the inflight transfers before shutting down to speed that
- # process up.
- self._coordinator_controller.cancel('KeyboardInterrupt()')
- raise
- finally:
- # Shutdown all of the executors.
- self._submission_executor.shutdown()
- self._request_executor.shutdown()
- self._io_executor.shutdown()
-
-
-class TransferCoordinatorController(object):
- def __init__(self):
- """Abstraction to control all transfer coordinators
-
- This abstraction allows the manager to wait for inprogress transfers
- to complete and cancel all inprogress transfers.
- """
- self._lock = threading.Lock()
- self._tracked_transfer_coordinators = set()
-
- @property
- def tracked_transfer_coordinators(self):
- """The set of transfer coordinators being tracked"""
- with self._lock:
- # We return a copy because the set is mutable and if you were to
- # iterate over the set, it may be changing in length due to
- # additions and removals of transfer coordinators.
- return copy.copy(self._tracked_transfer_coordinators)
-
- def add_transfer_coordinator(self, transfer_coordinator):
- """Adds a transfer coordinator of a transfer to be canceled if needed
-
- :type transfer_coordinator: s3transfer.futures.TransferCoordinator
- :param transfer_coordinator: The transfer coordinator for the
- particular transfer
- """
- with self._lock:
- self._tracked_transfer_coordinators.add(transfer_coordinator)
-
- def remove_transfer_coordinator(self, transfer_coordinator):
- """Remove a transfer coordinator from cancelation consideration
-
- Typically, this method is invoked by the transfer coordinator itself
- to remove its self when it completes its transfer.
-
- :type transfer_coordinator: s3transfer.futures.TransferCoordinator
- :param transfer_coordinator: The transfer coordinator for the
- particular transfer
- """
- with self._lock:
- self._tracked_transfer_coordinators.remove(transfer_coordinator)
-
- def cancel(self, msg='', exc_type=CancelledError):
- """Cancels all inprogress transfers
-
- This cancels the inprogress transfers by calling cancel() on all
- tracked transfer coordinators.
-
- :param msg: The message to pass on to each transfer coordinator that
- gets cancelled.
-
- :param exc_type: The type of exception to set for the cancellation
- """
- for transfer_coordinator in self.tracked_transfer_coordinators:
- transfer_coordinator.cancel(msg, exc_type)
-
- def wait(self):
- """Wait until there are no more inprogress transfers
-
- This will not stop when failures are encountered and not propogate any
- of these errors from failed transfers, but it can be interrupted with
- a KeyboardInterrupt.
- """
- try:
- transfer_coordinator = None
- for transfer_coordinator in self.tracked_transfer_coordinators:
- transfer_coordinator.result()
- except KeyboardInterrupt:
- logger.debug('Received KeyboardInterrupt in wait()')
- # If Keyboard interrupt is raised while waiting for
- # the result, then exit out of the wait and raise the
- # exception
- if transfer_coordinator:
- logger.debug(
- 'On KeyboardInterrupt was waiting for %s',
- transfer_coordinator)
- raise
- except Exception:
- # A general exception could have been thrown because
- # of result(). We just want to ignore this and continue
- # because we at least know that the transfer coordinator
- # has completed.
- pass
+ def _validate_all_known_args(self, actual, allowed):
+ for kwarg in actual:
+ if kwarg not in allowed:
+ raise ValueError(
+ "Invalid extra_args key '%s', "
+ "must be one of: %s" % (
+ kwarg, ', '.join(allowed)))
+
+ def _submit_transfer(self, call_args, submission_task_cls,
+ extra_main_kwargs=None):
+ if not extra_main_kwargs:
+ extra_main_kwargs = {}
+
+ # Create a TransferFuture to return back to the user
+ transfer_future, components = self._get_future_with_components(
+ call_args)
+
+ # Add any provided done callbacks to the created transfer future
+ # to be invoked on the transfer future being complete.
+ for callback in get_callbacks(transfer_future, 'done'):
+ components['coordinator'].add_done_callback(callback)
+
+ # Get the main kwargs needed to instantiate the submission task
+ main_kwargs = self._get_submission_task_main_kwargs(
+ transfer_future, extra_main_kwargs)
+
+ # Submit a SubmissionTask that will submit all of the necessary
+ # tasks needed to complete the S3 transfer.
+ self._submission_executor.submit(
+ submission_task_cls(
+ transfer_coordinator=components['coordinator'],
+ main_kwargs=main_kwargs
+ )
+ )
+
+ # Increment the unique id counter for future transfer requests
+ self._id_counter += 1
+
+ return transfer_future
+
+ def _get_future_with_components(self, call_args):
+ transfer_id = self._id_counter
+ # Creates a new transfer future along with its components
+ transfer_coordinator = TransferCoordinator(transfer_id=transfer_id)
+ # Track the transfer coordinator for transfers to manage.
+ self._coordinator_controller.add_transfer_coordinator(
+ transfer_coordinator)
+ # Also make sure that the transfer coordinator is removed once
+ # the transfer completes so it does not stick around in memory.
+ transfer_coordinator.add_done_callback(
+ self._coordinator_controller.remove_transfer_coordinator,
+ transfer_coordinator)
+ components = {
+ 'meta': TransferMeta(call_args, transfer_id=transfer_id),
+ 'coordinator': transfer_coordinator
+ }
+ transfer_future = TransferFuture(**components)
+ return transfer_future, components
+
+ def _get_submission_task_main_kwargs(
+ self, transfer_future, extra_main_kwargs):
+ main_kwargs = {
+ 'client': self._client,
+ 'config': self._config,
+ 'osutil': self._osutil,
+ 'request_executor': self._request_executor,
+ 'transfer_future': transfer_future
+ }
+ main_kwargs.update(extra_main_kwargs)
+ return main_kwargs
+
+ def _register_handlers(self):
+ # Register handlers to enable/disable callbacks on uploads.
+ event_name = 'request-created.s3'
+ self._client.meta.events.register_first(
+ event_name, signal_not_transferring,
+ unique_id='s3upload-not-transferring')
+ self._client.meta.events.register_last(
+ event_name, signal_transferring,
+ unique_id='s3upload-transferring')
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_value, *args):
+ cancel = False
+ cancel_msg = ''
+ cancel_exc_type = FatalError
+ # If a exception was raised in the context handler, signal to cancel
+ # all of the inprogress futures in the shutdown.
+ if exc_type:
+ cancel = True
+ cancel_msg = six.text_type(exc_value)
+ if not cancel_msg:
+ cancel_msg = repr(exc_value)
+ # If it was a KeyboardInterrupt, the cancellation was initiated
+ # by the user.
+ if isinstance(exc_value, KeyboardInterrupt):
+ cancel_exc_type = CancelledError
+ self._shutdown(cancel, cancel_msg, cancel_exc_type)
+
+ def shutdown(self, cancel=False, cancel_msg=''):
+ """Shutdown the TransferManager
+
+ It will wait till all transfers complete before it completely shuts
+ down.
+
+ :type cancel: boolean
+ :param cancel: If True, calls TransferFuture.cancel() for
+ all in-progress in transfers. This is useful if you want the
+ shutdown to happen quicker.
+
+ :type cancel_msg: str
+ :param cancel_msg: The message to specify if canceling all in-progress
+ transfers.
+ """
+ self._shutdown(cancel, cancel, cancel_msg)
+
+ def _shutdown(self, cancel, cancel_msg, exc_type=CancelledError):
+ if cancel:
+ # Cancel all in-flight transfers if requested, before waiting
+ # for them to complete.
+ self._coordinator_controller.cancel(cancel_msg, exc_type)
+ try:
+ # Wait until there are no more in-progress transfers. This is
+ # wrapped in a try statement because this can be interrupted
+ # with a KeyboardInterrupt that needs to be caught.
+ self._coordinator_controller.wait()
+ except KeyboardInterrupt:
+ # If not errors were raised in the try block, the cancel should
+ # have no coordinators it needs to run cancel on. If there was
+ # an error raised in the try statement we want to cancel all of
+ # the inflight transfers before shutting down to speed that
+ # process up.
+ self._coordinator_controller.cancel('KeyboardInterrupt()')
+ raise
+ finally:
+ # Shutdown all of the executors.
+ self._submission_executor.shutdown()
+ self._request_executor.shutdown()
+ self._io_executor.shutdown()
+
+
+class TransferCoordinatorController(object):
+ def __init__(self):
+ """Abstraction to control all transfer coordinators
+
+ This abstraction allows the manager to wait for inprogress transfers
+ to complete and cancel all inprogress transfers.
+ """
+ self._lock = threading.Lock()
+ self._tracked_transfer_coordinators = set()
+
+ @property
+ def tracked_transfer_coordinators(self):
+ """The set of transfer coordinators being tracked"""
+ with self._lock:
+ # We return a copy because the set is mutable and if you were to
+ # iterate over the set, it may be changing in length due to
+ # additions and removals of transfer coordinators.
+ return copy.copy(self._tracked_transfer_coordinators)
+
+ def add_transfer_coordinator(self, transfer_coordinator):
+ """Adds a transfer coordinator of a transfer to be canceled if needed
+
+ :type transfer_coordinator: s3transfer.futures.TransferCoordinator
+ :param transfer_coordinator: The transfer coordinator for the
+ particular transfer
+ """
+ with self._lock:
+ self._tracked_transfer_coordinators.add(transfer_coordinator)
+
+ def remove_transfer_coordinator(self, transfer_coordinator):
+ """Remove a transfer coordinator from cancelation consideration
+
+ Typically, this method is invoked by the transfer coordinator itself
+ to remove its self when it completes its transfer.
+
+ :type transfer_coordinator: s3transfer.futures.TransferCoordinator
+ :param transfer_coordinator: The transfer coordinator for the
+ particular transfer
+ """
+ with self._lock:
+ self._tracked_transfer_coordinators.remove(transfer_coordinator)
+
+ def cancel(self, msg='', exc_type=CancelledError):
+ """Cancels all inprogress transfers
+
+ This cancels the inprogress transfers by calling cancel() on all
+ tracked transfer coordinators.
+
+ :param msg: The message to pass on to each transfer coordinator that
+ gets cancelled.
+
+ :param exc_type: The type of exception to set for the cancellation
+ """
+ for transfer_coordinator in self.tracked_transfer_coordinators:
+ transfer_coordinator.cancel(msg, exc_type)
+
+ def wait(self):
+ """Wait until there are no more inprogress transfers
+
+ This will not stop when failures are encountered and not propogate any
+ of these errors from failed transfers, but it can be interrupted with
+ a KeyboardInterrupt.
+ """
+ try:
+ transfer_coordinator = None
+ for transfer_coordinator in self.tracked_transfer_coordinators:
+ transfer_coordinator.result()
+ except KeyboardInterrupt:
+ logger.debug('Received KeyboardInterrupt in wait()')
+ # If Keyboard interrupt is raised while waiting for
+ # the result, then exit out of the wait and raise the
+ # exception
+ if transfer_coordinator:
+ logger.debug(
+ 'On KeyboardInterrupt was waiting for %s',
+ transfer_coordinator)
+ raise
+ except Exception:
+ # A general exception could have been thrown because
+ # of result(). We just want to ignore this and continue
+ # because we at least know that the transfer coordinator
+ # has completed.
+ pass
diff --git a/contrib/python/s3transfer/py2/s3transfer/subscribers.py b/contrib/python/s3transfer/py2/s3transfer/subscribers.py
index 41698cbbb1..329b01ec9d 100644
--- a/contrib/python/s3transfer/py2/s3transfer/subscribers.py
+++ b/contrib/python/s3transfer/py2/s3transfer/subscribers.py
@@ -1,95 +1,95 @@
-# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-# http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-from botocore.compat import six
-
-from s3transfer.compat import accepts_kwargs
-from s3transfer.exceptions import InvalidSubscriberMethodError
-
-
-class BaseSubscriber(object):
- """The base subscriber class
-
- It is recommended that all subscriber implementations subclass and then
- override the subscription methods (i.e. on_{subsribe_type}() methods).
- """
- VALID_SUBSCRIBER_TYPES = [
- 'queued',
- 'progress',
- 'done'
- ]
-
- def __new__(cls, *args, **kwargs):
- cls._validate_subscriber_methods()
- return super(BaseSubscriber, cls).__new__(cls)
-
- @classmethod
- def _validate_subscriber_methods(cls):
- for subscriber_type in cls.VALID_SUBSCRIBER_TYPES:
- subscriber_method = getattr(cls, 'on_' + subscriber_type)
- if not six.callable(subscriber_method):
- raise InvalidSubscriberMethodError(
- 'Subscriber method %s must be callable.' %
- subscriber_method)
-
- if not accepts_kwargs(subscriber_method):
- raise InvalidSubscriberMethodError(
- 'Subscriber method %s must accept keyword '
- 'arguments (**kwargs)' % subscriber_method)
-
- def on_queued(self, future, **kwargs):
- """Callback to be invoked when transfer request gets queued
-
- This callback can be useful for:
-
- * Keeping track of how many transfers have been requested
- * Providing the expected transfer size through
- future.meta.provide_transfer_size() so a HeadObject would not
- need to be made for copies and downloads.
-
- :type future: s3transfer.futures.TransferFuture
- :param future: The TransferFuture representing the requested transfer.
- """
- pass
-
- def on_progress(self, future, bytes_transferred, **kwargs):
- """Callback to be invoked when progress is made on transfer
-
- This callback can be useful for:
-
- * Recording and displaying progress
-
- :type future: s3transfer.futures.TransferFuture
- :param future: The TransferFuture representing the requested transfer.
-
- :type bytes_transferred: int
- :param bytes_transferred: The number of bytes transferred for that
- invocation of the callback. Note that a negative amount can be
- provided, which usually indicates that an in-progress request
- needed to be retried and thus progress was rewound.
- """
- pass
-
- def on_done(self, future, **kwargs):
- """Callback to be invoked once a transfer is done
-
- This callback can be useful for:
-
- * Recording and displaying whether the transfer succeeded or
- failed using future.result()
- * Running some task after the transfer completed like changing
- the last modified time of a downloaded file.
-
- :type future: s3transfer.futures.TransferFuture
- :param future: The TransferFuture representing the requested transfer.
- """
- pass
+# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+# http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from botocore.compat import six
+
+from s3transfer.compat import accepts_kwargs
+from s3transfer.exceptions import InvalidSubscriberMethodError
+
+
+class BaseSubscriber(object):
+ """The base subscriber class
+
+ It is recommended that all subscriber implementations subclass and then
+ override the subscription methods (i.e. on_{subsribe_type}() methods).
+ """
+ VALID_SUBSCRIBER_TYPES = [
+ 'queued',
+ 'progress',
+ 'done'
+ ]
+
+ def __new__(cls, *args, **kwargs):
+ cls._validate_subscriber_methods()
+ return super(BaseSubscriber, cls).__new__(cls)
+
+ @classmethod
+ def _validate_subscriber_methods(cls):
+ for subscriber_type in cls.VALID_SUBSCRIBER_TYPES:
+ subscriber_method = getattr(cls, 'on_' + subscriber_type)
+ if not six.callable(subscriber_method):
+ raise InvalidSubscriberMethodError(
+ 'Subscriber method %s must be callable.' %
+ subscriber_method)
+
+ if not accepts_kwargs(subscriber_method):
+ raise InvalidSubscriberMethodError(
+ 'Subscriber method %s must accept keyword '
+ 'arguments (**kwargs)' % subscriber_method)
+
+ def on_queued(self, future, **kwargs):
+ """Callback to be invoked when transfer request gets queued
+
+ This callback can be useful for:
+
+ * Keeping track of how many transfers have been requested
+ * Providing the expected transfer size through
+ future.meta.provide_transfer_size() so a HeadObject would not
+ need to be made for copies and downloads.
+
+ :type future: s3transfer.futures.TransferFuture
+ :param future: The TransferFuture representing the requested transfer.
+ """
+ pass
+
+ def on_progress(self, future, bytes_transferred, **kwargs):
+ """Callback to be invoked when progress is made on transfer
+
+ This callback can be useful for:
+
+ * Recording and displaying progress
+
+ :type future: s3transfer.futures.TransferFuture
+ :param future: The TransferFuture representing the requested transfer.
+
+ :type bytes_transferred: int
+ :param bytes_transferred: The number of bytes transferred for that
+ invocation of the callback. Note that a negative amount can be
+ provided, which usually indicates that an in-progress request
+ needed to be retried and thus progress was rewound.
+ """
+ pass
+
+ def on_done(self, future, **kwargs):
+ """Callback to be invoked once a transfer is done
+
+ This callback can be useful for:
+
+ * Recording and displaying whether the transfer succeeded or
+ failed using future.result()
+ * Running some task after the transfer completed like changing
+ the last modified time of a downloaded file.
+
+ :type future: s3transfer.futures.TransferFuture
+ :param future: The TransferFuture representing the requested transfer.
+ """
+ pass
diff --git a/contrib/python/s3transfer/py2/s3transfer/tasks.py b/contrib/python/s3transfer/py2/s3transfer/tasks.py
index dcb407f1a4..1d314216e4 100644
--- a/contrib/python/s3transfer/py2/s3transfer/tasks.py
+++ b/contrib/python/s3transfer/py2/s3transfer/tasks.py
@@ -1,364 +1,364 @@
-# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-# http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-import copy
-import logging
-
-from s3transfer.utils import get_callbacks
-
-
-logger = logging.getLogger(__name__)
-
-
-class Task(object):
- """A task associated to a TransferFuture request
-
- This is a base class for other classes to subclass from. All subclassed
- classes must implement the main() method.
- """
- def __init__(self, transfer_coordinator, main_kwargs=None,
- pending_main_kwargs=None, done_callbacks=None,
- is_final=False):
- """
- :type transfer_coordinator: s3transfer.futures.TransferCoordinator
- :param transfer_coordinator: The context associated to the
- TransferFuture for which this Task is associated with.
-
- :type main_kwargs: dict
- :param main_kwargs: The keyword args that can be immediately supplied
- to the _main() method of the task
-
- :type pending_main_kwargs: dict
- :param pending_main_kwargs: The keyword args that are depended upon
- by the result from a dependent future(s). The result returned by
- the future(s) will be used as the value for the keyword argument
- when _main() is called. The values for each key can be:
- * a single future - Once completed, its value will be the
- result of that single future
- * a list of futures - Once all of the futures complete, the
- value used will be a list of each completed future result
- value in order of when they were originally supplied.
-
- :type done_callbacks: list of callbacks
- :param done_callbacks: A list of callbacks to call once the task is
- done completing. Each callback will be called with no arguments
- and will be called no matter if the task succeeds or an exception
- is raised.
-
- :type is_final: boolean
- :param is_final: True, to indicate that this task is the final task
- for the TransferFuture request. By setting this value to True, it
- will set the result of the entire TransferFuture to the result
- returned by this task's main() method.
- """
- self._transfer_coordinator = transfer_coordinator
-
- self._main_kwargs = main_kwargs
- if self._main_kwargs is None:
- self._main_kwargs = {}
-
- self._pending_main_kwargs = pending_main_kwargs
- if pending_main_kwargs is None:
- self._pending_main_kwargs = {}
-
- self._done_callbacks = done_callbacks
- if self._done_callbacks is None:
- self._done_callbacks = []
-
- self._is_final = is_final
-
- def __repr__(self):
- # These are the general main_kwarg parameters that we want to
- # display in the repr.
- params_to_display = [
- 'bucket', 'key', 'part_number', 'final_filename',
- 'transfer_future', 'offset', 'extra_args'
- ]
- main_kwargs_to_display = self._get_kwargs_with_params_to_include(
- self._main_kwargs, params_to_display)
- return '%s(transfer_id=%s, %s)' % (
- self.__class__.__name__, self._transfer_coordinator.transfer_id,
- main_kwargs_to_display)
-
- @property
- def transfer_id(self):
- """The id for the transfer request that the task belongs to"""
- return self._transfer_coordinator.transfer_id
-
- def _get_kwargs_with_params_to_include(self, kwargs, include):
- filtered_kwargs = {}
- for param in include:
- if param in kwargs:
- filtered_kwargs[param] = kwargs[param]
- return filtered_kwargs
-
- def _get_kwargs_with_params_to_exclude(self, kwargs, exclude):
- filtered_kwargs = {}
- for param, value in kwargs.items():
- if param in exclude:
- continue
- filtered_kwargs[param] = value
- return filtered_kwargs
-
- def __call__(self):
- """The callable to use when submitting a Task to an executor"""
- try:
- # Wait for all of futures this task depends on.
- self._wait_on_dependent_futures()
- # Gather up all of the main keyword arguments for main().
- # This includes the immediately provided main_kwargs and
- # the values for pending_main_kwargs that source from the return
- # values from the task's depenent futures.
- kwargs = self._get_all_main_kwargs()
- # If the task is not done (really only if some other related
- # task to the TransferFuture had failed) then execute the task's
- # main() method.
- if not self._transfer_coordinator.done():
- return self._execute_main(kwargs)
- except Exception as e:
- self._log_and_set_exception(e)
- finally:
- # Run any done callbacks associated to the task no matter what.
- for done_callback in self._done_callbacks:
- done_callback()
-
- if self._is_final:
- # If this is the final task announce that it is done if results
- # are waiting on its completion.
- self._transfer_coordinator.announce_done()
-
- def _execute_main(self, kwargs):
- # Do not display keyword args that should not be printed, especially
- # if they are going to make the logs hard to follow.
- params_to_exclude = ['data']
- kwargs_to_display = self._get_kwargs_with_params_to_exclude(
- kwargs, params_to_exclude)
- # Log what is about to be executed.
- logger.debug(
- "Executing task %s with kwargs %s" % (self, kwargs_to_display)
- )
-
- return_value = self._main(**kwargs)
- # If the task is the final task, then set the TransferFuture's
- # value to the return value from main().
- if self._is_final:
- self._transfer_coordinator.set_result(return_value)
- return return_value
-
- def _log_and_set_exception(self, exception):
- # If an exception is ever thrown than set the exception for the
- # entire TransferFuture.
- logger.debug("Exception raised.", exc_info=True)
- self._transfer_coordinator.set_exception(exception)
-
- def _main(self, **kwargs):
- """The method that will be ran in the executor
-
- This method must be implemented by subclasses from Task. main() can
- be implemented with any arguments decided upon by the subclass.
- """
- raise NotImplementedError('_main() must be implemented')
-
- def _wait_on_dependent_futures(self):
- # Gather all of the futures into that main() depends on.
- futures_to_wait_on = []
- for _, future in self._pending_main_kwargs.items():
- # If the pending main keyword arg is a list then extend the list.
- if isinstance(future, list):
- futures_to_wait_on.extend(future)
- # If the pending main keword arg is a future append it to the list.
- else:
- futures_to_wait_on.append(future)
- # Now wait for all of the futures to complete.
- self._wait_until_all_complete(futures_to_wait_on)
-
- def _wait_until_all_complete(self, futures):
- # This is a basic implementation of the concurrent.futures.wait()
- #
- # concurrent.futures.wait() is not used instead because of this
- # reported issue: https://bugs.python.org/issue20319.
- # The issue would occassionally cause multipart uploads to hang
- # when wait() was called. With this approach, it avoids the
- # concurrency bug by removing any association with concurrent.futures
- # implementation of waiters.
- logger.debug(
- '%s about to wait for the following futures %s', self, futures)
- for future in futures:
- try:
- logger.debug('%s about to wait for %s', self, future)
- future.result()
- except Exception:
- # result() can also produce exceptions. We want to ignore
- # these to be deffered to error handling down the road.
- pass
- logger.debug('%s done waiting for dependent futures', self)
-
- def _get_all_main_kwargs(self):
- # Copy over all of the kwargs that we know is available.
- kwargs = copy.copy(self._main_kwargs)
-
- # Iterate through the kwargs whose values are pending on the result
- # of a future.
- for key, pending_value in self._pending_main_kwargs.items():
- # If the value is a list of futures, iterate though the list
- # appending on the result from each future.
- if isinstance(pending_value, list):
- result = []
- for future in pending_value:
- result.append(future.result())
- # Otherwise if the pending_value is a future, just wait for it.
- else:
- result = pending_value.result()
- # Add the retrieved value to the kwargs to be sent to the
- # main() call.
- kwargs[key] = result
- return kwargs
-
-
-class SubmissionTask(Task):
- """A base class for any submission task
-
- Submission tasks are the top-level task used to submit a series of tasks
- to execute a particular transfer.
- """
- def _main(self, transfer_future, **kwargs):
- """
- :type transfer_future: s3transfer.futures.TransferFuture
- :param transfer_future: The transfer future associated with the
- transfer request that tasks are being submitted for
-
- :param kwargs: Any additional kwargs that you may want to pass
- to the _submit() method
- """
- try:
- self._transfer_coordinator.set_status_to_queued()
-
- # Before submitting any tasks, run all of the on_queued callbacks
- on_queued_callbacks = get_callbacks(transfer_future, 'queued')
- for on_queued_callback in on_queued_callbacks:
- on_queued_callback()
-
- # Once callbacks have been ran set the status to running.
- self._transfer_coordinator.set_status_to_running()
-
- # Call the submit method to start submitting tasks to execute the
- # transfer.
- self._submit(transfer_future=transfer_future, **kwargs)
- except BaseException as e:
- # If there was an exception raised during the submission of task
- # there is a chance that the final task that signals if a transfer
- # is done and too run the cleanup may never have been submitted in
- # the first place so we need to account accordingly.
- #
- # Note that BaseException is caught, instead of Exception, because
- # for some implmentations of executors, specifically the serial
- # implementation, the SubmissionTask is directly exposed to
- # KeyboardInterupts and so needs to cleanup and signal done
- # for those as well.
-
- # Set the exception, that caused the process to fail.
- self._log_and_set_exception(e)
-
- # Wait for all possibly associated futures that may have spawned
- # from this submission task have finished before we anounce the
- # transfer done.
- self._wait_for_all_submitted_futures_to_complete()
-
- # Announce the transfer as done, which will run any cleanups
- # and done callbacks as well.
- self._transfer_coordinator.announce_done()
-
- def _submit(self, transfer_future, **kwargs):
- """The submition method to be implemented
-
- :type transfer_future: s3transfer.futures.TransferFuture
- :param transfer_future: The transfer future associated with the
- transfer request that tasks are being submitted for
-
- :param kwargs: Any additional keyword arguments you want to be passed
- in
- """
- raise NotImplementedError('_submit() must be implemented')
-
- def _wait_for_all_submitted_futures_to_complete(self):
- # We want to wait for all futures that were submitted to
- # complete as we do not want the cleanup callbacks or done callbacks
- # to be called to early. The main problem is any task that was
- # submitted may have submitted even more during its process and so
- # we need to account accordingly.
-
- # First get all of the futures that were submitted up to this point.
- submitted_futures = self._transfer_coordinator.associated_futures
- while submitted_futures:
- # Wait for those futures to complete.
- self._wait_until_all_complete(submitted_futures)
- # However, more futures may have been submitted as we waited so
- # we need to check again for any more associated futures.
- possibly_more_submitted_futures = \
- self._transfer_coordinator.associated_futures
- # If the current list of submitted futures is equal to the
- # the list of associated futures for when after the wait completes,
- # we can ensure no more futures were submitted in waiting on
- # the current list of futures to complete ultimately meaning all
- # futures that may have spawned from the original submission task
- # have completed.
- if submitted_futures == possibly_more_submitted_futures:
- break
- submitted_futures = possibly_more_submitted_futures
-
-
-class CreateMultipartUploadTask(Task):
- """Task to initiate a multipart upload"""
- def _main(self, client, bucket, key, extra_args):
- """
- :param client: The client to use when calling CreateMultipartUpload
- :param bucket: The name of the bucket to upload to
- :param key: The name of the key to upload to
- :param extra_args: A dictionary of any extra arguments that may be
- used in the intialization.
-
- :returns: The upload id of the multipart upload
- """
- # Create the multipart upload.
- response = client.create_multipart_upload(
- Bucket=bucket, Key=key, **extra_args)
- upload_id = response['UploadId']
-
- # Add a cleanup if the multipart upload fails at any point.
- self._transfer_coordinator.add_failure_cleanup(
- client.abort_multipart_upload, Bucket=bucket, Key=key,
- UploadId=upload_id
- )
- return upload_id
-
-
-class CompleteMultipartUploadTask(Task):
- """Task to complete a multipart upload"""
- def _main(self, client, bucket, key, upload_id, parts, extra_args):
- """
- :param client: The client to use when calling CompleteMultipartUpload
- :param bucket: The name of the bucket to upload to
- :param key: The name of the key to upload to
- :param upload_id: The id of the upload
- :param parts: A list of parts to use to complete the multipart upload::
-
- [{'Etag': etag_value, 'PartNumber': part_number}, ...]
-
- Each element in the list consists of a return value from
- ``UploadPartTask.main()``.
- :param extra_args: A dictionary of any extra arguments that may be
- used in completing the multipart transfer.
- """
- client.complete_multipart_upload(
- Bucket=bucket, Key=key, UploadId=upload_id,
- MultipartUpload={'Parts': parts},
- **extra_args)
+# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+# http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+import copy
+import logging
+
+from s3transfer.utils import get_callbacks
+
+
+logger = logging.getLogger(__name__)
+
+
+class Task(object):
+ """A task associated to a TransferFuture request
+
+ This is a base class for other classes to subclass from. All subclassed
+ classes must implement the main() method.
+ """
+ def __init__(self, transfer_coordinator, main_kwargs=None,
+ pending_main_kwargs=None, done_callbacks=None,
+ is_final=False):
+ """
+ :type transfer_coordinator: s3transfer.futures.TransferCoordinator
+ :param transfer_coordinator: The context associated to the
+ TransferFuture for which this Task is associated with.
+
+ :type main_kwargs: dict
+ :param main_kwargs: The keyword args that can be immediately supplied
+ to the _main() method of the task
+
+ :type pending_main_kwargs: dict
+ :param pending_main_kwargs: The keyword args that are depended upon
+ by the result from a dependent future(s). The result returned by
+ the future(s) will be used as the value for the keyword argument
+ when _main() is called. The values for each key can be:
+ * a single future - Once completed, its value will be the
+ result of that single future
+ * a list of futures - Once all of the futures complete, the
+ value used will be a list of each completed future result
+ value in order of when they were originally supplied.
+
+ :type done_callbacks: list of callbacks
+ :param done_callbacks: A list of callbacks to call once the task is
+ done completing. Each callback will be called with no arguments
+ and will be called no matter if the task succeeds or an exception
+ is raised.
+
+ :type is_final: boolean
+ :param is_final: True, to indicate that this task is the final task
+ for the TransferFuture request. By setting this value to True, it
+ will set the result of the entire TransferFuture to the result
+ returned by this task's main() method.
+ """
+ self._transfer_coordinator = transfer_coordinator
+
+ self._main_kwargs = main_kwargs
+ if self._main_kwargs is None:
+ self._main_kwargs = {}
+
+ self._pending_main_kwargs = pending_main_kwargs
+ if pending_main_kwargs is None:
+ self._pending_main_kwargs = {}
+
+ self._done_callbacks = done_callbacks
+ if self._done_callbacks is None:
+ self._done_callbacks = []
+
+ self._is_final = is_final
+
+ def __repr__(self):
+ # These are the general main_kwarg parameters that we want to
+ # display in the repr.
+ params_to_display = [
+ 'bucket', 'key', 'part_number', 'final_filename',
+ 'transfer_future', 'offset', 'extra_args'
+ ]
+ main_kwargs_to_display = self._get_kwargs_with_params_to_include(
+ self._main_kwargs, params_to_display)
+ return '%s(transfer_id=%s, %s)' % (
+ self.__class__.__name__, self._transfer_coordinator.transfer_id,
+ main_kwargs_to_display)
+
+ @property
+ def transfer_id(self):
+ """The id for the transfer request that the task belongs to"""
+ return self._transfer_coordinator.transfer_id
+
+ def _get_kwargs_with_params_to_include(self, kwargs, include):
+ filtered_kwargs = {}
+ for param in include:
+ if param in kwargs:
+ filtered_kwargs[param] = kwargs[param]
+ return filtered_kwargs
+
+ def _get_kwargs_with_params_to_exclude(self, kwargs, exclude):
+ filtered_kwargs = {}
+ for param, value in kwargs.items():
+ if param in exclude:
+ continue
+ filtered_kwargs[param] = value
+ return filtered_kwargs
+
+ def __call__(self):
+ """The callable to use when submitting a Task to an executor"""
+ try:
+ # Wait for all of futures this task depends on.
+ self._wait_on_dependent_futures()
+ # Gather up all of the main keyword arguments for main().
+ # This includes the immediately provided main_kwargs and
+ # the values for pending_main_kwargs that source from the return
+ # values from the task's depenent futures.
+ kwargs = self._get_all_main_kwargs()
+ # If the task is not done (really only if some other related
+ # task to the TransferFuture had failed) then execute the task's
+ # main() method.
+ if not self._transfer_coordinator.done():
+ return self._execute_main(kwargs)
+ except Exception as e:
+ self._log_and_set_exception(e)
+ finally:
+ # Run any done callbacks associated to the task no matter what.
+ for done_callback in self._done_callbacks:
+ done_callback()
+
+ if self._is_final:
+ # If this is the final task announce that it is done if results
+ # are waiting on its completion.
+ self._transfer_coordinator.announce_done()
+
+ def _execute_main(self, kwargs):
+ # Do not display keyword args that should not be printed, especially
+ # if they are going to make the logs hard to follow.
+ params_to_exclude = ['data']
+ kwargs_to_display = self._get_kwargs_with_params_to_exclude(
+ kwargs, params_to_exclude)
+ # Log what is about to be executed.
+ logger.debug(
+ "Executing task %s with kwargs %s" % (self, kwargs_to_display)
+ )
+
+ return_value = self._main(**kwargs)
+ # If the task is the final task, then set the TransferFuture's
+ # value to the return value from main().
+ if self._is_final:
+ self._transfer_coordinator.set_result(return_value)
+ return return_value
+
+ def _log_and_set_exception(self, exception):
+ # If an exception is ever thrown than set the exception for the
+ # entire TransferFuture.
+ logger.debug("Exception raised.", exc_info=True)
+ self._transfer_coordinator.set_exception(exception)
+
+ def _main(self, **kwargs):
+ """The method that will be ran in the executor
+
+ This method must be implemented by subclasses from Task. main() can
+ be implemented with any arguments decided upon by the subclass.
+ """
+ raise NotImplementedError('_main() must be implemented')
+
+ def _wait_on_dependent_futures(self):
+ # Gather all of the futures into that main() depends on.
+ futures_to_wait_on = []
+ for _, future in self._pending_main_kwargs.items():
+ # If the pending main keyword arg is a list then extend the list.
+ if isinstance(future, list):
+ futures_to_wait_on.extend(future)
+ # If the pending main keword arg is a future append it to the list.
+ else:
+ futures_to_wait_on.append(future)
+ # Now wait for all of the futures to complete.
+ self._wait_until_all_complete(futures_to_wait_on)
+
+ def _wait_until_all_complete(self, futures):
+ # This is a basic implementation of the concurrent.futures.wait()
+ #
+ # concurrent.futures.wait() is not used instead because of this
+ # reported issue: https://bugs.python.org/issue20319.
+ # The issue would occassionally cause multipart uploads to hang
+ # when wait() was called. With this approach, it avoids the
+ # concurrency bug by removing any association with concurrent.futures
+ # implementation of waiters.
+ logger.debug(
+ '%s about to wait for the following futures %s', self, futures)
+ for future in futures:
+ try:
+ logger.debug('%s about to wait for %s', self, future)
+ future.result()
+ except Exception:
+ # result() can also produce exceptions. We want to ignore
+ # these to be deffered to error handling down the road.
+ pass
+ logger.debug('%s done waiting for dependent futures', self)
+
+ def _get_all_main_kwargs(self):
+ # Copy over all of the kwargs that we know is available.
+ kwargs = copy.copy(self._main_kwargs)
+
+ # Iterate through the kwargs whose values are pending on the result
+ # of a future.
+ for key, pending_value in self._pending_main_kwargs.items():
+ # If the value is a list of futures, iterate though the list
+ # appending on the result from each future.
+ if isinstance(pending_value, list):
+ result = []
+ for future in pending_value:
+ result.append(future.result())
+ # Otherwise if the pending_value is a future, just wait for it.
+ else:
+ result = pending_value.result()
+ # Add the retrieved value to the kwargs to be sent to the
+ # main() call.
+ kwargs[key] = result
+ return kwargs
+
+
+class SubmissionTask(Task):
+ """A base class for any submission task
+
+ Submission tasks are the top-level task used to submit a series of tasks
+ to execute a particular transfer.
+ """
+ def _main(self, transfer_future, **kwargs):
+ """
+ :type transfer_future: s3transfer.futures.TransferFuture
+ :param transfer_future: The transfer future associated with the
+ transfer request that tasks are being submitted for
+
+ :param kwargs: Any additional kwargs that you may want to pass
+ to the _submit() method
+ """
+ try:
+ self._transfer_coordinator.set_status_to_queued()
+
+ # Before submitting any tasks, run all of the on_queued callbacks
+ on_queued_callbacks = get_callbacks(transfer_future, 'queued')
+ for on_queued_callback in on_queued_callbacks:
+ on_queued_callback()
+
+ # Once callbacks have been ran set the status to running.
+ self._transfer_coordinator.set_status_to_running()
+
+ # Call the submit method to start submitting tasks to execute the
+ # transfer.
+ self._submit(transfer_future=transfer_future, **kwargs)
+ except BaseException as e:
+ # If there was an exception raised during the submission of task
+ # there is a chance that the final task that signals if a transfer
+ # is done and too run the cleanup may never have been submitted in
+ # the first place so we need to account accordingly.
+ #
+ # Note that BaseException is caught, instead of Exception, because
+ # for some implmentations of executors, specifically the serial
+ # implementation, the SubmissionTask is directly exposed to
+ # KeyboardInterupts and so needs to cleanup and signal done
+ # for those as well.
+
+ # Set the exception, that caused the process to fail.
+ self._log_and_set_exception(e)
+
+ # Wait for all possibly associated futures that may have spawned
+ # from this submission task have finished before we anounce the
+ # transfer done.
+ self._wait_for_all_submitted_futures_to_complete()
+
+ # Announce the transfer as done, which will run any cleanups
+ # and done callbacks as well.
+ self._transfer_coordinator.announce_done()
+
+ def _submit(self, transfer_future, **kwargs):
+ """The submition method to be implemented
+
+ :type transfer_future: s3transfer.futures.TransferFuture
+ :param transfer_future: The transfer future associated with the
+ transfer request that tasks are being submitted for
+
+ :param kwargs: Any additional keyword arguments you want to be passed
+ in
+ """
+ raise NotImplementedError('_submit() must be implemented')
+
+ def _wait_for_all_submitted_futures_to_complete(self):
+ # We want to wait for all futures that were submitted to
+ # complete as we do not want the cleanup callbacks or done callbacks
+ # to be called to early. The main problem is any task that was
+ # submitted may have submitted even more during its process and so
+ # we need to account accordingly.
+
+ # First get all of the futures that were submitted up to this point.
+ submitted_futures = self._transfer_coordinator.associated_futures
+ while submitted_futures:
+ # Wait for those futures to complete.
+ self._wait_until_all_complete(submitted_futures)
+ # However, more futures may have been submitted as we waited so
+ # we need to check again for any more associated futures.
+ possibly_more_submitted_futures = \
+ self._transfer_coordinator.associated_futures
+ # If the current list of submitted futures is equal to the
+ # the list of associated futures for when after the wait completes,
+ # we can ensure no more futures were submitted in waiting on
+ # the current list of futures to complete ultimately meaning all
+ # futures that may have spawned from the original submission task
+ # have completed.
+ if submitted_futures == possibly_more_submitted_futures:
+ break
+ submitted_futures = possibly_more_submitted_futures
+
+
+class CreateMultipartUploadTask(Task):
+ """Task to initiate a multipart upload"""
+ def _main(self, client, bucket, key, extra_args):
+ """
+ :param client: The client to use when calling CreateMultipartUpload
+ :param bucket: The name of the bucket to upload to
+ :param key: The name of the key to upload to
+ :param extra_args: A dictionary of any extra arguments that may be
+ used in the intialization.
+
+ :returns: The upload id of the multipart upload
+ """
+ # Create the multipart upload.
+ response = client.create_multipart_upload(
+ Bucket=bucket, Key=key, **extra_args)
+ upload_id = response['UploadId']
+
+ # Add a cleanup if the multipart upload fails at any point.
+ self._transfer_coordinator.add_failure_cleanup(
+ client.abort_multipart_upload, Bucket=bucket, Key=key,
+ UploadId=upload_id
+ )
+ return upload_id
+
+
+class CompleteMultipartUploadTask(Task):
+ """Task to complete a multipart upload"""
+ def _main(self, client, bucket, key, upload_id, parts, extra_args):
+ """
+ :param client: The client to use when calling CompleteMultipartUpload
+ :param bucket: The name of the bucket to upload to
+ :param key: The name of the key to upload to
+ :param upload_id: The id of the upload
+ :param parts: A list of parts to use to complete the multipart upload::
+
+ [{'Etag': etag_value, 'PartNumber': part_number}, ...]
+
+ Each element in the list consists of a return value from
+ ``UploadPartTask.main()``.
+ :param extra_args: A dictionary of any extra arguments that may be
+ used in completing the multipart transfer.
+ """
+ client.complete_multipart_upload(
+ Bucket=bucket, Key=key, UploadId=upload_id,
+ MultipartUpload={'Parts': parts},
+ **extra_args)
diff --git a/contrib/python/s3transfer/py2/s3transfer/upload.py b/contrib/python/s3transfer/py2/s3transfer/upload.py
index 7e13c58a34..27451e5eff 100644
--- a/contrib/python/s3transfer/py2/s3transfer/upload.py
+++ b/contrib/python/s3transfer/py2/s3transfer/upload.py
@@ -1,726 +1,726 @@
-# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-# http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-import math
-
-from botocore.compat import six
-
-from s3transfer.compat import seekable, readable
-from s3transfer.futures import IN_MEMORY_UPLOAD_TAG
-from s3transfer.tasks import Task
-from s3transfer.tasks import SubmissionTask
-from s3transfer.tasks import CreateMultipartUploadTask
-from s3transfer.tasks import CompleteMultipartUploadTask
-from s3transfer.utils import get_callbacks
-from s3transfer.utils import get_filtered_dict
-from s3transfer.utils import DeferredOpenFile, ChunksizeAdjuster
-
-
-class AggregatedProgressCallback(object):
- def __init__(self, callbacks, threshold=1024 * 256):
- """Aggregates progress updates for every provided progress callback
-
- :type callbacks: A list of functions that accepts bytes_transferred
- as a single argument
- :param callbacks: The callbacks to invoke when threshold is reached
-
- :type threshold: int
- :param threshold: The progress threshold in which to take the
- aggregated progress and invoke the progress callback with that
- aggregated progress total
- """
- self._callbacks = callbacks
- self._threshold = threshold
- self._bytes_seen = 0
-
- def __call__(self, bytes_transferred):
- self._bytes_seen += bytes_transferred
- if self._bytes_seen >= self._threshold:
- self._trigger_callbacks()
-
- def flush(self):
- """Flushes out any progress that has not been sent to its callbacks"""
- if self._bytes_seen > 0:
- self._trigger_callbacks()
-
- def _trigger_callbacks(self):
- for callback in self._callbacks:
- callback(bytes_transferred=self._bytes_seen)
- self._bytes_seen = 0
-
-
-class InterruptReader(object):
- """Wrapper that can interrupt reading using an error
-
+# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+# http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+import math
+
+from botocore.compat import six
+
+from s3transfer.compat import seekable, readable
+from s3transfer.futures import IN_MEMORY_UPLOAD_TAG
+from s3transfer.tasks import Task
+from s3transfer.tasks import SubmissionTask
+from s3transfer.tasks import CreateMultipartUploadTask
+from s3transfer.tasks import CompleteMultipartUploadTask
+from s3transfer.utils import get_callbacks
+from s3transfer.utils import get_filtered_dict
+from s3transfer.utils import DeferredOpenFile, ChunksizeAdjuster
+
+
+class AggregatedProgressCallback(object):
+ def __init__(self, callbacks, threshold=1024 * 256):
+ """Aggregates progress updates for every provided progress callback
+
+ :type callbacks: A list of functions that accepts bytes_transferred
+ as a single argument
+ :param callbacks: The callbacks to invoke when threshold is reached
+
+ :type threshold: int
+ :param threshold: The progress threshold in which to take the
+ aggregated progress and invoke the progress callback with that
+ aggregated progress total
+ """
+ self._callbacks = callbacks
+ self._threshold = threshold
+ self._bytes_seen = 0
+
+ def __call__(self, bytes_transferred):
+ self._bytes_seen += bytes_transferred
+ if self._bytes_seen >= self._threshold:
+ self._trigger_callbacks()
+
+ def flush(self):
+ """Flushes out any progress that has not been sent to its callbacks"""
+ if self._bytes_seen > 0:
+ self._trigger_callbacks()
+
+ def _trigger_callbacks(self):
+ for callback in self._callbacks:
+ callback(bytes_transferred=self._bytes_seen)
+ self._bytes_seen = 0
+
+
+class InterruptReader(object):
+ """Wrapper that can interrupt reading using an error
+
It uses a transfer coordinator to propagate an error if it notices
- that a read is being made while the file is being read from.
-
- :type fileobj: file-like obj
- :param fileobj: The file-like object to read from
-
- :type transfer_coordinator: s3transfer.futures.TransferCoordinator
- :param transfer_coordinator: The transfer coordinator to use if the
- reader needs to be interrupted.
- """
- def __init__(self, fileobj, transfer_coordinator):
- self._fileobj = fileobj
- self._transfer_coordinator = transfer_coordinator
-
- def read(self, amount=None):
- # If there is an exception, then raise the exception.
- # We raise an error instead of returning no bytes because for
- # requests where the content length and md5 was sent, it will
- # cause md5 mismatches and retries as there was no indication that
- # the stream being read from encountered any issues.
- if self._transfer_coordinator.exception:
- raise self._transfer_coordinator.exception
- return self._fileobj.read(amount)
-
+ that a read is being made while the file is being read from.
+
+ :type fileobj: file-like obj
+ :param fileobj: The file-like object to read from
+
+ :type transfer_coordinator: s3transfer.futures.TransferCoordinator
+ :param transfer_coordinator: The transfer coordinator to use if the
+ reader needs to be interrupted.
+ """
+ def __init__(self, fileobj, transfer_coordinator):
+ self._fileobj = fileobj
+ self._transfer_coordinator = transfer_coordinator
+
+ def read(self, amount=None):
+ # If there is an exception, then raise the exception.
+ # We raise an error instead of returning no bytes because for
+ # requests where the content length and md5 was sent, it will
+ # cause md5 mismatches and retries as there was no indication that
+ # the stream being read from encountered any issues.
+ if self._transfer_coordinator.exception:
+ raise self._transfer_coordinator.exception
+ return self._fileobj.read(amount)
+
def seek(self, where, whence=0):
self._fileobj.seek(where, whence)
-
- def tell(self):
- return self._fileobj.tell()
-
- def close(self):
- self._fileobj.close()
-
- def __enter__(self):
- return self
-
- def __exit__(self, *args, **kwargs):
- self.close()
-
-
-class UploadInputManager(object):
- """Base manager class for handling various types of files for uploads
-
- This class is typically used for the UploadSubmissionTask class to help
- determine the following:
-
- * How to determine the size of the file
- * How to determine if a multipart upload is required
- * How to retrieve the body for a PutObject
- * How to retrieve the bodies for a set of UploadParts
-
- The answers/implementations differ for the various types of file inputs
- that may be accepted. All implementations must subclass and override
- public methods from this class.
- """
- def __init__(self, osutil, transfer_coordinator, bandwidth_limiter=None):
- self._osutil = osutil
- self._transfer_coordinator = transfer_coordinator
- self._bandwidth_limiter = bandwidth_limiter
-
- @classmethod
- def is_compatible(cls, upload_source):
- """Determines if the source for the upload is compatible with manager
-
- :param upload_source: The source for which the upload will pull data
- from.
-
- :returns: True if the manager can handle the type of source specified
- otherwise returns False.
- """
- raise NotImplementedError('must implement _is_compatible()')
-
- def stores_body_in_memory(self, operation_name):
- """Whether the body it provides are stored in-memory
-
- :type operation_name: str
- :param operation_name: The name of the client operation that the body
- is being used for. Valid operation_names are ``put_object`` and
- ``upload_part``.
-
- :rtype: boolean
- :returns: True if the body returned by the manager will be stored in
- memory. False if the manager will not directly store the body in
- memory.
- """
- raise NotImplemented('must implement store_body_in_memory()')
-
- def provide_transfer_size(self, transfer_future):
- """Provides the transfer size of an upload
-
- :type transfer_future: s3transfer.futures.TransferFuture
- :param transfer_future: The future associated with upload request
- """
- raise NotImplementedError('must implement provide_transfer_size()')
-
- def requires_multipart_upload(self, transfer_future, config):
- """Determines where a multipart upload is required
-
- :type transfer_future: s3transfer.futures.TransferFuture
- :param transfer_future: The future associated with upload request
-
- :type config: s3transfer.manager.TransferConfig
- :param config: The config associated to the transfer manager
-
- :rtype: boolean
- :returns: True, if the upload should be multipart based on
- configuartion and size. False, otherwise.
- """
- raise NotImplementedError('must implement requires_multipart_upload()')
-
- def get_put_object_body(self, transfer_future):
- """Returns the body to use for PutObject
-
- :type transfer_future: s3transfer.futures.TransferFuture
- :param transfer_future: The future associated with upload request
-
- :type config: s3transfer.manager.TransferConfig
- :param config: The config associated to the transfer manager
-
- :rtype: s3transfer.utils.ReadFileChunk
- :returns: A ReadFileChunk including all progress callbacks
- associated with the transfer future.
- """
- raise NotImplementedError('must implement get_put_object_body()')
-
- def yield_upload_part_bodies(self, transfer_future, chunksize):
- """Yields the part number and body to use for each UploadPart
-
- :type transfer_future: s3transfer.futures.TransferFuture
- :param transfer_future: The future associated with upload request
-
- :type chunksize: int
- :param chunksize: The chunksize to use for this upload.
-
- :rtype: int, s3transfer.utils.ReadFileChunk
- :returns: Yields the part number and the ReadFileChunk including all
- progress callbacks associated with the transfer future for that
- specific yielded part.
- """
- raise NotImplementedError('must implement yield_upload_part_bodies()')
-
- def _wrap_fileobj(self, fileobj):
- fileobj = InterruptReader(fileobj, self._transfer_coordinator)
- if self._bandwidth_limiter:
- fileobj = self._bandwidth_limiter.get_bandwith_limited_stream(
- fileobj, self._transfer_coordinator, enabled=False)
- return fileobj
-
- def _get_progress_callbacks(self, transfer_future):
- callbacks = get_callbacks(transfer_future, 'progress')
- # We only want to be wrapping the callbacks if there are callbacks to
- # invoke because we do not want to be doing any unnecessary work if
- # there are no callbacks to invoke.
- if callbacks:
- return [AggregatedProgressCallback(callbacks)]
- return []
-
- def _get_close_callbacks(self, aggregated_progress_callbacks):
- return [callback.flush for callback in aggregated_progress_callbacks]
-
-
-class UploadFilenameInputManager(UploadInputManager):
- """Upload utility for filenames"""
- @classmethod
- def is_compatible(cls, upload_source):
- return isinstance(upload_source, six.string_types)
-
- def stores_body_in_memory(self, operation_name):
- return False
-
- def provide_transfer_size(self, transfer_future):
- transfer_future.meta.provide_transfer_size(
- self._osutil.get_file_size(
- transfer_future.meta.call_args.fileobj))
-
- def requires_multipart_upload(self, transfer_future, config):
- return transfer_future.meta.size >= config.multipart_threshold
-
- def get_put_object_body(self, transfer_future):
- # Get a file-like object for the given input
- fileobj, full_size = self._get_put_object_fileobj_with_full_size(
- transfer_future)
-
- # Wrap fileobj with interrupt reader that will quickly cancel
- # uploads if needed instead of having to wait for the socket
- # to completely read all of the data.
- fileobj = self._wrap_fileobj(fileobj)
-
- callbacks = self._get_progress_callbacks(transfer_future)
- close_callbacks = self._get_close_callbacks(callbacks)
- size = transfer_future.meta.size
- # Return the file-like object wrapped into a ReadFileChunk to get
- # progress.
- return self._osutil.open_file_chunk_reader_from_fileobj(
- fileobj=fileobj, chunk_size=size, full_file_size=full_size,
- callbacks=callbacks, close_callbacks=close_callbacks)
-
- def yield_upload_part_bodies(self, transfer_future, chunksize):
- full_file_size = transfer_future.meta.size
- num_parts = self._get_num_parts(transfer_future, chunksize)
- for part_number in range(1, num_parts + 1):
- callbacks = self._get_progress_callbacks(transfer_future)
- close_callbacks = self._get_close_callbacks(callbacks)
- start_byte = chunksize * (part_number - 1)
- # Get a file-like object for that part and the size of the full
- # file size for the associated file-like object for that part.
- fileobj, full_size = self._get_upload_part_fileobj_with_full_size(
- transfer_future.meta.call_args.fileobj, start_byte=start_byte,
- part_size=chunksize, full_file_size=full_file_size)
-
- # Wrap fileobj with interrupt reader that will quickly cancel
- # uploads if needed instead of having to wait for the socket
- # to completely read all of the data.
- fileobj = self._wrap_fileobj(fileobj)
-
- # Wrap the file-like object into a ReadFileChunk to get progress.
- read_file_chunk = self._osutil.open_file_chunk_reader_from_fileobj(
- fileobj=fileobj, chunk_size=chunksize,
- full_file_size=full_size, callbacks=callbacks,
- close_callbacks=close_callbacks)
- yield part_number, read_file_chunk
-
- def _get_deferred_open_file(self, fileobj, start_byte):
- fileobj = DeferredOpenFile(
- fileobj, start_byte, open_function=self._osutil.open)
- return fileobj
-
- def _get_put_object_fileobj_with_full_size(self, transfer_future):
- fileobj = transfer_future.meta.call_args.fileobj
- size = transfer_future.meta.size
- return self._get_deferred_open_file(fileobj, 0), size
-
- def _get_upload_part_fileobj_with_full_size(self, fileobj, **kwargs):
- start_byte = kwargs['start_byte']
- full_size = kwargs['full_file_size']
- return self._get_deferred_open_file(fileobj, start_byte), full_size
-
- def _get_num_parts(self, transfer_future, part_size):
- return int(
- math.ceil(transfer_future.meta.size / float(part_size)))
-
-
-class UploadSeekableInputManager(UploadFilenameInputManager):
- """Upload utility for an open file object"""
- @classmethod
- def is_compatible(cls, upload_source):
- return readable(upload_source) and seekable(upload_source)
-
- def stores_body_in_memory(self, operation_name):
- if operation_name == 'put_object':
- return False
- else:
- return True
-
- def provide_transfer_size(self, transfer_future):
- fileobj = transfer_future.meta.call_args.fileobj
- # To determine size, first determine the starting position
- # Seek to the end and then find the difference in the length
- # between the end and start positions.
- start_position = fileobj.tell()
- fileobj.seek(0, 2)
- end_position = fileobj.tell()
- fileobj.seek(start_position)
- transfer_future.meta.provide_transfer_size(
- end_position - start_position)
-
- def _get_upload_part_fileobj_with_full_size(self, fileobj, **kwargs):
- # Note: It is unfortunate that in order to do a multithreaded
- # multipart upload we cannot simply copy the filelike object
- # since there is not really a mechanism in python (i.e. os.dup
- # points to the same OS filehandle which causes concurrency
- # issues). So instead we need to read from the fileobj and
+
+ def tell(self):
+ return self._fileobj.tell()
+
+ def close(self):
+ self._fileobj.close()
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, *args, **kwargs):
+ self.close()
+
+
+class UploadInputManager(object):
+ """Base manager class for handling various types of files for uploads
+
+ This class is typically used for the UploadSubmissionTask class to help
+ determine the following:
+
+ * How to determine the size of the file
+ * How to determine if a multipart upload is required
+ * How to retrieve the body for a PutObject
+ * How to retrieve the bodies for a set of UploadParts
+
+ The answers/implementations differ for the various types of file inputs
+ that may be accepted. All implementations must subclass and override
+ public methods from this class.
+ """
+ def __init__(self, osutil, transfer_coordinator, bandwidth_limiter=None):
+ self._osutil = osutil
+ self._transfer_coordinator = transfer_coordinator
+ self._bandwidth_limiter = bandwidth_limiter
+
+ @classmethod
+ def is_compatible(cls, upload_source):
+ """Determines if the source for the upload is compatible with manager
+
+ :param upload_source: The source for which the upload will pull data
+ from.
+
+ :returns: True if the manager can handle the type of source specified
+ otherwise returns False.
+ """
+ raise NotImplementedError('must implement _is_compatible()')
+
+ def stores_body_in_memory(self, operation_name):
+ """Whether the body it provides are stored in-memory
+
+ :type operation_name: str
+ :param operation_name: The name of the client operation that the body
+ is being used for. Valid operation_names are ``put_object`` and
+ ``upload_part``.
+
+ :rtype: boolean
+ :returns: True if the body returned by the manager will be stored in
+ memory. False if the manager will not directly store the body in
+ memory.
+ """
+ raise NotImplemented('must implement store_body_in_memory()')
+
+ def provide_transfer_size(self, transfer_future):
+ """Provides the transfer size of an upload
+
+ :type transfer_future: s3transfer.futures.TransferFuture
+ :param transfer_future: The future associated with upload request
+ """
+ raise NotImplementedError('must implement provide_transfer_size()')
+
+ def requires_multipart_upload(self, transfer_future, config):
+ """Determines where a multipart upload is required
+
+ :type transfer_future: s3transfer.futures.TransferFuture
+ :param transfer_future: The future associated with upload request
+
+ :type config: s3transfer.manager.TransferConfig
+ :param config: The config associated to the transfer manager
+
+ :rtype: boolean
+ :returns: True, if the upload should be multipart based on
+ configuartion and size. False, otherwise.
+ """
+ raise NotImplementedError('must implement requires_multipart_upload()')
+
+ def get_put_object_body(self, transfer_future):
+ """Returns the body to use for PutObject
+
+ :type transfer_future: s3transfer.futures.TransferFuture
+ :param transfer_future: The future associated with upload request
+
+ :type config: s3transfer.manager.TransferConfig
+ :param config: The config associated to the transfer manager
+
+ :rtype: s3transfer.utils.ReadFileChunk
+ :returns: A ReadFileChunk including all progress callbacks
+ associated with the transfer future.
+ """
+ raise NotImplementedError('must implement get_put_object_body()')
+
+ def yield_upload_part_bodies(self, transfer_future, chunksize):
+ """Yields the part number and body to use for each UploadPart
+
+ :type transfer_future: s3transfer.futures.TransferFuture
+ :param transfer_future: The future associated with upload request
+
+ :type chunksize: int
+ :param chunksize: The chunksize to use for this upload.
+
+ :rtype: int, s3transfer.utils.ReadFileChunk
+ :returns: Yields the part number and the ReadFileChunk including all
+ progress callbacks associated with the transfer future for that
+ specific yielded part.
+ """
+ raise NotImplementedError('must implement yield_upload_part_bodies()')
+
+ def _wrap_fileobj(self, fileobj):
+ fileobj = InterruptReader(fileobj, self._transfer_coordinator)
+ if self._bandwidth_limiter:
+ fileobj = self._bandwidth_limiter.get_bandwith_limited_stream(
+ fileobj, self._transfer_coordinator, enabled=False)
+ return fileobj
+
+ def _get_progress_callbacks(self, transfer_future):
+ callbacks = get_callbacks(transfer_future, 'progress')
+ # We only want to be wrapping the callbacks if there are callbacks to
+ # invoke because we do not want to be doing any unnecessary work if
+ # there are no callbacks to invoke.
+ if callbacks:
+ return [AggregatedProgressCallback(callbacks)]
+ return []
+
+ def _get_close_callbacks(self, aggregated_progress_callbacks):
+ return [callback.flush for callback in aggregated_progress_callbacks]
+
+
+class UploadFilenameInputManager(UploadInputManager):
+ """Upload utility for filenames"""
+ @classmethod
+ def is_compatible(cls, upload_source):
+ return isinstance(upload_source, six.string_types)
+
+ def stores_body_in_memory(self, operation_name):
+ return False
+
+ def provide_transfer_size(self, transfer_future):
+ transfer_future.meta.provide_transfer_size(
+ self._osutil.get_file_size(
+ transfer_future.meta.call_args.fileobj))
+
+ def requires_multipart_upload(self, transfer_future, config):
+ return transfer_future.meta.size >= config.multipart_threshold
+
+ def get_put_object_body(self, transfer_future):
+ # Get a file-like object for the given input
+ fileobj, full_size = self._get_put_object_fileobj_with_full_size(
+ transfer_future)
+
+ # Wrap fileobj with interrupt reader that will quickly cancel
+ # uploads if needed instead of having to wait for the socket
+ # to completely read all of the data.
+ fileobj = self._wrap_fileobj(fileobj)
+
+ callbacks = self._get_progress_callbacks(transfer_future)
+ close_callbacks = self._get_close_callbacks(callbacks)
+ size = transfer_future.meta.size
+ # Return the file-like object wrapped into a ReadFileChunk to get
+ # progress.
+ return self._osutil.open_file_chunk_reader_from_fileobj(
+ fileobj=fileobj, chunk_size=size, full_file_size=full_size,
+ callbacks=callbacks, close_callbacks=close_callbacks)
+
+ def yield_upload_part_bodies(self, transfer_future, chunksize):
+ full_file_size = transfer_future.meta.size
+ num_parts = self._get_num_parts(transfer_future, chunksize)
+ for part_number in range(1, num_parts + 1):
+ callbacks = self._get_progress_callbacks(transfer_future)
+ close_callbacks = self._get_close_callbacks(callbacks)
+ start_byte = chunksize * (part_number - 1)
+ # Get a file-like object for that part and the size of the full
+ # file size for the associated file-like object for that part.
+ fileobj, full_size = self._get_upload_part_fileobj_with_full_size(
+ transfer_future.meta.call_args.fileobj, start_byte=start_byte,
+ part_size=chunksize, full_file_size=full_file_size)
+
+ # Wrap fileobj with interrupt reader that will quickly cancel
+ # uploads if needed instead of having to wait for the socket
+ # to completely read all of the data.
+ fileobj = self._wrap_fileobj(fileobj)
+
+ # Wrap the file-like object into a ReadFileChunk to get progress.
+ read_file_chunk = self._osutil.open_file_chunk_reader_from_fileobj(
+ fileobj=fileobj, chunk_size=chunksize,
+ full_file_size=full_size, callbacks=callbacks,
+ close_callbacks=close_callbacks)
+ yield part_number, read_file_chunk
+
+ def _get_deferred_open_file(self, fileobj, start_byte):
+ fileobj = DeferredOpenFile(
+ fileobj, start_byte, open_function=self._osutil.open)
+ return fileobj
+
+ def _get_put_object_fileobj_with_full_size(self, transfer_future):
+ fileobj = transfer_future.meta.call_args.fileobj
+ size = transfer_future.meta.size
+ return self._get_deferred_open_file(fileobj, 0), size
+
+ def _get_upload_part_fileobj_with_full_size(self, fileobj, **kwargs):
+ start_byte = kwargs['start_byte']
+ full_size = kwargs['full_file_size']
+ return self._get_deferred_open_file(fileobj, start_byte), full_size
+
+ def _get_num_parts(self, transfer_future, part_size):
+ return int(
+ math.ceil(transfer_future.meta.size / float(part_size)))
+
+
+class UploadSeekableInputManager(UploadFilenameInputManager):
+ """Upload utility for an open file object"""
+ @classmethod
+ def is_compatible(cls, upload_source):
+ return readable(upload_source) and seekable(upload_source)
+
+ def stores_body_in_memory(self, operation_name):
+ if operation_name == 'put_object':
+ return False
+ else:
+ return True
+
+ def provide_transfer_size(self, transfer_future):
+ fileobj = transfer_future.meta.call_args.fileobj
+ # To determine size, first determine the starting position
+ # Seek to the end and then find the difference in the length
+ # between the end and start positions.
+ start_position = fileobj.tell()
+ fileobj.seek(0, 2)
+ end_position = fileobj.tell()
+ fileobj.seek(start_position)
+ transfer_future.meta.provide_transfer_size(
+ end_position - start_position)
+
+ def _get_upload_part_fileobj_with_full_size(self, fileobj, **kwargs):
+ # Note: It is unfortunate that in order to do a multithreaded
+ # multipart upload we cannot simply copy the filelike object
+ # since there is not really a mechanism in python (i.e. os.dup
+ # points to the same OS filehandle which causes concurrency
+ # issues). So instead we need to read from the fileobj and
# chunk the data out to separate file-like objects in memory.
- data = fileobj.read(kwargs['part_size'])
- # We return the length of the data instead of the full_file_size
+ data = fileobj.read(kwargs['part_size'])
+ # We return the length of the data instead of the full_file_size
# because we partitioned the data into separate BytesIO objects
- # meaning the BytesIO object has no knowledge of its start position
- # relative the input source nor access to the rest of the input
- # source. So we must treat it as its own standalone file.
- return six.BytesIO(data), len(data)
-
- def _get_put_object_fileobj_with_full_size(self, transfer_future):
- fileobj = transfer_future.meta.call_args.fileobj
- # The current position needs to be taken into account when retrieving
- # the full size of the file.
- size = fileobj.tell() + transfer_future.meta.size
- return fileobj, size
-
-
-class UploadNonSeekableInputManager(UploadInputManager):
- """Upload utility for a file-like object that cannot seek."""
- def __init__(self, osutil, transfer_coordinator, bandwidth_limiter=None):
- super(UploadNonSeekableInputManager, self).__init__(
- osutil, transfer_coordinator, bandwidth_limiter)
- self._initial_data = b''
-
- @classmethod
- def is_compatible(cls, upload_source):
- return readable(upload_source)
-
- def stores_body_in_memory(self, operation_name):
- return True
-
- def provide_transfer_size(self, transfer_future):
- # No-op because there is no way to do this short of reading the entire
- # body into memory.
- return
-
- def requires_multipart_upload(self, transfer_future, config):
- # If the user has set the size, we can use that.
- if transfer_future.meta.size is not None:
- return transfer_future.meta.size >= config.multipart_threshold
-
- # This is tricky to determine in this case because we can't know how
- # large the input is. So to figure it out, we read data into memory
- # up until the threshold and compare how much data was actually read
- # against the threshold.
- fileobj = transfer_future.meta.call_args.fileobj
- threshold = config.multipart_threshold
- self._initial_data = self._read(fileobj, threshold, False)
- if len(self._initial_data) < threshold:
- return False
- else:
- return True
-
- def get_put_object_body(self, transfer_future):
- callbacks = self._get_progress_callbacks(transfer_future)
- close_callbacks = self._get_close_callbacks(callbacks)
- fileobj = transfer_future.meta.call_args.fileobj
-
- body = self._wrap_data(
- self._initial_data + fileobj.read(), callbacks, close_callbacks)
-
- # Zero out the stored data so we don't have additional copies
- # hanging around in memory.
- self._initial_data = None
- return body
-
- def yield_upload_part_bodies(self, transfer_future, chunksize):
- file_object = transfer_future.meta.call_args.fileobj
- part_number = 0
-
- # Continue reading parts from the file-like object until it is empty.
- while True:
- callbacks = self._get_progress_callbacks(transfer_future)
- close_callbacks = self._get_close_callbacks(callbacks)
- part_number += 1
- part_content = self._read(file_object, chunksize)
- if not part_content:
- break
- part_object = self._wrap_data(
- part_content, callbacks, close_callbacks)
-
- # Zero out part_content to avoid hanging on to additional data.
- part_content = None
- yield part_number, part_object
-
- def _read(self, fileobj, amount, truncate=True):
- """
- Reads a specific amount of data from a stream and returns it. If there
- is any data in initial_data, that will be popped out first.
-
- :type fileobj: A file-like object that implements read
- :param fileobj: The stream to read from.
-
- :type amount: int
- :param amount: The number of bytes to read from the stream.
-
- :type truncate: bool
- :param truncate: Whether or not to truncate initial_data after
- reading from it.
-
- :return: Generator which generates part bodies from the initial data.
- """
- # If the the initial data is empty, we simply read from the fileobj
- if len(self._initial_data) == 0:
- return fileobj.read(amount)
-
+ # meaning the BytesIO object has no knowledge of its start position
+ # relative the input source nor access to the rest of the input
+ # source. So we must treat it as its own standalone file.
+ return six.BytesIO(data), len(data)
+
+ def _get_put_object_fileobj_with_full_size(self, transfer_future):
+ fileobj = transfer_future.meta.call_args.fileobj
+ # The current position needs to be taken into account when retrieving
+ # the full size of the file.
+ size = fileobj.tell() + transfer_future.meta.size
+ return fileobj, size
+
+
+class UploadNonSeekableInputManager(UploadInputManager):
+ """Upload utility for a file-like object that cannot seek."""
+ def __init__(self, osutil, transfer_coordinator, bandwidth_limiter=None):
+ super(UploadNonSeekableInputManager, self).__init__(
+ osutil, transfer_coordinator, bandwidth_limiter)
+ self._initial_data = b''
+
+ @classmethod
+ def is_compatible(cls, upload_source):
+ return readable(upload_source)
+
+ def stores_body_in_memory(self, operation_name):
+ return True
+
+ def provide_transfer_size(self, transfer_future):
+ # No-op because there is no way to do this short of reading the entire
+ # body into memory.
+ return
+
+ def requires_multipart_upload(self, transfer_future, config):
+ # If the user has set the size, we can use that.
+ if transfer_future.meta.size is not None:
+ return transfer_future.meta.size >= config.multipart_threshold
+
+ # This is tricky to determine in this case because we can't know how
+ # large the input is. So to figure it out, we read data into memory
+ # up until the threshold and compare how much data was actually read
+ # against the threshold.
+ fileobj = transfer_future.meta.call_args.fileobj
+ threshold = config.multipart_threshold
+ self._initial_data = self._read(fileobj, threshold, False)
+ if len(self._initial_data) < threshold:
+ return False
+ else:
+ return True
+
+ def get_put_object_body(self, transfer_future):
+ callbacks = self._get_progress_callbacks(transfer_future)
+ close_callbacks = self._get_close_callbacks(callbacks)
+ fileobj = transfer_future.meta.call_args.fileobj
+
+ body = self._wrap_data(
+ self._initial_data + fileobj.read(), callbacks, close_callbacks)
+
+ # Zero out the stored data so we don't have additional copies
+ # hanging around in memory.
+ self._initial_data = None
+ return body
+
+ def yield_upload_part_bodies(self, transfer_future, chunksize):
+ file_object = transfer_future.meta.call_args.fileobj
+ part_number = 0
+
+ # Continue reading parts from the file-like object until it is empty.
+ while True:
+ callbacks = self._get_progress_callbacks(transfer_future)
+ close_callbacks = self._get_close_callbacks(callbacks)
+ part_number += 1
+ part_content = self._read(file_object, chunksize)
+ if not part_content:
+ break
+ part_object = self._wrap_data(
+ part_content, callbacks, close_callbacks)
+
+ # Zero out part_content to avoid hanging on to additional data.
+ part_content = None
+ yield part_number, part_object
+
+ def _read(self, fileobj, amount, truncate=True):
+ """
+ Reads a specific amount of data from a stream and returns it. If there
+ is any data in initial_data, that will be popped out first.
+
+ :type fileobj: A file-like object that implements read
+ :param fileobj: The stream to read from.
+
+ :type amount: int
+ :param amount: The number of bytes to read from the stream.
+
+ :type truncate: bool
+ :param truncate: Whether or not to truncate initial_data after
+ reading from it.
+
+ :return: Generator which generates part bodies from the initial data.
+ """
+ # If the the initial data is empty, we simply read from the fileobj
+ if len(self._initial_data) == 0:
+ return fileobj.read(amount)
+
# If the requested number of bytes is less than the amount of
- # initial data, pull entirely from initial data.
- if amount <= len(self._initial_data):
- data = self._initial_data[:amount]
- # Truncate initial data so we don't hang onto the data longer
- # than we need.
- if truncate:
- self._initial_data = self._initial_data[amount:]
- return data
-
- # At this point there is some initial data left, but not enough to
- # satisfy the number of bytes requested. Pull out the remaining
- # initial data and read the rest from the fileobj.
- amount_to_read = amount - len(self._initial_data)
- data = self._initial_data + fileobj.read(amount_to_read)
-
- # Zero out initial data so we don't hang onto the data any more.
- if truncate:
- self._initial_data = b''
- return data
-
- def _wrap_data(self, data, callbacks, close_callbacks):
- """
- Wraps data with the interrupt reader and the file chunk reader.
-
- :type data: bytes
- :param data: The data to wrap.
-
- :type callbacks: list
- :param callbacks: The callbacks associated with the transfer future.
-
- :type close_callbacks: list
- :param close_callbacks: The callbacks to be called when closing the
- wrapper for the data.
-
- :return: Fully wrapped data.
- """
- fileobj = self._wrap_fileobj(six.BytesIO(data))
- return self._osutil.open_file_chunk_reader_from_fileobj(
- fileobj=fileobj, chunk_size=len(data), full_file_size=len(data),
- callbacks=callbacks, close_callbacks=close_callbacks)
-
-
-class UploadSubmissionTask(SubmissionTask):
- """Task for submitting tasks to execute an upload"""
-
- UPLOAD_PART_ARGS = [
- 'SSECustomerKey',
- 'SSECustomerAlgorithm',
- 'SSECustomerKeyMD5',
- 'RequestPayer',
+ # initial data, pull entirely from initial data.
+ if amount <= len(self._initial_data):
+ data = self._initial_data[:amount]
+ # Truncate initial data so we don't hang onto the data longer
+ # than we need.
+ if truncate:
+ self._initial_data = self._initial_data[amount:]
+ return data
+
+ # At this point there is some initial data left, but not enough to
+ # satisfy the number of bytes requested. Pull out the remaining
+ # initial data and read the rest from the fileobj.
+ amount_to_read = amount - len(self._initial_data)
+ data = self._initial_data + fileobj.read(amount_to_read)
+
+ # Zero out initial data so we don't hang onto the data any more.
+ if truncate:
+ self._initial_data = b''
+ return data
+
+ def _wrap_data(self, data, callbacks, close_callbacks):
+ """
+ Wraps data with the interrupt reader and the file chunk reader.
+
+ :type data: bytes
+ :param data: The data to wrap.
+
+ :type callbacks: list
+ :param callbacks: The callbacks associated with the transfer future.
+
+ :type close_callbacks: list
+ :param close_callbacks: The callbacks to be called when closing the
+ wrapper for the data.
+
+ :return: Fully wrapped data.
+ """
+ fileobj = self._wrap_fileobj(six.BytesIO(data))
+ return self._osutil.open_file_chunk_reader_from_fileobj(
+ fileobj=fileobj, chunk_size=len(data), full_file_size=len(data),
+ callbacks=callbacks, close_callbacks=close_callbacks)
+
+
+class UploadSubmissionTask(SubmissionTask):
+ """Task for submitting tasks to execute an upload"""
+
+ UPLOAD_PART_ARGS = [
+ 'SSECustomerKey',
+ 'SSECustomerAlgorithm',
+ 'SSECustomerKeyMD5',
+ 'RequestPayer',
'ExpectedBucketOwner'
- ]
-
- COMPLETE_MULTIPART_ARGS = [
+ ]
+
+ COMPLETE_MULTIPART_ARGS = [
'RequestPayer',
'ExpectedBucketOwner'
- ]
-
- def _get_upload_input_manager_cls(self, transfer_future):
+ ]
+
+ def _get_upload_input_manager_cls(self, transfer_future):
"""Retrieves a class for managing input for an upload based on file type
-
- :type transfer_future: s3transfer.futures.TransferFuture
- :param transfer_future: The transfer future for the request
-
- :rtype: class of UploadInputManager
- :returns: The appropriate class to use for managing a specific type of
- input for uploads.
- """
- upload_manager_resolver_chain = [
- UploadFilenameInputManager,
- UploadSeekableInputManager,
- UploadNonSeekableInputManager
- ]
-
- fileobj = transfer_future.meta.call_args.fileobj
- for upload_manager_cls in upload_manager_resolver_chain:
- if upload_manager_cls.is_compatible(fileobj):
- return upload_manager_cls
- raise RuntimeError(
- 'Input %s of type: %s is not supported.' % (
- fileobj, type(fileobj)))
-
- def _submit(self, client, config, osutil, request_executor,
- transfer_future, bandwidth_limiter=None):
- """
- :param client: The client associated with the transfer manager
-
- :type config: s3transfer.manager.TransferConfig
- :param config: The transfer config associated with the transfer
- manager
-
- :type osutil: s3transfer.utils.OSUtil
- :param osutil: The os utility associated to the transfer manager
-
- :type request_executor: s3transfer.futures.BoundedExecutor
- :param request_executor: The request executor associated with the
- transfer manager
-
- :type transfer_future: s3transfer.futures.TransferFuture
- :param transfer_future: The transfer future associated with the
- transfer request that tasks are being submitted for
- """
- upload_input_manager = self._get_upload_input_manager_cls(
- transfer_future)(
- osutil, self._transfer_coordinator, bandwidth_limiter)
-
- # Determine the size if it was not provided
- if transfer_future.meta.size is None:
- upload_input_manager.provide_transfer_size(transfer_future)
-
- # Do a multipart upload if needed, otherwise do a regular put object.
- if not upload_input_manager.requires_multipart_upload(
- transfer_future, config):
- self._submit_upload_request(
- client, config, osutil, request_executor, transfer_future,
- upload_input_manager)
- else:
- self._submit_multipart_request(
- client, config, osutil, request_executor, transfer_future,
- upload_input_manager)
-
- def _submit_upload_request(self, client, config, osutil, request_executor,
- transfer_future, upload_input_manager):
- call_args = transfer_future.meta.call_args
-
- # Get any tags that need to be associated to the put object task
- put_object_tag = self._get_upload_task_tag(
- upload_input_manager, 'put_object')
-
- # Submit the request of a single upload.
- self._transfer_coordinator.submit(
- request_executor,
- PutObjectTask(
- transfer_coordinator=self._transfer_coordinator,
- main_kwargs={
- 'client': client,
- 'fileobj': upload_input_manager.get_put_object_body(
- transfer_future),
- 'bucket': call_args.bucket,
- 'key': call_args.key,
- 'extra_args': call_args.extra_args
- },
- is_final=True
- ),
- tag=put_object_tag
- )
-
- def _submit_multipart_request(self, client, config, osutil,
- request_executor, transfer_future,
- upload_input_manager):
- call_args = transfer_future.meta.call_args
-
- # Submit the request to create a multipart upload.
- create_multipart_future = self._transfer_coordinator.submit(
- request_executor,
- CreateMultipartUploadTask(
- transfer_coordinator=self._transfer_coordinator,
- main_kwargs={
- 'client': client,
- 'bucket': call_args.bucket,
- 'key': call_args.key,
- 'extra_args': call_args.extra_args,
- }
- )
- )
-
- # Submit requests to upload the parts of the file.
- part_futures = []
- extra_part_args = self._extra_upload_part_args(call_args.extra_args)
-
- # Get any tags that need to be associated to the submitted task
- # for upload the data
- upload_part_tag = self._get_upload_task_tag(
- upload_input_manager, 'upload_part')
-
- size = transfer_future.meta.size
- adjuster = ChunksizeAdjuster()
- chunksize = adjuster.adjust_chunksize(config.multipart_chunksize, size)
- part_iterator = upload_input_manager.yield_upload_part_bodies(
- transfer_future, chunksize)
-
- for part_number, fileobj in part_iterator:
- part_futures.append(
- self._transfer_coordinator.submit(
- request_executor,
- UploadPartTask(
- transfer_coordinator=self._transfer_coordinator,
- main_kwargs={
- 'client': client,
- 'fileobj': fileobj,
- 'bucket': call_args.bucket,
- 'key': call_args.key,
- 'part_number': part_number,
- 'extra_args': extra_part_args
- },
- pending_main_kwargs={
- 'upload_id': create_multipart_future
- }
- ),
- tag=upload_part_tag
- )
- )
-
- complete_multipart_extra_args = self._extra_complete_multipart_args(
- call_args.extra_args)
- # Submit the request to complete the multipart upload.
- self._transfer_coordinator.submit(
- request_executor,
- CompleteMultipartUploadTask(
- transfer_coordinator=self._transfer_coordinator,
- main_kwargs={
- 'client': client,
- 'bucket': call_args.bucket,
- 'key': call_args.key,
- 'extra_args': complete_multipart_extra_args,
- },
- pending_main_kwargs={
- 'upload_id': create_multipart_future,
- 'parts': part_futures
- },
- is_final=True
- )
- )
-
- def _extra_upload_part_args(self, extra_args):
- # Only the args in UPLOAD_PART_ARGS actually need to be passed
- # onto the upload_part calls.
- return get_filtered_dict(extra_args, self.UPLOAD_PART_ARGS)
-
- def _extra_complete_multipart_args(self, extra_args):
- return get_filtered_dict(extra_args, self.COMPLETE_MULTIPART_ARGS)
-
- def _get_upload_task_tag(self, upload_input_manager, operation_name):
- tag = None
- if upload_input_manager.stores_body_in_memory(operation_name):
- tag = IN_MEMORY_UPLOAD_TAG
- return tag
-
-
-class PutObjectTask(Task):
- """Task to do a nonmultipart upload"""
- def _main(self, client, fileobj, bucket, key, extra_args):
- """
- :param client: The client to use when calling PutObject
- :param fileobj: The file to upload.
- :param bucket: The name of the bucket to upload to
- :param key: The name of the key to upload to
- :param extra_args: A dictionary of any extra arguments that may be
- used in the upload.
- """
- with fileobj as body:
- client.put_object(Bucket=bucket, Key=key, Body=body, **extra_args)
-
-
-class UploadPartTask(Task):
- """Task to upload a part in a multipart upload"""
- def _main(self, client, fileobj, bucket, key, upload_id, part_number,
- extra_args):
- """
- :param client: The client to use when calling PutObject
- :param fileobj: The file to upload.
- :param bucket: The name of the bucket to upload to
- :param key: The name of the key to upload to
- :param upload_id: The id of the upload
- :param part_number: The number representing the part of the multipart
- upload
- :param extra_args: A dictionary of any extra arguments that may be
- used in the upload.
-
- :rtype: dict
- :returns: A dictionary representing a part::
-
- {'Etag': etag_value, 'PartNumber': part_number}
-
- This value can be appended to a list to be used to complete
- the multipart upload.
- """
- with fileobj as body:
- response = client.upload_part(
- Bucket=bucket, Key=key,
- UploadId=upload_id, PartNumber=part_number,
- Body=body, **extra_args)
- etag = response['ETag']
- return {'ETag': etag, 'PartNumber': part_number}
+
+ :type transfer_future: s3transfer.futures.TransferFuture
+ :param transfer_future: The transfer future for the request
+
+ :rtype: class of UploadInputManager
+ :returns: The appropriate class to use for managing a specific type of
+ input for uploads.
+ """
+ upload_manager_resolver_chain = [
+ UploadFilenameInputManager,
+ UploadSeekableInputManager,
+ UploadNonSeekableInputManager
+ ]
+
+ fileobj = transfer_future.meta.call_args.fileobj
+ for upload_manager_cls in upload_manager_resolver_chain:
+ if upload_manager_cls.is_compatible(fileobj):
+ return upload_manager_cls
+ raise RuntimeError(
+ 'Input %s of type: %s is not supported.' % (
+ fileobj, type(fileobj)))
+
+ def _submit(self, client, config, osutil, request_executor,
+ transfer_future, bandwidth_limiter=None):
+ """
+ :param client: The client associated with the transfer manager
+
+ :type config: s3transfer.manager.TransferConfig
+ :param config: The transfer config associated with the transfer
+ manager
+
+ :type osutil: s3transfer.utils.OSUtil
+ :param osutil: The os utility associated to the transfer manager
+
+ :type request_executor: s3transfer.futures.BoundedExecutor
+ :param request_executor: The request executor associated with the
+ transfer manager
+
+ :type transfer_future: s3transfer.futures.TransferFuture
+ :param transfer_future: The transfer future associated with the
+ transfer request that tasks are being submitted for
+ """
+ upload_input_manager = self._get_upload_input_manager_cls(
+ transfer_future)(
+ osutil, self._transfer_coordinator, bandwidth_limiter)
+
+ # Determine the size if it was not provided
+ if transfer_future.meta.size is None:
+ upload_input_manager.provide_transfer_size(transfer_future)
+
+ # Do a multipart upload if needed, otherwise do a regular put object.
+ if not upload_input_manager.requires_multipart_upload(
+ transfer_future, config):
+ self._submit_upload_request(
+ client, config, osutil, request_executor, transfer_future,
+ upload_input_manager)
+ else:
+ self._submit_multipart_request(
+ client, config, osutil, request_executor, transfer_future,
+ upload_input_manager)
+
+ def _submit_upload_request(self, client, config, osutil, request_executor,
+ transfer_future, upload_input_manager):
+ call_args = transfer_future.meta.call_args
+
+ # Get any tags that need to be associated to the put object task
+ put_object_tag = self._get_upload_task_tag(
+ upload_input_manager, 'put_object')
+
+ # Submit the request of a single upload.
+ self._transfer_coordinator.submit(
+ request_executor,
+ PutObjectTask(
+ transfer_coordinator=self._transfer_coordinator,
+ main_kwargs={
+ 'client': client,
+ 'fileobj': upload_input_manager.get_put_object_body(
+ transfer_future),
+ 'bucket': call_args.bucket,
+ 'key': call_args.key,
+ 'extra_args': call_args.extra_args
+ },
+ is_final=True
+ ),
+ tag=put_object_tag
+ )
+
+ def _submit_multipart_request(self, client, config, osutil,
+ request_executor, transfer_future,
+ upload_input_manager):
+ call_args = transfer_future.meta.call_args
+
+ # Submit the request to create a multipart upload.
+ create_multipart_future = self._transfer_coordinator.submit(
+ request_executor,
+ CreateMultipartUploadTask(
+ transfer_coordinator=self._transfer_coordinator,
+ main_kwargs={
+ 'client': client,
+ 'bucket': call_args.bucket,
+ 'key': call_args.key,
+ 'extra_args': call_args.extra_args,
+ }
+ )
+ )
+
+ # Submit requests to upload the parts of the file.
+ part_futures = []
+ extra_part_args = self._extra_upload_part_args(call_args.extra_args)
+
+ # Get any tags that need to be associated to the submitted task
+ # for upload the data
+ upload_part_tag = self._get_upload_task_tag(
+ upload_input_manager, 'upload_part')
+
+ size = transfer_future.meta.size
+ adjuster = ChunksizeAdjuster()
+ chunksize = adjuster.adjust_chunksize(config.multipart_chunksize, size)
+ part_iterator = upload_input_manager.yield_upload_part_bodies(
+ transfer_future, chunksize)
+
+ for part_number, fileobj in part_iterator:
+ part_futures.append(
+ self._transfer_coordinator.submit(
+ request_executor,
+ UploadPartTask(
+ transfer_coordinator=self._transfer_coordinator,
+ main_kwargs={
+ 'client': client,
+ 'fileobj': fileobj,
+ 'bucket': call_args.bucket,
+ 'key': call_args.key,
+ 'part_number': part_number,
+ 'extra_args': extra_part_args
+ },
+ pending_main_kwargs={
+ 'upload_id': create_multipart_future
+ }
+ ),
+ tag=upload_part_tag
+ )
+ )
+
+ complete_multipart_extra_args = self._extra_complete_multipart_args(
+ call_args.extra_args)
+ # Submit the request to complete the multipart upload.
+ self._transfer_coordinator.submit(
+ request_executor,
+ CompleteMultipartUploadTask(
+ transfer_coordinator=self._transfer_coordinator,
+ main_kwargs={
+ 'client': client,
+ 'bucket': call_args.bucket,
+ 'key': call_args.key,
+ 'extra_args': complete_multipart_extra_args,
+ },
+ pending_main_kwargs={
+ 'upload_id': create_multipart_future,
+ 'parts': part_futures
+ },
+ is_final=True
+ )
+ )
+
+ def _extra_upload_part_args(self, extra_args):
+ # Only the args in UPLOAD_PART_ARGS actually need to be passed
+ # onto the upload_part calls.
+ return get_filtered_dict(extra_args, self.UPLOAD_PART_ARGS)
+
+ def _extra_complete_multipart_args(self, extra_args):
+ return get_filtered_dict(extra_args, self.COMPLETE_MULTIPART_ARGS)
+
+ def _get_upload_task_tag(self, upload_input_manager, operation_name):
+ tag = None
+ if upload_input_manager.stores_body_in_memory(operation_name):
+ tag = IN_MEMORY_UPLOAD_TAG
+ return tag
+
+
+class PutObjectTask(Task):
+ """Task to do a nonmultipart upload"""
+ def _main(self, client, fileobj, bucket, key, extra_args):
+ """
+ :param client: The client to use when calling PutObject
+ :param fileobj: The file to upload.
+ :param bucket: The name of the bucket to upload to
+ :param key: The name of the key to upload to
+ :param extra_args: A dictionary of any extra arguments that may be
+ used in the upload.
+ """
+ with fileobj as body:
+ client.put_object(Bucket=bucket, Key=key, Body=body, **extra_args)
+
+
+class UploadPartTask(Task):
+ """Task to upload a part in a multipart upload"""
+ def _main(self, client, fileobj, bucket, key, upload_id, part_number,
+ extra_args):
+ """
+ :param client: The client to use when calling PutObject
+ :param fileobj: The file to upload.
+ :param bucket: The name of the bucket to upload to
+ :param key: The name of the key to upload to
+ :param upload_id: The id of the upload
+ :param part_number: The number representing the part of the multipart
+ upload
+ :param extra_args: A dictionary of any extra arguments that may be
+ used in the upload.
+
+ :rtype: dict
+ :returns: A dictionary representing a part::
+
+ {'Etag': etag_value, 'PartNumber': part_number}
+
+ This value can be appended to a list to be used to complete
+ the multipart upload.
+ """
+ with fileobj as body:
+ response = client.upload_part(
+ Bucket=bucket, Key=key,
+ UploadId=upload_id, PartNumber=part_number,
+ Body=body, **extra_args)
+ etag = response['ETag']
+ return {'ETag': etag, 'PartNumber': part_number}
diff --git a/contrib/python/s3transfer/py2/s3transfer/utils.py b/contrib/python/s3transfer/py2/s3transfer/utils.py
index b93cd5cf94..67f7e028fc 100644
--- a/contrib/python/s3transfer/py2/s3transfer/utils.py
+++ b/contrib/python/s3transfer/py2/s3transfer/utils.py
@@ -1,313 +1,313 @@
-# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-# http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-import random
-import time
-import functools
-import math
-import os
+# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+# http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+import random
+import time
+import functools
+import math
+import os
import socket
-import stat
-import string
-import logging
-import threading
-import io
-from collections import defaultdict
-
+import stat
+import string
+import logging
+import threading
+import io
+from collections import defaultdict
+
from botocore.exceptions import IncompleteReadError
from botocore.exceptions import ReadTimeoutError
from s3transfer.compat import SOCKET_ERROR
-from s3transfer.compat import rename_file
-from s3transfer.compat import seekable
+from s3transfer.compat import rename_file
+from s3transfer.compat import seekable
from s3transfer.compat import fallocate
-
-
-MAX_PARTS = 10000
-# The maximum file size you can upload via S3 per request.
-# See: http://docs.aws.amazon.com/AmazonS3/latest/dev/UploadingObjects.html
-# and: http://docs.aws.amazon.com/AmazonS3/latest/dev/qfacts.html
-MAX_SINGLE_UPLOAD_SIZE = 5 * (1024 ** 3)
-MIN_UPLOAD_CHUNKSIZE = 5 * (1024 ** 2)
-logger = logging.getLogger(__name__)
-
-
+
+
+MAX_PARTS = 10000
+# The maximum file size you can upload via S3 per request.
+# See: http://docs.aws.amazon.com/AmazonS3/latest/dev/UploadingObjects.html
+# and: http://docs.aws.amazon.com/AmazonS3/latest/dev/qfacts.html
+MAX_SINGLE_UPLOAD_SIZE = 5 * (1024 ** 3)
+MIN_UPLOAD_CHUNKSIZE = 5 * (1024 ** 2)
+logger = logging.getLogger(__name__)
+
+
S3_RETRYABLE_DOWNLOAD_ERRORS = (
socket.timeout, SOCKET_ERROR, ReadTimeoutError, IncompleteReadError
)
-def random_file_extension(num_digits=8):
- return ''.join(random.choice(string.hexdigits) for _ in range(num_digits))
-
-
-def signal_not_transferring(request, operation_name, **kwargs):
- if operation_name in ['PutObject', 'UploadPart'] and \
- hasattr(request.body, 'signal_not_transferring'):
- request.body.signal_not_transferring()
-
-
-def signal_transferring(request, operation_name, **kwargs):
- if operation_name in ['PutObject', 'UploadPart'] and \
- hasattr(request.body, 'signal_transferring'):
- request.body.signal_transferring()
-
-
+def random_file_extension(num_digits=8):
+ return ''.join(random.choice(string.hexdigits) for _ in range(num_digits))
+
+
+def signal_not_transferring(request, operation_name, **kwargs):
+ if operation_name in ['PutObject', 'UploadPart'] and \
+ hasattr(request.body, 'signal_not_transferring'):
+ request.body.signal_not_transferring()
+
+
+def signal_transferring(request, operation_name, **kwargs):
+ if operation_name in ['PutObject', 'UploadPart'] and \
+ hasattr(request.body, 'signal_transferring'):
+ request.body.signal_transferring()
+
+
def calculate_num_parts(size, part_size):
return int(math.ceil(size / float(part_size)))
-def calculate_range_parameter(part_size, part_index, num_parts,
- total_size=None):
- """Calculate the range parameter for multipart downloads/copies
-
- :type part_size: int
- :param part_size: The size of the part
-
- :type part_index: int
- :param part_index: The index for which this parts starts. This index starts
- at zero
-
- :type num_parts: int
- :param num_parts: The total number of parts in the transfer
-
- :returns: The value to use for Range parameter on downloads or
- the CopySourceRange parameter for copies
- """
- # Used to calculate the Range parameter
- start_range = part_index * part_size
- if part_index == num_parts - 1:
- end_range = ''
- if total_size is not None:
- end_range = str(total_size - 1)
- else:
- end_range = start_range + part_size - 1
- range_param = 'bytes=%s-%s' % (start_range, end_range)
- return range_param
-
-
-def get_callbacks(transfer_future, callback_type):
- """Retrieves callbacks from a subscriber
-
- :type transfer_future: s3transfer.futures.TransferFuture
- :param transfer_future: The transfer future the subscriber is associated
- to.
-
- :type callback_type: str
- :param callback_type: The type of callback to retrieve from the subscriber.
- Valid types include:
- * 'queued'
- * 'progress'
- * 'done'
-
- :returns: A list of callbacks for the type specified. All callbacks are
- preinjected with the transfer future.
- """
- callbacks = []
- for subscriber in transfer_future.meta.call_args.subscribers:
- callback_name = 'on_' + callback_type
- if hasattr(subscriber, callback_name):
- callbacks.append(
- functools.partial(
- getattr(subscriber, callback_name),
- future=transfer_future
- )
- )
- return callbacks
-
-
-def invoke_progress_callbacks(callbacks, bytes_transferred):
- """Calls all progress callbacks
-
- :param callbacks: A list of progress callbacks to invoke
- :param bytes_transferred: The number of bytes transferred. This is passed
- to the callbacks. If no bytes were transferred the callbacks will not
- be invoked because no progress was achieved. It is also possible
- to receive a negative amount which comes from retrying a transfer
- request.
- """
- # Only invoke the callbacks if bytes were actually transferred.
- if bytes_transferred:
- for callback in callbacks:
- callback(bytes_transferred=bytes_transferred)
-
-
-def get_filtered_dict(original_dict, whitelisted_keys):
- """Gets a dictionary filtered by whitelisted keys
-
- :param original_dict: The original dictionary of arguments to source keys
- and values.
- :param whitelisted_key: A list of keys to include in the filtered
- dictionary.
-
- :returns: A dictionary containing key/values from the original dictionary
- whose key was included in the whitelist
- """
- filtered_dict = {}
- for key, value in original_dict.items():
- if key in whitelisted_keys:
- filtered_dict[key] = value
- return filtered_dict
-
-
-class CallArgs(object):
- def __init__(self, **kwargs):
- """A class that records call arguments
-
- The call arguments must be passed as keyword arguments. It will set
- each keyword argument as an attribute of the object along with its
- associated value.
- """
- for arg, value in kwargs.items():
- setattr(self, arg, value)
-
-
-class FunctionContainer(object):
- """An object that contains a function and any args or kwargs to call it
-
- When called the provided function will be called with provided args
- and kwargs.
- """
- def __init__(self, func, *args, **kwargs):
- self._func = func
- self._args = args
- self._kwargs = kwargs
-
- def __repr__(self):
- return 'Function: %s with args %s and kwargs %s' % (
- self._func, self._args, self._kwargs)
-
- def __call__(self):
- return self._func(*self._args, **self._kwargs)
-
-
-class CountCallbackInvoker(object):
- """An abstraction to invoke a callback when a shared count reaches zero
-
- :param callback: Callback invoke when finalized count reaches zero
- """
- def __init__(self, callback):
- self._lock = threading.Lock()
- self._callback = callback
- self._count = 0
- self._is_finalized = False
-
- @property
- def current_count(self):
- with self._lock:
- return self._count
-
- def increment(self):
- """Increment the count by one"""
- with self._lock:
- if self._is_finalized:
- raise RuntimeError(
- 'Counter has been finalized it can no longer be '
- 'incremented.'
- )
- self._count += 1
-
- def decrement(self):
- """Decrement the count by one"""
- with self._lock:
- if self._count == 0:
- raise RuntimeError(
- 'Counter is at zero. It cannot dip below zero')
- self._count -= 1
- if self._is_finalized and self._count == 0:
- self._callback()
-
- def finalize(self):
- """Finalize the counter
-
- Once finalized, the counter never be incremented and the callback
- can be invoked once the count reaches zero
- """
- with self._lock:
- self._is_finalized = True
- if self._count == 0:
- self._callback()
-
-
-class OSUtils(object):
+def calculate_range_parameter(part_size, part_index, num_parts,
+ total_size=None):
+ """Calculate the range parameter for multipart downloads/copies
+
+ :type part_size: int
+ :param part_size: The size of the part
+
+ :type part_index: int
+ :param part_index: The index for which this parts starts. This index starts
+ at zero
+
+ :type num_parts: int
+ :param num_parts: The total number of parts in the transfer
+
+ :returns: The value to use for Range parameter on downloads or
+ the CopySourceRange parameter for copies
+ """
+ # Used to calculate the Range parameter
+ start_range = part_index * part_size
+ if part_index == num_parts - 1:
+ end_range = ''
+ if total_size is not None:
+ end_range = str(total_size - 1)
+ else:
+ end_range = start_range + part_size - 1
+ range_param = 'bytes=%s-%s' % (start_range, end_range)
+ return range_param
+
+
+def get_callbacks(transfer_future, callback_type):
+ """Retrieves callbacks from a subscriber
+
+ :type transfer_future: s3transfer.futures.TransferFuture
+ :param transfer_future: The transfer future the subscriber is associated
+ to.
+
+ :type callback_type: str
+ :param callback_type: The type of callback to retrieve from the subscriber.
+ Valid types include:
+ * 'queued'
+ * 'progress'
+ * 'done'
+
+ :returns: A list of callbacks for the type specified. All callbacks are
+ preinjected with the transfer future.
+ """
+ callbacks = []
+ for subscriber in transfer_future.meta.call_args.subscribers:
+ callback_name = 'on_' + callback_type
+ if hasattr(subscriber, callback_name):
+ callbacks.append(
+ functools.partial(
+ getattr(subscriber, callback_name),
+ future=transfer_future
+ )
+ )
+ return callbacks
+
+
+def invoke_progress_callbacks(callbacks, bytes_transferred):
+ """Calls all progress callbacks
+
+ :param callbacks: A list of progress callbacks to invoke
+ :param bytes_transferred: The number of bytes transferred. This is passed
+ to the callbacks. If no bytes were transferred the callbacks will not
+ be invoked because no progress was achieved. It is also possible
+ to receive a negative amount which comes from retrying a transfer
+ request.
+ """
+ # Only invoke the callbacks if bytes were actually transferred.
+ if bytes_transferred:
+ for callback in callbacks:
+ callback(bytes_transferred=bytes_transferred)
+
+
+def get_filtered_dict(original_dict, whitelisted_keys):
+ """Gets a dictionary filtered by whitelisted keys
+
+ :param original_dict: The original dictionary of arguments to source keys
+ and values.
+ :param whitelisted_key: A list of keys to include in the filtered
+ dictionary.
+
+ :returns: A dictionary containing key/values from the original dictionary
+ whose key was included in the whitelist
+ """
+ filtered_dict = {}
+ for key, value in original_dict.items():
+ if key in whitelisted_keys:
+ filtered_dict[key] = value
+ return filtered_dict
+
+
+class CallArgs(object):
+ def __init__(self, **kwargs):
+ """A class that records call arguments
+
+ The call arguments must be passed as keyword arguments. It will set
+ each keyword argument as an attribute of the object along with its
+ associated value.
+ """
+ for arg, value in kwargs.items():
+ setattr(self, arg, value)
+
+
+class FunctionContainer(object):
+ """An object that contains a function and any args or kwargs to call it
+
+ When called the provided function will be called with provided args
+ and kwargs.
+ """
+ def __init__(self, func, *args, **kwargs):
+ self._func = func
+ self._args = args
+ self._kwargs = kwargs
+
+ def __repr__(self):
+ return 'Function: %s with args %s and kwargs %s' % (
+ self._func, self._args, self._kwargs)
+
+ def __call__(self):
+ return self._func(*self._args, **self._kwargs)
+
+
+class CountCallbackInvoker(object):
+ """An abstraction to invoke a callback when a shared count reaches zero
+
+ :param callback: Callback invoke when finalized count reaches zero
+ """
+ def __init__(self, callback):
+ self._lock = threading.Lock()
+ self._callback = callback
+ self._count = 0
+ self._is_finalized = False
+
+ @property
+ def current_count(self):
+ with self._lock:
+ return self._count
+
+ def increment(self):
+ """Increment the count by one"""
+ with self._lock:
+ if self._is_finalized:
+ raise RuntimeError(
+ 'Counter has been finalized it can no longer be '
+ 'incremented.'
+ )
+ self._count += 1
+
+ def decrement(self):
+ """Decrement the count by one"""
+ with self._lock:
+ if self._count == 0:
+ raise RuntimeError(
+ 'Counter is at zero. It cannot dip below zero')
+ self._count -= 1
+ if self._is_finalized and self._count == 0:
+ self._callback()
+
+ def finalize(self):
+ """Finalize the counter
+
+ Once finalized, the counter never be incremented and the callback
+ can be invoked once the count reaches zero
+ """
+ with self._lock:
+ self._is_finalized = True
+ if self._count == 0:
+ self._callback()
+
+
+class OSUtils(object):
_MAX_FILENAME_LEN = 255
- def get_file_size(self, filename):
- return os.path.getsize(filename)
-
- def open_file_chunk_reader(self, filename, start_byte, size, callbacks):
- return ReadFileChunk.from_filename(filename, start_byte,
- size, callbacks,
- enable_callbacks=False)
-
- def open_file_chunk_reader_from_fileobj(self, fileobj, chunk_size,
- full_file_size, callbacks,
- close_callbacks=None):
- return ReadFileChunk(
- fileobj, chunk_size, full_file_size,
- callbacks=callbacks, enable_callbacks=False,
- close_callbacks=close_callbacks)
-
- def open(self, filename, mode):
- return open(filename, mode)
-
- def remove_file(self, filename):
- """Remove a file, noop if file does not exist."""
- # Unlike os.remove, if the file does not exist,
- # then this method does nothing.
- try:
- os.remove(filename)
- except OSError:
- pass
-
- def rename_file(self, current_filename, new_filename):
- rename_file(current_filename, new_filename)
-
- def is_special_file(cls, filename):
- """Checks to see if a file is a special UNIX file.
-
- It checks if the file is a character special device, block special
- device, FIFO, or socket.
-
- :param filename: Name of the file
-
- :returns: True if the file is a special file. False, if is not.
- """
- # If it does not exist, it must be a new file so it cannot be
- # a special file.
- if not os.path.exists(filename):
- return False
- mode = os.stat(filename).st_mode
- # Character special device.
- if stat.S_ISCHR(mode):
- return True
- # Block special device
- if stat.S_ISBLK(mode):
- return True
- # Named pipe / FIFO
- if stat.S_ISFIFO(mode):
- return True
- # Socket.
- if stat.S_ISSOCK(mode):
- return True
- return False
-
+ def get_file_size(self, filename):
+ return os.path.getsize(filename)
+
+ def open_file_chunk_reader(self, filename, start_byte, size, callbacks):
+ return ReadFileChunk.from_filename(filename, start_byte,
+ size, callbacks,
+ enable_callbacks=False)
+
+ def open_file_chunk_reader_from_fileobj(self, fileobj, chunk_size,
+ full_file_size, callbacks,
+ close_callbacks=None):
+ return ReadFileChunk(
+ fileobj, chunk_size, full_file_size,
+ callbacks=callbacks, enable_callbacks=False,
+ close_callbacks=close_callbacks)
+
+ def open(self, filename, mode):
+ return open(filename, mode)
+
+ def remove_file(self, filename):
+ """Remove a file, noop if file does not exist."""
+ # Unlike os.remove, if the file does not exist,
+ # then this method does nothing.
+ try:
+ os.remove(filename)
+ except OSError:
+ pass
+
+ def rename_file(self, current_filename, new_filename):
+ rename_file(current_filename, new_filename)
+
+ def is_special_file(cls, filename):
+ """Checks to see if a file is a special UNIX file.
+
+ It checks if the file is a character special device, block special
+ device, FIFO, or socket.
+
+ :param filename: Name of the file
+
+ :returns: True if the file is a special file. False, if is not.
+ """
+ # If it does not exist, it must be a new file so it cannot be
+ # a special file.
+ if not os.path.exists(filename):
+ return False
+ mode = os.stat(filename).st_mode
+ # Character special device.
+ if stat.S_ISCHR(mode):
+ return True
+ # Block special device
+ if stat.S_ISBLK(mode):
+ return True
+ # Named pipe / FIFO
+ if stat.S_ISFIFO(mode):
+ return True
+ # Socket.
+ if stat.S_ISSOCK(mode):
+ return True
+ return False
+
def get_temp_filename(self, filename):
suffix = os.extsep + random_file_extension()
path = os.path.dirname(filename)
name = os.path.basename(filename)
temp_filename = name[:self._MAX_FILENAME_LEN - len(suffix)] + suffix
return os.path.join(path, temp_filename)
-
+
def allocate(self, filename, size):
try:
with self.open(filename, 'wb') as f:
@@ -317,191 +317,191 @@ class OSUtils(object):
raise
-class DeferredOpenFile(object):
- def __init__(self, filename, start_byte=0, mode='rb', open_function=open):
- """A class that defers the opening of a file till needed
-
- This is useful for deferring opening of a file till it is needed
- in a separate thread, as there is a limit of how many open files
- there can be in a single thread for most operating systems. The
- file gets opened in the following methods: ``read()``, ``seek()``,
- and ``__enter__()``
-
- :type filename: str
- :param filename: The name of the file to open
-
- :type start_byte: int
- :param start_byte: The byte to seek to when the file is opened.
-
- :type mode: str
- :param mode: The mode to use to open the file
-
- :type open_function: function
- :param open_function: The function to use to open the file
- """
- self._filename = filename
- self._fileobj = None
- self._start_byte = start_byte
- self._mode = mode
- self._open_function = open_function
-
- def _open_if_needed(self):
- if self._fileobj is None:
- self._fileobj = self._open_function(self._filename, self._mode)
- if self._start_byte != 0:
- self._fileobj.seek(self._start_byte)
-
- @property
- def name(self):
- return self._filename
-
- def read(self, amount=None):
- self._open_if_needed()
- return self._fileobj.read(amount)
-
- def write(self, data):
- self._open_if_needed()
- self._fileobj.write(data)
-
+class DeferredOpenFile(object):
+ def __init__(self, filename, start_byte=0, mode='rb', open_function=open):
+ """A class that defers the opening of a file till needed
+
+ This is useful for deferring opening of a file till it is needed
+ in a separate thread, as there is a limit of how many open files
+ there can be in a single thread for most operating systems. The
+ file gets opened in the following methods: ``read()``, ``seek()``,
+ and ``__enter__()``
+
+ :type filename: str
+ :param filename: The name of the file to open
+
+ :type start_byte: int
+ :param start_byte: The byte to seek to when the file is opened.
+
+ :type mode: str
+ :param mode: The mode to use to open the file
+
+ :type open_function: function
+ :param open_function: The function to use to open the file
+ """
+ self._filename = filename
+ self._fileobj = None
+ self._start_byte = start_byte
+ self._mode = mode
+ self._open_function = open_function
+
+ def _open_if_needed(self):
+ if self._fileobj is None:
+ self._fileobj = self._open_function(self._filename, self._mode)
+ if self._start_byte != 0:
+ self._fileobj.seek(self._start_byte)
+
+ @property
+ def name(self):
+ return self._filename
+
+ def read(self, amount=None):
+ self._open_if_needed()
+ return self._fileobj.read(amount)
+
+ def write(self, data):
+ self._open_if_needed()
+ self._fileobj.write(data)
+
def seek(self, where, whence=0):
- self._open_if_needed()
+ self._open_if_needed()
self._fileobj.seek(where, whence)
-
- def tell(self):
- if self._fileobj is None:
- return self._start_byte
- return self._fileobj.tell()
-
- def close(self):
- if self._fileobj:
- self._fileobj.close()
-
- def __enter__(self):
- self._open_if_needed()
- return self
-
- def __exit__(self, *args, **kwargs):
- self.close()
-
-
-class ReadFileChunk(object):
- def __init__(self, fileobj, chunk_size, full_file_size,
- callbacks=None, enable_callbacks=True, close_callbacks=None):
- """
-
- Given a file object shown below::
-
- |___________________________________________________|
- 0 | | full_file_size
- |----chunk_size---|
- f.tell()
-
- :type fileobj: file
- :param fileobj: File like object
-
- :type chunk_size: int
- :param chunk_size: The max chunk size to read. Trying to read
- pass the end of the chunk size will behave like you've
- reached the end of the file.
-
- :type full_file_size: int
- :param full_file_size: The entire content length associated
- with ``fileobj``.
-
- :type callbacks: A list of function(amount_read)
- :param callbacks: Called whenever data is read from this object in the
- order provided.
-
- :type enable_callbacks: boolean
- :param enable_callbacks: True if to run callbacks. Otherwise, do not
- run callbacks
-
- :type close_callbacks: A list of function()
- :param close_callbacks: Called when close is called. The function
- should take no arguments.
- """
- self._fileobj = fileobj
- self._start_byte = self._fileobj.tell()
- self._size = self._calculate_file_size(
- self._fileobj, requested_size=chunk_size,
- start_byte=self._start_byte, actual_file_size=full_file_size)
+
+ def tell(self):
+ if self._fileobj is None:
+ return self._start_byte
+ return self._fileobj.tell()
+
+ def close(self):
+ if self._fileobj:
+ self._fileobj.close()
+
+ def __enter__(self):
+ self._open_if_needed()
+ return self
+
+ def __exit__(self, *args, **kwargs):
+ self.close()
+
+
+class ReadFileChunk(object):
+ def __init__(self, fileobj, chunk_size, full_file_size,
+ callbacks=None, enable_callbacks=True, close_callbacks=None):
+ """
+
+ Given a file object shown below::
+
+ |___________________________________________________|
+ 0 | | full_file_size
+ |----chunk_size---|
+ f.tell()
+
+ :type fileobj: file
+ :param fileobj: File like object
+
+ :type chunk_size: int
+ :param chunk_size: The max chunk size to read. Trying to read
+ pass the end of the chunk size will behave like you've
+ reached the end of the file.
+
+ :type full_file_size: int
+ :param full_file_size: The entire content length associated
+ with ``fileobj``.
+
+ :type callbacks: A list of function(amount_read)
+ :param callbacks: Called whenever data is read from this object in the
+ order provided.
+
+ :type enable_callbacks: boolean
+ :param enable_callbacks: True if to run callbacks. Otherwise, do not
+ run callbacks
+
+ :type close_callbacks: A list of function()
+ :param close_callbacks: Called when close is called. The function
+ should take no arguments.
+ """
+ self._fileobj = fileobj
+ self._start_byte = self._fileobj.tell()
+ self._size = self._calculate_file_size(
+ self._fileobj, requested_size=chunk_size,
+ start_byte=self._start_byte, actual_file_size=full_file_size)
# _amount_read represents the position in the chunk and may exceed
# the chunk size, but won't allow reads out of bounds.
- self._amount_read = 0
- self._callbacks = callbacks
- if callbacks is None:
- self._callbacks = []
- self._callbacks_enabled = enable_callbacks
- self._close_callbacks = close_callbacks
- if close_callbacks is None:
- self._close_callbacks = close_callbacks
-
- @classmethod
- def from_filename(cls, filename, start_byte, chunk_size, callbacks=None,
- enable_callbacks=True):
- """Convenience factory function to create from a filename.
-
- :type start_byte: int
- :param start_byte: The first byte from which to start reading.
-
- :type chunk_size: int
- :param chunk_size: The max chunk size to read. Trying to read
- pass the end of the chunk size will behave like you've
- reached the end of the file.
-
- :type full_file_size: int
- :param full_file_size: The entire content length associated
- with ``fileobj``.
-
- :type callbacks: function(amount_read)
- :param callbacks: Called whenever data is read from this object.
-
- :type enable_callbacks: bool
- :param enable_callbacks: Indicate whether to invoke callback
- during read() calls.
-
- :rtype: ``ReadFileChunk``
- :return: A new instance of ``ReadFileChunk``
-
- """
- f = open(filename, 'rb')
- f.seek(start_byte)
- file_size = os.fstat(f.fileno()).st_size
- return cls(f, chunk_size, file_size, callbacks, enable_callbacks)
-
- def _calculate_file_size(self, fileobj, requested_size, start_byte,
- actual_file_size):
- max_chunk_size = actual_file_size - start_byte
- return min(max_chunk_size, requested_size)
-
- def read(self, amount=None):
+ self._amount_read = 0
+ self._callbacks = callbacks
+ if callbacks is None:
+ self._callbacks = []
+ self._callbacks_enabled = enable_callbacks
+ self._close_callbacks = close_callbacks
+ if close_callbacks is None:
+ self._close_callbacks = close_callbacks
+
+ @classmethod
+ def from_filename(cls, filename, start_byte, chunk_size, callbacks=None,
+ enable_callbacks=True):
+ """Convenience factory function to create from a filename.
+
+ :type start_byte: int
+ :param start_byte: The first byte from which to start reading.
+
+ :type chunk_size: int
+ :param chunk_size: The max chunk size to read. Trying to read
+ pass the end of the chunk size will behave like you've
+ reached the end of the file.
+
+ :type full_file_size: int
+ :param full_file_size: The entire content length associated
+ with ``fileobj``.
+
+ :type callbacks: function(amount_read)
+ :param callbacks: Called whenever data is read from this object.
+
+ :type enable_callbacks: bool
+ :param enable_callbacks: Indicate whether to invoke callback
+ during read() calls.
+
+ :rtype: ``ReadFileChunk``
+ :return: A new instance of ``ReadFileChunk``
+
+ """
+ f = open(filename, 'rb')
+ f.seek(start_byte)
+ file_size = os.fstat(f.fileno()).st_size
+ return cls(f, chunk_size, file_size, callbacks, enable_callbacks)
+
+ def _calculate_file_size(self, fileobj, requested_size, start_byte,
+ actual_file_size):
+ max_chunk_size = actual_file_size - start_byte
+ return min(max_chunk_size, requested_size)
+
+ def read(self, amount=None):
amount_left = max(self._size - self._amount_read, 0)
- if amount is None:
+ if amount is None:
amount_to_read = amount_left
- else:
+ else:
amount_to_read = min(amount_left, amount)
- data = self._fileobj.read(amount_to_read)
- self._amount_read += len(data)
- if self._callbacks is not None and self._callbacks_enabled:
- invoke_progress_callbacks(self._callbacks, len(data))
- return data
-
- def signal_transferring(self):
- self.enable_callback()
- if hasattr(self._fileobj, 'signal_transferring'):
- self._fileobj.signal_transferring()
-
- def signal_not_transferring(self):
- self.disable_callback()
- if hasattr(self._fileobj, 'signal_not_transferring'):
- self._fileobj.signal_not_transferring()
-
- def enable_callback(self):
- self._callbacks_enabled = True
-
- def disable_callback(self):
- self._callbacks_enabled = False
-
+ data = self._fileobj.read(amount_to_read)
+ self._amount_read += len(data)
+ if self._callbacks is not None and self._callbacks_enabled:
+ invoke_progress_callbacks(self._callbacks, len(data))
+ return data
+
+ def signal_transferring(self):
+ self.enable_callback()
+ if hasattr(self._fileobj, 'signal_transferring'):
+ self._fileobj.signal_transferring()
+
+ def signal_not_transferring(self):
+ self.disable_callback()
+ if hasattr(self._fileobj, 'signal_not_transferring'):
+ self._fileobj.signal_not_transferring()
+
+ def enable_callback(self):
+ self._callbacks_enabled = True
+
+ def disable_callback(self):
+ self._callbacks_enabled = False
+
def seek(self, where, whence=0):
if whence not in (0, 1, 2):
# Mimic io's error for invalid whence values
@@ -517,247 +517,247 @@ class ReadFileChunk(object):
where += self._size
self._fileobj.seek(max(where, self._start_byte))
- if self._callbacks is not None and self._callbacks_enabled:
- # To also rewind the callback() for an accurate progress report
+ if self._callbacks is not None and self._callbacks_enabled:
+ # To also rewind the callback() for an accurate progress report
bounded_where = max(min(where - self._start_byte, self._size), 0)
bounded_amount_read = min(self._amount_read, self._size)
amount = bounded_where - bounded_amount_read
- invoke_progress_callbacks(
+ invoke_progress_callbacks(
self._callbacks, bytes_transferred=amount)
self._amount_read = max(where - self._start_byte, 0)
-
- def close(self):
- if self._close_callbacks is not None and self._callbacks_enabled:
- for callback in self._close_callbacks:
- callback()
- self._fileobj.close()
-
- def tell(self):
- return self._amount_read
-
- def __len__(self):
- # __len__ is defined because requests will try to determine the length
- # of the stream to set a content length. In the normal case
- # of the file it will just stat the file, but we need to change that
- # behavior. By providing a __len__, requests will use that instead
- # of stat'ing the file.
- return self._size
-
- def __enter__(self):
- return self
-
- def __exit__(self, *args, **kwargs):
- self.close()
-
- def __iter__(self):
- # This is a workaround for http://bugs.python.org/issue17575
- # Basically httplib will try to iterate over the contents, even
- # if its a file like object. This wasn't noticed because we've
- # already exhausted the stream so iterating over the file immediately
- # stops, which is what we're simulating here.
- return iter([])
-
-
-class StreamReaderProgress(object):
- """Wrapper for a read only stream that adds progress callbacks."""
- def __init__(self, stream, callbacks=None):
- self._stream = stream
- self._callbacks = callbacks
- if callbacks is None:
- self._callbacks = []
-
- def read(self, *args, **kwargs):
- value = self._stream.read(*args, **kwargs)
- invoke_progress_callbacks(self._callbacks, len(value))
- return value
-
-
-class NoResourcesAvailable(Exception):
- pass
-
-
-class TaskSemaphore(object):
- def __init__(self, count):
- """A semaphore for the purpose of limiting the number of tasks
-
- :param count: The size of semaphore
- """
- self._semaphore = threading.Semaphore(count)
-
- def acquire(self, tag, blocking=True):
- """Acquire the semaphore
-
- :param tag: A tag identifying what is acquiring the semaphore. Note
- that this is not really needed to directly use this class but is
- needed for API compatibility with the SlidingWindowSemaphore
- implementation.
- :param block: If True, block until it can be acquired. If False,
- do not block and raise an exception if cannot be aquired.
-
- :returns: A token (can be None) to use when releasing the semaphore
- """
- logger.debug("Acquiring %s", tag)
- if not self._semaphore.acquire(blocking):
- raise NoResourcesAvailable("Cannot acquire tag '%s'" % tag)
-
- def release(self, tag, acquire_token):
- """Release the semaphore
-
- :param tag: A tag identifying what is releasing the semaphore
- :param acquire_token: The token returned from when the semaphore was
- acquired. Note that this is not really needed to directly use this
- class but is needed for API compatibility with the
- SlidingWindowSemaphore implementation.
- """
- logger.debug("Releasing acquire %s/%s" % (tag, acquire_token))
- self._semaphore.release()
-
-
-class SlidingWindowSemaphore(TaskSemaphore):
- """A semaphore used to coordinate sequential resource access.
-
- This class is similar to the stdlib BoundedSemaphore:
-
- * It's initialized with a count.
- * Each call to ``acquire()`` decrements the counter.
- * If the count is at zero, then ``acquire()`` will either block until the
- count increases, or if ``blocking=False``, then it will raise
- a NoResourcesAvailable exception indicating that it failed to acquire the
- semaphore.
-
- The main difference is that this semaphore is used to limit
- access to a resource that requires sequential access. For example,
- if I want to access resource R that has 20 subresources R_0 - R_19,
- this semaphore can also enforce that you only have a max range of
- 10 at any given point in time. You must also specify a tag name
- when you acquire the semaphore. The sliding window semantics apply
- on a per tag basis. The internal count will only be incremented
- when the minimum sequence number for a tag is released.
-
- """
- def __init__(self, count):
- self._count = count
- # Dict[tag, next_sequence_number].
- self._tag_sequences = defaultdict(int)
- self._lowest_sequence = {}
- self._lock = threading.Lock()
- self._condition = threading.Condition(self._lock)
- # Dict[tag, List[sequence_number]]
- self._pending_release = {}
-
- def current_count(self):
- with self._lock:
- return self._count
-
- def acquire(self, tag, blocking=True):
- logger.debug("Acquiring %s", tag)
- self._condition.acquire()
- try:
- if self._count == 0:
- if not blocking:
- raise NoResourcesAvailable("Cannot acquire tag '%s'" % tag)
- else:
- while self._count == 0:
- self._condition.wait()
- # self._count is no longer zero.
- # First, check if this is the first time we're seeing this tag.
- sequence_number = self._tag_sequences[tag]
- if sequence_number == 0:
- # First time seeing the tag, so record we're at 0.
- self._lowest_sequence[tag] = sequence_number
- self._tag_sequences[tag] += 1
- self._count -= 1
- return sequence_number
- finally:
- self._condition.release()
-
- def release(self, tag, acquire_token):
- sequence_number = acquire_token
- logger.debug("Releasing acquire %s/%s", tag, sequence_number)
- self._condition.acquire()
- try:
- if tag not in self._tag_sequences:
- raise ValueError("Attempted to release unknown tag: %s" % tag)
- max_sequence = self._tag_sequences[tag]
- if self._lowest_sequence[tag] == sequence_number:
- # We can immediately process this request and free up
- # resources.
- self._lowest_sequence[tag] += 1
- self._count += 1
- self._condition.notify()
- queued = self._pending_release.get(tag, [])
- while queued:
- if self._lowest_sequence[tag] == queued[-1]:
- queued.pop()
- self._lowest_sequence[tag] += 1
- self._count += 1
- else:
- break
- elif self._lowest_sequence[tag] < sequence_number < max_sequence:
- # We can't do anything right now because we're still waiting
- # for the min sequence for the tag to be released. We have
- # to queue this for pending release.
- self._pending_release.setdefault(
- tag, []).append(sequence_number)
- self._pending_release[tag].sort(reverse=True)
- else:
- raise ValueError(
- "Attempted to release unknown sequence number "
- "%s for tag: %s" % (sequence_number, tag))
- finally:
- self._condition.release()
-
-
-class ChunksizeAdjuster(object):
- def __init__(self, max_size=MAX_SINGLE_UPLOAD_SIZE,
- min_size=MIN_UPLOAD_CHUNKSIZE, max_parts=MAX_PARTS):
- self.max_size = max_size
- self.min_size = min_size
- self.max_parts = max_parts
-
- def adjust_chunksize(self, current_chunksize, file_size=None):
- """Get a chunksize close to current that fits within all S3 limits.
-
- :type current_chunksize: int
- :param current_chunksize: The currently configured chunksize.
-
- :type file_size: int or None
- :param file_size: The size of the file to upload. This might be None
- if the object being transferred has an unknown size.
-
- :returns: A valid chunksize that fits within configured limits.
- """
- chunksize = current_chunksize
- if file_size is not None:
- chunksize = self._adjust_for_max_parts(chunksize, file_size)
- return self._adjust_for_chunksize_limits(chunksize)
-
- def _adjust_for_chunksize_limits(self, current_chunksize):
- if current_chunksize > self.max_size:
- logger.debug(
- "Chunksize greater than maximum chunksize. "
- "Setting to %s from %s." % (self.max_size, current_chunksize))
- return self.max_size
- elif current_chunksize < self.min_size:
- logger.debug(
- "Chunksize less than minimum chunksize. "
- "Setting to %s from %s." % (self.min_size, current_chunksize))
- return self.min_size
- else:
- return current_chunksize
-
- def _adjust_for_max_parts(self, current_chunksize, file_size):
- chunksize = current_chunksize
- num_parts = int(math.ceil(file_size / float(chunksize)))
-
- while num_parts > self.max_parts:
- chunksize *= 2
- num_parts = int(math.ceil(file_size / float(chunksize)))
-
- if chunksize != current_chunksize:
- logger.debug(
- "Chunksize would result in the number of parts exceeding the "
- "maximum. Setting to %s from %s." %
- (chunksize, current_chunksize))
-
- return chunksize
+
+ def close(self):
+ if self._close_callbacks is not None and self._callbacks_enabled:
+ for callback in self._close_callbacks:
+ callback()
+ self._fileobj.close()
+
+ def tell(self):
+ return self._amount_read
+
+ def __len__(self):
+ # __len__ is defined because requests will try to determine the length
+ # of the stream to set a content length. In the normal case
+ # of the file it will just stat the file, but we need to change that
+ # behavior. By providing a __len__, requests will use that instead
+ # of stat'ing the file.
+ return self._size
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, *args, **kwargs):
+ self.close()
+
+ def __iter__(self):
+ # This is a workaround for http://bugs.python.org/issue17575
+ # Basically httplib will try to iterate over the contents, even
+ # if its a file like object. This wasn't noticed because we've
+ # already exhausted the stream so iterating over the file immediately
+ # stops, which is what we're simulating here.
+ return iter([])
+
+
+class StreamReaderProgress(object):
+ """Wrapper for a read only stream that adds progress callbacks."""
+ def __init__(self, stream, callbacks=None):
+ self._stream = stream
+ self._callbacks = callbacks
+ if callbacks is None:
+ self._callbacks = []
+
+ def read(self, *args, **kwargs):
+ value = self._stream.read(*args, **kwargs)
+ invoke_progress_callbacks(self._callbacks, len(value))
+ return value
+
+
+class NoResourcesAvailable(Exception):
+ pass
+
+
+class TaskSemaphore(object):
+ def __init__(self, count):
+ """A semaphore for the purpose of limiting the number of tasks
+
+ :param count: The size of semaphore
+ """
+ self._semaphore = threading.Semaphore(count)
+
+ def acquire(self, tag, blocking=True):
+ """Acquire the semaphore
+
+ :param tag: A tag identifying what is acquiring the semaphore. Note
+ that this is not really needed to directly use this class but is
+ needed for API compatibility with the SlidingWindowSemaphore
+ implementation.
+ :param block: If True, block until it can be acquired. If False,
+ do not block and raise an exception if cannot be aquired.
+
+ :returns: A token (can be None) to use when releasing the semaphore
+ """
+ logger.debug("Acquiring %s", tag)
+ if not self._semaphore.acquire(blocking):
+ raise NoResourcesAvailable("Cannot acquire tag '%s'" % tag)
+
+ def release(self, tag, acquire_token):
+ """Release the semaphore
+
+ :param tag: A tag identifying what is releasing the semaphore
+ :param acquire_token: The token returned from when the semaphore was
+ acquired. Note that this is not really needed to directly use this
+ class but is needed for API compatibility with the
+ SlidingWindowSemaphore implementation.
+ """
+ logger.debug("Releasing acquire %s/%s" % (tag, acquire_token))
+ self._semaphore.release()
+
+
+class SlidingWindowSemaphore(TaskSemaphore):
+ """A semaphore used to coordinate sequential resource access.
+
+ This class is similar to the stdlib BoundedSemaphore:
+
+ * It's initialized with a count.
+ * Each call to ``acquire()`` decrements the counter.
+ * If the count is at zero, then ``acquire()`` will either block until the
+ count increases, or if ``blocking=False``, then it will raise
+ a NoResourcesAvailable exception indicating that it failed to acquire the
+ semaphore.
+
+ The main difference is that this semaphore is used to limit
+ access to a resource that requires sequential access. For example,
+ if I want to access resource R that has 20 subresources R_0 - R_19,
+ this semaphore can also enforce that you only have a max range of
+ 10 at any given point in time. You must also specify a tag name
+ when you acquire the semaphore. The sliding window semantics apply
+ on a per tag basis. The internal count will only be incremented
+ when the minimum sequence number for a tag is released.
+
+ """
+ def __init__(self, count):
+ self._count = count
+ # Dict[tag, next_sequence_number].
+ self._tag_sequences = defaultdict(int)
+ self._lowest_sequence = {}
+ self._lock = threading.Lock()
+ self._condition = threading.Condition(self._lock)
+ # Dict[tag, List[sequence_number]]
+ self._pending_release = {}
+
+ def current_count(self):
+ with self._lock:
+ return self._count
+
+ def acquire(self, tag, blocking=True):
+ logger.debug("Acquiring %s", tag)
+ self._condition.acquire()
+ try:
+ if self._count == 0:
+ if not blocking:
+ raise NoResourcesAvailable("Cannot acquire tag '%s'" % tag)
+ else:
+ while self._count == 0:
+ self._condition.wait()
+ # self._count is no longer zero.
+ # First, check if this is the first time we're seeing this tag.
+ sequence_number = self._tag_sequences[tag]
+ if sequence_number == 0:
+ # First time seeing the tag, so record we're at 0.
+ self._lowest_sequence[tag] = sequence_number
+ self._tag_sequences[tag] += 1
+ self._count -= 1
+ return sequence_number
+ finally:
+ self._condition.release()
+
+ def release(self, tag, acquire_token):
+ sequence_number = acquire_token
+ logger.debug("Releasing acquire %s/%s", tag, sequence_number)
+ self._condition.acquire()
+ try:
+ if tag not in self._tag_sequences:
+ raise ValueError("Attempted to release unknown tag: %s" % tag)
+ max_sequence = self._tag_sequences[tag]
+ if self._lowest_sequence[tag] == sequence_number:
+ # We can immediately process this request and free up
+ # resources.
+ self._lowest_sequence[tag] += 1
+ self._count += 1
+ self._condition.notify()
+ queued = self._pending_release.get(tag, [])
+ while queued:
+ if self._lowest_sequence[tag] == queued[-1]:
+ queued.pop()
+ self._lowest_sequence[tag] += 1
+ self._count += 1
+ else:
+ break
+ elif self._lowest_sequence[tag] < sequence_number < max_sequence:
+ # We can't do anything right now because we're still waiting
+ # for the min sequence for the tag to be released. We have
+ # to queue this for pending release.
+ self._pending_release.setdefault(
+ tag, []).append(sequence_number)
+ self._pending_release[tag].sort(reverse=True)
+ else:
+ raise ValueError(
+ "Attempted to release unknown sequence number "
+ "%s for tag: %s" % (sequence_number, tag))
+ finally:
+ self._condition.release()
+
+
+class ChunksizeAdjuster(object):
+ def __init__(self, max_size=MAX_SINGLE_UPLOAD_SIZE,
+ min_size=MIN_UPLOAD_CHUNKSIZE, max_parts=MAX_PARTS):
+ self.max_size = max_size
+ self.min_size = min_size
+ self.max_parts = max_parts
+
+ def adjust_chunksize(self, current_chunksize, file_size=None):
+ """Get a chunksize close to current that fits within all S3 limits.
+
+ :type current_chunksize: int
+ :param current_chunksize: The currently configured chunksize.
+
+ :type file_size: int or None
+ :param file_size: The size of the file to upload. This might be None
+ if the object being transferred has an unknown size.
+
+ :returns: A valid chunksize that fits within configured limits.
+ """
+ chunksize = current_chunksize
+ if file_size is not None:
+ chunksize = self._adjust_for_max_parts(chunksize, file_size)
+ return self._adjust_for_chunksize_limits(chunksize)
+
+ def _adjust_for_chunksize_limits(self, current_chunksize):
+ if current_chunksize > self.max_size:
+ logger.debug(
+ "Chunksize greater than maximum chunksize. "
+ "Setting to %s from %s." % (self.max_size, current_chunksize))
+ return self.max_size
+ elif current_chunksize < self.min_size:
+ logger.debug(
+ "Chunksize less than minimum chunksize. "
+ "Setting to %s from %s." % (self.min_size, current_chunksize))
+ return self.min_size
+ else:
+ return current_chunksize
+
+ def _adjust_for_max_parts(self, current_chunksize, file_size):
+ chunksize = current_chunksize
+ num_parts = int(math.ceil(file_size / float(chunksize)))
+
+ while num_parts > self.max_parts:
+ chunksize *= 2
+ num_parts = int(math.ceil(file_size / float(chunksize)))
+
+ if chunksize != current_chunksize:
+ logger.debug(
+ "Chunksize would result in the number of parts exceeding the "
+ "maximum. Setting to %s from %s." %
+ (chunksize, current_chunksize))
+
+ return chunksize
diff --git a/contrib/python/s3transfer/py2/ya.make b/contrib/python/s3transfer/py2/ya.make
index 4a40935092..45eb28f88c 100644
--- a/contrib/python/s3transfer/py2/ya.make
+++ b/contrib/python/s3transfer/py2/ya.make
@@ -1,51 +1,51 @@
# Generated by devtools/yamaker (pypi).
PY2_LIBRARY()
-
+
OWNER(gebetix g:python-contrib)
VERSION(0.4.2)
-
+
LICENSE(Apache-2.0)
-
-PEERDIR(
- contrib/python/botocore
+
+PEERDIR(
+ contrib/python/botocore
contrib/python/futures
-)
-
+)
+
NO_LINT()
NO_CHECK_IMPORTS(
s3transfer.crt
)
-PY_SRCS(
- TOP_LEVEL
- s3transfer/__init__.py
- s3transfer/bandwidth.py
- s3transfer/compat.py
+PY_SRCS(
+ TOP_LEVEL
+ s3transfer/__init__.py
+ s3transfer/bandwidth.py
+ s3transfer/compat.py
s3transfer/constants.py
s3transfer/copies.py
s3transfer/crt.py
- s3transfer/delete.py
- s3transfer/download.py
- s3transfer/exceptions.py
- s3transfer/futures.py
- s3transfer/manager.py
+ s3transfer/delete.py
+ s3transfer/download.py
+ s3transfer/exceptions.py
+ s3transfer/futures.py
+ s3transfer/manager.py
s3transfer/processpool.py
- s3transfer/subscribers.py
- s3transfer/tasks.py
- s3transfer/upload.py
- s3transfer/utils.py
-)
-
+ s3transfer/subscribers.py
+ s3transfer/tasks.py
+ s3transfer/upload.py
+ s3transfer/utils.py
+)
+
RESOURCE_FILES(
PREFIX contrib/python/s3transfer/py2/
.dist-info/METADATA
.dist-info/top_level.txt
)
-END()
+END()
RECURSE_FOR_TESTS(
tests
diff --git a/contrib/python/s3transfer/py3/s3transfer/__init__.py b/contrib/python/s3transfer/py3/s3transfer/__init__.py
index 15db8ce5fc..1a749c712e 100644
--- a/contrib/python/s3transfer/py3/s3transfer/__init__.py
+++ b/contrib/python/s3transfer/py3/s3transfer/__init__.py
@@ -1,186 +1,186 @@
-# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-# http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-"""Abstractions over S3's upload/download operations.
-
-This module provides high level abstractions for efficient
-uploads/downloads. It handles several things for the user:
-
-* Automatically switching to multipart transfers when
- a file is over a specific size threshold
-* Uploading/downloading a file in parallel
-* Throttling based on max bandwidth
-* Progress callbacks to monitor transfers
-* Retries. While botocore handles retries for streaming uploads,
- it is not possible for it to handle retries for streaming
- downloads. This module handles retries for both cases so
- you don't need to implement any retry logic yourself.
-
-This module has a reasonable set of defaults. It also allows you
-to configure many aspects of the transfer process including:
-
-* Multipart threshold size
-* Max parallel downloads
-* Max bandwidth
-* Socket timeouts
-* Retry amounts
-
-There is no support for s3->s3 multipart copies at this
-time.
-
-
-.. _ref_s3transfer_usage:
-
-Usage
-=====
-
-The simplest way to use this module is:
-
-.. code-block:: python
-
- client = boto3.client('s3', 'us-west-2')
- transfer = S3Transfer(client)
- # Upload /tmp/myfile to s3://bucket/key
- transfer.upload_file('/tmp/myfile', 'bucket', 'key')
-
- # Download s3://bucket/key to /tmp/myfile
- transfer.download_file('bucket', 'key', '/tmp/myfile')
-
-The ``upload_file`` and ``download_file`` methods also accept
-``**kwargs``, which will be forwarded through to the corresponding
-client operation. Here are a few examples using ``upload_file``::
-
- # Making the object public
- transfer.upload_file('/tmp/myfile', 'bucket', 'key',
- extra_args={'ACL': 'public-read'})
-
- # Setting metadata
- transfer.upload_file('/tmp/myfile', 'bucket', 'key',
- extra_args={'Metadata': {'a': 'b', 'c': 'd'}})
-
- # Setting content type
- transfer.upload_file('/tmp/myfile.json', 'bucket', 'key',
- extra_args={'ContentType': "application/json"})
-
-
+# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+# http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+"""Abstractions over S3's upload/download operations.
+
+This module provides high level abstractions for efficient
+uploads/downloads. It handles several things for the user:
+
+* Automatically switching to multipart transfers when
+ a file is over a specific size threshold
+* Uploading/downloading a file in parallel
+* Throttling based on max bandwidth
+* Progress callbacks to monitor transfers
+* Retries. While botocore handles retries for streaming uploads,
+ it is not possible for it to handle retries for streaming
+ downloads. This module handles retries for both cases so
+ you don't need to implement any retry logic yourself.
+
+This module has a reasonable set of defaults. It also allows you
+to configure many aspects of the transfer process including:
+
+* Multipart threshold size
+* Max parallel downloads
+* Max bandwidth
+* Socket timeouts
+* Retry amounts
+
+There is no support for s3->s3 multipart copies at this
+time.
+
+
+.. _ref_s3transfer_usage:
+
+Usage
+=====
+
+The simplest way to use this module is:
+
+.. code-block:: python
+
+ client = boto3.client('s3', 'us-west-2')
+ transfer = S3Transfer(client)
+ # Upload /tmp/myfile to s3://bucket/key
+ transfer.upload_file('/tmp/myfile', 'bucket', 'key')
+
+ # Download s3://bucket/key to /tmp/myfile
+ transfer.download_file('bucket', 'key', '/tmp/myfile')
+
+The ``upload_file`` and ``download_file`` methods also accept
+``**kwargs``, which will be forwarded through to the corresponding
+client operation. Here are a few examples using ``upload_file``::
+
+ # Making the object public
+ transfer.upload_file('/tmp/myfile', 'bucket', 'key',
+ extra_args={'ACL': 'public-read'})
+
+ # Setting metadata
+ transfer.upload_file('/tmp/myfile', 'bucket', 'key',
+ extra_args={'Metadata': {'a': 'b', 'c': 'd'}})
+
+ # Setting content type
+ transfer.upload_file('/tmp/myfile.json', 'bucket', 'key',
+ extra_args={'ContentType': "application/json"})
+
+
The ``S3Transfer`` class also supports progress callbacks so you can
-provide transfer progress to users. Both the ``upload_file`` and
-``download_file`` methods take an optional ``callback`` parameter.
-Here's an example of how to print a simple progress percentage
-to the user:
-
-.. code-block:: python
-
- class ProgressPercentage(object):
- def __init__(self, filename):
- self._filename = filename
- self._size = float(os.path.getsize(filename))
- self._seen_so_far = 0
- self._lock = threading.Lock()
-
- def __call__(self, bytes_amount):
- # To simplify we'll assume this is hooked up
- # to a single filename.
- with self._lock:
- self._seen_so_far += bytes_amount
- percentage = (self._seen_so_far / self._size) * 100
- sys.stdout.write(
- "\r%s %s / %s (%.2f%%)" % (self._filename, self._seen_so_far,
- self._size, percentage))
- sys.stdout.flush()
-
-
- transfer = S3Transfer(boto3.client('s3', 'us-west-2'))
- # Upload /tmp/myfile to s3://bucket/key and print upload progress.
- transfer.upload_file('/tmp/myfile', 'bucket', 'key',
- callback=ProgressPercentage('/tmp/myfile'))
-
-
-
-You can also provide a TransferConfig object to the S3Transfer
-object that gives you more fine grained control over the
-transfer. For example:
-
-.. code-block:: python
-
- client = boto3.client('s3', 'us-west-2')
- config = TransferConfig(
- multipart_threshold=8 * 1024 * 1024,
- max_concurrency=10,
- num_download_attempts=10,
- )
- transfer = S3Transfer(client, config)
- transfer.upload_file('/tmp/foo', 'bucket', 'key')
-
-
-"""
+provide transfer progress to users. Both the ``upload_file`` and
+``download_file`` methods take an optional ``callback`` parameter.
+Here's an example of how to print a simple progress percentage
+to the user:
+
+.. code-block:: python
+
+ class ProgressPercentage(object):
+ def __init__(self, filename):
+ self._filename = filename
+ self._size = float(os.path.getsize(filename))
+ self._seen_so_far = 0
+ self._lock = threading.Lock()
+
+ def __call__(self, bytes_amount):
+ # To simplify we'll assume this is hooked up
+ # to a single filename.
+ with self._lock:
+ self._seen_so_far += bytes_amount
+ percentage = (self._seen_so_far / self._size) * 100
+ sys.stdout.write(
+ "\r%s %s / %s (%.2f%%)" % (self._filename, self._seen_so_far,
+ self._size, percentage))
+ sys.stdout.flush()
+
+
+ transfer = S3Transfer(boto3.client('s3', 'us-west-2'))
+ # Upload /tmp/myfile to s3://bucket/key and print upload progress.
+ transfer.upload_file('/tmp/myfile', 'bucket', 'key',
+ callback=ProgressPercentage('/tmp/myfile'))
+
+
+
+You can also provide a TransferConfig object to the S3Transfer
+object that gives you more fine grained control over the
+transfer. For example:
+
+.. code-block:: python
+
+ client = boto3.client('s3', 'us-west-2')
+ config = TransferConfig(
+ multipart_threshold=8 * 1024 * 1024,
+ max_concurrency=10,
+ num_download_attempts=10,
+ )
+ transfer = S3Transfer(client, config)
+ transfer.upload_file('/tmp/foo', 'bucket', 'key')
+
+
+"""
import concurrent.futures
-import functools
-import logging
+import functools
+import logging
import math
import os
import queue
import random
-import socket
+import socket
import string
-import threading
-
+import threading
+
from botocore.compat import six # noqa: F401
-from botocore.exceptions import IncompleteReadError
+from botocore.exceptions import IncompleteReadError
from botocore.vendored.requests.packages.urllib3.exceptions import (
ReadTimeoutError,
)
-
-import s3transfer.compat
-from s3transfer.exceptions import RetriesExceededError, S3UploadFailedError
-
-__author__ = 'Amazon Web Services'
+
+import s3transfer.compat
+from s3transfer.exceptions import RetriesExceededError, S3UploadFailedError
+
+__author__ = 'Amazon Web Services'
__version__ = '0.5.1'
-
-
-class NullHandler(logging.Handler):
- def emit(self, record):
- pass
-
-
-logger = logging.getLogger(__name__)
-logger.addHandler(NullHandler())
-
-MB = 1024 * 1024
-SHUTDOWN_SENTINEL = object()
-
-
-def random_file_extension(num_digits=8):
- return ''.join(random.choice(string.hexdigits) for _ in range(num_digits))
-
-
-def disable_upload_callbacks(request, operation_name, **kwargs):
+
+
+class NullHandler(logging.Handler):
+ def emit(self, record):
+ pass
+
+
+logger = logging.getLogger(__name__)
+logger.addHandler(NullHandler())
+
+MB = 1024 * 1024
+SHUTDOWN_SENTINEL = object()
+
+
+def random_file_extension(num_digits=8):
+ return ''.join(random.choice(string.hexdigits) for _ in range(num_digits))
+
+
+def disable_upload_callbacks(request, operation_name, **kwargs):
if operation_name in ['PutObject', 'UploadPart'] and hasattr(
request.body, 'disable_callback'
):
- request.body.disable_callback()
-
-
-def enable_upload_callbacks(request, operation_name, **kwargs):
+ request.body.disable_callback()
+
+
+def enable_upload_callbacks(request, operation_name, **kwargs):
if operation_name in ['PutObject', 'UploadPart'] and hasattr(
request.body, 'enable_callback'
):
- request.body.enable_callback()
-
-
-class QueueShutdownError(Exception):
- pass
-
-
+ request.body.enable_callback()
+
+
+class QueueShutdownError(Exception):
+ pass
+
+
class ReadFileChunk:
def __init__(
self,
@@ -191,48 +191,48 @@ class ReadFileChunk:
callback=None,
enable_callback=True,
):
- """
-
- Given a file object shown below:
-
- |___________________________________________________|
- 0 | | full_file_size
- |----chunk_size---|
- start_byte
-
- :type fileobj: file
- :param fileobj: File like object
-
- :type start_byte: int
- :param start_byte: The first byte from which to start reading.
-
- :type chunk_size: int
- :param chunk_size: The max chunk size to read. Trying to read
- pass the end of the chunk size will behave like you've
- reached the end of the file.
-
- :type full_file_size: int
- :param full_file_size: The entire content length associated
- with ``fileobj``.
-
- :type callback: function(amount_read)
- :param callback: Called whenever data is read from this object.
-
- """
- self._fileobj = fileobj
- self._start_byte = start_byte
- self._size = self._calculate_file_size(
+ """
+
+ Given a file object shown below:
+
+ |___________________________________________________|
+ 0 | | full_file_size
+ |----chunk_size---|
+ start_byte
+
+ :type fileobj: file
+ :param fileobj: File like object
+
+ :type start_byte: int
+ :param start_byte: The first byte from which to start reading.
+
+ :type chunk_size: int
+ :param chunk_size: The max chunk size to read. Trying to read
+ pass the end of the chunk size will behave like you've
+ reached the end of the file.
+
+ :type full_file_size: int
+ :param full_file_size: The entire content length associated
+ with ``fileobj``.
+
+ :type callback: function(amount_read)
+ :param callback: Called whenever data is read from this object.
+
+ """
+ self._fileobj = fileobj
+ self._start_byte = start_byte
+ self._size = self._calculate_file_size(
self._fileobj,
requested_size=chunk_size,
start_byte=start_byte,
actual_file_size=full_file_size,
)
- self._fileobj.seek(self._start_byte)
- self._amount_read = 0
- self._callback = callback
- self._callback_enabled = enable_callback
-
- @classmethod
+ self._fileobj.seek(self._start_byte)
+ self._amount_read = 0
+ self._callback = callback
+ self._callback_enabled = enable_callback
+
+ @classmethod
def from_filename(
cls,
filename,
@@ -241,145 +241,145 @@ class ReadFileChunk:
callback=None,
enable_callback=True,
):
- """Convenience factory function to create from a filename.
-
- :type start_byte: int
- :param start_byte: The first byte from which to start reading.
-
- :type chunk_size: int
- :param chunk_size: The max chunk size to read. Trying to read
- pass the end of the chunk size will behave like you've
- reached the end of the file.
-
- :type full_file_size: int
- :param full_file_size: The entire content length associated
- with ``fileobj``.
-
- :type callback: function(amount_read)
- :param callback: Called whenever data is read from this object.
-
- :type enable_callback: bool
- :param enable_callback: Indicate whether to invoke callback
- during read() calls.
-
- :rtype: ``ReadFileChunk``
- :return: A new instance of ``ReadFileChunk``
-
- """
- f = open(filename, 'rb')
- file_size = os.fstat(f.fileno()).st_size
+ """Convenience factory function to create from a filename.
+
+ :type start_byte: int
+ :param start_byte: The first byte from which to start reading.
+
+ :type chunk_size: int
+ :param chunk_size: The max chunk size to read. Trying to read
+ pass the end of the chunk size will behave like you've
+ reached the end of the file.
+
+ :type full_file_size: int
+ :param full_file_size: The entire content length associated
+ with ``fileobj``.
+
+ :type callback: function(amount_read)
+ :param callback: Called whenever data is read from this object.
+
+ :type enable_callback: bool
+ :param enable_callback: Indicate whether to invoke callback
+ during read() calls.
+
+ :rtype: ``ReadFileChunk``
+ :return: A new instance of ``ReadFileChunk``
+
+ """
+ f = open(filename, 'rb')
+ file_size = os.fstat(f.fileno()).st_size
return cls(
f, start_byte, chunk_size, file_size, callback, enable_callback
)
-
+
def _calculate_file_size(
self, fileobj, requested_size, start_byte, actual_file_size
):
- max_chunk_size = actual_file_size - start_byte
- return min(max_chunk_size, requested_size)
-
- def read(self, amount=None):
- if amount is None:
- amount_to_read = self._size - self._amount_read
- else:
- amount_to_read = min(self._size - self._amount_read, amount)
- data = self._fileobj.read(amount_to_read)
- self._amount_read += len(data)
- if self._callback is not None and self._callback_enabled:
- self._callback(len(data))
- return data
-
- def enable_callback(self):
- self._callback_enabled = True
-
- def disable_callback(self):
- self._callback_enabled = False
-
- def seek(self, where):
- self._fileobj.seek(self._start_byte + where)
- if self._callback is not None and self._callback_enabled:
- # To also rewind the callback() for an accurate progress report
- self._callback(where - self._amount_read)
- self._amount_read = where
-
- def close(self):
- self._fileobj.close()
-
- def tell(self):
- return self._amount_read
-
- def __len__(self):
- # __len__ is defined because requests will try to determine the length
- # of the stream to set a content length. In the normal case
- # of the file it will just stat the file, but we need to change that
- # behavior. By providing a __len__, requests will use that instead
- # of stat'ing the file.
- return self._size
-
- def __enter__(self):
- return self
-
- def __exit__(self, *args, **kwargs):
- self.close()
-
- def __iter__(self):
- # This is a workaround for http://bugs.python.org/issue17575
- # Basically httplib will try to iterate over the contents, even
- # if its a file like object. This wasn't noticed because we've
- # already exhausted the stream so iterating over the file immediately
- # stops, which is what we're simulating here.
- return iter([])
-
-
+ max_chunk_size = actual_file_size - start_byte
+ return min(max_chunk_size, requested_size)
+
+ def read(self, amount=None):
+ if amount is None:
+ amount_to_read = self._size - self._amount_read
+ else:
+ amount_to_read = min(self._size - self._amount_read, amount)
+ data = self._fileobj.read(amount_to_read)
+ self._amount_read += len(data)
+ if self._callback is not None and self._callback_enabled:
+ self._callback(len(data))
+ return data
+
+ def enable_callback(self):
+ self._callback_enabled = True
+
+ def disable_callback(self):
+ self._callback_enabled = False
+
+ def seek(self, where):
+ self._fileobj.seek(self._start_byte + where)
+ if self._callback is not None and self._callback_enabled:
+ # To also rewind the callback() for an accurate progress report
+ self._callback(where - self._amount_read)
+ self._amount_read = where
+
+ def close(self):
+ self._fileobj.close()
+
+ def tell(self):
+ return self._amount_read
+
+ def __len__(self):
+ # __len__ is defined because requests will try to determine the length
+ # of the stream to set a content length. In the normal case
+ # of the file it will just stat the file, but we need to change that
+ # behavior. By providing a __len__, requests will use that instead
+ # of stat'ing the file.
+ return self._size
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, *args, **kwargs):
+ self.close()
+
+ def __iter__(self):
+ # This is a workaround for http://bugs.python.org/issue17575
+ # Basically httplib will try to iterate over the contents, even
+ # if its a file like object. This wasn't noticed because we've
+ # already exhausted the stream so iterating over the file immediately
+ # stops, which is what we're simulating here.
+ return iter([])
+
+
class StreamReaderProgress:
- """Wrapper for a read only stream that adds progress callbacks."""
-
- def __init__(self, stream, callback=None):
- self._stream = stream
- self._callback = callback
-
- def read(self, *args, **kwargs):
- value = self._stream.read(*args, **kwargs)
- if self._callback is not None:
- self._callback(len(value))
- return value
-
-
+ """Wrapper for a read only stream that adds progress callbacks."""
+
+ def __init__(self, stream, callback=None):
+ self._stream = stream
+ self._callback = callback
+
+ def read(self, *args, **kwargs):
+ value = self._stream.read(*args, **kwargs)
+ if self._callback is not None:
+ self._callback(len(value))
+ return value
+
+
class OSUtils:
- def get_file_size(self, filename):
- return os.path.getsize(filename)
-
- def open_file_chunk_reader(self, filename, start_byte, size, callback):
+ def get_file_size(self, filename):
+ return os.path.getsize(filename)
+
+ def open_file_chunk_reader(self, filename, start_byte, size, callback):
return ReadFileChunk.from_filename(
filename, start_byte, size, callback, enable_callback=False
)
-
- def open(self, filename, mode):
- return open(filename, mode)
-
- def remove_file(self, filename):
- """Remove a file, noop if file does not exist."""
- # Unlike os.remove, if the file does not exist,
- # then this method does nothing.
- try:
- os.remove(filename)
- except OSError:
- pass
-
- def rename_file(self, current_filename, new_filename):
- s3transfer.compat.rename_file(current_filename, new_filename)
-
-
+
+ def open(self, filename, mode):
+ return open(filename, mode)
+
+ def remove_file(self, filename):
+ """Remove a file, noop if file does not exist."""
+ # Unlike os.remove, if the file does not exist,
+ # then this method does nothing.
+ try:
+ os.remove(filename)
+ except OSError:
+ pass
+
+ def rename_file(self, current_filename, new_filename):
+ s3transfer.compat.rename_file(current_filename, new_filename)
+
+
class MultipartUploader:
- # These are the extra_args that need to be forwarded onto
- # subsequent upload_parts.
- UPLOAD_PART_ARGS = [
- 'SSECustomerKey',
- 'SSECustomerAlgorithm',
- 'SSECustomerKeyMD5',
- 'RequestPayer',
- ]
-
+ # These are the extra_args that need to be forwarded onto
+ # subsequent upload_parts.
+ UPLOAD_PART_ARGS = [
+ 'SSECustomerKey',
+ 'SSECustomerAlgorithm',
+ 'SSECustomerKeyMD5',
+ 'RequestPayer',
+ ]
+
def __init__(
self,
client,
@@ -387,62 +387,62 @@ class MultipartUploader:
osutil,
executor_cls=concurrent.futures.ThreadPoolExecutor,
):
- self._client = client
- self._config = config
- self._os = osutil
- self._executor_cls = executor_cls
-
- def _extra_upload_part_args(self, extra_args):
- # Only the args in UPLOAD_PART_ARGS actually need to be passed
- # onto the upload_part calls.
- upload_parts_args = {}
- for key, value in extra_args.items():
- if key in self.UPLOAD_PART_ARGS:
- upload_parts_args[key] = value
- return upload_parts_args
-
- def upload_file(self, filename, bucket, key, callback, extra_args):
+ self._client = client
+ self._config = config
+ self._os = osutil
+ self._executor_cls = executor_cls
+
+ def _extra_upload_part_args(self, extra_args):
+ # Only the args in UPLOAD_PART_ARGS actually need to be passed
+ # onto the upload_part calls.
+ upload_parts_args = {}
+ for key, value in extra_args.items():
+ if key in self.UPLOAD_PART_ARGS:
+ upload_parts_args[key] = value
+ return upload_parts_args
+
+ def upload_file(self, filename, bucket, key, callback, extra_args):
response = self._client.create_multipart_upload(
Bucket=bucket, Key=key, **extra_args
)
- upload_id = response['UploadId']
- try:
+ upload_id = response['UploadId']
+ try:
parts = self._upload_parts(
upload_id, filename, bucket, key, callback, extra_args
)
- except Exception as e:
+ except Exception as e:
logger.debug(
"Exception raised while uploading parts, "
"aborting multipart upload.",
exc_info=True,
)
- self._client.abort_multipart_upload(
+ self._client.abort_multipart_upload(
Bucket=bucket, Key=key, UploadId=upload_id
)
- raise S3UploadFailedError(
+ raise S3UploadFailedError(
"Failed to upload {} to {}: {}".format(
filename, '/'.join([bucket, key]), e
)
)
- self._client.complete_multipart_upload(
+ self._client.complete_multipart_upload(
Bucket=bucket,
Key=key,
UploadId=upload_id,
MultipartUpload={'Parts': parts},
)
-
+
def _upload_parts(
self, upload_id, filename, bucket, key, callback, extra_args
):
- upload_parts_extra_args = self._extra_upload_part_args(extra_args)
- parts = []
- part_size = self._config.multipart_chunksize
- num_parts = int(
+ upload_parts_extra_args = self._extra_upload_part_args(extra_args)
+ parts = []
+ part_size = self._config.multipart_chunksize
+ num_parts = int(
math.ceil(self._os.get_file_size(filename) / float(part_size))
)
- max_workers = self._config.max_concurrency
- with self._executor_cls(max_workers=max_workers) as executor:
- upload_partial = functools.partial(
+ max_workers = self._config.max_concurrency
+ with self._executor_cls(max_workers=max_workers) as executor:
+ upload_partial = functools.partial(
self._upload_one_part,
filename,
bucket,
@@ -452,10 +452,10 @@ class MultipartUploader:
upload_parts_extra_args,
callback,
)
- for part in executor.map(upload_partial, range(1, num_parts + 1)):
- parts.append(part)
- return parts
-
+ for part in executor.map(upload_partial, range(1, num_parts + 1)):
+ parts.append(part)
+ return parts
+
def _upload_one_part(
self,
filename,
@@ -467,11 +467,11 @@ class MultipartUploader:
callback,
part_number,
):
- open_chunk_reader = self._os.open_file_chunk_reader
+ open_chunk_reader = self._os.open_file_chunk_reader
with open_chunk_reader(
filename, part_size * (part_number - 1), part_size, callback
) as body:
- response = self._client.upload_part(
+ response = self._client.upload_part(
Bucket=bucket,
Key=key,
UploadId=upload_id,
@@ -479,44 +479,44 @@ class MultipartUploader:
Body=body,
**extra_args,
)
- etag = response['ETag']
- return {'ETag': etag, 'PartNumber': part_number}
-
-
-class ShutdownQueue(queue.Queue):
- """A queue implementation that can be shutdown.
-
- Shutting down a queue means that this class adds a
- trigger_shutdown method that will trigger all subsequent
- calls to put() to fail with a ``QueueShutdownError``.
-
- It purposefully deviates from queue.Queue, and is *not* meant
- to be a drop in replacement for ``queue.Queue``.
-
- """
-
- def _init(self, maxsize):
- self._shutdown = False
- self._shutdown_lock = threading.Lock()
- # queue.Queue is an old style class so we don't use super().
- return queue.Queue._init(self, maxsize)
-
- def trigger_shutdown(self):
- with self._shutdown_lock:
- self._shutdown = True
- logger.debug("The IO queue is now shutdown.")
-
- def put(self, item):
- # Note: this is not sufficient, it's still possible to deadlock!
- # Need to hook into the condition vars used by this class.
- with self._shutdown_lock:
- if self._shutdown:
+ etag = response['ETag']
+ return {'ETag': etag, 'PartNumber': part_number}
+
+
+class ShutdownQueue(queue.Queue):
+ """A queue implementation that can be shutdown.
+
+ Shutting down a queue means that this class adds a
+ trigger_shutdown method that will trigger all subsequent
+ calls to put() to fail with a ``QueueShutdownError``.
+
+ It purposefully deviates from queue.Queue, and is *not* meant
+ to be a drop in replacement for ``queue.Queue``.
+
+ """
+
+ def _init(self, maxsize):
+ self._shutdown = False
+ self._shutdown_lock = threading.Lock()
+ # queue.Queue is an old style class so we don't use super().
+ return queue.Queue._init(self, maxsize)
+
+ def trigger_shutdown(self):
+ with self._shutdown_lock:
+ self._shutdown = True
+ logger.debug("The IO queue is now shutdown.")
+
+ def put(self, item):
+ # Note: this is not sufficient, it's still possible to deadlock!
+ # Need to hook into the condition vars used by this class.
+ with self._shutdown_lock:
+ if self._shutdown:
raise QueueShutdownError(
"Cannot put item to queue when " "queue has been shutdown."
)
- return queue.Queue.put(self, item)
-
-
+ return queue.Queue.put(self, item)
+
+
class MultipartDownloader:
def __init__(
self,
@@ -525,50 +525,50 @@ class MultipartDownloader:
osutil,
executor_cls=concurrent.futures.ThreadPoolExecutor,
):
- self._client = client
- self._config = config
- self._os = osutil
- self._executor_cls = executor_cls
- self._ioqueue = ShutdownQueue(self._config.max_io_queue)
-
+ self._client = client
+ self._config = config
+ self._os = osutil
+ self._executor_cls = executor_cls
+ self._ioqueue = ShutdownQueue(self._config.max_io_queue)
+
def download_file(
self, bucket, key, filename, object_size, extra_args, callback=None
):
- with self._executor_cls(max_workers=2) as controller:
- # 1 thread for the future that manages the uploading of files
- # 1 thread for the future that manages IO writes.
- download_parts_handler = functools.partial(
- self._download_file_as_future,
+ with self._executor_cls(max_workers=2) as controller:
+ # 1 thread for the future that manages the uploading of files
+ # 1 thread for the future that manages IO writes.
+ download_parts_handler = functools.partial(
+ self._download_file_as_future,
bucket,
key,
filename,
object_size,
callback,
)
- parts_future = controller.submit(download_parts_handler)
-
- io_writes_handler = functools.partial(
+ parts_future = controller.submit(download_parts_handler)
+
+ io_writes_handler = functools.partial(
self._perform_io_writes, filename
)
- io_future = controller.submit(io_writes_handler)
- results = concurrent.futures.wait(
- [parts_future, io_future],
+ io_future = controller.submit(io_writes_handler)
+ results = concurrent.futures.wait(
+ [parts_future, io_future],
return_when=concurrent.futures.FIRST_EXCEPTION,
)
- self._process_future_results(results)
-
- def _process_future_results(self, futures):
- finished, unfinished = futures
- for future in finished:
- future.result()
-
+ self._process_future_results(results)
+
+ def _process_future_results(self, futures):
+ finished, unfinished = futures
+ for future in finished:
+ future.result()
+
def _download_file_as_future(
self, bucket, key, filename, object_size, callback
):
- part_size = self._config.multipart_chunksize
- num_parts = int(math.ceil(object_size / float(part_size)))
- max_workers = self._config.max_concurrency
- download_partial = functools.partial(
+ part_size = self._config.multipart_chunksize
+ num_parts = int(math.ceil(object_size / float(part_size)))
+ max_workers = self._config.max_concurrency
+ download_partial = functools.partial(
self._download_range,
bucket,
key,
@@ -577,48 +577,48 @@ class MultipartDownloader:
num_parts,
callback,
)
- try:
- with self._executor_cls(max_workers=max_workers) as executor:
- list(executor.map(download_partial, range(num_parts)))
- finally:
- self._ioqueue.put(SHUTDOWN_SENTINEL)
-
- def _calculate_range_param(self, part_size, part_index, num_parts):
- start_range = part_index * part_size
- if part_index == num_parts - 1:
- end_range = ''
- else:
- end_range = start_range + part_size - 1
+ try:
+ with self._executor_cls(max_workers=max_workers) as executor:
+ list(executor.map(download_partial, range(num_parts)))
+ finally:
+ self._ioqueue.put(SHUTDOWN_SENTINEL)
+
+ def _calculate_range_param(self, part_size, part_index, num_parts):
+ start_range = part_index * part_size
+ if part_index == num_parts - 1:
+ end_range = ''
+ else:
+ end_range = start_range + part_size - 1
range_param = f'bytes={start_range}-{end_range}'
- return range_param
-
+ return range_param
+
def _download_range(
self, bucket, key, filename, part_size, num_parts, callback, part_index
):
- try:
- range_param = self._calculate_range_param(
+ try:
+ range_param = self._calculate_range_param(
part_size, part_index, num_parts
)
-
- max_attempts = self._config.num_download_attempts
- last_exception = None
- for i in range(max_attempts):
- try:
- logger.debug("Making get_object call.")
- response = self._client.get_object(
+
+ max_attempts = self._config.num_download_attempts
+ last_exception = None
+ for i in range(max_attempts):
+ try:
+ logger.debug("Making get_object call.")
+ response = self._client.get_object(
Bucket=bucket, Key=key, Range=range_param
)
- streaming_body = StreamReaderProgress(
+ streaming_body = StreamReaderProgress(
response['Body'], callback
)
- buffer_size = 1024 * 16
- current_index = part_size * part_index
+ buffer_size = 1024 * 16
+ current_index = part_size * part_index
for chunk in iter(
lambda: streaming_body.read(buffer_size), b''
):
- self._ioqueue.put((current_index, chunk))
- current_index += len(chunk)
- return
+ self._ioqueue.put((current_index, chunk))
+ current_index += len(chunk)
+ return
except (
socket.timeout,
OSError,
@@ -633,37 +633,37 @@ class MultipartDownloader:
max_attempts,
exc_info=True,
)
- last_exception = e
- continue
- raise RetriesExceededError(last_exception)
- finally:
- logger.debug("EXITING _download_range for part: %s", part_index)
-
- def _perform_io_writes(self, filename):
- with self._os.open(filename, 'wb') as f:
- while True:
- task = self._ioqueue.get()
- if task is SHUTDOWN_SENTINEL:
+ last_exception = e
+ continue
+ raise RetriesExceededError(last_exception)
+ finally:
+ logger.debug("EXITING _download_range for part: %s", part_index)
+
+ def _perform_io_writes(self, filename):
+ with self._os.open(filename, 'wb') as f:
+ while True:
+ task = self._ioqueue.get()
+ if task is SHUTDOWN_SENTINEL:
logger.debug(
"Shutdown sentinel received in IO handler, "
"shutting down IO handler."
)
- return
- else:
- try:
- offset, data = task
- f.seek(offset)
- f.write(data)
- except Exception as e:
+ return
+ else:
+ try:
+ offset, data = task
+ f.seek(offset)
+ f.write(data)
+ except Exception as e:
logger.debug(
"Caught exception in IO thread: %s",
e,
exc_info=True,
)
- self._ioqueue.trigger_shutdown()
- raise
-
-
+ self._ioqueue.trigger_shutdown()
+ raise
+
+
class TransferConfig:
def __init__(
self,
@@ -673,68 +673,68 @@ class TransferConfig:
num_download_attempts=5,
max_io_queue=100,
):
- self.multipart_threshold = multipart_threshold
- self.max_concurrency = max_concurrency
- self.multipart_chunksize = multipart_chunksize
- self.num_download_attempts = num_download_attempts
- self.max_io_queue = max_io_queue
-
-
+ self.multipart_threshold = multipart_threshold
+ self.max_concurrency = max_concurrency
+ self.multipart_chunksize = multipart_chunksize
+ self.num_download_attempts = num_download_attempts
+ self.max_io_queue = max_io_queue
+
+
class S3Transfer:
-
- ALLOWED_DOWNLOAD_ARGS = [
- 'VersionId',
- 'SSECustomerAlgorithm',
- 'SSECustomerKey',
- 'SSECustomerKeyMD5',
- 'RequestPayer',
- ]
-
- ALLOWED_UPLOAD_ARGS = [
- 'ACL',
- 'CacheControl',
- 'ContentDisposition',
- 'ContentEncoding',
- 'ContentLanguage',
- 'ContentType',
- 'Expires',
- 'GrantFullControl',
- 'GrantRead',
- 'GrantReadACP',
- 'GrantWriteACL',
- 'Metadata',
- 'RequestPayer',
- 'ServerSideEncryption',
- 'StorageClass',
- 'SSECustomerAlgorithm',
- 'SSECustomerKey',
- 'SSECustomerKeyMD5',
- 'SSEKMSKeyId',
+
+ ALLOWED_DOWNLOAD_ARGS = [
+ 'VersionId',
+ 'SSECustomerAlgorithm',
+ 'SSECustomerKey',
+ 'SSECustomerKeyMD5',
+ 'RequestPayer',
+ ]
+
+ ALLOWED_UPLOAD_ARGS = [
+ 'ACL',
+ 'CacheControl',
+ 'ContentDisposition',
+ 'ContentEncoding',
+ 'ContentLanguage',
+ 'ContentType',
+ 'Expires',
+ 'GrantFullControl',
+ 'GrantRead',
+ 'GrantReadACP',
+ 'GrantWriteACL',
+ 'Metadata',
+ 'RequestPayer',
+ 'ServerSideEncryption',
+ 'StorageClass',
+ 'SSECustomerAlgorithm',
+ 'SSECustomerKey',
+ 'SSECustomerKeyMD5',
+ 'SSEKMSKeyId',
'SSEKMSEncryptionContext',
'Tagging',
- ]
-
- def __init__(self, client, config=None, osutil=None):
- self._client = client
- if config is None:
- config = TransferConfig()
- self._config = config
- if osutil is None:
- osutil = OSUtils()
- self._osutil = osutil
-
+ ]
+
+ def __init__(self, client, config=None, osutil=None):
+ self._client = client
+ if config is None:
+ config = TransferConfig()
+ self._config = config
+ if osutil is None:
+ osutil = OSUtils()
+ self._osutil = osutil
+
def upload_file(
self, filename, bucket, key, callback=None, extra_args=None
):
- """Upload a file to an S3 object.
-
- Variants have also been injected into S3 client, Bucket and Object.
- You don't have to use S3Transfer.upload_file() directly.
- """
- if extra_args is None:
- extra_args = {}
- self._validate_all_known_args(extra_args, self.ALLOWED_UPLOAD_ARGS)
- events = self._client.meta.events
+ """Upload a file to an S3 object.
+
+ Variants have also been injected into S3 client, Bucket and Object.
+ You don't have to use S3Transfer.upload_file() directly.
+ """
+ if extra_args is None:
+ extra_args = {}
+ self._validate_all_known_args(extra_args, self.ALLOWED_UPLOAD_ARGS)
+ events = self._client.meta.events
events.register_first(
'request-created.s3',
disable_upload_callbacks,
@@ -749,14 +749,14 @@ class S3Transfer:
self._osutil.get_file_size(filename)
>= self._config.multipart_threshold
):
- self._multipart_upload(filename, bucket, key, callback, extra_args)
- else:
- self._put_object(filename, bucket, key, callback, extra_args)
-
- def _put_object(self, filename, bucket, key, callback, extra_args):
- # We're using open_file_chunk_reader so we can take advantage of the
- # progress callback functionality.
- open_chunk_reader = self._osutil.open_file_chunk_reader
+ self._multipart_upload(filename, bucket, key, callback, extra_args)
+ else:
+ self._put_object(filename, bucket, key, callback, extra_args)
+
+ def _put_object(self, filename, bucket, key, callback, extra_args):
+ # We're using open_file_chunk_reader so we can take advantage of the
+ # progress callback functionality.
+ open_chunk_reader = self._osutil.open_file_chunk_reader
with open_chunk_reader(
filename,
0,
@@ -766,57 +766,57 @@ class S3Transfer:
self._client.put_object(
Bucket=bucket, Key=key, Body=body, **extra_args
)
-
+
def download_file(
self, bucket, key, filename, extra_args=None, callback=None
):
- """Download an S3 object to a file.
-
- Variants have also been injected into S3 client, Bucket and Object.
- You don't have to use S3Transfer.download_file() directly.
- """
- # This method will issue a ``head_object`` request to determine
- # the size of the S3 object. This is used to determine if the
- # object is downloaded in parallel.
- if extra_args is None:
- extra_args = {}
- self._validate_all_known_args(extra_args, self.ALLOWED_DOWNLOAD_ARGS)
- object_size = self._object_size(bucket, key, extra_args)
- temp_filename = filename + os.extsep + random_file_extension()
- try:
+ """Download an S3 object to a file.
+
+ Variants have also been injected into S3 client, Bucket and Object.
+ You don't have to use S3Transfer.download_file() directly.
+ """
+ # This method will issue a ``head_object`` request to determine
+ # the size of the S3 object. This is used to determine if the
+ # object is downloaded in parallel.
+ if extra_args is None:
+ extra_args = {}
+ self._validate_all_known_args(extra_args, self.ALLOWED_DOWNLOAD_ARGS)
+ object_size = self._object_size(bucket, key, extra_args)
+ temp_filename = filename + os.extsep + random_file_extension()
+ try:
self._download_file(
bucket, key, temp_filename, object_size, extra_args, callback
)
- except Exception:
+ except Exception:
logger.debug(
"Exception caught in download_file, removing partial "
"file: %s",
temp_filename,
exc_info=True,
)
- self._osutil.remove_file(temp_filename)
- raise
- else:
- self._osutil.rename_file(temp_filename, filename)
-
+ self._osutil.remove_file(temp_filename)
+ raise
+ else:
+ self._osutil.rename_file(temp_filename, filename)
+
def _download_file(
self, bucket, key, filename, object_size, extra_args, callback
):
- if object_size >= self._config.multipart_threshold:
+ if object_size >= self._config.multipart_threshold:
self._ranged_download(
bucket, key, filename, object_size, extra_args, callback
)
- else:
- self._get_object(bucket, key, filename, extra_args, callback)
-
- def _validate_all_known_args(self, actual, allowed):
- for kwarg in actual:
- if kwarg not in allowed:
- raise ValueError(
- "Invalid extra_args key '%s', "
+ else:
+ self._get_object(bucket, key, filename, extra_args, callback)
+
+ def _validate_all_known_args(self, actual, allowed):
+ for kwarg in actual:
+ if kwarg not in allowed:
+ raise ValueError(
+ "Invalid extra_args key '%s', "
"must be one of: %s" % (kwarg, ', '.join(allowed))
)
-
+
def _ranged_download(
self, bucket, key, filename, object_size, extra_args, callback
):
@@ -826,13 +826,13 @@ class S3Transfer:
downloader.download_file(
bucket, key, filename, object_size, extra_args, callback
)
-
- def _get_object(self, bucket, key, filename, extra_args, callback):
- # precondition: num_download_attempts > 0
- max_attempts = self._config.num_download_attempts
- last_exception = None
- for i in range(max_attempts):
- try:
+
+ def _get_object(self, bucket, key, filename, extra_args, callback):
+ # precondition: num_download_attempts > 0
+ max_attempts = self._config.num_download_attempts
+ last_exception = None
+ for i in range(max_attempts):
+ try:
return self._do_get_object(
bucket, key, filename, extra_args, callback
)
@@ -842,8 +842,8 @@ class S3Transfer:
ReadTimeoutError,
IncompleteReadError,
) as e:
- # TODO: we need a way to reset the callback if the
- # download failed.
+ # TODO: we need a way to reset the callback if the
+ # download failed.
logger.debug(
"Retrying exception caught (%s), "
"retrying request, (attempt %s / %s)",
@@ -852,24 +852,24 @@ class S3Transfer:
max_attempts,
exc_info=True,
)
- last_exception = e
- continue
- raise RetriesExceededError(last_exception)
-
- def _do_get_object(self, bucket, key, filename, extra_args, callback):
+ last_exception = e
+ continue
+ raise RetriesExceededError(last_exception)
+
+ def _do_get_object(self, bucket, key, filename, extra_args, callback):
response = self._client.get_object(
Bucket=bucket, Key=key, **extra_args
)
streaming_body = StreamReaderProgress(response['Body'], callback)
- with self._osutil.open(filename, 'wb') as f:
- for chunk in iter(lambda: streaming_body.read(8192), b''):
- f.write(chunk)
-
- def _object_size(self, bucket, key, extra_args):
+ with self._osutil.open(filename, 'wb') as f:
+ for chunk in iter(lambda: streaming_body.read(8192), b''):
+ f.write(chunk)
+
+ def _object_size(self, bucket, key, extra_args):
return self._client.head_object(Bucket=bucket, Key=key, **extra_args)[
'ContentLength'
]
-
- def _multipart_upload(self, filename, bucket, key, callback, extra_args):
- uploader = MultipartUploader(self._client, self._config, self._osutil)
- uploader.upload_file(filename, bucket, key, callback, extra_args)
+
+ def _multipart_upload(self, filename, bucket, key, callback, extra_args):
+ uploader = MultipartUploader(self._client, self._config, self._osutil)
+ uploader.upload_file(filename, bucket, key, callback, extra_args)
diff --git a/contrib/python/s3transfer/py3/s3transfer/bandwidth.py b/contrib/python/s3transfer/py3/s3transfer/bandwidth.py
index e3ff9c2717..9bac5885e1 100644
--- a/contrib/python/s3transfer/py3/s3transfer/bandwidth.py
+++ b/contrib/python/s3transfer/py3/s3transfer/bandwidth.py
@@ -1,103 +1,103 @@
-# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-# http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
+# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+# http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
import threading
-import time
-
-
-class RequestExceededException(Exception):
- def __init__(self, requested_amt, retry_time):
- """Error when requested amount exceeds what is allowed
-
- The request that raised this error should be retried after waiting
- the time specified by ``retry_time``.
-
- :type requested_amt: int
- :param requested_amt: The originally requested byte amount
-
- :type retry_time: float
- :param retry_time: The length in time to wait to retry for the
- requested amount
- """
- self.requested_amt = requested_amt
- self.retry_time = retry_time
+import time
+
+
+class RequestExceededException(Exception):
+ def __init__(self, requested_amt, retry_time):
+ """Error when requested amount exceeds what is allowed
+
+ The request that raised this error should be retried after waiting
+ the time specified by ``retry_time``.
+
+ :type requested_amt: int
+ :param requested_amt: The originally requested byte amount
+
+ :type retry_time: float
+ :param retry_time: The length in time to wait to retry for the
+ requested amount
+ """
+ self.requested_amt = requested_amt
+ self.retry_time = retry_time
msg = 'Request amount {} exceeded the amount available. Retry in {}'.format(
requested_amt, retry_time
- )
+ )
super().__init__(msg)
-
-
+
+
class RequestToken:
- """A token to pass as an identifier when consuming from the LeakyBucket"""
+ """A token to pass as an identifier when consuming from the LeakyBucket"""
+
+ pass
+
- pass
-
-
class TimeUtils:
- def time(self):
- """Get the current time back
-
- :rtype: float
- :returns: The current time in seconds
- """
- return time.time()
-
- def sleep(self, value):
- """Sleep for a designated time
-
- :type value: float
- :param value: The time to sleep for in seconds
- """
- return time.sleep(value)
-
-
+ def time(self):
+ """Get the current time back
+
+ :rtype: float
+ :returns: The current time in seconds
+ """
+ return time.time()
+
+ def sleep(self, value):
+ """Sleep for a designated time
+
+ :type value: float
+ :param value: The time to sleep for in seconds
+ """
+ return time.sleep(value)
+
+
class BandwidthLimiter:
- def __init__(self, leaky_bucket, time_utils=None):
- """Limits bandwidth for shared S3 transfers
-
- :type leaky_bucket: LeakyBucket
- :param leaky_bucket: The leaky bucket to use limit bandwidth
-
- :type time_utils: TimeUtils
- :param time_utils: Time utility to use for interacting with time.
- """
- self._leaky_bucket = leaky_bucket
- self._time_utils = time_utils
- if time_utils is None:
- self._time_utils = TimeUtils()
-
+ def __init__(self, leaky_bucket, time_utils=None):
+ """Limits bandwidth for shared S3 transfers
+
+ :type leaky_bucket: LeakyBucket
+ :param leaky_bucket: The leaky bucket to use limit bandwidth
+
+ :type time_utils: TimeUtils
+ :param time_utils: Time utility to use for interacting with time.
+ """
+ self._leaky_bucket = leaky_bucket
+ self._time_utils = time_utils
+ if time_utils is None:
+ self._time_utils = TimeUtils()
+
def get_bandwith_limited_stream(
self, fileobj, transfer_coordinator, enabled=True
):
- """Wraps a fileobj in a bandwidth limited stream wrapper
-
- :type fileobj: file-like obj
- :param fileobj: The file-like obj to wrap
-
- :type transfer_coordinator: s3transfer.futures.TransferCoordinator
- param transfer_coordinator: The coordinator for the general transfer
- that the wrapped stream is a part of
-
- :type enabled: boolean
- :param enabled: Whether bandwidth limiting should be enabled to start
- """
- stream = BandwidthLimitedStream(
+ """Wraps a fileobj in a bandwidth limited stream wrapper
+
+ :type fileobj: file-like obj
+ :param fileobj: The file-like obj to wrap
+
+ :type transfer_coordinator: s3transfer.futures.TransferCoordinator
+ param transfer_coordinator: The coordinator for the general transfer
+ that the wrapped stream is a part of
+
+ :type enabled: boolean
+ :param enabled: Whether bandwidth limiting should be enabled to start
+ """
+ stream = BandwidthLimitedStream(
fileobj, self._leaky_bucket, transfer_coordinator, self._time_utils
)
- if not enabled:
- stream.disable_bandwidth_limiting()
- return stream
-
-
+ if not enabled:
+ stream.disable_bandwidth_limiting()
+ return stream
+
+
class BandwidthLimitedStream:
def __init__(
self,
@@ -107,110 +107,110 @@ class BandwidthLimitedStream:
time_utils=None,
bytes_threshold=256 * 1024,
):
- """Limits bandwidth for reads on a wrapped stream
-
- :type fileobj: file-like object
- :param fileobj: The file like object to wrap
-
- :type leaky_bucket: LeakyBucket
- :param leaky_bucket: The leaky bucket to use to throttle reads on
- the stream
-
- :type transfer_coordinator: s3transfer.futures.TransferCoordinator
- param transfer_coordinator: The coordinator for the general transfer
- that the wrapped stream is a part of
-
- :type time_utils: TimeUtils
- :param time_utils: The time utility to use for interacting with time
- """
- self._fileobj = fileobj
- self._leaky_bucket = leaky_bucket
- self._transfer_coordinator = transfer_coordinator
- self._time_utils = time_utils
- if time_utils is None:
- self._time_utils = TimeUtils()
- self._bandwidth_limiting_enabled = True
- self._request_token = RequestToken()
- self._bytes_seen = 0
- self._bytes_threshold = bytes_threshold
-
- def enable_bandwidth_limiting(self):
- """Enable bandwidth limiting on reads to the stream"""
- self._bandwidth_limiting_enabled = True
-
- def disable_bandwidth_limiting(self):
- """Disable bandwidth limiting on reads to the stream"""
- self._bandwidth_limiting_enabled = False
-
- def read(self, amount):
- """Read a specified amount
-
- Reads will only be throttled if bandwidth limiting is enabled.
- """
- if not self._bandwidth_limiting_enabled:
- return self._fileobj.read(amount)
-
- # We do not want to be calling consume on every read as the read
- # amounts can be small causing the lock of the leaky bucket to
- # introduce noticeable overhead. So instead we keep track of
- # how many bytes we have seen and only call consume once we pass a
- # certain threshold.
- self._bytes_seen += amount
- if self._bytes_seen < self._bytes_threshold:
- return self._fileobj.read(amount)
-
- self._consume_through_leaky_bucket()
- return self._fileobj.read(amount)
-
- def _consume_through_leaky_bucket(self):
+ """Limits bandwidth for reads on a wrapped stream
+
+ :type fileobj: file-like object
+ :param fileobj: The file like object to wrap
+
+ :type leaky_bucket: LeakyBucket
+ :param leaky_bucket: The leaky bucket to use to throttle reads on
+ the stream
+
+ :type transfer_coordinator: s3transfer.futures.TransferCoordinator
+ param transfer_coordinator: The coordinator for the general transfer
+ that the wrapped stream is a part of
+
+ :type time_utils: TimeUtils
+ :param time_utils: The time utility to use for interacting with time
+ """
+ self._fileobj = fileobj
+ self._leaky_bucket = leaky_bucket
+ self._transfer_coordinator = transfer_coordinator
+ self._time_utils = time_utils
+ if time_utils is None:
+ self._time_utils = TimeUtils()
+ self._bandwidth_limiting_enabled = True
+ self._request_token = RequestToken()
+ self._bytes_seen = 0
+ self._bytes_threshold = bytes_threshold
+
+ def enable_bandwidth_limiting(self):
+ """Enable bandwidth limiting on reads to the stream"""
+ self._bandwidth_limiting_enabled = True
+
+ def disable_bandwidth_limiting(self):
+ """Disable bandwidth limiting on reads to the stream"""
+ self._bandwidth_limiting_enabled = False
+
+ def read(self, amount):
+ """Read a specified amount
+
+ Reads will only be throttled if bandwidth limiting is enabled.
+ """
+ if not self._bandwidth_limiting_enabled:
+ return self._fileobj.read(amount)
+
+ # We do not want to be calling consume on every read as the read
+ # amounts can be small causing the lock of the leaky bucket to
+ # introduce noticeable overhead. So instead we keep track of
+ # how many bytes we have seen and only call consume once we pass a
+ # certain threshold.
+ self._bytes_seen += amount
+ if self._bytes_seen < self._bytes_threshold:
+ return self._fileobj.read(amount)
+
+ self._consume_through_leaky_bucket()
+ return self._fileobj.read(amount)
+
+ def _consume_through_leaky_bucket(self):
# NOTE: If the read amount on the stream are high, it will result
- # in large bursty behavior as there is not an interface for partial
- # reads. However given the read's on this abstraction are at most 256KB
- # (via downloads), it reduces the burstiness to be small KB bursts at
- # worst.
- while not self._transfer_coordinator.exception:
- try:
- self._leaky_bucket.consume(
+ # in large bursty behavior as there is not an interface for partial
+ # reads. However given the read's on this abstraction are at most 256KB
+ # (via downloads), it reduces the burstiness to be small KB bursts at
+ # worst.
+ while not self._transfer_coordinator.exception:
+ try:
+ self._leaky_bucket.consume(
self._bytes_seen, self._request_token
)
- self._bytes_seen = 0
- return
- except RequestExceededException as e:
- self._time_utils.sleep(e.retry_time)
- else:
- raise self._transfer_coordinator.exception
-
- def signal_transferring(self):
- """Signal that data being read is being transferred to S3"""
- self.enable_bandwidth_limiting()
-
- def signal_not_transferring(self):
- """Signal that data being read is not being transferred to S3"""
- self.disable_bandwidth_limiting()
-
+ self._bytes_seen = 0
+ return
+ except RequestExceededException as e:
+ self._time_utils.sleep(e.retry_time)
+ else:
+ raise self._transfer_coordinator.exception
+
+ def signal_transferring(self):
+ """Signal that data being read is being transferred to S3"""
+ self.enable_bandwidth_limiting()
+
+ def signal_not_transferring(self):
+ """Signal that data being read is not being transferred to S3"""
+ self.disable_bandwidth_limiting()
+
def seek(self, where, whence=0):
self._fileobj.seek(where, whence)
-
- def tell(self):
- return self._fileobj.tell()
-
- def close(self):
- if self._bandwidth_limiting_enabled and self._bytes_seen:
- # This handles the case where the file is small enough to never
- # trigger the threshold and thus is never subjugated to the
- # leaky bucket on read(). This specifically happens for small
- # uploads. So instead to account for those bytes, have
- # it go through the leaky bucket when the file gets closed.
- self._consume_through_leaky_bucket()
- self._fileobj.close()
-
- def __enter__(self):
- return self
-
- def __exit__(self, *args, **kwargs):
- self.close()
-
-
+
+ def tell(self):
+ return self._fileobj.tell()
+
+ def close(self):
+ if self._bandwidth_limiting_enabled and self._bytes_seen:
+ # This handles the case where the file is small enough to never
+ # trigger the threshold and thus is never subjugated to the
+ # leaky bucket on read(). This specifically happens for small
+ # uploads. So instead to account for those bytes, have
+ # it go through the leaky bucket when the file gets closed.
+ self._consume_through_leaky_bucket()
+ self._fileobj.close()
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, *args, **kwargs):
+ self.close()
+
+
class LeakyBucket:
def __init__(
self,
@@ -219,221 +219,221 @@ class LeakyBucket:
rate_tracker=None,
consumption_scheduler=None,
):
- """A leaky bucket abstraction to limit bandwidth consumption
-
- :type rate: int
- :type rate: The maximum rate to allow. This rate is in terms of
- bytes per second.
-
- :type time_utils: TimeUtils
- :param time_utils: The time utility to use for interacting with time
-
- :type rate_tracker: BandwidthRateTracker
- :param rate_tracker: Tracks bandwidth consumption
-
- :type consumption_scheduler: ConsumptionScheduler
- :param consumption_scheduler: Schedules consumption retries when
- necessary
- """
- self._max_rate = float(max_rate)
- self._time_utils = time_utils
- if time_utils is None:
- self._time_utils = TimeUtils()
- self._lock = threading.Lock()
- self._rate_tracker = rate_tracker
- if rate_tracker is None:
- self._rate_tracker = BandwidthRateTracker()
- self._consumption_scheduler = consumption_scheduler
- if consumption_scheduler is None:
- self._consumption_scheduler = ConsumptionScheduler()
-
- def consume(self, amt, request_token):
- """Consume an a requested amount
-
- :type amt: int
- :param amt: The amount of bytes to request to consume
-
- :type request_token: RequestToken
- :param request_token: The token associated to the consumption
- request that is used to identify the request. So if a
- RequestExceededException is raised the token should be used
- in subsequent retry consume() request.
-
- :raises RequestExceededException: If the consumption amount would
- exceed the maximum allocated bandwidth
-
- :rtype: int
- :returns: The amount consumed
- """
- with self._lock:
- time_now = self._time_utils.time()
- if self._consumption_scheduler.is_scheduled(request_token):
- return self._release_requested_amt_for_scheduled_request(
+ """A leaky bucket abstraction to limit bandwidth consumption
+
+ :type rate: int
+ :type rate: The maximum rate to allow. This rate is in terms of
+ bytes per second.
+
+ :type time_utils: TimeUtils
+ :param time_utils: The time utility to use for interacting with time
+
+ :type rate_tracker: BandwidthRateTracker
+ :param rate_tracker: Tracks bandwidth consumption
+
+ :type consumption_scheduler: ConsumptionScheduler
+ :param consumption_scheduler: Schedules consumption retries when
+ necessary
+ """
+ self._max_rate = float(max_rate)
+ self._time_utils = time_utils
+ if time_utils is None:
+ self._time_utils = TimeUtils()
+ self._lock = threading.Lock()
+ self._rate_tracker = rate_tracker
+ if rate_tracker is None:
+ self._rate_tracker = BandwidthRateTracker()
+ self._consumption_scheduler = consumption_scheduler
+ if consumption_scheduler is None:
+ self._consumption_scheduler = ConsumptionScheduler()
+
+ def consume(self, amt, request_token):
+ """Consume an a requested amount
+
+ :type amt: int
+ :param amt: The amount of bytes to request to consume
+
+ :type request_token: RequestToken
+ :param request_token: The token associated to the consumption
+ request that is used to identify the request. So if a
+ RequestExceededException is raised the token should be used
+ in subsequent retry consume() request.
+
+ :raises RequestExceededException: If the consumption amount would
+ exceed the maximum allocated bandwidth
+
+ :rtype: int
+ :returns: The amount consumed
+ """
+ with self._lock:
+ time_now = self._time_utils.time()
+ if self._consumption_scheduler.is_scheduled(request_token):
+ return self._release_requested_amt_for_scheduled_request(
amt, request_token, time_now
)
- elif self._projected_to_exceed_max_rate(amt, time_now):
- self._raise_request_exceeded_exception(
+ elif self._projected_to_exceed_max_rate(amt, time_now):
+ self._raise_request_exceeded_exception(
amt, request_token, time_now
)
- else:
- return self._release_requested_amt(amt, time_now)
-
- def _projected_to_exceed_max_rate(self, amt, time_now):
- projected_rate = self._rate_tracker.get_projected_rate(amt, time_now)
- return projected_rate > self._max_rate
-
+ else:
+ return self._release_requested_amt(amt, time_now)
+
+ def _projected_to_exceed_max_rate(self, amt, time_now):
+ projected_rate = self._rate_tracker.get_projected_rate(amt, time_now)
+ return projected_rate > self._max_rate
+
def _release_requested_amt_for_scheduled_request(
self, amt, request_token, time_now
):
- self._consumption_scheduler.process_scheduled_consumption(
+ self._consumption_scheduler.process_scheduled_consumption(
request_token
)
- return self._release_requested_amt(amt, time_now)
-
- def _raise_request_exceeded_exception(self, amt, request_token, time_now):
+ return self._release_requested_amt(amt, time_now)
+
+ def _raise_request_exceeded_exception(self, amt, request_token, time_now):
allocated_time = amt / float(self._max_rate)
- retry_time = self._consumption_scheduler.schedule_consumption(
+ retry_time = self._consumption_scheduler.schedule_consumption(
amt, request_token, allocated_time
)
- raise RequestExceededException(
+ raise RequestExceededException(
requested_amt=amt, retry_time=retry_time
)
-
- def _release_requested_amt(self, amt, time_now):
- self._rate_tracker.record_consumption_rate(amt, time_now)
- return amt
-
-
+
+ def _release_requested_amt(self, amt, time_now):
+ self._rate_tracker.record_consumption_rate(amt, time_now)
+ return amt
+
+
class ConsumptionScheduler:
- def __init__(self):
- """Schedules when to consume a desired amount"""
- self._tokens_to_scheduled_consumption = {}
- self._total_wait = 0
-
- def is_scheduled(self, token):
- """Indicates if a consumption request has been scheduled
-
- :type token: RequestToken
- :param token: The token associated to the consumption
- request that is used to identify the request.
- """
- return token in self._tokens_to_scheduled_consumption
-
- def schedule_consumption(self, amt, token, time_to_consume):
- """Schedules a wait time to be able to consume an amount
-
- :type amt: int
- :param amt: The amount of bytes scheduled to be consumed
-
- :type token: RequestToken
- :param token: The token associated to the consumption
- request that is used to identify the request.
-
- :type time_to_consume: float
- :param time_to_consume: The desired time it should take for that
- specific request amount to be consumed in regardless of previously
- scheduled consumption requests
-
- :rtype: float
- :returns: The amount of time to wait for the specific request before
- actually consuming the specified amount.
- """
- self._total_wait += time_to_consume
- self._tokens_to_scheduled_consumption[token] = {
- 'wait_duration': self._total_wait,
- 'time_to_consume': time_to_consume,
- }
- return self._total_wait
-
- def process_scheduled_consumption(self, token):
- """Processes a scheduled consumption request that has completed
-
- :type token: RequestToken
- :param token: The token associated to the consumption
- request that is used to identify the request.
- """
- scheduled_retry = self._tokens_to_scheduled_consumption.pop(token)
- self._total_wait = max(
+ def __init__(self):
+ """Schedules when to consume a desired amount"""
+ self._tokens_to_scheduled_consumption = {}
+ self._total_wait = 0
+
+ def is_scheduled(self, token):
+ """Indicates if a consumption request has been scheduled
+
+ :type token: RequestToken
+ :param token: The token associated to the consumption
+ request that is used to identify the request.
+ """
+ return token in self._tokens_to_scheduled_consumption
+
+ def schedule_consumption(self, amt, token, time_to_consume):
+ """Schedules a wait time to be able to consume an amount
+
+ :type amt: int
+ :param amt: The amount of bytes scheduled to be consumed
+
+ :type token: RequestToken
+ :param token: The token associated to the consumption
+ request that is used to identify the request.
+
+ :type time_to_consume: float
+ :param time_to_consume: The desired time it should take for that
+ specific request amount to be consumed in regardless of previously
+ scheduled consumption requests
+
+ :rtype: float
+ :returns: The amount of time to wait for the specific request before
+ actually consuming the specified amount.
+ """
+ self._total_wait += time_to_consume
+ self._tokens_to_scheduled_consumption[token] = {
+ 'wait_duration': self._total_wait,
+ 'time_to_consume': time_to_consume,
+ }
+ return self._total_wait
+
+ def process_scheduled_consumption(self, token):
+ """Processes a scheduled consumption request that has completed
+
+ :type token: RequestToken
+ :param token: The token associated to the consumption
+ request that is used to identify the request.
+ """
+ scheduled_retry = self._tokens_to_scheduled_consumption.pop(token)
+ self._total_wait = max(
self._total_wait - scheduled_retry['time_to_consume'], 0
)
-
-
+
+
class BandwidthRateTracker:
- def __init__(self, alpha=0.8):
- """Tracks the rate of bandwidth consumption
-
- :type a: float
- :param a: The constant to use in calculating the exponentional moving
- average of the bandwidth rate. Specifically it is used in the
- following calculation:
-
- current_rate = alpha * new_rate + (1 - alpha) * current_rate
-
- This value of this constant should be between 0 and 1.
- """
- self._alpha = alpha
- self._last_time = None
- self._current_rate = None
-
- @property
- def current_rate(self):
- """The current transfer rate
-
- :rtype: float
- :returns: The current tracked transfer rate
- """
- if self._last_time is None:
- return 0.0
- return self._current_rate
-
- def get_projected_rate(self, amt, time_at_consumption):
- """Get the projected rate using a provided amount and time
-
- :type amt: int
- :param amt: The proposed amount to consume
-
- :type time_at_consumption: float
- :param time_at_consumption: The proposed time to consume at
-
- :rtype: float
- :returns: The consumption rate if that amt and time were consumed
- """
- if self._last_time is None:
- return 0.0
- return self._calculate_exponential_moving_average_rate(
+ def __init__(self, alpha=0.8):
+ """Tracks the rate of bandwidth consumption
+
+ :type a: float
+ :param a: The constant to use in calculating the exponentional moving
+ average of the bandwidth rate. Specifically it is used in the
+ following calculation:
+
+ current_rate = alpha * new_rate + (1 - alpha) * current_rate
+
+ This value of this constant should be between 0 and 1.
+ """
+ self._alpha = alpha
+ self._last_time = None
+ self._current_rate = None
+
+ @property
+ def current_rate(self):
+ """The current transfer rate
+
+ :rtype: float
+ :returns: The current tracked transfer rate
+ """
+ if self._last_time is None:
+ return 0.0
+ return self._current_rate
+
+ def get_projected_rate(self, amt, time_at_consumption):
+ """Get the projected rate using a provided amount and time
+
+ :type amt: int
+ :param amt: The proposed amount to consume
+
+ :type time_at_consumption: float
+ :param time_at_consumption: The proposed time to consume at
+
+ :rtype: float
+ :returns: The consumption rate if that amt and time were consumed
+ """
+ if self._last_time is None:
+ return 0.0
+ return self._calculate_exponential_moving_average_rate(
amt, time_at_consumption
)
-
- def record_consumption_rate(self, amt, time_at_consumption):
- """Record the consumption rate based off amount and time point
-
- :type amt: int
- :param amt: The amount that got consumed
-
- :type time_at_consumption: float
- :param time_at_consumption: The time at which the amount was consumed
- """
- if self._last_time is None:
- self._last_time = time_at_consumption
- self._current_rate = 0.0
- return
- self._current_rate = self._calculate_exponential_moving_average_rate(
+
+ def record_consumption_rate(self, amt, time_at_consumption):
+ """Record the consumption rate based off amount and time point
+
+ :type amt: int
+ :param amt: The amount that got consumed
+
+ :type time_at_consumption: float
+ :param time_at_consumption: The time at which the amount was consumed
+ """
+ if self._last_time is None:
+ self._last_time = time_at_consumption
+ self._current_rate = 0.0
+ return
+ self._current_rate = self._calculate_exponential_moving_average_rate(
amt, time_at_consumption
)
- self._last_time = time_at_consumption
-
- def _calculate_rate(self, amt, time_at_consumption):
- time_delta = time_at_consumption - self._last_time
- if time_delta <= 0:
+ self._last_time = time_at_consumption
+
+ def _calculate_rate(self, amt, time_at_consumption):
+ time_delta = time_at_consumption - self._last_time
+ if time_delta <= 0:
# While it is really unlikely to see this in an actual transfer,
- # we do not want to be returning back a negative rate or try to
- # divide the amount by zero. So instead return back an infinite
- # rate as the time delta is infinitesimally small.
- return float('inf')
- return amt / (time_delta)
-
+ # we do not want to be returning back a negative rate or try to
+ # divide the amount by zero. So instead return back an infinite
+ # rate as the time delta is infinitesimally small.
+ return float('inf')
+ return amt / (time_delta)
+
def _calculate_exponential_moving_average_rate(
self, amt, time_at_consumption
):
- new_rate = self._calculate_rate(amt, time_at_consumption)
- return self._alpha * new_rate + (1 - self._alpha) * self._current_rate
+ new_rate = self._calculate_rate(amt, time_at_consumption)
+ return self._alpha * new_rate + (1 - self._alpha) * self._current_rate
diff --git a/contrib/python/s3transfer/py3/s3transfer/compat.py b/contrib/python/s3transfer/py3/s3transfer/compat.py
index dc0c2fee3e..68267ad0e2 100644
--- a/contrib/python/s3transfer/py3/s3transfer/compat.py
+++ b/contrib/python/s3transfer/py3/s3transfer/compat.py
@@ -1,86 +1,86 @@
-# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-# http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
+# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+# http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
import errno
-import inspect
-import os
-import socket
+import inspect
+import os
+import socket
import sys
-
-from botocore.compat import six
-
-if sys.platform.startswith('win'):
- def rename_file(current_filename, new_filename):
- try:
- os.remove(new_filename)
- except OSError as e:
- if not e.errno == errno.ENOENT:
- # We only want to a ignore trying to remove
- # a file that does not exist. If it fails
- # for any other reason we should be propagating
- # that exception.
- raise
- os.rename(current_filename, new_filename)
-else:
- rename_file = os.rename
-
-
+
+from botocore.compat import six
+
+if sys.platform.startswith('win'):
+ def rename_file(current_filename, new_filename):
+ try:
+ os.remove(new_filename)
+ except OSError as e:
+ if not e.errno == errno.ENOENT:
+ # We only want to a ignore trying to remove
+ # a file that does not exist. If it fails
+ # for any other reason we should be propagating
+ # that exception.
+ raise
+ os.rename(current_filename, new_filename)
+else:
+ rename_file = os.rename
+
+
def accepts_kwargs(func):
return inspect.getfullargspec(func)[2]
-
-
+
+
# In python 3, socket.error is OSError, which is too general
# for what we want (i.e FileNotFoundError is a subclass of OSError).
# In python 3, all the socket related errors are in a newly created
# ConnectionError.
SOCKET_ERROR = ConnectionError
MAXINT = None
-
-
-def seekable(fileobj):
- """Backwards compat function to determine if a fileobj is seekable
-
- :param fileobj: The file-like object to determine if seekable
-
- :returns: True, if seekable. False, otherwise.
- """
- # If the fileobj has a seekable attr, try calling the seekable()
- # method on it.
- if hasattr(fileobj, 'seekable'):
- return fileobj.seekable()
- # If there is no seekable attr, check if the object can be seeked
- # or telled. If it can, try to seek to the current position.
- elif hasattr(fileobj, 'seek') and hasattr(fileobj, 'tell'):
- try:
- fileobj.seek(0, 1)
- return True
+
+
+def seekable(fileobj):
+ """Backwards compat function to determine if a fileobj is seekable
+
+ :param fileobj: The file-like object to determine if seekable
+
+ :returns: True, if seekable. False, otherwise.
+ """
+ # If the fileobj has a seekable attr, try calling the seekable()
+ # method on it.
+ if hasattr(fileobj, 'seekable'):
+ return fileobj.seekable()
+ # If there is no seekable attr, check if the object can be seeked
+ # or telled. If it can, try to seek to the current position.
+ elif hasattr(fileobj, 'seek') and hasattr(fileobj, 'tell'):
+ try:
+ fileobj.seek(0, 1)
+ return True
except OSError:
- # If an io related error was thrown then it is not seekable.
- return False
- # Else, the fileobj is not seekable
- return False
-
-
-def readable(fileobj):
- """Determines whether or not a file-like object is readable.
-
- :param fileobj: The file-like object to determine if readable
-
- :returns: True, if readable. False otherwise.
- """
- if hasattr(fileobj, 'readable'):
- return fileobj.readable()
-
- return hasattr(fileobj, 'read')
+ # If an io related error was thrown then it is not seekable.
+ return False
+ # Else, the fileobj is not seekable
+ return False
+
+
+def readable(fileobj):
+ """Determines whether or not a file-like object is readable.
+
+ :param fileobj: The file-like object to determine if readable
+
+ :returns: True, if readable. False otherwise.
+ """
+ if hasattr(fileobj, 'readable'):
+ return fileobj.readable()
+
+ return hasattr(fileobj, 'read')
def fallocate(fileobj, size):
diff --git a/contrib/python/s3transfer/py3/s3transfer/copies.py b/contrib/python/s3transfer/py3/s3transfer/copies.py
index 2ef3d06b2d..a1dfdc8ba3 100644
--- a/contrib/python/s3transfer/py3/s3transfer/copies.py
+++ b/contrib/python/s3transfer/py3/s3transfer/copies.py
@@ -1,18 +1,18 @@
-# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-# http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-import copy
-import math
-
+# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+# http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+import copy
+import math
+
from s3transfer.tasks import (
CompleteMultipartUploadTask,
CreateMultipartUploadTask,
@@ -25,300 +25,300 @@ from s3transfer.utils import (
get_callbacks,
get_filtered_dict,
)
-
-
-class CopySubmissionTask(SubmissionTask):
- """Task for submitting tasks to execute a copy"""
-
- EXTRA_ARGS_TO_HEAD_ARGS_MAPPING = {
- 'CopySourceIfMatch': 'IfMatch',
- 'CopySourceIfModifiedSince': 'IfModifiedSince',
- 'CopySourceIfNoneMatch': 'IfNoneMatch',
- 'CopySourceIfUnmodifiedSince': 'IfUnmodifiedSince',
- 'CopySourceSSECustomerKey': 'SSECustomerKey',
- 'CopySourceSSECustomerAlgorithm': 'SSECustomerAlgorithm',
- 'CopySourceSSECustomerKeyMD5': 'SSECustomerKeyMD5',
+
+
+class CopySubmissionTask(SubmissionTask):
+ """Task for submitting tasks to execute a copy"""
+
+ EXTRA_ARGS_TO_HEAD_ARGS_MAPPING = {
+ 'CopySourceIfMatch': 'IfMatch',
+ 'CopySourceIfModifiedSince': 'IfModifiedSince',
+ 'CopySourceIfNoneMatch': 'IfNoneMatch',
+ 'CopySourceIfUnmodifiedSince': 'IfUnmodifiedSince',
+ 'CopySourceSSECustomerKey': 'SSECustomerKey',
+ 'CopySourceSSECustomerAlgorithm': 'SSECustomerAlgorithm',
+ 'CopySourceSSECustomerKeyMD5': 'SSECustomerKeyMD5',
'RequestPayer': 'RequestPayer',
'ExpectedBucketOwner': 'ExpectedBucketOwner',
- }
-
- UPLOAD_PART_COPY_ARGS = [
- 'CopySourceIfMatch',
- 'CopySourceIfModifiedSince',
- 'CopySourceIfNoneMatch',
- 'CopySourceIfUnmodifiedSince',
- 'CopySourceSSECustomerKey',
- 'CopySourceSSECustomerAlgorithm',
- 'CopySourceSSECustomerKeyMD5',
- 'SSECustomerKey',
- 'SSECustomerAlgorithm',
- 'SSECustomerKeyMD5',
- 'RequestPayer',
+ }
+
+ UPLOAD_PART_COPY_ARGS = [
+ 'CopySourceIfMatch',
+ 'CopySourceIfModifiedSince',
+ 'CopySourceIfNoneMatch',
+ 'CopySourceIfUnmodifiedSince',
+ 'CopySourceSSECustomerKey',
+ 'CopySourceSSECustomerAlgorithm',
+ 'CopySourceSSECustomerKeyMD5',
+ 'SSECustomerKey',
+ 'SSECustomerAlgorithm',
+ 'SSECustomerKeyMD5',
+ 'RequestPayer',
'ExpectedBucketOwner',
- ]
-
- CREATE_MULTIPART_ARGS_BLACKLIST = [
- 'CopySourceIfMatch',
- 'CopySourceIfModifiedSince',
- 'CopySourceIfNoneMatch',
- 'CopySourceIfUnmodifiedSince',
- 'CopySourceSSECustomerKey',
- 'CopySourceSSECustomerAlgorithm',
- 'CopySourceSSECustomerKeyMD5',
+ ]
+
+ CREATE_MULTIPART_ARGS_BLACKLIST = [
+ 'CopySourceIfMatch',
+ 'CopySourceIfModifiedSince',
+ 'CopySourceIfNoneMatch',
+ 'CopySourceIfUnmodifiedSince',
+ 'CopySourceSSECustomerKey',
+ 'CopySourceSSECustomerAlgorithm',
+ 'CopySourceSSECustomerKeyMD5',
'MetadataDirective',
'TaggingDirective',
- ]
-
+ ]
+
COMPLETE_MULTIPART_ARGS = ['RequestPayer', 'ExpectedBucketOwner']
-
+
def _submit(
self, client, config, osutil, request_executor, transfer_future
):
- """
- :param client: The client associated with the transfer manager
-
- :type config: s3transfer.manager.TransferConfig
- :param config: The transfer config associated with the transfer
- manager
-
- :type osutil: s3transfer.utils.OSUtil
- :param osutil: The os utility associated to the transfer manager
-
- :type request_executor: s3transfer.futures.BoundedExecutor
- :param request_executor: The request executor associated with the
- transfer manager
-
- :type transfer_future: s3transfer.futures.TransferFuture
- :param transfer_future: The transfer future associated with the
- transfer request that tasks are being submitted for
- """
- # Determine the size if it was not provided
- if transfer_future.meta.size is None:
- # If a size was not provided figure out the size for the
- # user. Note that we will only use the client provided to
- # the TransferManager. If the object is outside of the region
- # of the client, they may have to provide the file size themselves
- # with a completely new client.
- call_args = transfer_future.meta.call_args
+ """
+ :param client: The client associated with the transfer manager
+
+ :type config: s3transfer.manager.TransferConfig
+ :param config: The transfer config associated with the transfer
+ manager
+
+ :type osutil: s3transfer.utils.OSUtil
+ :param osutil: The os utility associated to the transfer manager
+
+ :type request_executor: s3transfer.futures.BoundedExecutor
+ :param request_executor: The request executor associated with the
+ transfer manager
+
+ :type transfer_future: s3transfer.futures.TransferFuture
+ :param transfer_future: The transfer future associated with the
+ transfer request that tasks are being submitted for
+ """
+ # Determine the size if it was not provided
+ if transfer_future.meta.size is None:
+ # If a size was not provided figure out the size for the
+ # user. Note that we will only use the client provided to
+ # the TransferManager. If the object is outside of the region
+ # of the client, they may have to provide the file size themselves
+ # with a completely new client.
+ call_args = transfer_future.meta.call_args
head_object_request = (
- self._get_head_object_request_from_copy_source(
+ self._get_head_object_request_from_copy_source(
call_args.copy_source
)
)
- extra_args = call_args.extra_args
-
- # Map any values that may be used in the head object that is
- # used in the copy object
- for param, value in extra_args.items():
- if param in self.EXTRA_ARGS_TO_HEAD_ARGS_MAPPING:
- head_object_request[
+ extra_args = call_args.extra_args
+
+ # Map any values that may be used in the head object that is
+ # used in the copy object
+ for param, value in extra_args.items():
+ if param in self.EXTRA_ARGS_TO_HEAD_ARGS_MAPPING:
+ head_object_request[
self.EXTRA_ARGS_TO_HEAD_ARGS_MAPPING[param]
] = value
-
- response = call_args.source_client.head_object(
+
+ response = call_args.source_client.head_object(
**head_object_request
)
- transfer_future.meta.provide_transfer_size(
+ transfer_future.meta.provide_transfer_size(
response['ContentLength']
)
-
- # If it is greater than threshold do a multipart copy, otherwise
- # do a regular copy object.
- if transfer_future.meta.size < config.multipart_threshold:
- self._submit_copy_request(
+
+ # If it is greater than threshold do a multipart copy, otherwise
+ # do a regular copy object.
+ if transfer_future.meta.size < config.multipart_threshold:
+ self._submit_copy_request(
client, config, osutil, request_executor, transfer_future
)
- else:
- self._submit_multipart_request(
+ else:
+ self._submit_multipart_request(
client, config, osutil, request_executor, transfer_future
)
-
+
def _submit_copy_request(
self, client, config, osutil, request_executor, transfer_future
):
- call_args = transfer_future.meta.call_args
-
- # Get the needed progress callbacks for the task
- progress_callbacks = get_callbacks(transfer_future, 'progress')
-
- # Submit the request of a single copy.
- self._transfer_coordinator.submit(
- request_executor,
- CopyObjectTask(
- transfer_coordinator=self._transfer_coordinator,
- main_kwargs={
- 'client': client,
- 'copy_source': call_args.copy_source,
- 'bucket': call_args.bucket,
- 'key': call_args.key,
- 'extra_args': call_args.extra_args,
- 'callbacks': progress_callbacks,
+ call_args = transfer_future.meta.call_args
+
+ # Get the needed progress callbacks for the task
+ progress_callbacks = get_callbacks(transfer_future, 'progress')
+
+ # Submit the request of a single copy.
+ self._transfer_coordinator.submit(
+ request_executor,
+ CopyObjectTask(
+ transfer_coordinator=self._transfer_coordinator,
+ main_kwargs={
+ 'client': client,
+ 'copy_source': call_args.copy_source,
+ 'bucket': call_args.bucket,
+ 'key': call_args.key,
+ 'extra_args': call_args.extra_args,
+ 'callbacks': progress_callbacks,
'size': transfer_future.meta.size,
- },
+ },
is_final=True,
),
- )
-
+ )
+
def _submit_multipart_request(
self, client, config, osutil, request_executor, transfer_future
):
- call_args = transfer_future.meta.call_args
-
- # Submit the request to create a multipart upload and make sure it
- # does not include any of the arguments used for copy part.
- create_multipart_extra_args = {}
- for param, val in call_args.extra_args.items():
- if param not in self.CREATE_MULTIPART_ARGS_BLACKLIST:
- create_multipart_extra_args[param] = val
-
- create_multipart_future = self._transfer_coordinator.submit(
- request_executor,
- CreateMultipartUploadTask(
- transfer_coordinator=self._transfer_coordinator,
- main_kwargs={
- 'client': client,
- 'bucket': call_args.bucket,
- 'key': call_args.key,
- 'extra_args': create_multipart_extra_args,
+ call_args = transfer_future.meta.call_args
+
+ # Submit the request to create a multipart upload and make sure it
+ # does not include any of the arguments used for copy part.
+ create_multipart_extra_args = {}
+ for param, val in call_args.extra_args.items():
+ if param not in self.CREATE_MULTIPART_ARGS_BLACKLIST:
+ create_multipart_extra_args[param] = val
+
+ create_multipart_future = self._transfer_coordinator.submit(
+ request_executor,
+ CreateMultipartUploadTask(
+ transfer_coordinator=self._transfer_coordinator,
+ main_kwargs={
+ 'client': client,
+ 'bucket': call_args.bucket,
+ 'key': call_args.key,
+ 'extra_args': create_multipart_extra_args,
},
),
- )
-
- # Determine how many parts are needed based on filesize and
- # desired chunksize.
- part_size = config.multipart_chunksize
- adjuster = ChunksizeAdjuster()
- part_size = adjuster.adjust_chunksize(
+ )
+
+ # Determine how many parts are needed based on filesize and
+ # desired chunksize.
+ part_size = config.multipart_chunksize
+ adjuster = ChunksizeAdjuster()
+ part_size = adjuster.adjust_chunksize(
part_size, transfer_future.meta.size
)
- num_parts = int(
+ num_parts = int(
math.ceil(transfer_future.meta.size / float(part_size))
)
-
- # Submit requests to upload the parts of the file.
- part_futures = []
- progress_callbacks = get_callbacks(transfer_future, 'progress')
-
- for part_number in range(1, num_parts + 1):
- extra_part_args = self._extra_upload_part_args(
+
+ # Submit requests to upload the parts of the file.
+ part_futures = []
+ progress_callbacks = get_callbacks(transfer_future, 'progress')
+
+ for part_number in range(1, num_parts + 1):
+ extra_part_args = self._extra_upload_part_args(
call_args.extra_args
)
- # The part number for upload part starts at 1 while the
- # range parameter starts at zero, so just subtract 1 off of
- # the part number
- extra_part_args['CopySourceRange'] = calculate_range_parameter(
+ # The part number for upload part starts at 1 while the
+ # range parameter starts at zero, so just subtract 1 off of
+ # the part number
+ extra_part_args['CopySourceRange'] = calculate_range_parameter(
part_size,
part_number - 1,
num_parts,
transfer_future.meta.size,
)
- # Get the size of the part copy as well for the progress
- # callbacks.
- size = self._get_transfer_size(
+ # Get the size of the part copy as well for the progress
+ # callbacks.
+ size = self._get_transfer_size(
part_size,
part_number - 1,
num_parts,
transfer_future.meta.size,
- )
- part_futures.append(
- self._transfer_coordinator.submit(
- request_executor,
- CopyPartTask(
- transfer_coordinator=self._transfer_coordinator,
- main_kwargs={
- 'client': client,
- 'copy_source': call_args.copy_source,
- 'bucket': call_args.bucket,
- 'key': call_args.key,
- 'part_number': part_number,
- 'extra_args': extra_part_args,
- 'callbacks': progress_callbacks,
+ )
+ part_futures.append(
+ self._transfer_coordinator.submit(
+ request_executor,
+ CopyPartTask(
+ transfer_coordinator=self._transfer_coordinator,
+ main_kwargs={
+ 'client': client,
+ 'copy_source': call_args.copy_source,
+ 'bucket': call_args.bucket,
+ 'key': call_args.key,
+ 'part_number': part_number,
+ 'extra_args': extra_part_args,
+ 'callbacks': progress_callbacks,
'size': size,
- },
- pending_main_kwargs={
- 'upload_id': create_multipart_future
+ },
+ pending_main_kwargs={
+ 'upload_id': create_multipart_future
},
),
- )
- )
-
- complete_multipart_extra_args = self._extra_complete_multipart_args(
+ )
+ )
+
+ complete_multipart_extra_args = self._extra_complete_multipart_args(
call_args.extra_args
)
- # Submit the request to complete the multipart upload.
- self._transfer_coordinator.submit(
- request_executor,
- CompleteMultipartUploadTask(
- transfer_coordinator=self._transfer_coordinator,
- main_kwargs={
- 'client': client,
- 'bucket': call_args.bucket,
- 'key': call_args.key,
- 'extra_args': complete_multipart_extra_args,
- },
- pending_main_kwargs={
- 'upload_id': create_multipart_future,
+ # Submit the request to complete the multipart upload.
+ self._transfer_coordinator.submit(
+ request_executor,
+ CompleteMultipartUploadTask(
+ transfer_coordinator=self._transfer_coordinator,
+ main_kwargs={
+ 'client': client,
+ 'bucket': call_args.bucket,
+ 'key': call_args.key,
+ 'extra_args': complete_multipart_extra_args,
+ },
+ pending_main_kwargs={
+ 'upload_id': create_multipart_future,
'parts': part_futures,
- },
+ },
is_final=True,
),
- )
-
- def _get_head_object_request_from_copy_source(self, copy_source):
- if isinstance(copy_source, dict):
- return copy.copy(copy_source)
- else:
- raise TypeError(
- 'Expecting dictionary formatted: '
- '{"Bucket": bucket_name, "Key": key} '
+ )
+
+ def _get_head_object_request_from_copy_source(self, copy_source):
+ if isinstance(copy_source, dict):
+ return copy.copy(copy_source)
+ else:
+ raise TypeError(
+ 'Expecting dictionary formatted: '
+ '{"Bucket": bucket_name, "Key": key} '
'but got %s or type %s.' % (copy_source, type(copy_source))
- )
-
- def _extra_upload_part_args(self, extra_args):
- # Only the args in COPY_PART_ARGS actually need to be passed
- # onto the upload_part_copy calls.
- return get_filtered_dict(extra_args, self.UPLOAD_PART_COPY_ARGS)
-
- def _extra_complete_multipart_args(self, extra_args):
- return get_filtered_dict(extra_args, self.COMPLETE_MULTIPART_ARGS)
-
+ )
+
+ def _extra_upload_part_args(self, extra_args):
+ # Only the args in COPY_PART_ARGS actually need to be passed
+ # onto the upload_part_copy calls.
+ return get_filtered_dict(extra_args, self.UPLOAD_PART_COPY_ARGS)
+
+ def _extra_complete_multipart_args(self, extra_args):
+ return get_filtered_dict(extra_args, self.COMPLETE_MULTIPART_ARGS)
+
def _get_transfer_size(
self, part_size, part_index, num_parts, total_transfer_size
):
- if part_index == num_parts - 1:
- # The last part may be different in size then the rest of the
- # parts.
- return total_transfer_size - (part_index * part_size)
- return part_size
-
-
-class CopyObjectTask(Task):
- """Task to do a nonmultipart copy"""
+ if part_index == num_parts - 1:
+ # The last part may be different in size then the rest of the
+ # parts.
+ return total_transfer_size - (part_index * part_size)
+ return part_size
+
+
+class CopyObjectTask(Task):
+ """Task to do a nonmultipart copy"""
def _main(
self, client, copy_source, bucket, key, extra_args, callbacks, size
):
- """
- :param client: The client to use when calling PutObject
- :param copy_source: The CopySource parameter to use
- :param bucket: The name of the bucket to copy to
- :param key: The name of the key to copy to
- :param extra_args: A dictionary of any extra arguments that may be
- used in the upload.
- :param callbacks: List of callbacks to call after copy
- :param size: The size of the transfer. This value is passed into
- the callbacks
-
- """
- client.copy_object(
+ """
+ :param client: The client to use when calling PutObject
+ :param copy_source: The CopySource parameter to use
+ :param bucket: The name of the bucket to copy to
+ :param key: The name of the key to copy to
+ :param extra_args: A dictionary of any extra arguments that may be
+ used in the upload.
+ :param callbacks: List of callbacks to call after copy
+ :param size: The size of the transfer. This value is passed into
+ the callbacks
+
+ """
+ client.copy_object(
CopySource=copy_source, Bucket=bucket, Key=key, **extra_args
)
- for callback in callbacks:
- callback(bytes_transferred=size)
-
-
-class CopyPartTask(Task):
- """Task to upload a part in a multipart copy"""
+ for callback in callbacks:
+ callback(bytes_transferred=size)
+
+
+class CopyPartTask(Task):
+ """Task to upload a part in a multipart copy"""
def _main(
self,
@@ -332,29 +332,29 @@ class CopyPartTask(Task):
callbacks,
size,
):
- """
- :param client: The client to use when calling PutObject
- :param copy_source: The CopySource parameter to use
- :param bucket: The name of the bucket to upload to
- :param key: The name of the key to upload to
- :param upload_id: The id of the upload
- :param part_number: The number representing the part of the multipart
- upload
- :param extra_args: A dictionary of any extra arguments that may be
- used in the upload.
- :param callbacks: List of callbacks to call after copy part
- :param size: The size of the transfer. This value is passed into
- the callbacks
-
- :rtype: dict
- :returns: A dictionary representing a part::
-
- {'Etag': etag_value, 'PartNumber': part_number}
-
- This value can be appended to a list to be used to complete
- the multipart upload.
- """
- response = client.upload_part_copy(
+ """
+ :param client: The client to use when calling PutObject
+ :param copy_source: The CopySource parameter to use
+ :param bucket: The name of the bucket to upload to
+ :param key: The name of the key to upload to
+ :param upload_id: The id of the upload
+ :param part_number: The number representing the part of the multipart
+ upload
+ :param extra_args: A dictionary of any extra arguments that may be
+ used in the upload.
+ :param callbacks: List of callbacks to call after copy part
+ :param size: The size of the transfer. This value is passed into
+ the callbacks
+
+ :rtype: dict
+ :returns: A dictionary representing a part::
+
+ {'Etag': etag_value, 'PartNumber': part_number}
+
+ This value can be appended to a list to be used to complete
+ the multipart upload.
+ """
+ response = client.upload_part_copy(
CopySource=copy_source,
Bucket=bucket,
Key=key,
@@ -362,7 +362,7 @@ class CopyPartTask(Task):
PartNumber=part_number,
**extra_args
)
- for callback in callbacks:
- callback(bytes_transferred=size)
- etag = response['CopyPartResult']['ETag']
- return {'ETag': etag, 'PartNumber': part_number}
+ for callback in callbacks:
+ callback(bytes_transferred=size)
+ etag = response['CopyPartResult']['ETag']
+ return {'ETag': etag, 'PartNumber': part_number}
diff --git a/contrib/python/s3transfer/py3/s3transfer/delete.py b/contrib/python/s3transfer/py3/s3transfer/delete.py
index 3d8e6ec9b2..74084d312a 100644
--- a/contrib/python/s3transfer/py3/s3transfer/delete.py
+++ b/contrib/python/s3transfer/py3/s3transfer/delete.py
@@ -1,71 +1,71 @@
-# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-# http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
+# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+# http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
from s3transfer.tasks import SubmissionTask, Task
-
-
-class DeleteSubmissionTask(SubmissionTask):
- """Task for submitting tasks to execute an object deletion."""
-
- def _submit(self, client, request_executor, transfer_future, **kwargs):
- """
- :param client: The client associated with the transfer manager
-
- :type config: s3transfer.manager.TransferConfig
- :param config: The transfer config associated with the transfer
- manager
-
- :type osutil: s3transfer.utils.OSUtil
- :param osutil: The os utility associated to the transfer manager
-
- :type request_executor: s3transfer.futures.BoundedExecutor
- :param request_executor: The request executor associated with the
- transfer manager
-
- :type transfer_future: s3transfer.futures.TransferFuture
- :param transfer_future: The transfer future associated with the
- transfer request that tasks are being submitted for
- """
- call_args = transfer_future.meta.call_args
-
- self._transfer_coordinator.submit(
- request_executor,
- DeleteObjectTask(
- transfer_coordinator=self._transfer_coordinator,
- main_kwargs={
- 'client': client,
- 'bucket': call_args.bucket,
- 'key': call_args.key,
- 'extra_args': call_args.extra_args,
- },
+
+
+class DeleteSubmissionTask(SubmissionTask):
+ """Task for submitting tasks to execute an object deletion."""
+
+ def _submit(self, client, request_executor, transfer_future, **kwargs):
+ """
+ :param client: The client associated with the transfer manager
+
+ :type config: s3transfer.manager.TransferConfig
+ :param config: The transfer config associated with the transfer
+ manager
+
+ :type osutil: s3transfer.utils.OSUtil
+ :param osutil: The os utility associated to the transfer manager
+
+ :type request_executor: s3transfer.futures.BoundedExecutor
+ :param request_executor: The request executor associated with the
+ transfer manager
+
+ :type transfer_future: s3transfer.futures.TransferFuture
+ :param transfer_future: The transfer future associated with the
+ transfer request that tasks are being submitted for
+ """
+ call_args = transfer_future.meta.call_args
+
+ self._transfer_coordinator.submit(
+ request_executor,
+ DeleteObjectTask(
+ transfer_coordinator=self._transfer_coordinator,
+ main_kwargs={
+ 'client': client,
+ 'bucket': call_args.bucket,
+ 'key': call_args.key,
+ 'extra_args': call_args.extra_args,
+ },
is_final=True,
),
- )
-
-
-class DeleteObjectTask(Task):
- def _main(self, client, bucket, key, extra_args):
- """
-
- :param client: The S3 client to use when calling DeleteObject
-
- :type bucket: str
- :param bucket: The name of the bucket.
-
- :type key: str
- :param key: The name of the object to delete.
-
- :type extra_args: dict
- :param extra_args: Extra arguments to pass to the DeleteObject call.
-
- """
- client.delete_object(Bucket=bucket, Key=key, **extra_args)
+ )
+
+
+class DeleteObjectTask(Task):
+ def _main(self, client, bucket, key, extra_args):
+ """
+
+ :param client: The S3 client to use when calling DeleteObject
+
+ :type bucket: str
+ :param bucket: The name of the bucket.
+
+ :type key: str
+ :param key: The name of the object to delete.
+
+ :type extra_args: dict
+ :param extra_args: Extra arguments to pass to the DeleteObject call.
+
+ """
+ client.delete_object(Bucket=bucket, Key=key, **extra_args)
diff --git a/contrib/python/s3transfer/py3/s3transfer/download.py b/contrib/python/s3transfer/py3/s3transfer/download.py
index e1f429065d..dc8980d4ed 100644
--- a/contrib/python/s3transfer/py3/s3transfer/download.py
+++ b/contrib/python/s3transfer/py3/s3transfer/download.py
@@ -1,22 +1,22 @@
-# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-# http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
+# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+# http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
import heapq
-import logging
-import threading
-
-from s3transfer.compat import seekable
-from s3transfer.exceptions import RetriesExceededError
-from s3transfer.futures import IN_MEMORY_DOWNLOAD_TAG
+import logging
+import threading
+
+from s3transfer.compat import seekable
+from s3transfer.exceptions import RetriesExceededError
+from s3transfer.futures import IN_MEMORY_DOWNLOAD_TAG
from s3transfer.tasks import SubmissionTask, Task
from s3transfer.utils import (
S3_RETRYABLE_DOWNLOAD_ERRORS,
@@ -29,289 +29,289 @@ from s3transfer.utils import (
get_callbacks,
invoke_progress_callbacks,
)
-
-logger = logging.getLogger(__name__)
-
-
+
+logger = logging.getLogger(__name__)
+
+
class DownloadOutputManager:
- """Base manager class for handling various types of files for downloads
-
- This class is typically used for the DownloadSubmissionTask class to help
- determine the following:
-
- * Provides the fileobj to write to downloads to
- * Get a task to complete once everything downloaded has been written
-
- The answers/implementations differ for the various types of file outputs
- that may be accepted. All implementations must subclass and override
- public methods from this class.
- """
-
- def __init__(self, osutil, transfer_coordinator, io_executor):
- self._osutil = osutil
- self._transfer_coordinator = transfer_coordinator
- self._io_executor = io_executor
-
- @classmethod
- def is_compatible(cls, download_target, osutil):
- """Determines if the target for the download is compatible with manager
-
- :param download_target: The target for which the upload will write
- data to.
-
- :param osutil: The os utility to be used for the transfer
-
- :returns: True if the manager can handle the type of target specified
- otherwise returns False.
- """
- raise NotImplementedError('must implement is_compatible()')
-
- def get_download_task_tag(self):
- """Get the tag (if any) to associate all GetObjectTasks
-
- :rtype: s3transfer.futures.TaskTag
- :returns: The tag to associate all GetObjectTasks with
- """
- return None
-
- def get_fileobj_for_io_writes(self, transfer_future):
- """Get file-like object to use for io writes in the io executor
-
- :type transfer_future: s3transfer.futures.TransferFuture
- :param transfer_future: The future associated with upload request
-
- returns: A file-like object to write to
- """
- raise NotImplementedError('must implement get_fileobj_for_io_writes()')
-
- def queue_file_io_task(self, fileobj, data, offset):
- """Queue IO write for submission to the IO executor.
-
- This method accepts an IO executor and information about the
- downloaded data, and handles submitting this to the IO executor.
-
- This method may defer submission to the IO executor if necessary.
-
- """
- self._transfer_coordinator.submit(
+ """Base manager class for handling various types of files for downloads
+
+ This class is typically used for the DownloadSubmissionTask class to help
+ determine the following:
+
+ * Provides the fileobj to write to downloads to
+ * Get a task to complete once everything downloaded has been written
+
+ The answers/implementations differ for the various types of file outputs
+ that may be accepted. All implementations must subclass and override
+ public methods from this class.
+ """
+
+ def __init__(self, osutil, transfer_coordinator, io_executor):
+ self._osutil = osutil
+ self._transfer_coordinator = transfer_coordinator
+ self._io_executor = io_executor
+
+ @classmethod
+ def is_compatible(cls, download_target, osutil):
+ """Determines if the target for the download is compatible with manager
+
+ :param download_target: The target for which the upload will write
+ data to.
+
+ :param osutil: The os utility to be used for the transfer
+
+ :returns: True if the manager can handle the type of target specified
+ otherwise returns False.
+ """
+ raise NotImplementedError('must implement is_compatible()')
+
+ def get_download_task_tag(self):
+ """Get the tag (if any) to associate all GetObjectTasks
+
+ :rtype: s3transfer.futures.TaskTag
+ :returns: The tag to associate all GetObjectTasks with
+ """
+ return None
+
+ def get_fileobj_for_io_writes(self, transfer_future):
+ """Get file-like object to use for io writes in the io executor
+
+ :type transfer_future: s3transfer.futures.TransferFuture
+ :param transfer_future: The future associated with upload request
+
+ returns: A file-like object to write to
+ """
+ raise NotImplementedError('must implement get_fileobj_for_io_writes()')
+
+ def queue_file_io_task(self, fileobj, data, offset):
+ """Queue IO write for submission to the IO executor.
+
+ This method accepts an IO executor and information about the
+ downloaded data, and handles submitting this to the IO executor.
+
+ This method may defer submission to the IO executor if necessary.
+
+ """
+ self._transfer_coordinator.submit(
self._io_executor, self.get_io_write_task(fileobj, data, offset)
)
-
- def get_io_write_task(self, fileobj, data, offset):
- """Get an IO write task for the requested set of data
-
- This task can be ran immediately or be submitted to the IO executor
- for it to run.
-
- :type fileobj: file-like object
- :param fileobj: The file-like object to write to
-
- :type data: bytes
- :param data: The data to write out
-
- :type offset: integer
- :param offset: The offset to write the data to in the file-like object
-
- :returns: An IO task to be used to write data to a file-like object
- """
- return IOWriteTask(
- self._transfer_coordinator,
- main_kwargs={
- 'fileobj': fileobj,
- 'data': data,
- 'offset': offset,
+
+ def get_io_write_task(self, fileobj, data, offset):
+ """Get an IO write task for the requested set of data
+
+ This task can be ran immediately or be submitted to the IO executor
+ for it to run.
+
+ :type fileobj: file-like object
+ :param fileobj: The file-like object to write to
+
+ :type data: bytes
+ :param data: The data to write out
+
+ :type offset: integer
+ :param offset: The offset to write the data to in the file-like object
+
+ :returns: An IO task to be used to write data to a file-like object
+ """
+ return IOWriteTask(
+ self._transfer_coordinator,
+ main_kwargs={
+ 'fileobj': fileobj,
+ 'data': data,
+ 'offset': offset,
},
- )
-
- def get_final_io_task(self):
- """Get the final io task to complete the download
-
- This is needed because based on the architecture of the TransferManager
- the final tasks will be sent to the IO executor, but the executor
- needs a final task for it to signal that the transfer is done and
- all done callbacks can be run.
-
- :rtype: s3transfer.tasks.Task
- :returns: A final task to completed in the io executor
- """
+ )
+
+ def get_final_io_task(self):
+ """Get the final io task to complete the download
+
+ This is needed because based on the architecture of the TransferManager
+ the final tasks will be sent to the IO executor, but the executor
+ needs a final task for it to signal that the transfer is done and
+ all done callbacks can be run.
+
+ :rtype: s3transfer.tasks.Task
+ :returns: A final task to completed in the io executor
+ """
raise NotImplementedError('must implement get_final_io_task()')
-
- def _get_fileobj_from_filename(self, filename):
- f = DeferredOpenFile(
+
+ def _get_fileobj_from_filename(self, filename):
+ f = DeferredOpenFile(
filename, mode='wb', open_function=self._osutil.open
)
- # Make sure the file gets closed and we remove the temporary file
- # if anything goes wrong during the process.
- self._transfer_coordinator.add_failure_cleanup(f.close)
- return f
-
-
-class DownloadFilenameOutputManager(DownloadOutputManager):
- def __init__(self, osutil, transfer_coordinator, io_executor):
+ # Make sure the file gets closed and we remove the temporary file
+ # if anything goes wrong during the process.
+ self._transfer_coordinator.add_failure_cleanup(f.close)
+ return f
+
+
+class DownloadFilenameOutputManager(DownloadOutputManager):
+ def __init__(self, osutil, transfer_coordinator, io_executor):
super().__init__(osutil, transfer_coordinator, io_executor)
- self._final_filename = None
- self._temp_filename = None
- self._temp_fileobj = None
-
- @classmethod
- def is_compatible(cls, download_target, osutil):
+ self._final_filename = None
+ self._temp_filename = None
+ self._temp_fileobj = None
+
+ @classmethod
+ def is_compatible(cls, download_target, osutil):
return isinstance(download_target, str)
-
- def get_fileobj_for_io_writes(self, transfer_future):
- fileobj = transfer_future.meta.call_args.fileobj
- self._final_filename = fileobj
+
+ def get_fileobj_for_io_writes(self, transfer_future):
+ fileobj = transfer_future.meta.call_args.fileobj
+ self._final_filename = fileobj
self._temp_filename = self._osutil.get_temp_filename(fileobj)
- self._temp_fileobj = self._get_temp_fileobj()
- return self._temp_fileobj
-
- def get_final_io_task(self):
- # A task to rename the file from the temporary file to its final
- # location is needed. This should be the last task needed to complete
- # the download.
- return IORenameFileTask(
- transfer_coordinator=self._transfer_coordinator,
- main_kwargs={
- 'fileobj': self._temp_fileobj,
- 'final_filename': self._final_filename,
+ self._temp_fileobj = self._get_temp_fileobj()
+ return self._temp_fileobj
+
+ def get_final_io_task(self):
+ # A task to rename the file from the temporary file to its final
+ # location is needed. This should be the last task needed to complete
+ # the download.
+ return IORenameFileTask(
+ transfer_coordinator=self._transfer_coordinator,
+ main_kwargs={
+ 'fileobj': self._temp_fileobj,
+ 'final_filename': self._final_filename,
'osutil': self._osutil,
- },
+ },
is_final=True,
- )
-
- def _get_temp_fileobj(self):
- f = self._get_fileobj_from_filename(self._temp_filename)
- self._transfer_coordinator.add_failure_cleanup(
+ )
+
+ def _get_temp_fileobj(self):
+ f = self._get_fileobj_from_filename(self._temp_filename)
+ self._transfer_coordinator.add_failure_cleanup(
self._osutil.remove_file, self._temp_filename
)
- return f
-
-
-class DownloadSeekableOutputManager(DownloadOutputManager):
- @classmethod
- def is_compatible(cls, download_target, osutil):
- return seekable(download_target)
-
- def get_fileobj_for_io_writes(self, transfer_future):
- # Return the fileobj provided to the future.
- return transfer_future.meta.call_args.fileobj
-
- def get_final_io_task(self):
- # This task will serve the purpose of signaling when all of the io
- # writes have finished so done callbacks can be called.
- return CompleteDownloadNOOPTask(
+ return f
+
+
+class DownloadSeekableOutputManager(DownloadOutputManager):
+ @classmethod
+ def is_compatible(cls, download_target, osutil):
+ return seekable(download_target)
+
+ def get_fileobj_for_io_writes(self, transfer_future):
+ # Return the fileobj provided to the future.
+ return transfer_future.meta.call_args.fileobj
+
+ def get_final_io_task(self):
+ # This task will serve the purpose of signaling when all of the io
+ # writes have finished so done callbacks can be called.
+ return CompleteDownloadNOOPTask(
transfer_coordinator=self._transfer_coordinator
)
-
-
-class DownloadNonSeekableOutputManager(DownloadOutputManager):
+
+
+class DownloadNonSeekableOutputManager(DownloadOutputManager):
def __init__(
self, osutil, transfer_coordinator, io_executor, defer_queue=None
):
super().__init__(osutil, transfer_coordinator, io_executor)
- if defer_queue is None:
- defer_queue = DeferQueue()
- self._defer_queue = defer_queue
- self._io_submit_lock = threading.Lock()
-
- @classmethod
- def is_compatible(cls, download_target, osutil):
- return hasattr(download_target, 'write')
-
- def get_download_task_tag(self):
- return IN_MEMORY_DOWNLOAD_TAG
-
- def get_fileobj_for_io_writes(self, transfer_future):
- return transfer_future.meta.call_args.fileobj
-
- def get_final_io_task(self):
- return CompleteDownloadNOOPTask(
+ if defer_queue is None:
+ defer_queue = DeferQueue()
+ self._defer_queue = defer_queue
+ self._io_submit_lock = threading.Lock()
+
+ @classmethod
+ def is_compatible(cls, download_target, osutil):
+ return hasattr(download_target, 'write')
+
+ def get_download_task_tag(self):
+ return IN_MEMORY_DOWNLOAD_TAG
+
+ def get_fileobj_for_io_writes(self, transfer_future):
+ return transfer_future.meta.call_args.fileobj
+
+ def get_final_io_task(self):
+ return CompleteDownloadNOOPTask(
transfer_coordinator=self._transfer_coordinator
)
-
- def queue_file_io_task(self, fileobj, data, offset):
- with self._io_submit_lock:
- writes = self._defer_queue.request_writes(offset, data)
- for write in writes:
- data = write['data']
+
+ def queue_file_io_task(self, fileobj, data, offset):
+ with self._io_submit_lock:
+ writes = self._defer_queue.request_writes(offset, data)
+ for write in writes:
+ data = write['data']
logger.debug(
"Queueing IO offset %s for fileobj: %s",
write['offset'],
fileobj,
)
super().queue_file_io_task(fileobj, data, offset)
-
- def get_io_write_task(self, fileobj, data, offset):
- return IOStreamingWriteTask(
- self._transfer_coordinator,
- main_kwargs={
- 'fileobj': fileobj,
- 'data': data,
+
+ def get_io_write_task(self, fileobj, data, offset):
+ return IOStreamingWriteTask(
+ self._transfer_coordinator,
+ main_kwargs={
+ 'fileobj': fileobj,
+ 'data': data,
},
- )
-
-
-class DownloadSpecialFilenameOutputManager(DownloadNonSeekableOutputManager):
+ )
+
+
+class DownloadSpecialFilenameOutputManager(DownloadNonSeekableOutputManager):
def __init__(
self, osutil, transfer_coordinator, io_executor, defer_queue=None
):
super().__init__(
osutil, transfer_coordinator, io_executor, defer_queue
)
- self._fileobj = None
-
- @classmethod
- def is_compatible(cls, download_target, osutil):
+ self._fileobj = None
+
+ @classmethod
+ def is_compatible(cls, download_target, osutil):
return isinstance(download_target, str) and osutil.is_special_file(
download_target
)
-
- def get_fileobj_for_io_writes(self, transfer_future):
- filename = transfer_future.meta.call_args.fileobj
- self._fileobj = self._get_fileobj_from_filename(filename)
- return self._fileobj
-
- def get_final_io_task(self):
- # Make sure the file gets closed once the transfer is done.
- return IOCloseTask(
- transfer_coordinator=self._transfer_coordinator,
- is_final=True,
+
+ def get_fileobj_for_io_writes(self, transfer_future):
+ filename = transfer_future.meta.call_args.fileobj
+ self._fileobj = self._get_fileobj_from_filename(filename)
+ return self._fileobj
+
+ def get_final_io_task(self):
+ # Make sure the file gets closed once the transfer is done.
+ return IOCloseTask(
+ transfer_coordinator=self._transfer_coordinator,
+ is_final=True,
main_kwargs={'fileobj': self._fileobj},
)
-
-
-class DownloadSubmissionTask(SubmissionTask):
- """Task for submitting tasks to execute a download"""
-
- def _get_download_output_manager_cls(self, transfer_future, osutil):
- """Retrieves a class for managing output for a download
-
- :type transfer_future: s3transfer.futures.TransferFuture
- :param transfer_future: The transfer future for the request
-
- :type osutil: s3transfer.utils.OSUtils
- :param osutil: The os utility associated to the transfer
-
- :rtype: class of DownloadOutputManager
- :returns: The appropriate class to use for managing a specific type of
- input for downloads.
- """
- download_manager_resolver_chain = [
- DownloadSpecialFilenameOutputManager,
- DownloadFilenameOutputManager,
- DownloadSeekableOutputManager,
- DownloadNonSeekableOutputManager,
- ]
-
- fileobj = transfer_future.meta.call_args.fileobj
- for download_manager_cls in download_manager_resolver_chain:
- if download_manager_cls.is_compatible(fileobj, osutil):
- return download_manager_cls
- raise RuntimeError(
+
+
+class DownloadSubmissionTask(SubmissionTask):
+ """Task for submitting tasks to execute a download"""
+
+ def _get_download_output_manager_cls(self, transfer_future, osutil):
+ """Retrieves a class for managing output for a download
+
+ :type transfer_future: s3transfer.futures.TransferFuture
+ :param transfer_future: The transfer future for the request
+
+ :type osutil: s3transfer.utils.OSUtils
+ :param osutil: The os utility associated to the transfer
+
+ :rtype: class of DownloadOutputManager
+ :returns: The appropriate class to use for managing a specific type of
+ input for downloads.
+ """
+ download_manager_resolver_chain = [
+ DownloadSpecialFilenameOutputManager,
+ DownloadFilenameOutputManager,
+ DownloadSeekableOutputManager,
+ DownloadNonSeekableOutputManager,
+ ]
+
+ fileobj = transfer_future.meta.call_args.fileobj
+ for download_manager_cls in download_manager_resolver_chain:
+ if download_manager_cls.is_compatible(fileobj, osutil):
+ return download_manager_cls
+ raise RuntimeError(
'Output {} of type: {} is not supported.'.format(
fileobj, type(fileobj)
)
)
-
+
def _submit(
self,
client,
@@ -322,52 +322,52 @@ class DownloadSubmissionTask(SubmissionTask):
transfer_future,
bandwidth_limiter=None,
):
- """
- :param client: The client associated with the transfer manager
-
- :type config: s3transfer.manager.TransferConfig
- :param config: The transfer config associated with the transfer
- manager
-
- :type osutil: s3transfer.utils.OSUtil
- :param osutil: The os utility associated to the transfer manager
-
- :type request_executor: s3transfer.futures.BoundedExecutor
- :param request_executor: The request executor associated with the
- transfer manager
-
- :type io_executor: s3transfer.futures.BoundedExecutor
- :param io_executor: The io executor associated with the
- transfer manager
-
- :type transfer_future: s3transfer.futures.TransferFuture
- :param transfer_future: The transfer future associated with the
- transfer request that tasks are being submitted for
-
- :type bandwidth_limiter: s3transfer.bandwidth.BandwidthLimiter
- :param bandwidth_limiter: The bandwidth limiter to use when
- downloading streams
- """
- if transfer_future.meta.size is None:
- # If a size was not provided figure out the size for the
- # user.
- response = client.head_object(
- Bucket=transfer_future.meta.call_args.bucket,
- Key=transfer_future.meta.call_args.key,
+ """
+ :param client: The client associated with the transfer manager
+
+ :type config: s3transfer.manager.TransferConfig
+ :param config: The transfer config associated with the transfer
+ manager
+
+ :type osutil: s3transfer.utils.OSUtil
+ :param osutil: The os utility associated to the transfer manager
+
+ :type request_executor: s3transfer.futures.BoundedExecutor
+ :param request_executor: The request executor associated with the
+ transfer manager
+
+ :type io_executor: s3transfer.futures.BoundedExecutor
+ :param io_executor: The io executor associated with the
+ transfer manager
+
+ :type transfer_future: s3transfer.futures.TransferFuture
+ :param transfer_future: The transfer future associated with the
+ transfer request that tasks are being submitted for
+
+ :type bandwidth_limiter: s3transfer.bandwidth.BandwidthLimiter
+ :param bandwidth_limiter: The bandwidth limiter to use when
+ downloading streams
+ """
+ if transfer_future.meta.size is None:
+ # If a size was not provided figure out the size for the
+ # user.
+ response = client.head_object(
+ Bucket=transfer_future.meta.call_args.bucket,
+ Key=transfer_future.meta.call_args.key,
**transfer_future.meta.call_args.extra_args,
- )
- transfer_future.meta.provide_transfer_size(
+ )
+ transfer_future.meta.provide_transfer_size(
response['ContentLength']
)
-
- download_output_manager = self._get_download_output_manager_cls(
+
+ download_output_manager = self._get_download_output_manager_cls(
transfer_future, osutil
)(osutil, self._transfer_coordinator, io_executor)
-
- # If it is greater than threshold do a ranged download, otherwise
- # do a regular GetObject download.
- if transfer_future.meta.size < config.multipart_threshold:
- self._submit_download_request(
+
+ # If it is greater than threshold do a ranged download, otherwise
+ # do a regular GetObject download.
+ if transfer_future.meta.size < config.multipart_threshold:
+ self._submit_download_request(
client,
config,
osutil,
@@ -377,8 +377,8 @@ class DownloadSubmissionTask(SubmissionTask):
transfer_future,
bandwidth_limiter,
)
- else:
- self._submit_ranged_download_request(
+ else:
+ self._submit_ranged_download_request(
client,
config,
osutil,
@@ -388,7 +388,7 @@ class DownloadSubmissionTask(SubmissionTask):
transfer_future,
bandwidth_limiter,
)
-
+
def _submit_download_request(
self,
client,
@@ -400,45 +400,45 @@ class DownloadSubmissionTask(SubmissionTask):
transfer_future,
bandwidth_limiter,
):
- call_args = transfer_future.meta.call_args
-
- # Get a handle to the file that will be used for writing downloaded
- # contents
- fileobj = download_output_manager.get_fileobj_for_io_writes(
+ call_args = transfer_future.meta.call_args
+
+ # Get a handle to the file that will be used for writing downloaded
+ # contents
+ fileobj = download_output_manager.get_fileobj_for_io_writes(
transfer_future
)
-
- # Get the needed callbacks for the task
- progress_callbacks = get_callbacks(transfer_future, 'progress')
-
- # Get any associated tags for the get object task.
- get_object_tag = download_output_manager.get_download_task_tag()
-
- # Get the final io task to run once the download is complete.
- final_task = download_output_manager.get_final_io_task()
-
- # Submit the task to download the object.
- self._transfer_coordinator.submit(
- request_executor,
- ImmediatelyWriteIOGetObjectTask(
- transfer_coordinator=self._transfer_coordinator,
- main_kwargs={
- 'client': client,
- 'bucket': call_args.bucket,
- 'key': call_args.key,
- 'fileobj': fileobj,
- 'extra_args': call_args.extra_args,
- 'callbacks': progress_callbacks,
- 'max_attempts': config.num_download_attempts,
- 'download_output_manager': download_output_manager,
- 'io_chunksize': config.io_chunksize,
+
+ # Get the needed callbacks for the task
+ progress_callbacks = get_callbacks(transfer_future, 'progress')
+
+ # Get any associated tags for the get object task.
+ get_object_tag = download_output_manager.get_download_task_tag()
+
+ # Get the final io task to run once the download is complete.
+ final_task = download_output_manager.get_final_io_task()
+
+ # Submit the task to download the object.
+ self._transfer_coordinator.submit(
+ request_executor,
+ ImmediatelyWriteIOGetObjectTask(
+ transfer_coordinator=self._transfer_coordinator,
+ main_kwargs={
+ 'client': client,
+ 'bucket': call_args.bucket,
+ 'key': call_args.key,
+ 'fileobj': fileobj,
+ 'extra_args': call_args.extra_args,
+ 'callbacks': progress_callbacks,
+ 'max_attempts': config.num_download_attempts,
+ 'download_output_manager': download_output_manager,
+ 'io_chunksize': config.io_chunksize,
'bandwidth_limiter': bandwidth_limiter,
- },
+ },
done_callbacks=[final_task],
- ),
+ ),
tag=get_object_tag,
- )
-
+ )
+
def _submit_ranged_download_request(
self,
client,
@@ -450,86 +450,86 @@ class DownloadSubmissionTask(SubmissionTask):
transfer_future,
bandwidth_limiter,
):
- call_args = transfer_future.meta.call_args
-
- # Get the needed progress callbacks for the task
- progress_callbacks = get_callbacks(transfer_future, 'progress')
-
- # Get a handle to the file that will be used for writing downloaded
- # contents
- fileobj = download_output_manager.get_fileobj_for_io_writes(
+ call_args = transfer_future.meta.call_args
+
+ # Get the needed progress callbacks for the task
+ progress_callbacks = get_callbacks(transfer_future, 'progress')
+
+ # Get a handle to the file that will be used for writing downloaded
+ # contents
+ fileobj = download_output_manager.get_fileobj_for_io_writes(
transfer_future
)
-
- # Determine the number of parts
- part_size = config.multipart_chunksize
+
+ # Determine the number of parts
+ part_size = config.multipart_chunksize
num_parts = calculate_num_parts(transfer_future.meta.size, part_size)
-
- # Get any associated tags for the get object task.
- get_object_tag = download_output_manager.get_download_task_tag()
-
- # Callback invoker to submit the final io task once all downloads
- # are complete.
- finalize_download_invoker = CountCallbackInvoker(
- self._get_final_io_task_submission_callback(
- download_output_manager, io_executor
- )
- )
- for i in range(num_parts):
- # Calculate the range parameter
- range_parameter = calculate_range_parameter(
+
+ # Get any associated tags for the get object task.
+ get_object_tag = download_output_manager.get_download_task_tag()
+
+ # Callback invoker to submit the final io task once all downloads
+ # are complete.
+ finalize_download_invoker = CountCallbackInvoker(
+ self._get_final_io_task_submission_callback(
+ download_output_manager, io_executor
+ )
+ )
+ for i in range(num_parts):
+ # Calculate the range parameter
+ range_parameter = calculate_range_parameter(
part_size, i, num_parts
)
-
- # Inject the Range parameter to the parameters to be passed in
- # as extra args
- extra_args = {'Range': range_parameter}
- extra_args.update(call_args.extra_args)
- finalize_download_invoker.increment()
- # Submit the ranged downloads
- self._transfer_coordinator.submit(
- request_executor,
- GetObjectTask(
- transfer_coordinator=self._transfer_coordinator,
- main_kwargs={
- 'client': client,
- 'bucket': call_args.bucket,
- 'key': call_args.key,
- 'fileobj': fileobj,
- 'extra_args': extra_args,
- 'callbacks': progress_callbacks,
- 'max_attempts': config.num_download_attempts,
- 'start_index': i * part_size,
- 'download_output_manager': download_output_manager,
- 'io_chunksize': config.io_chunksize,
+
+ # Inject the Range parameter to the parameters to be passed in
+ # as extra args
+ extra_args = {'Range': range_parameter}
+ extra_args.update(call_args.extra_args)
+ finalize_download_invoker.increment()
+ # Submit the ranged downloads
+ self._transfer_coordinator.submit(
+ request_executor,
+ GetObjectTask(
+ transfer_coordinator=self._transfer_coordinator,
+ main_kwargs={
+ 'client': client,
+ 'bucket': call_args.bucket,
+ 'key': call_args.key,
+ 'fileobj': fileobj,
+ 'extra_args': extra_args,
+ 'callbacks': progress_callbacks,
+ 'max_attempts': config.num_download_attempts,
+ 'start_index': i * part_size,
+ 'download_output_manager': download_output_manager,
+ 'io_chunksize': config.io_chunksize,
'bandwidth_limiter': bandwidth_limiter,
- },
+ },
done_callbacks=[finalize_download_invoker.decrement],
- ),
+ ),
tag=get_object_tag,
- )
- finalize_download_invoker.finalize()
-
+ )
+ finalize_download_invoker.finalize()
+
def _get_final_io_task_submission_callback(
self, download_manager, io_executor
):
- final_task = download_manager.get_final_io_task()
- return FunctionContainer(
+ final_task = download_manager.get_final_io_task()
+ return FunctionContainer(
self._transfer_coordinator.submit, io_executor, final_task
)
-
- def _calculate_range_param(self, part_size, part_index, num_parts):
- # Used to calculate the Range parameter
- start_range = part_index * part_size
- if part_index == num_parts - 1:
- end_range = ''
- else:
- end_range = start_range + part_size - 1
+
+ def _calculate_range_param(self, part_size, part_index, num_parts):
+ # Used to calculate the Range parameter
+ start_range = part_index * part_size
+ if part_index == num_parts - 1:
+ end_range = ''
+ else:
+ end_range = start_range + part_size - 1
range_param = f'bytes={start_range}-{end_range}'
- return range_param
-
-
-class GetObjectTask(Task):
+ return range_param
+
+
+class GetObjectTask(Task):
def _main(
self,
client,
@@ -544,57 +544,57 @@ class GetObjectTask(Task):
start_index=0,
bandwidth_limiter=None,
):
- """Downloads an object and places content into io queue
-
- :param client: The client to use when calling GetObject
- :param bucket: The bucket to download from
- :param key: The key to download from
- :param fileobj: The file handle to write content to
+ """Downloads an object and places content into io queue
+
+ :param client: The client to use when calling GetObject
+ :param bucket: The bucket to download from
+ :param key: The key to download from
+ :param fileobj: The file handle to write content to
:param exta_args: Any extra arguments to include in GetObject request
- :param callbacks: List of progress callbacks to invoke on download
- :param max_attempts: The number of retries to do when downloading
- :param download_output_manager: The download output manager associated
- with the current download.
- :param io_chunksize: The size of each io chunk to read from the
- download stream and queue in the io queue.
- :param start_index: The location in the file to start writing the
- content of the key to.
- :param bandwidth_limiter: The bandwidth limiter to use when throttling
- the downloading of data in streams.
- """
- last_exception = None
- for i in range(max_attempts):
- try:
+ :param callbacks: List of progress callbacks to invoke on download
+ :param max_attempts: The number of retries to do when downloading
+ :param download_output_manager: The download output manager associated
+ with the current download.
+ :param io_chunksize: The size of each io chunk to read from the
+ download stream and queue in the io queue.
+ :param start_index: The location in the file to start writing the
+ content of the key to.
+ :param bandwidth_limiter: The bandwidth limiter to use when throttling
+ the downloading of data in streams.
+ """
+ last_exception = None
+ for i in range(max_attempts):
+ try:
current_index = start_index
- response = client.get_object(
+ response = client.get_object(
Bucket=bucket, Key=key, **extra_args
)
- streaming_body = StreamReaderProgress(
+ streaming_body = StreamReaderProgress(
response['Body'], callbacks
)
- if bandwidth_limiter:
+ if bandwidth_limiter:
streaming_body = (
- bandwidth_limiter.get_bandwith_limited_stream(
+ bandwidth_limiter.get_bandwith_limited_stream(
streaming_body, self._transfer_coordinator
)
)
-
- chunks = DownloadChunkIterator(streaming_body, io_chunksize)
- for chunk in chunks:
- # If the transfer is done because of a cancellation
- # or error somewhere else, stop trying to submit more
- # data to be written and break out of the download.
- if not self._transfer_coordinator.done():
- self._handle_io(
+
+ chunks = DownloadChunkIterator(streaming_body, io_chunksize)
+ for chunk in chunks:
+ # If the transfer is done because of a cancellation
+ # or error somewhere else, stop trying to submit more
+ # data to be written and break out of the download.
+ if not self._transfer_coordinator.done():
+ self._handle_io(
download_output_manager,
fileobj,
chunk,
current_index,
- )
- current_index += len(chunk)
- else:
- return
- return
+ )
+ current_index += len(chunk)
+ else:
+ return
+ return
except S3_RETRYABLE_DOWNLOAD_ERRORS as e:
logger.debug(
"Retrying exception caught (%s), "
@@ -604,91 +604,91 @@ class GetObjectTask(Task):
max_attempts,
exc_info=True,
)
- last_exception = e
- # Also invoke the progress callbacks to indicate that we
- # are trying to download the stream again and all progress
- # for this GetObject has been lost.
- invoke_progress_callbacks(
+ last_exception = e
+ # Also invoke the progress callbacks to indicate that we
+ # are trying to download the stream again and all progress
+ # for this GetObject has been lost.
+ invoke_progress_callbacks(
callbacks, start_index - current_index
)
- continue
- raise RetriesExceededError(last_exception)
-
- def _handle_io(self, download_output_manager, fileobj, chunk, index):
- download_output_manager.queue_file_io_task(fileobj, chunk, index)
-
-
-class ImmediatelyWriteIOGetObjectTask(GetObjectTask):
- """GetObjectTask that immediately writes to the provided file object
-
- This is useful for downloads where it is known only one thread is
- downloading the object so there is no reason to go through the
- overhead of using an IO queue and executor.
- """
-
- def _handle_io(self, download_output_manager, fileobj, chunk, index):
- task = download_output_manager.get_io_write_task(fileobj, chunk, index)
- task()
-
-
-class IOWriteTask(Task):
- def _main(self, fileobj, data, offset):
- """Pulls off an io queue to write contents to a file
-
- :param fileobj: The file handle to write content to
- :param data: The data to write
- :param offset: The offset to write the data to.
- """
- fileobj.seek(offset)
- fileobj.write(data)
-
-
-class IOStreamingWriteTask(Task):
- """Task for writing data to a non-seekable stream."""
-
- def _main(self, fileobj, data):
- """Write data to a fileobj.
-
+ continue
+ raise RetriesExceededError(last_exception)
+
+ def _handle_io(self, download_output_manager, fileobj, chunk, index):
+ download_output_manager.queue_file_io_task(fileobj, chunk, index)
+
+
+class ImmediatelyWriteIOGetObjectTask(GetObjectTask):
+ """GetObjectTask that immediately writes to the provided file object
+
+ This is useful for downloads where it is known only one thread is
+ downloading the object so there is no reason to go through the
+ overhead of using an IO queue and executor.
+ """
+
+ def _handle_io(self, download_output_manager, fileobj, chunk, index):
+ task = download_output_manager.get_io_write_task(fileobj, chunk, index)
+ task()
+
+
+class IOWriteTask(Task):
+ def _main(self, fileobj, data, offset):
+ """Pulls off an io queue to write contents to a file
+
+ :param fileobj: The file handle to write content to
+ :param data: The data to write
+ :param offset: The offset to write the data to.
+ """
+ fileobj.seek(offset)
+ fileobj.write(data)
+
+
+class IOStreamingWriteTask(Task):
+ """Task for writing data to a non-seekable stream."""
+
+ def _main(self, fileobj, data):
+ """Write data to a fileobj.
+
Data will be written directly to the fileobj without
- any prior seeking.
-
- :param fileobj: The fileobj to write content to
- :param data: The data to write
-
- """
- fileobj.write(data)
-
-
-class IORenameFileTask(Task):
- """A task to rename a temporary file to its final filename
-
- :param fileobj: The file handle that content was written to.
- :param final_filename: The final name of the file to rename to
- upon completion of writing the contents.
- :param osutil: OS utility
- """
-
- def _main(self, fileobj, final_filename, osutil):
- fileobj.close()
- osutil.rename_file(fileobj.name, final_filename)
-
-
-class IOCloseTask(Task):
- """A task to close out a file once the download is complete.
-
- :param fileobj: The fileobj to close.
- """
-
- def _main(self, fileobj):
- fileobj.close()
-
-
-class CompleteDownloadNOOPTask(Task):
- """A NOOP task to serve as an indicator that the download is complete
-
- Note that the default for is_final is set to True because this should
- always be the last task.
- """
+ any prior seeking.
+
+ :param fileobj: The fileobj to write content to
+ :param data: The data to write
+
+ """
+ fileobj.write(data)
+
+
+class IORenameFileTask(Task):
+ """A task to rename a temporary file to its final filename
+
+ :param fileobj: The file handle that content was written to.
+ :param final_filename: The final name of the file to rename to
+ upon completion of writing the contents.
+ :param osutil: OS utility
+ """
+
+ def _main(self, fileobj, final_filename, osutil):
+ fileobj.close()
+ osutil.rename_file(fileobj.name, final_filename)
+
+
+class IOCloseTask(Task):
+ """A task to close out a file once the download is complete.
+
+ :param fileobj: The fileobj to close.
+ """
+
+ def _main(self, fileobj):
+ fileobj.close()
+
+
+class CompleteDownloadNOOPTask(Task):
+ """A NOOP task to serve as an indicator that the download is complete
+
+ Note that the default for is_final is set to True because this should
+ always be the last task.
+ """
def __init__(
self,
@@ -699,92 +699,92 @@ class CompleteDownloadNOOPTask(Task):
is_final=True,
):
super().__init__(
- transfer_coordinator=transfer_coordinator,
- main_kwargs=main_kwargs,
- pending_main_kwargs=pending_main_kwargs,
- done_callbacks=done_callbacks,
+ transfer_coordinator=transfer_coordinator,
+ main_kwargs=main_kwargs,
+ pending_main_kwargs=pending_main_kwargs,
+ done_callbacks=done_callbacks,
is_final=is_final,
- )
-
- def _main(self):
- pass
-
-
+ )
+
+ def _main(self):
+ pass
+
+
class DownloadChunkIterator:
- def __init__(self, body, chunksize):
- """Iterator to chunk out a downloaded S3 stream
-
- :param body: A readable file-like object
- :param chunksize: The amount to read each time
- """
- self._body = body
- self._chunksize = chunksize
- self._num_reads = 0
-
- def __iter__(self):
- return self
-
- def __next__(self):
- chunk = self._body.read(self._chunksize)
- self._num_reads += 1
- if chunk:
- return chunk
- elif self._num_reads == 1:
- # Even though the response may have not had any
- # content, we still want to account for an empty object's
+ def __init__(self, body, chunksize):
+ """Iterator to chunk out a downloaded S3 stream
+
+ :param body: A readable file-like object
+ :param chunksize: The amount to read each time
+ """
+ self._body = body
+ self._chunksize = chunksize
+ self._num_reads = 0
+
+ def __iter__(self):
+ return self
+
+ def __next__(self):
+ chunk = self._body.read(self._chunksize)
+ self._num_reads += 1
+ if chunk:
+ return chunk
+ elif self._num_reads == 1:
+ # Even though the response may have not had any
+ # content, we still want to account for an empty object's
# existence so return the empty chunk for that initial
- # read.
- return chunk
- raise StopIteration()
-
- next = __next__
-
-
+ # read.
+ return chunk
+ raise StopIteration()
+
+ next = __next__
+
+
class DeferQueue:
- """IO queue that defers write requests until they are queued sequentially.
-
- This class is used to track IO data for a *single* fileobj.
-
- You can send data to this queue, and it will defer any IO write requests
- until it has the next contiguous block available (starting at 0).
-
- """
-
- def __init__(self):
- self._writes = []
- self._pending_offsets = set()
- self._next_offset = 0
-
- def request_writes(self, offset, data):
- """Request any available writes given new incoming data.
-
- You call this method by providing new data along with the
- offset associated with the data. If that new data unlocks
- any contiguous writes that can now be submitted, this
- method will return all applicable writes.
-
- This is done with 1 method call so you don't have to
- make two method calls (put(), get()) which acquires a lock
- each method call.
-
- """
- if offset < self._next_offset:
- # This is a request for a write that we've already
- # seen. This can happen in the event of a retry
- # where if we retry at at offset N/2, we'll requeue
- # offsets 0-N/2 again.
- return []
- writes = []
- if offset in self._pending_offsets:
- # We've already queued this offset so this request is
- # a duplicate. In this case we should ignore
- # this request and prefer what's already queued.
- return []
- heapq.heappush(self._writes, (offset, data))
- self._pending_offsets.add(offset)
- while self._writes and self._writes[0][0] == self._next_offset:
- next_write = heapq.heappop(self._writes)
- writes.append({'offset': next_write[0], 'data': next_write[1]})
- self._pending_offsets.remove(next_write[0])
- self._next_offset += len(next_write[1])
- return writes
+ """IO queue that defers write requests until they are queued sequentially.
+
+ This class is used to track IO data for a *single* fileobj.
+
+ You can send data to this queue, and it will defer any IO write requests
+ until it has the next contiguous block available (starting at 0).
+
+ """
+
+ def __init__(self):
+ self._writes = []
+ self._pending_offsets = set()
+ self._next_offset = 0
+
+ def request_writes(self, offset, data):
+ """Request any available writes given new incoming data.
+
+ You call this method by providing new data along with the
+ offset associated with the data. If that new data unlocks
+ any contiguous writes that can now be submitted, this
+ method will return all applicable writes.
+
+ This is done with 1 method call so you don't have to
+ make two method calls (put(), get()) which acquires a lock
+ each method call.
+
+ """
+ if offset < self._next_offset:
+ # This is a request for a write that we've already
+ # seen. This can happen in the event of a retry
+ # where if we retry at at offset N/2, we'll requeue
+ # offsets 0-N/2 again.
+ return []
+ writes = []
+ if offset in self._pending_offsets:
+ # We've already queued this offset so this request is
+ # a duplicate. In this case we should ignore
+ # this request and prefer what's already queued.
+ return []
+ heapq.heappush(self._writes, (offset, data))
+ self._pending_offsets.add(offset)
+ while self._writes and self._writes[0][0] == self._next_offset:
+ next_write = heapq.heappop(self._writes)
+ writes.append({'offset': next_write[0], 'data': next_write[1]})
+ self._pending_offsets.remove(next_write[0])
+ self._next_offset += len(next_write[1])
+ return writes
diff --git a/contrib/python/s3transfer/py3/s3transfer/exceptions.py b/contrib/python/s3transfer/py3/s3transfer/exceptions.py
index ef48766f5f..6150fe650d 100644
--- a/contrib/python/s3transfer/py3/s3transfer/exceptions.py
+++ b/contrib/python/s3transfer/py3/s3transfer/exceptions.py
@@ -1,37 +1,37 @@
-# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-# http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-from concurrent.futures import CancelledError
-
-
-class RetriesExceededError(Exception):
- def __init__(self, last_exception, msg='Max Retries Exceeded'):
+# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+# http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from concurrent.futures import CancelledError
+
+
+class RetriesExceededError(Exception):
+ def __init__(self, last_exception, msg='Max Retries Exceeded'):
super().__init__(msg)
- self.last_exception = last_exception
-
-
-class S3UploadFailedError(Exception):
- pass
-
-
-class InvalidSubscriberMethodError(Exception):
- pass
-
-
-class TransferNotDoneError(Exception):
- pass
-
-
-class FatalError(CancelledError):
- """A CancelledError raised from an error in the TransferManager"""
-
- pass
+ self.last_exception = last_exception
+
+
+class S3UploadFailedError(Exception):
+ pass
+
+
+class InvalidSubscriberMethodError(Exception):
+ pass
+
+
+class TransferNotDoneError(Exception):
+ pass
+
+
+class FatalError(CancelledError):
+ """A CancelledError raised from an error in the TransferManager"""
+
+ pass
diff --git a/contrib/python/s3transfer/py3/s3transfer/futures.py b/contrib/python/s3transfer/py3/s3transfer/futures.py
index a56c9e3215..39e071fb60 100644
--- a/contrib/python/s3transfer/py3/s3transfer/futures.py
+++ b/contrib/python/s3transfer/py3/s3transfer/futures.py
@@ -1,29 +1,29 @@
-# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-# http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-import copy
-import logging
-import sys
-import threading
+# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+# http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+import copy
+import logging
+import sys
+import threading
from collections import namedtuple
from concurrent import futures
-
-from s3transfer.compat import MAXINT
-from s3transfer.exceptions import CancelledError, TransferNotDoneError
+
+from s3transfer.compat import MAXINT
+from s3transfer.exceptions import CancelledError, TransferNotDoneError
from s3transfer.utils import FunctionContainer, TaskSemaphore
-
-logger = logging.getLogger(__name__)
-
-
+
+logger = logging.getLogger(__name__)
+
+
class BaseTransferFuture:
@property
def meta(self):
@@ -69,538 +69,538 @@ class BaseTransferMeta:
class TransferFuture(BaseTransferFuture):
- def __init__(self, meta=None, coordinator=None):
- """The future associated to a submitted transfer request
-
- :type meta: TransferMeta
- :param meta: The metadata associated to the request. This object
- is visible to the requester.
-
- :type coordinator: TransferCoordinator
- :param coordinator: The coordinator associated to the request. This
- object is not visible to the requester.
- """
- self._meta = meta
- if meta is None:
- self._meta = TransferMeta()
-
- self._coordinator = coordinator
- if coordinator is None:
- self._coordinator = TransferCoordinator()
-
- @property
- def meta(self):
- return self._meta
-
- def done(self):
- return self._coordinator.done()
-
- def result(self):
- try:
- # Usually the result() method blocks until the transfer is done,
- # however if a KeyboardInterrupt is raised we want want to exit
+ def __init__(self, meta=None, coordinator=None):
+ """The future associated to a submitted transfer request
+
+ :type meta: TransferMeta
+ :param meta: The metadata associated to the request. This object
+ is visible to the requester.
+
+ :type coordinator: TransferCoordinator
+ :param coordinator: The coordinator associated to the request. This
+ object is not visible to the requester.
+ """
+ self._meta = meta
+ if meta is None:
+ self._meta = TransferMeta()
+
+ self._coordinator = coordinator
+ if coordinator is None:
+ self._coordinator = TransferCoordinator()
+
+ @property
+ def meta(self):
+ return self._meta
+
+ def done(self):
+ return self._coordinator.done()
+
+ def result(self):
+ try:
+ # Usually the result() method blocks until the transfer is done,
+ # however if a KeyboardInterrupt is raised we want want to exit
# out of this and propagate the exception.
- return self._coordinator.result()
- except KeyboardInterrupt as e:
- self.cancel()
- raise e
-
- def cancel(self):
- self._coordinator.cancel()
-
- def set_exception(self, exception):
- """Sets the exception on the future."""
- if not self.done():
- raise TransferNotDoneError(
- 'set_exception can only be called once the transfer is '
+ return self._coordinator.result()
+ except KeyboardInterrupt as e:
+ self.cancel()
+ raise e
+
+ def cancel(self):
+ self._coordinator.cancel()
+
+ def set_exception(self, exception):
+ """Sets the exception on the future."""
+ if not self.done():
+ raise TransferNotDoneError(
+ 'set_exception can only be called once the transfer is '
'complete.'
)
- self._coordinator.set_exception(exception, override=True)
-
-
+ self._coordinator.set_exception(exception, override=True)
+
+
class TransferMeta(BaseTransferMeta):
- """Holds metadata about the TransferFuture"""
-
- def __init__(self, call_args=None, transfer_id=None):
- self._call_args = call_args
- self._transfer_id = transfer_id
- self._size = None
- self._user_context = {}
-
- @property
- def call_args(self):
- """The call args used in the transfer request"""
- return self._call_args
-
- @property
- def transfer_id(self):
- """The unique id of the transfer"""
- return self._transfer_id
-
- @property
- def size(self):
- """The size of the transfer request if known"""
- return self._size
-
- @property
- def user_context(self):
- """A dictionary that requesters can store data in"""
- return self._user_context
-
- def provide_transfer_size(self, size):
- """A method to provide the size of a transfer request
-
- By providing this value, the TransferManager will not try to
- call HeadObject or use the use OS to determine the size of the
- transfer.
- """
- self._size = size
-
-
+ """Holds metadata about the TransferFuture"""
+
+ def __init__(self, call_args=None, transfer_id=None):
+ self._call_args = call_args
+ self._transfer_id = transfer_id
+ self._size = None
+ self._user_context = {}
+
+ @property
+ def call_args(self):
+ """The call args used in the transfer request"""
+ return self._call_args
+
+ @property
+ def transfer_id(self):
+ """The unique id of the transfer"""
+ return self._transfer_id
+
+ @property
+ def size(self):
+ """The size of the transfer request if known"""
+ return self._size
+
+ @property
+ def user_context(self):
+ """A dictionary that requesters can store data in"""
+ return self._user_context
+
+ def provide_transfer_size(self, size):
+ """A method to provide the size of a transfer request
+
+ By providing this value, the TransferManager will not try to
+ call HeadObject or use the use OS to determine the size of the
+ transfer.
+ """
+ self._size = size
+
+
class TransferCoordinator:
- """A helper class for managing TransferFuture"""
-
- def __init__(self, transfer_id=None):
- self.transfer_id = transfer_id
- self._status = 'not-started'
- self._result = None
- self._exception = None
- self._associated_futures = set()
- self._failure_cleanups = []
- self._done_callbacks = []
- self._done_event = threading.Event()
- self._lock = threading.Lock()
- self._associated_futures_lock = threading.Lock()
- self._done_callbacks_lock = threading.Lock()
- self._failure_cleanups_lock = threading.Lock()
-
- def __repr__(self):
+ """A helper class for managing TransferFuture"""
+
+ def __init__(self, transfer_id=None):
+ self.transfer_id = transfer_id
+ self._status = 'not-started'
+ self._result = None
+ self._exception = None
+ self._associated_futures = set()
+ self._failure_cleanups = []
+ self._done_callbacks = []
+ self._done_event = threading.Event()
+ self._lock = threading.Lock()
+ self._associated_futures_lock = threading.Lock()
+ self._done_callbacks_lock = threading.Lock()
+ self._failure_cleanups_lock = threading.Lock()
+
+ def __repr__(self):
return '{}(transfer_id={})'.format(
self.__class__.__name__, self.transfer_id
)
-
- @property
- def exception(self):
- return self._exception
-
- @property
- def associated_futures(self):
- """The list of futures associated to the inprogress TransferFuture
-
- Once the transfer finishes this list becomes empty as the transfer
- is considered done and there should be no running futures left.
- """
- with self._associated_futures_lock:
- # We return a copy of the list because we do not want to
- # processing the returned list while another thread is adding
- # more futures to the actual list.
- return copy.copy(self._associated_futures)
-
- @property
- def failure_cleanups(self):
- """The list of callbacks to call when the TransferFuture fails"""
- return self._failure_cleanups
-
- @property
- def status(self):
- """The status of the TransferFuture
-
- The currently supported states are:
- * not-started - Has yet to start. If in this state, a transfer
- can be canceled immediately and nothing will happen.
- * queued - SubmissionTask is about to submit tasks
- * running - Is inprogress. In-progress as of now means that
- the SubmissionTask that runs the transfer is being executed. So
- there is no guarantee any transfer requests had been made to
- S3 if this state is reached.
- * cancelled - Was cancelled
- * failed - An exception other than CancelledError was thrown
- * success - No exceptions were thrown and is done.
- """
- return self._status
-
- def set_result(self, result):
- """Set a result for the TransferFuture
-
- Implies that the TransferFuture succeeded. This will always set a
- result because it is invoked on the final task where there is only
- ever one final task and it is ran at the very end of a transfer
- process. So if a result is being set for this final task, the transfer
- succeeded even if something came a long and canceled the transfer
- on the final task.
- """
- with self._lock:
- self._exception = None
- self._result = result
- self._status = 'success'
-
- def set_exception(self, exception, override=False):
- """Set an exception for the TransferFuture
-
- Implies the TransferFuture failed.
-
- :param exception: The exception that cause the transfer to fail.
- :param override: If True, override any existing state.
- """
- with self._lock:
- if not self.done() or override:
- self._exception = exception
- self._status = 'failed'
-
- def result(self):
- """Waits until TransferFuture is done and returns the result
-
- If the TransferFuture succeeded, it will return the result. If the
- TransferFuture failed, it will raise the exception associated to the
- failure.
- """
- # Doing a wait() with no timeout cannot be interrupted in python2 but
- # can be interrupted in python3 so we just wait with the largest
- # possible value integer value, which is on the scale of billions of
- # years...
- self._done_event.wait(MAXINT)
-
- # Once done waiting, raise an exception if present or return the
- # final result.
- if self._exception:
- raise self._exception
- return self._result
-
- def cancel(self, msg='', exc_type=CancelledError):
- """Cancels the TransferFuture
-
- :param msg: The message to attach to the cancellation
- :param exc_type: The type of exception to set for the cancellation
- """
- with self._lock:
- if not self.done():
- should_announce_done = False
- logger.debug('%s cancel(%s) called', self, msg)
- self._exception = exc_type(msg)
- if self._status == 'not-started':
- should_announce_done = True
- self._status = 'cancelled'
- if should_announce_done:
- self.announce_done()
-
- def set_status_to_queued(self):
- """Sets the TransferFutrue's status to running"""
- self._transition_to_non_done_state('queued')
-
- def set_status_to_running(self):
- """Sets the TransferFuture's status to running"""
- self._transition_to_non_done_state('running')
-
- def _transition_to_non_done_state(self, desired_state):
- with self._lock:
- if self.done():
- raise RuntimeError(
- 'Unable to transition from done state %s to non-done '
+
+ @property
+ def exception(self):
+ return self._exception
+
+ @property
+ def associated_futures(self):
+ """The list of futures associated to the inprogress TransferFuture
+
+ Once the transfer finishes this list becomes empty as the transfer
+ is considered done and there should be no running futures left.
+ """
+ with self._associated_futures_lock:
+ # We return a copy of the list because we do not want to
+ # processing the returned list while another thread is adding
+ # more futures to the actual list.
+ return copy.copy(self._associated_futures)
+
+ @property
+ def failure_cleanups(self):
+ """The list of callbacks to call when the TransferFuture fails"""
+ return self._failure_cleanups
+
+ @property
+ def status(self):
+ """The status of the TransferFuture
+
+ The currently supported states are:
+ * not-started - Has yet to start. If in this state, a transfer
+ can be canceled immediately and nothing will happen.
+ * queued - SubmissionTask is about to submit tasks
+ * running - Is inprogress. In-progress as of now means that
+ the SubmissionTask that runs the transfer is being executed. So
+ there is no guarantee any transfer requests had been made to
+ S3 if this state is reached.
+ * cancelled - Was cancelled
+ * failed - An exception other than CancelledError was thrown
+ * success - No exceptions were thrown and is done.
+ """
+ return self._status
+
+ def set_result(self, result):
+ """Set a result for the TransferFuture
+
+ Implies that the TransferFuture succeeded. This will always set a
+ result because it is invoked on the final task where there is only
+ ever one final task and it is ran at the very end of a transfer
+ process. So if a result is being set for this final task, the transfer
+ succeeded even if something came a long and canceled the transfer
+ on the final task.
+ """
+ with self._lock:
+ self._exception = None
+ self._result = result
+ self._status = 'success'
+
+ def set_exception(self, exception, override=False):
+ """Set an exception for the TransferFuture
+
+ Implies the TransferFuture failed.
+
+ :param exception: The exception that cause the transfer to fail.
+ :param override: If True, override any existing state.
+ """
+ with self._lock:
+ if not self.done() or override:
+ self._exception = exception
+ self._status = 'failed'
+
+ def result(self):
+ """Waits until TransferFuture is done and returns the result
+
+ If the TransferFuture succeeded, it will return the result. If the
+ TransferFuture failed, it will raise the exception associated to the
+ failure.
+ """
+ # Doing a wait() with no timeout cannot be interrupted in python2 but
+ # can be interrupted in python3 so we just wait with the largest
+ # possible value integer value, which is on the scale of billions of
+ # years...
+ self._done_event.wait(MAXINT)
+
+ # Once done waiting, raise an exception if present or return the
+ # final result.
+ if self._exception:
+ raise self._exception
+ return self._result
+
+ def cancel(self, msg='', exc_type=CancelledError):
+ """Cancels the TransferFuture
+
+ :param msg: The message to attach to the cancellation
+ :param exc_type: The type of exception to set for the cancellation
+ """
+ with self._lock:
+ if not self.done():
+ should_announce_done = False
+ logger.debug('%s cancel(%s) called', self, msg)
+ self._exception = exc_type(msg)
+ if self._status == 'not-started':
+ should_announce_done = True
+ self._status = 'cancelled'
+ if should_announce_done:
+ self.announce_done()
+
+ def set_status_to_queued(self):
+ """Sets the TransferFutrue's status to running"""
+ self._transition_to_non_done_state('queued')
+
+ def set_status_to_running(self):
+ """Sets the TransferFuture's status to running"""
+ self._transition_to_non_done_state('running')
+
+ def _transition_to_non_done_state(self, desired_state):
+ with self._lock:
+ if self.done():
+ raise RuntimeError(
+ 'Unable to transition from done state %s to non-done '
'state %s.' % (self.status, desired_state)
)
- self._status = desired_state
-
- def submit(self, executor, task, tag=None):
- """Submits a task to a provided executor
-
- :type executor: s3transfer.futures.BoundedExecutor
- :param executor: The executor to submit the callable to
-
- :type task: s3transfer.tasks.Task
- :param task: The task to submit to the executor
-
- :type tag: s3transfer.futures.TaskTag
- :param tag: A tag to associate to the submitted task
-
- :rtype: concurrent.futures.Future
- :returns: A future representing the submitted task
- """
- logger.debug(
+ self._status = desired_state
+
+ def submit(self, executor, task, tag=None):
+ """Submits a task to a provided executor
+
+ :type executor: s3transfer.futures.BoundedExecutor
+ :param executor: The executor to submit the callable to
+
+ :type task: s3transfer.tasks.Task
+ :param task: The task to submit to the executor
+
+ :type tag: s3transfer.futures.TaskTag
+ :param tag: A tag to associate to the submitted task
+
+ :rtype: concurrent.futures.Future
+ :returns: A future representing the submitted task
+ """
+ logger.debug(
"Submitting task {} to executor {} for transfer request: {}.".format(
task, executor, self.transfer_id
)
- )
- future = executor.submit(task, tag=tag)
- # Add this created future to the list of associated future just
- # in case it is needed during cleanups.
- self.add_associated_future(future)
- future.add_done_callback(
+ )
+ future = executor.submit(task, tag=tag)
+ # Add this created future to the list of associated future just
+ # in case it is needed during cleanups.
+ self.add_associated_future(future)
+ future.add_done_callback(
FunctionContainer(self.remove_associated_future, future)
)
- return future
-
- def done(self):
- """Determines if a TransferFuture has completed
-
- :returns: False if status is equal to 'failed', 'cancelled', or
- 'success'. True, otherwise
- """
- return self.status in ['failed', 'cancelled', 'success']
-
- def add_associated_future(self, future):
- """Adds a future to be associated with the TransferFuture"""
- with self._associated_futures_lock:
- self._associated_futures.add(future)
-
- def remove_associated_future(self, future):
- """Removes a future's association to the TransferFuture"""
- with self._associated_futures_lock:
- self._associated_futures.remove(future)
-
- def add_done_callback(self, function, *args, **kwargs):
- """Add a done callback to be invoked when transfer is done"""
- with self._done_callbacks_lock:
- self._done_callbacks.append(
- FunctionContainer(function, *args, **kwargs)
- )
-
- def add_failure_cleanup(self, function, *args, **kwargs):
- """Adds a callback to call upon failure"""
- with self._failure_cleanups_lock:
- self._failure_cleanups.append(
+ return future
+
+ def done(self):
+ """Determines if a TransferFuture has completed
+
+ :returns: False if status is equal to 'failed', 'cancelled', or
+ 'success'. True, otherwise
+ """
+ return self.status in ['failed', 'cancelled', 'success']
+
+ def add_associated_future(self, future):
+ """Adds a future to be associated with the TransferFuture"""
+ with self._associated_futures_lock:
+ self._associated_futures.add(future)
+
+ def remove_associated_future(self, future):
+ """Removes a future's association to the TransferFuture"""
+ with self._associated_futures_lock:
+ self._associated_futures.remove(future)
+
+ def add_done_callback(self, function, *args, **kwargs):
+ """Add a done callback to be invoked when transfer is done"""
+ with self._done_callbacks_lock:
+ self._done_callbacks.append(
+ FunctionContainer(function, *args, **kwargs)
+ )
+
+ def add_failure_cleanup(self, function, *args, **kwargs):
+ """Adds a callback to call upon failure"""
+ with self._failure_cleanups_lock:
+ self._failure_cleanups.append(
FunctionContainer(function, *args, **kwargs)
)
-
- def announce_done(self):
- """Announce that future is done running and run associated callbacks
-
- This will run any failure cleanups if the transfer failed if not
- they have not been run, allows the result() to be unblocked, and will
- run any done callbacks associated to the TransferFuture if they have
- not already been ran.
- """
- if self.status != 'success':
- self._run_failure_cleanups()
- self._done_event.set()
- self._run_done_callbacks()
-
- def _run_done_callbacks(self):
- # Run the callbacks and remove the callbacks from the internal
- # list so they do not get ran again if done is announced more than
- # once.
- with self._done_callbacks_lock:
- self._run_callbacks(self._done_callbacks)
- self._done_callbacks = []
-
- def _run_failure_cleanups(self):
- # Run the cleanup callbacks and remove the callbacks from the internal
- # list so they do not get ran again if done is announced more than
- # once.
- with self._failure_cleanups_lock:
- self._run_callbacks(self.failure_cleanups)
- self._failure_cleanups = []
-
- def _run_callbacks(self, callbacks):
- for callback in callbacks:
- self._run_callback(callback)
-
- def _run_callback(self, callback):
- try:
- callback()
- # We do not want a callback interrupting the process, especially
+
+ def announce_done(self):
+ """Announce that future is done running and run associated callbacks
+
+ This will run any failure cleanups if the transfer failed if not
+ they have not been run, allows the result() to be unblocked, and will
+ run any done callbacks associated to the TransferFuture if they have
+ not already been ran.
+ """
+ if self.status != 'success':
+ self._run_failure_cleanups()
+ self._done_event.set()
+ self._run_done_callbacks()
+
+ def _run_done_callbacks(self):
+ # Run the callbacks and remove the callbacks from the internal
+ # list so they do not get ran again if done is announced more than
+ # once.
+ with self._done_callbacks_lock:
+ self._run_callbacks(self._done_callbacks)
+ self._done_callbacks = []
+
+ def _run_failure_cleanups(self):
+ # Run the cleanup callbacks and remove the callbacks from the internal
+ # list so they do not get ran again if done is announced more than
+ # once.
+ with self._failure_cleanups_lock:
+ self._run_callbacks(self.failure_cleanups)
+ self._failure_cleanups = []
+
+ def _run_callbacks(self, callbacks):
+ for callback in callbacks:
+ self._run_callback(callback)
+
+ def _run_callback(self, callback):
+ try:
+ callback()
+ # We do not want a callback interrupting the process, especially
# in the failure cleanups. So log and catch, the exception.
- except Exception:
- logger.debug("Exception raised in %s." % callback, exc_info=True)
-
-
+ except Exception:
+ logger.debug("Exception raised in %s." % callback, exc_info=True)
+
+
class BoundedExecutor:
- EXECUTOR_CLS = futures.ThreadPoolExecutor
-
+ EXECUTOR_CLS = futures.ThreadPoolExecutor
+
def __init__(
self, max_size, max_num_threads, tag_semaphores=None, executor_cls=None
):
"""An executor implementation that has a maximum queued up tasks
-
- The executor will block if the number of tasks that have been
- submitted and is currently working on is past its maximum.
-
- :params max_size: The maximum number of inflight futures. An inflight
- future means that the task is either queued up or is currently
- being executed. A size of None or 0 means that the executor will
- have no bound in terms of the number of inflight futures.
-
- :params max_num_threads: The maximum number of threads the executor
- uses.
-
- :type tag_semaphores: dict
- :params tag_semaphores: A dictionary where the key is the name of the
- tag and the value is the semaphore to use when limiting the
- number of tasks the executor is processing at a time.
-
- :type executor_cls: BaseExecutor
- :param underlying_executor_cls: The executor class that
- get bounded by this executor. If None is provided, the
- concurrent.futures.ThreadPoolExecutor class is used.
- """
- self._max_num_threads = max_num_threads
- if executor_cls is None:
- executor_cls = self.EXECUTOR_CLS
- self._executor = executor_cls(max_workers=self._max_num_threads)
- self._semaphore = TaskSemaphore(max_size)
- self._tag_semaphores = tag_semaphores
-
- def submit(self, task, tag=None, block=True):
- """Submit a task to complete
-
- :type task: s3transfer.tasks.Task
- :param task: The task to run __call__ on
-
-
- :type tag: s3transfer.futures.TaskTag
- :param tag: An optional tag to associate to the task. This
- is used to override which semaphore to use.
-
- :type block: boolean
- :param block: True if to wait till it is possible to submit a task.
- False, if not to wait and raise an error if not able to submit
- a task.
-
+
+ The executor will block if the number of tasks that have been
+ submitted and is currently working on is past its maximum.
+
+ :params max_size: The maximum number of inflight futures. An inflight
+ future means that the task is either queued up or is currently
+ being executed. A size of None or 0 means that the executor will
+ have no bound in terms of the number of inflight futures.
+
+ :params max_num_threads: The maximum number of threads the executor
+ uses.
+
+ :type tag_semaphores: dict
+ :params tag_semaphores: A dictionary where the key is the name of the
+ tag and the value is the semaphore to use when limiting the
+ number of tasks the executor is processing at a time.
+
+ :type executor_cls: BaseExecutor
+ :param underlying_executor_cls: The executor class that
+ get bounded by this executor. If None is provided, the
+ concurrent.futures.ThreadPoolExecutor class is used.
+ """
+ self._max_num_threads = max_num_threads
+ if executor_cls is None:
+ executor_cls = self.EXECUTOR_CLS
+ self._executor = executor_cls(max_workers=self._max_num_threads)
+ self._semaphore = TaskSemaphore(max_size)
+ self._tag_semaphores = tag_semaphores
+
+ def submit(self, task, tag=None, block=True):
+ """Submit a task to complete
+
+ :type task: s3transfer.tasks.Task
+ :param task: The task to run __call__ on
+
+
+ :type tag: s3transfer.futures.TaskTag
+ :param tag: An optional tag to associate to the task. This
+ is used to override which semaphore to use.
+
+ :type block: boolean
+ :param block: True if to wait till it is possible to submit a task.
+ False, if not to wait and raise an error if not able to submit
+ a task.
+
:returns: The future associated to the submitted task
- """
- semaphore = self._semaphore
- # If a tag was provided, use the semaphore associated to that
- # tag.
- if tag:
- semaphore = self._tag_semaphores[tag]
-
- # Call acquire on the semaphore.
- acquire_token = semaphore.acquire(task.transfer_id, block)
- # Create a callback to invoke when task is done in order to call
- # release on the semaphore.
- release_callback = FunctionContainer(
+ """
+ semaphore = self._semaphore
+ # If a tag was provided, use the semaphore associated to that
+ # tag.
+ if tag:
+ semaphore = self._tag_semaphores[tag]
+
+ # Call acquire on the semaphore.
+ acquire_token = semaphore.acquire(task.transfer_id, block)
+ # Create a callback to invoke when task is done in order to call
+ # release on the semaphore.
+ release_callback = FunctionContainer(
semaphore.release, task.transfer_id, acquire_token
)
- # Submit the task to the underlying executor.
- future = ExecutorFuture(self._executor.submit(task))
- # Add the Semaphore.release() callback to the future such that
- # it is invoked once the future completes.
- future.add_done_callback(release_callback)
- return future
-
- def shutdown(self, wait=True):
- self._executor.shutdown(wait)
-
-
+ # Submit the task to the underlying executor.
+ future = ExecutorFuture(self._executor.submit(task))
+ # Add the Semaphore.release() callback to the future such that
+ # it is invoked once the future completes.
+ future.add_done_callback(release_callback)
+ return future
+
+ def shutdown(self, wait=True):
+ self._executor.shutdown(wait)
+
+
class ExecutorFuture:
- def __init__(self, future):
- """A future returned from the executor
-
- Currently, it is just a wrapper around a concurrent.futures.Future.
- However, this can eventually grow to implement the needed functionality
- of concurrent.futures.Future if we move off of the library and not
- affect the rest of the codebase.
-
- :type future: concurrent.futures.Future
- :param future: The underlying future
- """
- self._future = future
-
- def result(self):
- return self._future.result()
-
- def add_done_callback(self, fn):
- """Adds a callback to be completed once future is done
-
+ def __init__(self, future):
+ """A future returned from the executor
+
+ Currently, it is just a wrapper around a concurrent.futures.Future.
+ However, this can eventually grow to implement the needed functionality
+ of concurrent.futures.Future if we move off of the library and not
+ affect the rest of the codebase.
+
+ :type future: concurrent.futures.Future
+ :param future: The underlying future
+ """
+ self._future = future
+
+ def result(self):
+ return self._future.result()
+
+ def add_done_callback(self, fn):
+ """Adds a callback to be completed once future is done
+
:param fn: A callable that takes no arguments. Note that is different
- than concurrent.futures.Future.add_done_callback that requires
- a single argument for the future.
- """
- # The done callback for concurrent.futures.Future will always pass a
- # the future in as the only argument. So we need to create the
- # proper signature wrapper that will invoke the callback provided.
- def done_callback(future_passed_to_callback):
- return fn()
-
- self._future.add_done_callback(done_callback)
-
- def done(self):
- return self._future.done()
-
-
+ than concurrent.futures.Future.add_done_callback that requires
+ a single argument for the future.
+ """
+ # The done callback for concurrent.futures.Future will always pass a
+ # the future in as the only argument. So we need to create the
+ # proper signature wrapper that will invoke the callback provided.
+ def done_callback(future_passed_to_callback):
+ return fn()
+
+ self._future.add_done_callback(done_callback)
+
+ def done(self):
+ return self._future.done()
+
+
class BaseExecutor:
- """Base Executor class implementation needed to work with s3transfer"""
-
- def __init__(self, max_workers=None):
- pass
-
- def submit(self, fn, *args, **kwargs):
- raise NotImplementedError('submit()')
-
- def shutdown(self, wait=True):
- raise NotImplementedError('shutdown()')
-
-
-class NonThreadedExecutor(BaseExecutor):
- """A drop-in replacement non-threaded version of ThreadPoolExecutor"""
-
- def submit(self, fn, *args, **kwargs):
- future = NonThreadedExecutorFuture()
- try:
- result = fn(*args, **kwargs)
- future.set_result(result)
- except Exception:
- e, tb = sys.exc_info()[1:]
- logger.debug(
- 'Setting exception for %s to %s with traceback %s',
+ """Base Executor class implementation needed to work with s3transfer"""
+
+ def __init__(self, max_workers=None):
+ pass
+
+ def submit(self, fn, *args, **kwargs):
+ raise NotImplementedError('submit()')
+
+ def shutdown(self, wait=True):
+ raise NotImplementedError('shutdown()')
+
+
+class NonThreadedExecutor(BaseExecutor):
+ """A drop-in replacement non-threaded version of ThreadPoolExecutor"""
+
+ def submit(self, fn, *args, **kwargs):
+ future = NonThreadedExecutorFuture()
+ try:
+ result = fn(*args, **kwargs)
+ future.set_result(result)
+ except Exception:
+ e, tb = sys.exc_info()[1:]
+ logger.debug(
+ 'Setting exception for %s to %s with traceback %s',
future,
e,
tb,
- )
- future.set_exception_info(e, tb)
- return future
-
- def shutdown(self, wait=True):
- pass
-
-
+ )
+ future.set_exception_info(e, tb)
+ return future
+
+ def shutdown(self, wait=True):
+ pass
+
+
class NonThreadedExecutorFuture:
- """The Future returned from NonThreadedExecutor
-
- Note that this future is **not** thread-safe as it is being used
- from the context of a non-threaded environment.
- """
-
- def __init__(self):
- self._result = None
- self._exception = None
- self._traceback = None
- self._done = False
- self._done_callbacks = []
-
- def set_result(self, result):
- self._result = result
- self._set_done()
-
- def set_exception_info(self, exception, traceback):
- self._exception = exception
- self._traceback = traceback
- self._set_done()
-
- def result(self, timeout=None):
- if self._exception:
+ """The Future returned from NonThreadedExecutor
+
+ Note that this future is **not** thread-safe as it is being used
+ from the context of a non-threaded environment.
+ """
+
+ def __init__(self):
+ self._result = None
+ self._exception = None
+ self._traceback = None
+ self._done = False
+ self._done_callbacks = []
+
+ def set_result(self, result):
+ self._result = result
+ self._set_done()
+
+ def set_exception_info(self, exception, traceback):
+ self._exception = exception
+ self._traceback = traceback
+ self._set_done()
+
+ def result(self, timeout=None):
+ if self._exception:
raise self._exception.with_traceback(self._traceback)
- return self._result
-
- def _set_done(self):
- self._done = True
- for done_callback in self._done_callbacks:
- self._invoke_done_callback(done_callback)
- self._done_callbacks = []
-
- def _invoke_done_callback(self, done_callback):
- return done_callback(self)
-
- def done(self):
- return self._done
-
- def add_done_callback(self, fn):
- if self._done:
- self._invoke_done_callback(fn)
- else:
- self._done_callbacks.append(fn)
-
-
-TaskTag = namedtuple('TaskTag', ['name'])
-
-IN_MEMORY_UPLOAD_TAG = TaskTag('in_memory_upload')
-IN_MEMORY_DOWNLOAD_TAG = TaskTag('in_memory_download')
+ return self._result
+
+ def _set_done(self):
+ self._done = True
+ for done_callback in self._done_callbacks:
+ self._invoke_done_callback(done_callback)
+ self._done_callbacks = []
+
+ def _invoke_done_callback(self, done_callback):
+ return done_callback(self)
+
+ def done(self):
+ return self._done
+
+ def add_done_callback(self, fn):
+ if self._done:
+ self._invoke_done_callback(fn)
+ else:
+ self._done_callbacks.append(fn)
+
+
+TaskTag = namedtuple('TaskTag', ['name'])
+
+IN_MEMORY_UPLOAD_TAG = TaskTag('in_memory_upload')
+IN_MEMORY_DOWNLOAD_TAG = TaskTag('in_memory_download')
diff --git a/contrib/python/s3transfer/py3/s3transfer/manager.py b/contrib/python/s3transfer/py3/s3transfer/manager.py
index 6ed7ec8711..ff6afa12c1 100644
--- a/contrib/python/s3transfer/py3/s3transfer/manager.py
+++ b/contrib/python/s3transfer/py3/s3transfer/manager.py
@@ -1,25 +1,25 @@
-# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-# http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-import copy
-import logging
+# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+# http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+import copy
+import logging
import re
-import threading
-
+import threading
+
from s3transfer.bandwidth import BandwidthLimiter, LeakyBucket
from s3transfer.constants import ALLOWED_DOWNLOAD_ARGS, KB, MB
from s3transfer.copies import CopySubmissionTask
from s3transfer.delete import DeleteSubmissionTask
-from s3transfer.download import DownloadSubmissionTask
+from s3transfer.download import DownloadSubmissionTask
from s3transfer.exceptions import CancelledError, FatalError
from s3transfer.futures import (
IN_MEMORY_DOWNLOAD_TAG,
@@ -29,7 +29,7 @@ from s3transfer.futures import (
TransferFuture,
TransferMeta,
)
-from s3transfer.upload import UploadSubmissionTask
+from s3transfer.upload import UploadSubmissionTask
from s3transfer.utils import (
CallArgs,
OSUtils,
@@ -39,10 +39,10 @@ from s3transfer.utils import (
signal_not_transferring,
signal_transferring,
)
-
-logger = logging.getLogger(__name__)
-
-
+
+logger = logging.getLogger(__name__)
+
+
class TransferConfig:
def __init__(
self,
@@ -60,147 +60,147 @@ class TransferConfig:
max_bandwidth=None,
):
"""Configurations for the transfer manager
-
- :param multipart_threshold: The threshold for which multipart
- transfers occur.
-
- :param max_request_concurrency: The maximum number of S3 API
- transfer-related requests that can happen at a time.
-
- :param max_submission_concurrency: The maximum number of threads
- processing a call to a TransferManager method. Processing a
- call usually entails determining which S3 API requests that need
- to be enqueued, but does **not** entail making any of the
+
+ :param multipart_threshold: The threshold for which multipart
+ transfers occur.
+
+ :param max_request_concurrency: The maximum number of S3 API
+ transfer-related requests that can happen at a time.
+
+ :param max_submission_concurrency: The maximum number of threads
+ processing a call to a TransferManager method. Processing a
+ call usually entails determining which S3 API requests that need
+ to be enqueued, but does **not** entail making any of the
S3 API data transferring requests needed to perform the transfer.
- The threads controlled by ``max_request_concurrency`` is
- responsible for that.
-
- :param multipart_chunksize: The size of each transfer if a request
- becomes a multipart transfer.
-
- :param max_request_queue_size: The maximum amount of S3 API requests
+ The threads controlled by ``max_request_concurrency`` is
+ responsible for that.
+
+ :param multipart_chunksize: The size of each transfer if a request
+ becomes a multipart transfer.
+
+ :param max_request_queue_size: The maximum amount of S3 API requests
that can be queued at a time.
-
- :param max_submission_queue_size: The maximum amount of
+
+ :param max_submission_queue_size: The maximum amount of
TransferManager method calls that can be queued at a time.
-
- :param max_io_queue_size: The maximum amount of read parts that
+
+ :param max_io_queue_size: The maximum amount of read parts that
can be queued to be written to disk per download. The default
size for each elementin this queue is 8 KB.
-
- :param io_chunksize: The max size of each chunk in the io queue.
- Currently, this is size used when reading from the downloaded
- stream as well.
-
- :param num_download_attempts: The number of download attempts that
- will be tried upon errors with downloading an object in S3. Note
+
+ :param io_chunksize: The max size of each chunk in the io queue.
+ Currently, this is size used when reading from the downloaded
+ stream as well.
+
+ :param num_download_attempts: The number of download attempts that
+ will be tried upon errors with downloading an object in S3. Note
that these retries account for errors that occur when streaming
- down the data from s3 (i.e. socket errors and read timeouts that
+ down the data from s3 (i.e. socket errors and read timeouts that
occur after receiving an OK response from s3).
- Other retryable exceptions such as throttling errors and 5xx errors
- are already retried by botocore (this default is 5). The
- ``num_download_attempts`` does not take into account the
- number of exceptions retried by botocore.
-
- :param max_in_memory_upload_chunks: The number of chunks that can
- be stored in memory at a time for all ongoing upload requests.
- This pertains to chunks of data that need to be stored in memory
- during an upload if the data is sourced from a file-like object.
- The total maximum memory footprint due to a in-memory upload
- chunks is roughly equal to:
-
- max_in_memory_upload_chunks * multipart_chunksize
- + max_submission_concurrency * multipart_chunksize
-
- ``max_submission_concurrency`` has an affect on this value because
- for each thread pulling data off of a file-like object, they may
- be waiting with a single read chunk to be submitted for upload
- because the ``max_in_memory_upload_chunks`` value has been reached
- by the threads making the upload request.
-
- :param max_in_memory_download_chunks: The number of chunks that can
- be buffered in memory and **not** in the io queue at a time for all
+ Other retryable exceptions such as throttling errors and 5xx errors
+ are already retried by botocore (this default is 5). The
+ ``num_download_attempts`` does not take into account the
+ number of exceptions retried by botocore.
+
+ :param max_in_memory_upload_chunks: The number of chunks that can
+ be stored in memory at a time for all ongoing upload requests.
+ This pertains to chunks of data that need to be stored in memory
+ during an upload if the data is sourced from a file-like object.
+ The total maximum memory footprint due to a in-memory upload
+ chunks is roughly equal to:
+
+ max_in_memory_upload_chunks * multipart_chunksize
+ + max_submission_concurrency * multipart_chunksize
+
+ ``max_submission_concurrency`` has an affect on this value because
+ for each thread pulling data off of a file-like object, they may
+ be waiting with a single read chunk to be submitted for upload
+ because the ``max_in_memory_upload_chunks`` value has been reached
+ by the threads making the upload request.
+
+ :param max_in_memory_download_chunks: The number of chunks that can
+ be buffered in memory and **not** in the io queue at a time for all
ongoing download requests. This pertains specifically to file-like
- objects that cannot be seeked. The total maximum memory footprint
- due to a in-memory download chunks is roughly equal to:
-
- max_in_memory_download_chunks * multipart_chunksize
-
- :param max_bandwidth: The maximum bandwidth that will be consumed
- in uploading and downloading file content. The value is in terms of
- bytes per second.
- """
- self.multipart_threshold = multipart_threshold
- self.multipart_chunksize = multipart_chunksize
- self.max_request_concurrency = max_request_concurrency
- self.max_submission_concurrency = max_submission_concurrency
- self.max_request_queue_size = max_request_queue_size
- self.max_submission_queue_size = max_submission_queue_size
- self.max_io_queue_size = max_io_queue_size
- self.io_chunksize = io_chunksize
- self.num_download_attempts = num_download_attempts
- self.max_in_memory_upload_chunks = max_in_memory_upload_chunks
- self.max_in_memory_download_chunks = max_in_memory_download_chunks
- self.max_bandwidth = max_bandwidth
- self._validate_attrs_are_nonzero()
-
- def _validate_attrs_are_nonzero(self):
+ objects that cannot be seeked. The total maximum memory footprint
+ due to a in-memory download chunks is roughly equal to:
+
+ max_in_memory_download_chunks * multipart_chunksize
+
+ :param max_bandwidth: The maximum bandwidth that will be consumed
+ in uploading and downloading file content. The value is in terms of
+ bytes per second.
+ """
+ self.multipart_threshold = multipart_threshold
+ self.multipart_chunksize = multipart_chunksize
+ self.max_request_concurrency = max_request_concurrency
+ self.max_submission_concurrency = max_submission_concurrency
+ self.max_request_queue_size = max_request_queue_size
+ self.max_submission_queue_size = max_submission_queue_size
+ self.max_io_queue_size = max_io_queue_size
+ self.io_chunksize = io_chunksize
+ self.num_download_attempts = num_download_attempts
+ self.max_in_memory_upload_chunks = max_in_memory_upload_chunks
+ self.max_in_memory_download_chunks = max_in_memory_download_chunks
+ self.max_bandwidth = max_bandwidth
+ self._validate_attrs_are_nonzero()
+
+ def _validate_attrs_are_nonzero(self):
for attr, attr_val in self.__dict__.items():
- if attr_val is not None and attr_val <= 0:
- raise ValueError(
- 'Provided parameter %s of value %s must be greater than '
+ if attr_val is not None and attr_val <= 0:
+ raise ValueError(
+ 'Provided parameter %s of value %s must be greater than '
'0.' % (attr, attr_val)
)
-
-
+
+
class TransferManager:
ALLOWED_DOWNLOAD_ARGS = ALLOWED_DOWNLOAD_ARGS
-
- ALLOWED_UPLOAD_ARGS = [
- 'ACL',
- 'CacheControl',
- 'ContentDisposition',
- 'ContentEncoding',
- 'ContentLanguage',
- 'ContentType',
+
+ ALLOWED_UPLOAD_ARGS = [
+ 'ACL',
+ 'CacheControl',
+ 'ContentDisposition',
+ 'ContentEncoding',
+ 'ContentLanguage',
+ 'ContentType',
'ExpectedBucketOwner',
- 'Expires',
- 'GrantFullControl',
- 'GrantRead',
- 'GrantReadACP',
- 'GrantWriteACP',
- 'Metadata',
- 'RequestPayer',
- 'ServerSideEncryption',
- 'StorageClass',
- 'SSECustomerAlgorithm',
- 'SSECustomerKey',
- 'SSECustomerKeyMD5',
- 'SSEKMSKeyId',
+ 'Expires',
+ 'GrantFullControl',
+ 'GrantRead',
+ 'GrantReadACP',
+ 'GrantWriteACP',
+ 'Metadata',
+ 'RequestPayer',
+ 'ServerSideEncryption',
+ 'StorageClass',
+ 'SSECustomerAlgorithm',
+ 'SSECustomerKey',
+ 'SSECustomerKeyMD5',
+ 'SSEKMSKeyId',
'SSEKMSEncryptionContext',
'Tagging',
'WebsiteRedirectLocation',
- ]
-
- ALLOWED_COPY_ARGS = ALLOWED_UPLOAD_ARGS + [
- 'CopySourceIfMatch',
- 'CopySourceIfModifiedSince',
- 'CopySourceIfNoneMatch',
- 'CopySourceIfUnmodifiedSince',
- 'CopySourceSSECustomerAlgorithm',
- 'CopySourceSSECustomerKey',
- 'CopySourceSSECustomerKeyMD5',
+ ]
+
+ ALLOWED_COPY_ARGS = ALLOWED_UPLOAD_ARGS + [
+ 'CopySourceIfMatch',
+ 'CopySourceIfModifiedSince',
+ 'CopySourceIfNoneMatch',
+ 'CopySourceIfUnmodifiedSince',
+ 'CopySourceSSECustomerAlgorithm',
+ 'CopySourceSSECustomerKey',
+ 'CopySourceSSECustomerKeyMD5',
'MetadataDirective',
'TaggingDirective',
- ]
-
- ALLOWED_DELETE_ARGS = [
- 'MFA',
- 'VersionId',
- 'RequestPayer',
+ ]
+
+ ALLOWED_DELETE_ARGS = [
+ 'MFA',
+ 'VersionId',
+ 'RequestPayer',
'ExpectedBucketOwner',
- ]
-
+ ]
+
VALIDATE_SUPPORTED_BUCKET_VALUES = True
_UNSUPPORTED_BUCKET_PATTERNS = {
@@ -210,72 +210,72 @@ class TransferManager:
),
}
- def __init__(self, client, config=None, osutil=None, executor_cls=None):
- """A transfer manager interface for Amazon S3
-
- :param client: Client to be used by the manager
- :param config: TransferConfig to associate specific configurations
- :param osutil: OSUtils object to use for os-related behavior when
- using with transfer manager.
-
- :type executor_cls: s3transfer.futures.BaseExecutor
- :param executor_cls: The class of executor to use with the transfer
- manager. By default, concurrent.futures.ThreadPoolExecutor is used.
- """
- self._client = client
- self._config = config
- if config is None:
- self._config = TransferConfig()
- self._osutil = osutil
- if osutil is None:
- self._osutil = OSUtils()
- self._coordinator_controller = TransferCoordinatorController()
- # A counter to create unique id's for each transfer submitted.
- self._id_counter = 0
-
- # The executor responsible for making S3 API transfer requests
- self._request_executor = BoundedExecutor(
- max_size=self._config.max_request_queue_size,
- max_num_threads=self._config.max_request_concurrency,
- tag_semaphores={
- IN_MEMORY_UPLOAD_TAG: TaskSemaphore(
+ def __init__(self, client, config=None, osutil=None, executor_cls=None):
+ """A transfer manager interface for Amazon S3
+
+ :param client: Client to be used by the manager
+ :param config: TransferConfig to associate specific configurations
+ :param osutil: OSUtils object to use for os-related behavior when
+ using with transfer manager.
+
+ :type executor_cls: s3transfer.futures.BaseExecutor
+ :param executor_cls: The class of executor to use with the transfer
+ manager. By default, concurrent.futures.ThreadPoolExecutor is used.
+ """
+ self._client = client
+ self._config = config
+ if config is None:
+ self._config = TransferConfig()
+ self._osutil = osutil
+ if osutil is None:
+ self._osutil = OSUtils()
+ self._coordinator_controller = TransferCoordinatorController()
+ # A counter to create unique id's for each transfer submitted.
+ self._id_counter = 0
+
+ # The executor responsible for making S3 API transfer requests
+ self._request_executor = BoundedExecutor(
+ max_size=self._config.max_request_queue_size,
+ max_num_threads=self._config.max_request_concurrency,
+ tag_semaphores={
+ IN_MEMORY_UPLOAD_TAG: TaskSemaphore(
self._config.max_in_memory_upload_chunks
),
- IN_MEMORY_DOWNLOAD_TAG: SlidingWindowSemaphore(
+ IN_MEMORY_DOWNLOAD_TAG: SlidingWindowSemaphore(
self._config.max_in_memory_download_chunks
),
- },
+ },
executor_cls=executor_cls,
- )
-
- # The executor responsible for submitting the necessary tasks to
- # perform the desired transfer
- self._submission_executor = BoundedExecutor(
- max_size=self._config.max_submission_queue_size,
- max_num_threads=self._config.max_submission_concurrency,
+ )
+
+ # The executor responsible for submitting the necessary tasks to
+ # perform the desired transfer
+ self._submission_executor = BoundedExecutor(
+ max_size=self._config.max_submission_queue_size,
+ max_num_threads=self._config.max_submission_concurrency,
executor_cls=executor_cls,
- )
-
- # There is one thread available for writing to disk. It will handle
- # downloads for all files.
- self._io_executor = BoundedExecutor(
- max_size=self._config.max_io_queue_size,
- max_num_threads=1,
+ )
+
+ # There is one thread available for writing to disk. It will handle
+ # downloads for all files.
+ self._io_executor = BoundedExecutor(
+ max_size=self._config.max_io_queue_size,
+ max_num_threads=1,
executor_cls=executor_cls,
- )
-
- # The component responsible for limiting bandwidth usage if it
- # is configured.
- self._bandwidth_limiter = None
- if self._config.max_bandwidth is not None:
- logger.debug(
+ )
+
+ # The component responsible for limiting bandwidth usage if it
+ # is configured.
+ self._bandwidth_limiter = None
+ if self._config.max_bandwidth is not None:
+ logger.debug(
'Setting max_bandwidth to %s', self._config.max_bandwidth
)
- leaky_bucket = LeakyBucket(self._config.max_bandwidth)
- self._bandwidth_limiter = BandwidthLimiter(leaky_bucket)
-
- self._register_handlers()
-
+ leaky_bucket = LeakyBucket(self._config.max_bandwidth)
+ self._bandwidth_limiter = BandwidthLimiter(leaky_bucket)
+
+ self._register_handlers()
+
@property
def client(self):
return self._client
@@ -284,100 +284,100 @@ class TransferManager:
def config(self):
return self._config
- def upload(self, fileobj, bucket, key, extra_args=None, subscribers=None):
- """Uploads a file to S3
-
- :type fileobj: str or seekable file-like object
- :param fileobj: The name of a file to upload or a seekable file-like
- object to upload. It is recommended to use a filename because
- file-like objects may result in higher memory usage.
-
- :type bucket: str
- :param bucket: The name of the bucket to upload to
-
- :type key: str
- :param key: The name of the key to upload to
-
- :type extra_args: dict
- :param extra_args: Extra arguments that may be passed to the
- client operation
-
- :type subscribers: list(s3transfer.subscribers.BaseSubscriber)
- :param subscribers: The list of subscribers to be invoked in the
- order provided based on the event emit during the process of
- the transfer request.
-
- :rtype: s3transfer.futures.TransferFuture
- :returns: Transfer future representing the upload
- """
- if extra_args is None:
- extra_args = {}
- if subscribers is None:
- subscribers = []
- self._validate_all_known_args(extra_args, self.ALLOWED_UPLOAD_ARGS)
+ def upload(self, fileobj, bucket, key, extra_args=None, subscribers=None):
+ """Uploads a file to S3
+
+ :type fileobj: str or seekable file-like object
+ :param fileobj: The name of a file to upload or a seekable file-like
+ object to upload. It is recommended to use a filename because
+ file-like objects may result in higher memory usage.
+
+ :type bucket: str
+ :param bucket: The name of the bucket to upload to
+
+ :type key: str
+ :param key: The name of the key to upload to
+
+ :type extra_args: dict
+ :param extra_args: Extra arguments that may be passed to the
+ client operation
+
+ :type subscribers: list(s3transfer.subscribers.BaseSubscriber)
+ :param subscribers: The list of subscribers to be invoked in the
+ order provided based on the event emit during the process of
+ the transfer request.
+
+ :rtype: s3transfer.futures.TransferFuture
+ :returns: Transfer future representing the upload
+ """
+ if extra_args is None:
+ extra_args = {}
+ if subscribers is None:
+ subscribers = []
+ self._validate_all_known_args(extra_args, self.ALLOWED_UPLOAD_ARGS)
self._validate_if_bucket_supported(bucket)
- call_args = CallArgs(
+ call_args = CallArgs(
fileobj=fileobj,
bucket=bucket,
key=key,
extra_args=extra_args,
subscribers=subscribers,
- )
- extra_main_kwargs = {}
- if self._bandwidth_limiter:
- extra_main_kwargs['bandwidth_limiter'] = self._bandwidth_limiter
- return self._submit_transfer(
+ )
+ extra_main_kwargs = {}
+ if self._bandwidth_limiter:
+ extra_main_kwargs['bandwidth_limiter'] = self._bandwidth_limiter
+ return self._submit_transfer(
call_args, UploadSubmissionTask, extra_main_kwargs
)
-
+
def download(
self, bucket, key, fileobj, extra_args=None, subscribers=None
):
- """Downloads a file from S3
-
- :type bucket: str
- :param bucket: The name of the bucket to download from
-
- :type key: str
- :param key: The name of the key to download from
-
- :type fileobj: str or seekable file-like object
- :param fileobj: The name of a file to download or a seekable file-like
- object to download. It is recommended to use a filename because
- file-like objects may result in higher memory usage.
-
- :type extra_args: dict
- :param extra_args: Extra arguments that may be passed to the
- client operation
-
- :type subscribers: list(s3transfer.subscribers.BaseSubscriber)
- :param subscribers: The list of subscribers to be invoked in the
- order provided based on the event emit during the process of
- the transfer request.
-
- :rtype: s3transfer.futures.TransferFuture
- :returns: Transfer future representing the download
- """
- if extra_args is None:
- extra_args = {}
- if subscribers is None:
- subscribers = []
- self._validate_all_known_args(extra_args, self.ALLOWED_DOWNLOAD_ARGS)
+ """Downloads a file from S3
+
+ :type bucket: str
+ :param bucket: The name of the bucket to download from
+
+ :type key: str
+ :param key: The name of the key to download from
+
+ :type fileobj: str or seekable file-like object
+ :param fileobj: The name of a file to download or a seekable file-like
+ object to download. It is recommended to use a filename because
+ file-like objects may result in higher memory usage.
+
+ :type extra_args: dict
+ :param extra_args: Extra arguments that may be passed to the
+ client operation
+
+ :type subscribers: list(s3transfer.subscribers.BaseSubscriber)
+ :param subscribers: The list of subscribers to be invoked in the
+ order provided based on the event emit during the process of
+ the transfer request.
+
+ :rtype: s3transfer.futures.TransferFuture
+ :returns: Transfer future representing the download
+ """
+ if extra_args is None:
+ extra_args = {}
+ if subscribers is None:
+ subscribers = []
+ self._validate_all_known_args(extra_args, self.ALLOWED_DOWNLOAD_ARGS)
self._validate_if_bucket_supported(bucket)
- call_args = CallArgs(
+ call_args = CallArgs(
bucket=bucket,
key=key,
fileobj=fileobj,
extra_args=extra_args,
subscribers=subscribers,
- )
- extra_main_kwargs = {'io_executor': self._io_executor}
- if self._bandwidth_limiter:
- extra_main_kwargs['bandwidth_limiter'] = self._bandwidth_limiter
- return self._submit_transfer(
+ )
+ extra_main_kwargs = {'io_executor': self._io_executor}
+ if self._bandwidth_limiter:
+ extra_main_kwargs['bandwidth_limiter'] = self._bandwidth_limiter
+ return self._submit_transfer(
call_args, DownloadSubmissionTask, extra_main_kwargs
)
-
+
def copy(
self,
copy_source,
@@ -387,96 +387,96 @@ class TransferManager:
subscribers=None,
source_client=None,
):
- """Copies a file in S3
-
- :type copy_source: dict
- :param copy_source: The name of the source bucket, key name of the
- source object, and optional version ID of the source object. The
- dictionary format is:
- ``{'Bucket': 'bucket', 'Key': 'key', 'VersionId': 'id'}``. Note
- that the ``VersionId`` key is optional and may be omitted.
-
- :type bucket: str
- :param bucket: The name of the bucket to copy to
-
- :type key: str
- :param key: The name of the key to copy to
-
- :type extra_args: dict
- :param extra_args: Extra arguments that may be passed to the
- client operation
-
- :type subscribers: a list of subscribers
- :param subscribers: The list of subscribers to be invoked in the
- order provided based on the event emit during the process of
- the transfer request.
-
- :type source_client: botocore or boto3 Client
- :param source_client: The client to be used for operation that
- may happen at the source object. For example, this client is
- used for the head_object that determines the size of the copy.
- If no client is provided, the transfer manager's client is used
- as the client for the source object.
-
- :rtype: s3transfer.futures.TransferFuture
- :returns: Transfer future representing the copy
- """
- if extra_args is None:
- extra_args = {}
- if subscribers is None:
- subscribers = []
- if source_client is None:
- source_client = self._client
- self._validate_all_known_args(extra_args, self.ALLOWED_COPY_ARGS)
+ """Copies a file in S3
+
+ :type copy_source: dict
+ :param copy_source: The name of the source bucket, key name of the
+ source object, and optional version ID of the source object. The
+ dictionary format is:
+ ``{'Bucket': 'bucket', 'Key': 'key', 'VersionId': 'id'}``. Note
+ that the ``VersionId`` key is optional and may be omitted.
+
+ :type bucket: str
+ :param bucket: The name of the bucket to copy to
+
+ :type key: str
+ :param key: The name of the key to copy to
+
+ :type extra_args: dict
+ :param extra_args: Extra arguments that may be passed to the
+ client operation
+
+ :type subscribers: a list of subscribers
+ :param subscribers: The list of subscribers to be invoked in the
+ order provided based on the event emit during the process of
+ the transfer request.
+
+ :type source_client: botocore or boto3 Client
+ :param source_client: The client to be used for operation that
+ may happen at the source object. For example, this client is
+ used for the head_object that determines the size of the copy.
+ If no client is provided, the transfer manager's client is used
+ as the client for the source object.
+
+ :rtype: s3transfer.futures.TransferFuture
+ :returns: Transfer future representing the copy
+ """
+ if extra_args is None:
+ extra_args = {}
+ if subscribers is None:
+ subscribers = []
+ if source_client is None:
+ source_client = self._client
+ self._validate_all_known_args(extra_args, self.ALLOWED_COPY_ARGS)
if isinstance(copy_source, dict):
self._validate_if_bucket_supported(copy_source.get('Bucket'))
self._validate_if_bucket_supported(bucket)
- call_args = CallArgs(
+ call_args = CallArgs(
copy_source=copy_source,
bucket=bucket,
key=key,
extra_args=extra_args,
subscribers=subscribers,
source_client=source_client,
- )
- return self._submit_transfer(call_args, CopySubmissionTask)
-
- def delete(self, bucket, key, extra_args=None, subscribers=None):
- """Delete an S3 object.
-
- :type bucket: str
- :param bucket: The name of the bucket.
-
- :type key: str
- :param key: The name of the S3 object to delete.
-
- :type extra_args: dict
- :param extra_args: Extra arguments that may be passed to the
- DeleteObject call.
-
- :type subscribers: list
- :param subscribers: A list of subscribers to be invoked during the
- process of the transfer request. Note that the ``on_progress``
- callback is not invoked during object deletion.
-
- :rtype: s3transfer.futures.TransferFuture
- :return: Transfer future representing the deletion.
-
- """
- if extra_args is None:
- extra_args = {}
- if subscribers is None:
- subscribers = []
- self._validate_all_known_args(extra_args, self.ALLOWED_DELETE_ARGS)
+ )
+ return self._submit_transfer(call_args, CopySubmissionTask)
+
+ def delete(self, bucket, key, extra_args=None, subscribers=None):
+ """Delete an S3 object.
+
+ :type bucket: str
+ :param bucket: The name of the bucket.
+
+ :type key: str
+ :param key: The name of the S3 object to delete.
+
+ :type extra_args: dict
+ :param extra_args: Extra arguments that may be passed to the
+ DeleteObject call.
+
+ :type subscribers: list
+ :param subscribers: A list of subscribers to be invoked during the
+ process of the transfer request. Note that the ``on_progress``
+ callback is not invoked during object deletion.
+
+ :rtype: s3transfer.futures.TransferFuture
+ :return: Transfer future representing the deletion.
+
+ """
+ if extra_args is None:
+ extra_args = {}
+ if subscribers is None:
+ subscribers = []
+ self._validate_all_known_args(extra_args, self.ALLOWED_DELETE_ARGS)
self._validate_if_bucket_supported(bucket)
- call_args = CallArgs(
+ call_args = CallArgs(
bucket=bucket,
key=key,
extra_args=extra_args,
subscribers=subscribers,
- )
- return self._submit_transfer(call_args, DeleteSubmissionTask)
-
+ )
+ return self._submit_transfer(call_args, DeleteSubmissionTask)
+
def _validate_if_bucket_supported(self, bucket):
# s3 high level operations don't support some resources
# (eg. S3 Object Lambda) only direct API calls are available
@@ -490,238 +490,238 @@ class TransferManager:
'resource. Use direct client calls instead.' % resource
)
- def _validate_all_known_args(self, actual, allowed):
- for kwarg in actual:
- if kwarg not in allowed:
- raise ValueError(
- "Invalid extra_args key '%s', "
+ def _validate_all_known_args(self, actual, allowed):
+ for kwarg in actual:
+ if kwarg not in allowed:
+ raise ValueError(
+ "Invalid extra_args key '%s', "
"must be one of: %s" % (kwarg, ', '.join(allowed))
)
-
+
def _submit_transfer(
self, call_args, submission_task_cls, extra_main_kwargs=None
):
- if not extra_main_kwargs:
- extra_main_kwargs = {}
-
- # Create a TransferFuture to return back to the user
- transfer_future, components = self._get_future_with_components(
+ if not extra_main_kwargs:
+ extra_main_kwargs = {}
+
+ # Create a TransferFuture to return back to the user
+ transfer_future, components = self._get_future_with_components(
call_args
)
-
- # Add any provided done callbacks to the created transfer future
- # to be invoked on the transfer future being complete.
- for callback in get_callbacks(transfer_future, 'done'):
- components['coordinator'].add_done_callback(callback)
-
- # Get the main kwargs needed to instantiate the submission task
- main_kwargs = self._get_submission_task_main_kwargs(
+
+ # Add any provided done callbacks to the created transfer future
+ # to be invoked on the transfer future being complete.
+ for callback in get_callbacks(transfer_future, 'done'):
+ components['coordinator'].add_done_callback(callback)
+
+ # Get the main kwargs needed to instantiate the submission task
+ main_kwargs = self._get_submission_task_main_kwargs(
transfer_future, extra_main_kwargs
)
-
- # Submit a SubmissionTask that will submit all of the necessary
- # tasks needed to complete the S3 transfer.
- self._submission_executor.submit(
- submission_task_cls(
- transfer_coordinator=components['coordinator'],
+
+ # Submit a SubmissionTask that will submit all of the necessary
+ # tasks needed to complete the S3 transfer.
+ self._submission_executor.submit(
+ submission_task_cls(
+ transfer_coordinator=components['coordinator'],
main_kwargs=main_kwargs,
- )
- )
-
- # Increment the unique id counter for future transfer requests
- self._id_counter += 1
-
- return transfer_future
-
- def _get_future_with_components(self, call_args):
- transfer_id = self._id_counter
- # Creates a new transfer future along with its components
- transfer_coordinator = TransferCoordinator(transfer_id=transfer_id)
- # Track the transfer coordinator for transfers to manage.
- self._coordinator_controller.add_transfer_coordinator(
+ )
+ )
+
+ # Increment the unique id counter for future transfer requests
+ self._id_counter += 1
+
+ return transfer_future
+
+ def _get_future_with_components(self, call_args):
+ transfer_id = self._id_counter
+ # Creates a new transfer future along with its components
+ transfer_coordinator = TransferCoordinator(transfer_id=transfer_id)
+ # Track the transfer coordinator for transfers to manage.
+ self._coordinator_controller.add_transfer_coordinator(
transfer_coordinator
)
- # Also make sure that the transfer coordinator is removed once
- # the transfer completes so it does not stick around in memory.
- transfer_coordinator.add_done_callback(
- self._coordinator_controller.remove_transfer_coordinator,
+ # Also make sure that the transfer coordinator is removed once
+ # the transfer completes so it does not stick around in memory.
+ transfer_coordinator.add_done_callback(
+ self._coordinator_controller.remove_transfer_coordinator,
transfer_coordinator,
)
- components = {
- 'meta': TransferMeta(call_args, transfer_id=transfer_id),
+ components = {
+ 'meta': TransferMeta(call_args, transfer_id=transfer_id),
'coordinator': transfer_coordinator,
- }
- transfer_future = TransferFuture(**components)
- return transfer_future, components
-
- def _get_submission_task_main_kwargs(
+ }
+ transfer_future = TransferFuture(**components)
+ return transfer_future, components
+
+ def _get_submission_task_main_kwargs(
self, transfer_future, extra_main_kwargs
):
- main_kwargs = {
- 'client': self._client,
- 'config': self._config,
- 'osutil': self._osutil,
- 'request_executor': self._request_executor,
+ main_kwargs = {
+ 'client': self._client,
+ 'config': self._config,
+ 'osutil': self._osutil,
+ 'request_executor': self._request_executor,
'transfer_future': transfer_future,
- }
- main_kwargs.update(extra_main_kwargs)
- return main_kwargs
-
- def _register_handlers(self):
- # Register handlers to enable/disable callbacks on uploads.
- event_name = 'request-created.s3'
- self._client.meta.events.register_first(
+ }
+ main_kwargs.update(extra_main_kwargs)
+ return main_kwargs
+
+ def _register_handlers(self):
+ # Register handlers to enable/disable callbacks on uploads.
+ event_name = 'request-created.s3'
+ self._client.meta.events.register_first(
event_name,
signal_not_transferring,
unique_id='s3upload-not-transferring',
)
- self._client.meta.events.register_last(
+ self._client.meta.events.register_last(
event_name, signal_transferring, unique_id='s3upload-transferring'
)
-
- def __enter__(self):
- return self
-
- def __exit__(self, exc_type, exc_value, *args):
- cancel = False
- cancel_msg = ''
- cancel_exc_type = FatalError
- # If a exception was raised in the context handler, signal to cancel
- # all of the inprogress futures in the shutdown.
- if exc_type:
- cancel = True
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_value, *args):
+ cancel = False
+ cancel_msg = ''
+ cancel_exc_type = FatalError
+ # If a exception was raised in the context handler, signal to cancel
+ # all of the inprogress futures in the shutdown.
+ if exc_type:
+ cancel = True
cancel_msg = str(exc_value)
- if not cancel_msg:
- cancel_msg = repr(exc_value)
- # If it was a KeyboardInterrupt, the cancellation was initiated
- # by the user.
- if isinstance(exc_value, KeyboardInterrupt):
- cancel_exc_type = CancelledError
- self._shutdown(cancel, cancel_msg, cancel_exc_type)
-
- def shutdown(self, cancel=False, cancel_msg=''):
- """Shutdown the TransferManager
-
- It will wait till all transfers complete before it completely shuts
- down.
-
- :type cancel: boolean
- :param cancel: If True, calls TransferFuture.cancel() for
- all in-progress in transfers. This is useful if you want the
- shutdown to happen quicker.
-
- :type cancel_msg: str
- :param cancel_msg: The message to specify if canceling all in-progress
- transfers.
- """
- self._shutdown(cancel, cancel, cancel_msg)
-
- def _shutdown(self, cancel, cancel_msg, exc_type=CancelledError):
- if cancel:
- # Cancel all in-flight transfers if requested, before waiting
- # for them to complete.
- self._coordinator_controller.cancel(cancel_msg, exc_type)
- try:
- # Wait until there are no more in-progress transfers. This is
- # wrapped in a try statement because this can be interrupted
- # with a KeyboardInterrupt that needs to be caught.
- self._coordinator_controller.wait()
- except KeyboardInterrupt:
- # If not errors were raised in the try block, the cancel should
- # have no coordinators it needs to run cancel on. If there was
- # an error raised in the try statement we want to cancel all of
- # the inflight transfers before shutting down to speed that
- # process up.
- self._coordinator_controller.cancel('KeyboardInterrupt()')
- raise
- finally:
- # Shutdown all of the executors.
- self._submission_executor.shutdown()
- self._request_executor.shutdown()
- self._io_executor.shutdown()
-
-
+ if not cancel_msg:
+ cancel_msg = repr(exc_value)
+ # If it was a KeyboardInterrupt, the cancellation was initiated
+ # by the user.
+ if isinstance(exc_value, KeyboardInterrupt):
+ cancel_exc_type = CancelledError
+ self._shutdown(cancel, cancel_msg, cancel_exc_type)
+
+ def shutdown(self, cancel=False, cancel_msg=''):
+ """Shutdown the TransferManager
+
+ It will wait till all transfers complete before it completely shuts
+ down.
+
+ :type cancel: boolean
+ :param cancel: If True, calls TransferFuture.cancel() for
+ all in-progress in transfers. This is useful if you want the
+ shutdown to happen quicker.
+
+ :type cancel_msg: str
+ :param cancel_msg: The message to specify if canceling all in-progress
+ transfers.
+ """
+ self._shutdown(cancel, cancel, cancel_msg)
+
+ def _shutdown(self, cancel, cancel_msg, exc_type=CancelledError):
+ if cancel:
+ # Cancel all in-flight transfers if requested, before waiting
+ # for them to complete.
+ self._coordinator_controller.cancel(cancel_msg, exc_type)
+ try:
+ # Wait until there are no more in-progress transfers. This is
+ # wrapped in a try statement because this can be interrupted
+ # with a KeyboardInterrupt that needs to be caught.
+ self._coordinator_controller.wait()
+ except KeyboardInterrupt:
+ # If not errors were raised in the try block, the cancel should
+ # have no coordinators it needs to run cancel on. If there was
+ # an error raised in the try statement we want to cancel all of
+ # the inflight transfers before shutting down to speed that
+ # process up.
+ self._coordinator_controller.cancel('KeyboardInterrupt()')
+ raise
+ finally:
+ # Shutdown all of the executors.
+ self._submission_executor.shutdown()
+ self._request_executor.shutdown()
+ self._io_executor.shutdown()
+
+
class TransferCoordinatorController:
- def __init__(self):
- """Abstraction to control all transfer coordinators
-
- This abstraction allows the manager to wait for inprogress transfers
- to complete and cancel all inprogress transfers.
- """
- self._lock = threading.Lock()
- self._tracked_transfer_coordinators = set()
-
- @property
- def tracked_transfer_coordinators(self):
- """The set of transfer coordinators being tracked"""
- with self._lock:
- # We return a copy because the set is mutable and if you were to
- # iterate over the set, it may be changing in length due to
- # additions and removals of transfer coordinators.
- return copy.copy(self._tracked_transfer_coordinators)
-
- def add_transfer_coordinator(self, transfer_coordinator):
- """Adds a transfer coordinator of a transfer to be canceled if needed
-
- :type transfer_coordinator: s3transfer.futures.TransferCoordinator
- :param transfer_coordinator: The transfer coordinator for the
- particular transfer
- """
- with self._lock:
- self._tracked_transfer_coordinators.add(transfer_coordinator)
-
- def remove_transfer_coordinator(self, transfer_coordinator):
+ def __init__(self):
+ """Abstraction to control all transfer coordinators
+
+ This abstraction allows the manager to wait for inprogress transfers
+ to complete and cancel all inprogress transfers.
+ """
+ self._lock = threading.Lock()
+ self._tracked_transfer_coordinators = set()
+
+ @property
+ def tracked_transfer_coordinators(self):
+ """The set of transfer coordinators being tracked"""
+ with self._lock:
+ # We return a copy because the set is mutable and if you were to
+ # iterate over the set, it may be changing in length due to
+ # additions and removals of transfer coordinators.
+ return copy.copy(self._tracked_transfer_coordinators)
+
+ def add_transfer_coordinator(self, transfer_coordinator):
+ """Adds a transfer coordinator of a transfer to be canceled if needed
+
+ :type transfer_coordinator: s3transfer.futures.TransferCoordinator
+ :param transfer_coordinator: The transfer coordinator for the
+ particular transfer
+ """
+ with self._lock:
+ self._tracked_transfer_coordinators.add(transfer_coordinator)
+
+ def remove_transfer_coordinator(self, transfer_coordinator):
"""Remove a transfer coordinator from cancellation consideration
-
- Typically, this method is invoked by the transfer coordinator itself
- to remove its self when it completes its transfer.
-
- :type transfer_coordinator: s3transfer.futures.TransferCoordinator
- :param transfer_coordinator: The transfer coordinator for the
- particular transfer
- """
- with self._lock:
- self._tracked_transfer_coordinators.remove(transfer_coordinator)
-
- def cancel(self, msg='', exc_type=CancelledError):
- """Cancels all inprogress transfers
-
- This cancels the inprogress transfers by calling cancel() on all
- tracked transfer coordinators.
-
- :param msg: The message to pass on to each transfer coordinator that
- gets cancelled.
-
- :param exc_type: The type of exception to set for the cancellation
- """
- for transfer_coordinator in self.tracked_transfer_coordinators:
- transfer_coordinator.cancel(msg, exc_type)
-
- def wait(self):
- """Wait until there are no more inprogress transfers
-
+
+ Typically, this method is invoked by the transfer coordinator itself
+ to remove its self when it completes its transfer.
+
+ :type transfer_coordinator: s3transfer.futures.TransferCoordinator
+ :param transfer_coordinator: The transfer coordinator for the
+ particular transfer
+ """
+ with self._lock:
+ self._tracked_transfer_coordinators.remove(transfer_coordinator)
+
+ def cancel(self, msg='', exc_type=CancelledError):
+ """Cancels all inprogress transfers
+
+ This cancels the inprogress transfers by calling cancel() on all
+ tracked transfer coordinators.
+
+ :param msg: The message to pass on to each transfer coordinator that
+ gets cancelled.
+
+ :param exc_type: The type of exception to set for the cancellation
+ """
+ for transfer_coordinator in self.tracked_transfer_coordinators:
+ transfer_coordinator.cancel(msg, exc_type)
+
+ def wait(self):
+ """Wait until there are no more inprogress transfers
+
This will not stop when failures are encountered and not propagate any
- of these errors from failed transfers, but it can be interrupted with
- a KeyboardInterrupt.
- """
- try:
- transfer_coordinator = None
- for transfer_coordinator in self.tracked_transfer_coordinators:
- transfer_coordinator.result()
- except KeyboardInterrupt:
- logger.debug('Received KeyboardInterrupt in wait()')
- # If Keyboard interrupt is raised while waiting for
- # the result, then exit out of the wait and raise the
- # exception
- if transfer_coordinator:
- logger.debug(
- 'On KeyboardInterrupt was waiting for %s',
+ of these errors from failed transfers, but it can be interrupted with
+ a KeyboardInterrupt.
+ """
+ try:
+ transfer_coordinator = None
+ for transfer_coordinator in self.tracked_transfer_coordinators:
+ transfer_coordinator.result()
+ except KeyboardInterrupt:
+ logger.debug('Received KeyboardInterrupt in wait()')
+ # If Keyboard interrupt is raised while waiting for
+ # the result, then exit out of the wait and raise the
+ # exception
+ if transfer_coordinator:
+ logger.debug(
+ 'On KeyboardInterrupt was waiting for %s',
transfer_coordinator,
)
- raise
- except Exception:
- # A general exception could have been thrown because
- # of result(). We just want to ignore this and continue
- # because we at least know that the transfer coordinator
- # has completed.
- pass
+ raise
+ except Exception:
+ # A general exception could have been thrown because
+ # of result(). We just want to ignore this and continue
+ # because we at least know that the transfer coordinator
+ # has completed.
+ pass
diff --git a/contrib/python/s3transfer/py3/s3transfer/subscribers.py b/contrib/python/s3transfer/py3/s3transfer/subscribers.py
index f7697aef3b..cf0dbaa0d7 100644
--- a/contrib/python/s3transfer/py3/s3transfer/subscribers.py
+++ b/contrib/python/s3transfer/py3/s3transfer/subscribers.py
@@ -1,92 +1,92 @@
-# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-# http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-from s3transfer.compat import accepts_kwargs
-from s3transfer.exceptions import InvalidSubscriberMethodError
-
-
+# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+# http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from s3transfer.compat import accepts_kwargs
+from s3transfer.exceptions import InvalidSubscriberMethodError
+
+
class BaseSubscriber:
- """The base subscriber class
-
- It is recommended that all subscriber implementations subclass and then
- override the subscription methods (i.e. on_{subsribe_type}() methods).
- """
-
+ """The base subscriber class
+
+ It is recommended that all subscriber implementations subclass and then
+ override the subscription methods (i.e. on_{subsribe_type}() methods).
+ """
+
VALID_SUBSCRIBER_TYPES = ['queued', 'progress', 'done']
- def __new__(cls, *args, **kwargs):
- cls._validate_subscriber_methods()
+ def __new__(cls, *args, **kwargs):
+ cls._validate_subscriber_methods()
return super().__new__(cls)
-
- @classmethod
- def _validate_subscriber_methods(cls):
- for subscriber_type in cls.VALID_SUBSCRIBER_TYPES:
- subscriber_method = getattr(cls, 'on_' + subscriber_type)
+
+ @classmethod
+ def _validate_subscriber_methods(cls):
+ for subscriber_type in cls.VALID_SUBSCRIBER_TYPES:
+ subscriber_method = getattr(cls, 'on_' + subscriber_type)
if not callable(subscriber_method):
- raise InvalidSubscriberMethodError(
+ raise InvalidSubscriberMethodError(
'Subscriber method %s must be callable.'
% subscriber_method
)
-
- if not accepts_kwargs(subscriber_method):
- raise InvalidSubscriberMethodError(
- 'Subscriber method %s must accept keyword '
+
+ if not accepts_kwargs(subscriber_method):
+ raise InvalidSubscriberMethodError(
+ 'Subscriber method %s must accept keyword '
'arguments (**kwargs)' % subscriber_method
)
-
- def on_queued(self, future, **kwargs):
- """Callback to be invoked when transfer request gets queued
-
- This callback can be useful for:
-
- * Keeping track of how many transfers have been requested
- * Providing the expected transfer size through
- future.meta.provide_transfer_size() so a HeadObject would not
- need to be made for copies and downloads.
-
- :type future: s3transfer.futures.TransferFuture
- :param future: The TransferFuture representing the requested transfer.
- """
- pass
-
- def on_progress(self, future, bytes_transferred, **kwargs):
- """Callback to be invoked when progress is made on transfer
-
- This callback can be useful for:
-
- * Recording and displaying progress
-
- :type future: s3transfer.futures.TransferFuture
- :param future: The TransferFuture representing the requested transfer.
-
- :type bytes_transferred: int
- :param bytes_transferred: The number of bytes transferred for that
- invocation of the callback. Note that a negative amount can be
- provided, which usually indicates that an in-progress request
- needed to be retried and thus progress was rewound.
- """
- pass
-
- def on_done(self, future, **kwargs):
- """Callback to be invoked once a transfer is done
-
- This callback can be useful for:
-
- * Recording and displaying whether the transfer succeeded or
- failed using future.result()
- * Running some task after the transfer completed like changing
- the last modified time of a downloaded file.
-
- :type future: s3transfer.futures.TransferFuture
- :param future: The TransferFuture representing the requested transfer.
- """
- pass
+
+ def on_queued(self, future, **kwargs):
+ """Callback to be invoked when transfer request gets queued
+
+ This callback can be useful for:
+
+ * Keeping track of how many transfers have been requested
+ * Providing the expected transfer size through
+ future.meta.provide_transfer_size() so a HeadObject would not
+ need to be made for copies and downloads.
+
+ :type future: s3transfer.futures.TransferFuture
+ :param future: The TransferFuture representing the requested transfer.
+ """
+ pass
+
+ def on_progress(self, future, bytes_transferred, **kwargs):
+ """Callback to be invoked when progress is made on transfer
+
+ This callback can be useful for:
+
+ * Recording and displaying progress
+
+ :type future: s3transfer.futures.TransferFuture
+ :param future: The TransferFuture representing the requested transfer.
+
+ :type bytes_transferred: int
+ :param bytes_transferred: The number of bytes transferred for that
+ invocation of the callback. Note that a negative amount can be
+ provided, which usually indicates that an in-progress request
+ needed to be retried and thus progress was rewound.
+ """
+ pass
+
+ def on_done(self, future, **kwargs):
+ """Callback to be invoked once a transfer is done
+
+ This callback can be useful for:
+
+ * Recording and displaying whether the transfer succeeded or
+ failed using future.result()
+ * Running some task after the transfer completed like changing
+ the last modified time of a downloaded file.
+
+ :type future: s3transfer.futures.TransferFuture
+ :param future: The TransferFuture representing the requested transfer.
+ """
+ pass
diff --git a/contrib/python/s3transfer/py3/s3transfer/tasks.py b/contrib/python/s3transfer/py3/s3transfer/tasks.py
index 2d2402862c..1bad981264 100644
--- a/contrib/python/s3transfer/py3/s3transfer/tasks.py
+++ b/contrib/python/s3transfer/py3/s3transfer/tasks.py
@@ -1,29 +1,29 @@
-# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-# http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-import copy
-import logging
-
-from s3transfer.utils import get_callbacks
-
-logger = logging.getLogger(__name__)
-
-
+# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+# http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+import copy
+import logging
+
+from s3transfer.utils import get_callbacks
+
+logger = logging.getLogger(__name__)
+
+
class Task:
- """A task associated to a TransferFuture request
-
- This is a base class for other classes to subclass from. All subclassed
- classes must implement the main() method.
- """
+ """A task associated to a TransferFuture request
+
+ This is a base class for other classes to subclass from. All subclassed
+ classes must implement the main() method.
+ """
def __init__(
self,
@@ -33,58 +33,58 @@ class Task:
done_callbacks=None,
is_final=False,
):
- """
- :type transfer_coordinator: s3transfer.futures.TransferCoordinator
- :param transfer_coordinator: The context associated to the
- TransferFuture for which this Task is associated with.
-
- :type main_kwargs: dict
- :param main_kwargs: The keyword args that can be immediately supplied
- to the _main() method of the task
-
- :type pending_main_kwargs: dict
- :param pending_main_kwargs: The keyword args that are depended upon
- by the result from a dependent future(s). The result returned by
- the future(s) will be used as the value for the keyword argument
- when _main() is called. The values for each key can be:
- * a single future - Once completed, its value will be the
- result of that single future
- * a list of futures - Once all of the futures complete, the
- value used will be a list of each completed future result
- value in order of when they were originally supplied.
-
- :type done_callbacks: list of callbacks
- :param done_callbacks: A list of callbacks to call once the task is
- done completing. Each callback will be called with no arguments
- and will be called no matter if the task succeeds or an exception
- is raised.
-
- :type is_final: boolean
- :param is_final: True, to indicate that this task is the final task
- for the TransferFuture request. By setting this value to True, it
- will set the result of the entire TransferFuture to the result
- returned by this task's main() method.
- """
- self._transfer_coordinator = transfer_coordinator
-
- self._main_kwargs = main_kwargs
- if self._main_kwargs is None:
- self._main_kwargs = {}
-
- self._pending_main_kwargs = pending_main_kwargs
- if pending_main_kwargs is None:
- self._pending_main_kwargs = {}
-
- self._done_callbacks = done_callbacks
- if self._done_callbacks is None:
- self._done_callbacks = []
-
- self._is_final = is_final
-
- def __repr__(self):
- # These are the general main_kwarg parameters that we want to
- # display in the repr.
- params_to_display = [
+ """
+ :type transfer_coordinator: s3transfer.futures.TransferCoordinator
+ :param transfer_coordinator: The context associated to the
+ TransferFuture for which this Task is associated with.
+
+ :type main_kwargs: dict
+ :param main_kwargs: The keyword args that can be immediately supplied
+ to the _main() method of the task
+
+ :type pending_main_kwargs: dict
+ :param pending_main_kwargs: The keyword args that are depended upon
+ by the result from a dependent future(s). The result returned by
+ the future(s) will be used as the value for the keyword argument
+ when _main() is called. The values for each key can be:
+ * a single future - Once completed, its value will be the
+ result of that single future
+ * a list of futures - Once all of the futures complete, the
+ value used will be a list of each completed future result
+ value in order of when they were originally supplied.
+
+ :type done_callbacks: list of callbacks
+ :param done_callbacks: A list of callbacks to call once the task is
+ done completing. Each callback will be called with no arguments
+ and will be called no matter if the task succeeds or an exception
+ is raised.
+
+ :type is_final: boolean
+ :param is_final: True, to indicate that this task is the final task
+ for the TransferFuture request. By setting this value to True, it
+ will set the result of the entire TransferFuture to the result
+ returned by this task's main() method.
+ """
+ self._transfer_coordinator = transfer_coordinator
+
+ self._main_kwargs = main_kwargs
+ if self._main_kwargs is None:
+ self._main_kwargs = {}
+
+ self._pending_main_kwargs = pending_main_kwargs
+ if pending_main_kwargs is None:
+ self._pending_main_kwargs = {}
+
+ self._done_callbacks = done_callbacks
+ if self._done_callbacks is None:
+ self._done_callbacks = []
+
+ self._is_final = is_final
+
+ def __repr__(self):
+ # These are the general main_kwarg parameters that we want to
+ # display in the repr.
+ params_to_display = [
'bucket',
'key',
'part_number',
@@ -92,8 +92,8 @@ class Task:
'transfer_future',
'offset',
'extra_args',
- ]
- main_kwargs_to_display = self._get_kwargs_with_params_to_include(
+ ]
+ main_kwargs_to_display = self._get_kwargs_with_params_to_include(
self._main_kwargs, params_to_display
)
return '{}(transfer_id={}, {})'.format(
@@ -101,287 +101,287 @@ class Task:
self._transfer_coordinator.transfer_id,
main_kwargs_to_display,
)
-
- @property
- def transfer_id(self):
- """The id for the transfer request that the task belongs to"""
- return self._transfer_coordinator.transfer_id
-
- def _get_kwargs_with_params_to_include(self, kwargs, include):
- filtered_kwargs = {}
- for param in include:
- if param in kwargs:
- filtered_kwargs[param] = kwargs[param]
- return filtered_kwargs
-
- def _get_kwargs_with_params_to_exclude(self, kwargs, exclude):
- filtered_kwargs = {}
- for param, value in kwargs.items():
- if param in exclude:
- continue
- filtered_kwargs[param] = value
- return filtered_kwargs
-
- def __call__(self):
- """The callable to use when submitting a Task to an executor"""
- try:
- # Wait for all of futures this task depends on.
- self._wait_on_dependent_futures()
- # Gather up all of the main keyword arguments for main().
- # This includes the immediately provided main_kwargs and
- # the values for pending_main_kwargs that source from the return
+
+ @property
+ def transfer_id(self):
+ """The id for the transfer request that the task belongs to"""
+ return self._transfer_coordinator.transfer_id
+
+ def _get_kwargs_with_params_to_include(self, kwargs, include):
+ filtered_kwargs = {}
+ for param in include:
+ if param in kwargs:
+ filtered_kwargs[param] = kwargs[param]
+ return filtered_kwargs
+
+ def _get_kwargs_with_params_to_exclude(self, kwargs, exclude):
+ filtered_kwargs = {}
+ for param, value in kwargs.items():
+ if param in exclude:
+ continue
+ filtered_kwargs[param] = value
+ return filtered_kwargs
+
+ def __call__(self):
+ """The callable to use when submitting a Task to an executor"""
+ try:
+ # Wait for all of futures this task depends on.
+ self._wait_on_dependent_futures()
+ # Gather up all of the main keyword arguments for main().
+ # This includes the immediately provided main_kwargs and
+ # the values for pending_main_kwargs that source from the return
# values from the task's dependent futures.
- kwargs = self._get_all_main_kwargs()
- # If the task is not done (really only if some other related
- # task to the TransferFuture had failed) then execute the task's
- # main() method.
- if not self._transfer_coordinator.done():
- return self._execute_main(kwargs)
- except Exception as e:
- self._log_and_set_exception(e)
- finally:
- # Run any done callbacks associated to the task no matter what.
- for done_callback in self._done_callbacks:
- done_callback()
-
- if self._is_final:
- # If this is the final task announce that it is done if results
- # are waiting on its completion.
- self._transfer_coordinator.announce_done()
-
- def _execute_main(self, kwargs):
- # Do not display keyword args that should not be printed, especially
- # if they are going to make the logs hard to follow.
- params_to_exclude = ['data']
- kwargs_to_display = self._get_kwargs_with_params_to_exclude(
+ kwargs = self._get_all_main_kwargs()
+ # If the task is not done (really only if some other related
+ # task to the TransferFuture had failed) then execute the task's
+ # main() method.
+ if not self._transfer_coordinator.done():
+ return self._execute_main(kwargs)
+ except Exception as e:
+ self._log_and_set_exception(e)
+ finally:
+ # Run any done callbacks associated to the task no matter what.
+ for done_callback in self._done_callbacks:
+ done_callback()
+
+ if self._is_final:
+ # If this is the final task announce that it is done if results
+ # are waiting on its completion.
+ self._transfer_coordinator.announce_done()
+
+ def _execute_main(self, kwargs):
+ # Do not display keyword args that should not be printed, especially
+ # if they are going to make the logs hard to follow.
+ params_to_exclude = ['data']
+ kwargs_to_display = self._get_kwargs_with_params_to_exclude(
kwargs, params_to_exclude
)
- # Log what is about to be executed.
+ # Log what is about to be executed.
logger.debug(f"Executing task {self} with kwargs {kwargs_to_display}")
-
- return_value = self._main(**kwargs)
- # If the task is the final task, then set the TransferFuture's
- # value to the return value from main().
- if self._is_final:
- self._transfer_coordinator.set_result(return_value)
- return return_value
-
- def _log_and_set_exception(self, exception):
- # If an exception is ever thrown than set the exception for the
- # entire TransferFuture.
- logger.debug("Exception raised.", exc_info=True)
- self._transfer_coordinator.set_exception(exception)
-
- def _main(self, **kwargs):
- """The method that will be ran in the executor
-
- This method must be implemented by subclasses from Task. main() can
- be implemented with any arguments decided upon by the subclass.
- """
- raise NotImplementedError('_main() must be implemented')
-
- def _wait_on_dependent_futures(self):
- # Gather all of the futures into that main() depends on.
- futures_to_wait_on = []
- for _, future in self._pending_main_kwargs.items():
- # If the pending main keyword arg is a list then extend the list.
- if isinstance(future, list):
- futures_to_wait_on.extend(future)
+
+ return_value = self._main(**kwargs)
+ # If the task is the final task, then set the TransferFuture's
+ # value to the return value from main().
+ if self._is_final:
+ self._transfer_coordinator.set_result(return_value)
+ return return_value
+
+ def _log_and_set_exception(self, exception):
+ # If an exception is ever thrown than set the exception for the
+ # entire TransferFuture.
+ logger.debug("Exception raised.", exc_info=True)
+ self._transfer_coordinator.set_exception(exception)
+
+ def _main(self, **kwargs):
+ """The method that will be ran in the executor
+
+ This method must be implemented by subclasses from Task. main() can
+ be implemented with any arguments decided upon by the subclass.
+ """
+ raise NotImplementedError('_main() must be implemented')
+
+ def _wait_on_dependent_futures(self):
+ # Gather all of the futures into that main() depends on.
+ futures_to_wait_on = []
+ for _, future in self._pending_main_kwargs.items():
+ # If the pending main keyword arg is a list then extend the list.
+ if isinstance(future, list):
+ futures_to_wait_on.extend(future)
# If the pending main keyword arg is a future append it to the list.
- else:
- futures_to_wait_on.append(future)
- # Now wait for all of the futures to complete.
- self._wait_until_all_complete(futures_to_wait_on)
-
- def _wait_until_all_complete(self, futures):
- # This is a basic implementation of the concurrent.futures.wait()
- #
- # concurrent.futures.wait() is not used instead because of this
- # reported issue: https://bugs.python.org/issue20319.
+ else:
+ futures_to_wait_on.append(future)
+ # Now wait for all of the futures to complete.
+ self._wait_until_all_complete(futures_to_wait_on)
+
+ def _wait_until_all_complete(self, futures):
+ # This is a basic implementation of the concurrent.futures.wait()
+ #
+ # concurrent.futures.wait() is not used instead because of this
+ # reported issue: https://bugs.python.org/issue20319.
# The issue would occasionally cause multipart uploads to hang
- # when wait() was called. With this approach, it avoids the
- # concurrency bug by removing any association with concurrent.futures
- # implementation of waiters.
- logger.debug(
+ # when wait() was called. With this approach, it avoids the
+ # concurrency bug by removing any association with concurrent.futures
+ # implementation of waiters.
+ logger.debug(
'%s about to wait for the following futures %s', self, futures
)
- for future in futures:
- try:
- logger.debug('%s about to wait for %s', self, future)
- future.result()
- except Exception:
- # result() can also produce exceptions. We want to ignore
+ for future in futures:
+ try:
+ logger.debug('%s about to wait for %s', self, future)
+ future.result()
+ except Exception:
+ # result() can also produce exceptions. We want to ignore
# these to be deferred to error handling down the road.
- pass
- logger.debug('%s done waiting for dependent futures', self)
-
- def _get_all_main_kwargs(self):
- # Copy over all of the kwargs that we know is available.
- kwargs = copy.copy(self._main_kwargs)
-
- # Iterate through the kwargs whose values are pending on the result
- # of a future.
- for key, pending_value in self._pending_main_kwargs.items():
- # If the value is a list of futures, iterate though the list
- # appending on the result from each future.
- if isinstance(pending_value, list):
- result = []
- for future in pending_value:
- result.append(future.result())
- # Otherwise if the pending_value is a future, just wait for it.
- else:
- result = pending_value.result()
- # Add the retrieved value to the kwargs to be sent to the
- # main() call.
- kwargs[key] = result
- return kwargs
-
-
-class SubmissionTask(Task):
- """A base class for any submission task
-
- Submission tasks are the top-level task used to submit a series of tasks
- to execute a particular transfer.
- """
-
- def _main(self, transfer_future, **kwargs):
- """
- :type transfer_future: s3transfer.futures.TransferFuture
- :param transfer_future: The transfer future associated with the
- transfer request that tasks are being submitted for
-
- :param kwargs: Any additional kwargs that you may want to pass
- to the _submit() method
- """
- try:
- self._transfer_coordinator.set_status_to_queued()
-
- # Before submitting any tasks, run all of the on_queued callbacks
- on_queued_callbacks = get_callbacks(transfer_future, 'queued')
- for on_queued_callback in on_queued_callbacks:
- on_queued_callback()
-
- # Once callbacks have been ran set the status to running.
- self._transfer_coordinator.set_status_to_running()
-
- # Call the submit method to start submitting tasks to execute the
- # transfer.
- self._submit(transfer_future=transfer_future, **kwargs)
- except BaseException as e:
- # If there was an exception raised during the submission of task
- # there is a chance that the final task that signals if a transfer
- # is done and too run the cleanup may never have been submitted in
- # the first place so we need to account accordingly.
- #
- # Note that BaseException is caught, instead of Exception, because
+ pass
+ logger.debug('%s done waiting for dependent futures', self)
+
+ def _get_all_main_kwargs(self):
+ # Copy over all of the kwargs that we know is available.
+ kwargs = copy.copy(self._main_kwargs)
+
+ # Iterate through the kwargs whose values are pending on the result
+ # of a future.
+ for key, pending_value in self._pending_main_kwargs.items():
+ # If the value is a list of futures, iterate though the list
+ # appending on the result from each future.
+ if isinstance(pending_value, list):
+ result = []
+ for future in pending_value:
+ result.append(future.result())
+ # Otherwise if the pending_value is a future, just wait for it.
+ else:
+ result = pending_value.result()
+ # Add the retrieved value to the kwargs to be sent to the
+ # main() call.
+ kwargs[key] = result
+ return kwargs
+
+
+class SubmissionTask(Task):
+ """A base class for any submission task
+
+ Submission tasks are the top-level task used to submit a series of tasks
+ to execute a particular transfer.
+ """
+
+ def _main(self, transfer_future, **kwargs):
+ """
+ :type transfer_future: s3transfer.futures.TransferFuture
+ :param transfer_future: The transfer future associated with the
+ transfer request that tasks are being submitted for
+
+ :param kwargs: Any additional kwargs that you may want to pass
+ to the _submit() method
+ """
+ try:
+ self._transfer_coordinator.set_status_to_queued()
+
+ # Before submitting any tasks, run all of the on_queued callbacks
+ on_queued_callbacks = get_callbacks(transfer_future, 'queued')
+ for on_queued_callback in on_queued_callbacks:
+ on_queued_callback()
+
+ # Once callbacks have been ran set the status to running.
+ self._transfer_coordinator.set_status_to_running()
+
+ # Call the submit method to start submitting tasks to execute the
+ # transfer.
+ self._submit(transfer_future=transfer_future, **kwargs)
+ except BaseException as e:
+ # If there was an exception raised during the submission of task
+ # there is a chance that the final task that signals if a transfer
+ # is done and too run the cleanup may never have been submitted in
+ # the first place so we need to account accordingly.
+ #
+ # Note that BaseException is caught, instead of Exception, because
# for some implementations of executors, specifically the serial
- # implementation, the SubmissionTask is directly exposed to
- # KeyboardInterupts and so needs to cleanup and signal done
- # for those as well.
-
- # Set the exception, that caused the process to fail.
- self._log_and_set_exception(e)
-
- # Wait for all possibly associated futures that may have spawned
+ # implementation, the SubmissionTask is directly exposed to
+ # KeyboardInterupts and so needs to cleanup and signal done
+ # for those as well.
+
+ # Set the exception, that caused the process to fail.
+ self._log_and_set_exception(e)
+
+ # Wait for all possibly associated futures that may have spawned
# from this submission task have finished before we announce the
- # transfer done.
- self._wait_for_all_submitted_futures_to_complete()
-
- # Announce the transfer as done, which will run any cleanups
- # and done callbacks as well.
- self._transfer_coordinator.announce_done()
-
- def _submit(self, transfer_future, **kwargs):
+ # transfer done.
+ self._wait_for_all_submitted_futures_to_complete()
+
+ # Announce the transfer as done, which will run any cleanups
+ # and done callbacks as well.
+ self._transfer_coordinator.announce_done()
+
+ def _submit(self, transfer_future, **kwargs):
"""The submission method to be implemented
-
- :type transfer_future: s3transfer.futures.TransferFuture
- :param transfer_future: The transfer future associated with the
- transfer request that tasks are being submitted for
-
- :param kwargs: Any additional keyword arguments you want to be passed
- in
- """
- raise NotImplementedError('_submit() must be implemented')
-
- def _wait_for_all_submitted_futures_to_complete(self):
- # We want to wait for all futures that were submitted to
- # complete as we do not want the cleanup callbacks or done callbacks
- # to be called to early. The main problem is any task that was
- # submitted may have submitted even more during its process and so
- # we need to account accordingly.
-
- # First get all of the futures that were submitted up to this point.
- submitted_futures = self._transfer_coordinator.associated_futures
- while submitted_futures:
- # Wait for those futures to complete.
- self._wait_until_all_complete(submitted_futures)
- # However, more futures may have been submitted as we waited so
- # we need to check again for any more associated futures.
+
+ :type transfer_future: s3transfer.futures.TransferFuture
+ :param transfer_future: The transfer future associated with the
+ transfer request that tasks are being submitted for
+
+ :param kwargs: Any additional keyword arguments you want to be passed
+ in
+ """
+ raise NotImplementedError('_submit() must be implemented')
+
+ def _wait_for_all_submitted_futures_to_complete(self):
+ # We want to wait for all futures that were submitted to
+ # complete as we do not want the cleanup callbacks or done callbacks
+ # to be called to early. The main problem is any task that was
+ # submitted may have submitted even more during its process and so
+ # we need to account accordingly.
+
+ # First get all of the futures that were submitted up to this point.
+ submitted_futures = self._transfer_coordinator.associated_futures
+ while submitted_futures:
+ # Wait for those futures to complete.
+ self._wait_until_all_complete(submitted_futures)
+ # However, more futures may have been submitted as we waited so
+ # we need to check again for any more associated futures.
possibly_more_submitted_futures = (
- self._transfer_coordinator.associated_futures
+ self._transfer_coordinator.associated_futures
)
- # If the current list of submitted futures is equal to the
- # the list of associated futures for when after the wait completes,
- # we can ensure no more futures were submitted in waiting on
- # the current list of futures to complete ultimately meaning all
- # futures that may have spawned from the original submission task
- # have completed.
- if submitted_futures == possibly_more_submitted_futures:
- break
- submitted_futures = possibly_more_submitted_futures
-
-
-class CreateMultipartUploadTask(Task):
- """Task to initiate a multipart upload"""
-
- def _main(self, client, bucket, key, extra_args):
- """
- :param client: The client to use when calling CreateMultipartUpload
- :param bucket: The name of the bucket to upload to
- :param key: The name of the key to upload to
- :param extra_args: A dictionary of any extra arguments that may be
+ # If the current list of submitted futures is equal to the
+ # the list of associated futures for when after the wait completes,
+ # we can ensure no more futures were submitted in waiting on
+ # the current list of futures to complete ultimately meaning all
+ # futures that may have spawned from the original submission task
+ # have completed.
+ if submitted_futures == possibly_more_submitted_futures:
+ break
+ submitted_futures = possibly_more_submitted_futures
+
+
+class CreateMultipartUploadTask(Task):
+ """Task to initiate a multipart upload"""
+
+ def _main(self, client, bucket, key, extra_args):
+ """
+ :param client: The client to use when calling CreateMultipartUpload
+ :param bucket: The name of the bucket to upload to
+ :param key: The name of the key to upload to
+ :param extra_args: A dictionary of any extra arguments that may be
used in the initialization.
-
- :returns: The upload id of the multipart upload
- """
- # Create the multipart upload.
- response = client.create_multipart_upload(
+
+ :returns: The upload id of the multipart upload
+ """
+ # Create the multipart upload.
+ response = client.create_multipart_upload(
Bucket=bucket, Key=key, **extra_args
)
- upload_id = response['UploadId']
-
- # Add a cleanup if the multipart upload fails at any point.
- self._transfer_coordinator.add_failure_cleanup(
+ upload_id = response['UploadId']
+
+ # Add a cleanup if the multipart upload fails at any point.
+ self._transfer_coordinator.add_failure_cleanup(
client.abort_multipart_upload,
Bucket=bucket,
Key=key,
UploadId=upload_id,
- )
- return upload_id
-
-
-class CompleteMultipartUploadTask(Task):
- """Task to complete a multipart upload"""
-
- def _main(self, client, bucket, key, upload_id, parts, extra_args):
- """
- :param client: The client to use when calling CompleteMultipartUpload
- :param bucket: The name of the bucket to upload to
- :param key: The name of the key to upload to
- :param upload_id: The id of the upload
- :param parts: A list of parts to use to complete the multipart upload::
-
- [{'Etag': etag_value, 'PartNumber': part_number}, ...]
-
- Each element in the list consists of a return value from
- ``UploadPartTask.main()``.
- :param extra_args: A dictionary of any extra arguments that may be
- used in completing the multipart transfer.
- """
- client.complete_multipart_upload(
+ )
+ return upload_id
+
+
+class CompleteMultipartUploadTask(Task):
+ """Task to complete a multipart upload"""
+
+ def _main(self, client, bucket, key, upload_id, parts, extra_args):
+ """
+ :param client: The client to use when calling CompleteMultipartUpload
+ :param bucket: The name of the bucket to upload to
+ :param key: The name of the key to upload to
+ :param upload_id: The id of the upload
+ :param parts: A list of parts to use to complete the multipart upload::
+
+ [{'Etag': etag_value, 'PartNumber': part_number}, ...]
+
+ Each element in the list consists of a return value from
+ ``UploadPartTask.main()``.
+ :param extra_args: A dictionary of any extra arguments that may be
+ used in completing the multipart transfer.
+ """
+ client.complete_multipart_upload(
Bucket=bucket,
Key=key,
UploadId=upload_id,
- MultipartUpload={'Parts': parts},
+ MultipartUpload={'Parts': parts},
**extra_args,
)
diff --git a/contrib/python/s3transfer/py3/s3transfer/upload.py b/contrib/python/s3transfer/py3/s3transfer/upload.py
index 17a950fa46..31ade051d7 100644
--- a/contrib/python/s3transfer/py3/s3transfer/upload.py
+++ b/contrib/python/s3transfer/py3/s3transfer/upload.py
@@ -1,20 +1,20 @@
-# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-# http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-import math
+# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+# http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+import math
from io import BytesIO
-
+
from s3transfer.compat import readable, seekable
-from s3transfer.futures import IN_MEMORY_UPLOAD_TAG
+from s3transfer.futures import IN_MEMORY_UPLOAD_TAG
from s3transfer.tasks import (
CompleteMultipartUploadTask,
CreateMultipartUploadTask,
@@ -27,527 +27,527 @@ from s3transfer.utils import (
get_callbacks,
get_filtered_dict,
)
-
-
+
+
class AggregatedProgressCallback:
- def __init__(self, callbacks, threshold=1024 * 256):
- """Aggregates progress updates for every provided progress callback
-
- :type callbacks: A list of functions that accepts bytes_transferred
- as a single argument
- :param callbacks: The callbacks to invoke when threshold is reached
-
- :type threshold: int
- :param threshold: The progress threshold in which to take the
- aggregated progress and invoke the progress callback with that
- aggregated progress total
- """
- self._callbacks = callbacks
- self._threshold = threshold
- self._bytes_seen = 0
-
- def __call__(self, bytes_transferred):
- self._bytes_seen += bytes_transferred
- if self._bytes_seen >= self._threshold:
- self._trigger_callbacks()
-
- def flush(self):
- """Flushes out any progress that has not been sent to its callbacks"""
- if self._bytes_seen > 0:
- self._trigger_callbacks()
-
- def _trigger_callbacks(self):
- for callback in self._callbacks:
- callback(bytes_transferred=self._bytes_seen)
- self._bytes_seen = 0
-
-
+ def __init__(self, callbacks, threshold=1024 * 256):
+ """Aggregates progress updates for every provided progress callback
+
+ :type callbacks: A list of functions that accepts bytes_transferred
+ as a single argument
+ :param callbacks: The callbacks to invoke when threshold is reached
+
+ :type threshold: int
+ :param threshold: The progress threshold in which to take the
+ aggregated progress and invoke the progress callback with that
+ aggregated progress total
+ """
+ self._callbacks = callbacks
+ self._threshold = threshold
+ self._bytes_seen = 0
+
+ def __call__(self, bytes_transferred):
+ self._bytes_seen += bytes_transferred
+ if self._bytes_seen >= self._threshold:
+ self._trigger_callbacks()
+
+ def flush(self):
+ """Flushes out any progress that has not been sent to its callbacks"""
+ if self._bytes_seen > 0:
+ self._trigger_callbacks()
+
+ def _trigger_callbacks(self):
+ for callback in self._callbacks:
+ callback(bytes_transferred=self._bytes_seen)
+ self._bytes_seen = 0
+
+
class InterruptReader:
- """Wrapper that can interrupt reading using an error
-
+ """Wrapper that can interrupt reading using an error
+
It uses a transfer coordinator to propagate an error if it notices
- that a read is being made while the file is being read from.
-
- :type fileobj: file-like obj
- :param fileobj: The file-like object to read from
-
- :type transfer_coordinator: s3transfer.futures.TransferCoordinator
- :param transfer_coordinator: The transfer coordinator to use if the
- reader needs to be interrupted.
- """
-
- def __init__(self, fileobj, transfer_coordinator):
- self._fileobj = fileobj
- self._transfer_coordinator = transfer_coordinator
-
- def read(self, amount=None):
- # If there is an exception, then raise the exception.
- # We raise an error instead of returning no bytes because for
- # requests where the content length and md5 was sent, it will
- # cause md5 mismatches and retries as there was no indication that
- # the stream being read from encountered any issues.
- if self._transfer_coordinator.exception:
- raise self._transfer_coordinator.exception
- return self._fileobj.read(amount)
-
+ that a read is being made while the file is being read from.
+
+ :type fileobj: file-like obj
+ :param fileobj: The file-like object to read from
+
+ :type transfer_coordinator: s3transfer.futures.TransferCoordinator
+ :param transfer_coordinator: The transfer coordinator to use if the
+ reader needs to be interrupted.
+ """
+
+ def __init__(self, fileobj, transfer_coordinator):
+ self._fileobj = fileobj
+ self._transfer_coordinator = transfer_coordinator
+
+ def read(self, amount=None):
+ # If there is an exception, then raise the exception.
+ # We raise an error instead of returning no bytes because for
+ # requests where the content length and md5 was sent, it will
+ # cause md5 mismatches and retries as there was no indication that
+ # the stream being read from encountered any issues.
+ if self._transfer_coordinator.exception:
+ raise self._transfer_coordinator.exception
+ return self._fileobj.read(amount)
+
def seek(self, where, whence=0):
self._fileobj.seek(where, whence)
-
- def tell(self):
- return self._fileobj.tell()
-
- def close(self):
- self._fileobj.close()
-
- def __enter__(self):
- return self
-
- def __exit__(self, *args, **kwargs):
- self.close()
-
-
+
+ def tell(self):
+ return self._fileobj.tell()
+
+ def close(self):
+ self._fileobj.close()
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, *args, **kwargs):
+ self.close()
+
+
class UploadInputManager:
- """Base manager class for handling various types of files for uploads
-
- This class is typically used for the UploadSubmissionTask class to help
- determine the following:
-
- * How to determine the size of the file
- * How to determine if a multipart upload is required
- * How to retrieve the body for a PutObject
- * How to retrieve the bodies for a set of UploadParts
-
- The answers/implementations differ for the various types of file inputs
- that may be accepted. All implementations must subclass and override
- public methods from this class.
- """
-
- def __init__(self, osutil, transfer_coordinator, bandwidth_limiter=None):
- self._osutil = osutil
- self._transfer_coordinator = transfer_coordinator
- self._bandwidth_limiter = bandwidth_limiter
-
- @classmethod
- def is_compatible(cls, upload_source):
- """Determines if the source for the upload is compatible with manager
-
- :param upload_source: The source for which the upload will pull data
- from.
-
- :returns: True if the manager can handle the type of source specified
- otherwise returns False.
- """
- raise NotImplementedError('must implement _is_compatible()')
-
- def stores_body_in_memory(self, operation_name):
- """Whether the body it provides are stored in-memory
-
- :type operation_name: str
- :param operation_name: The name of the client operation that the body
- is being used for. Valid operation_names are ``put_object`` and
- ``upload_part``.
-
- :rtype: boolean
- :returns: True if the body returned by the manager will be stored in
- memory. False if the manager will not directly store the body in
- memory.
- """
+ """Base manager class for handling various types of files for uploads
+
+ This class is typically used for the UploadSubmissionTask class to help
+ determine the following:
+
+ * How to determine the size of the file
+ * How to determine if a multipart upload is required
+ * How to retrieve the body for a PutObject
+ * How to retrieve the bodies for a set of UploadParts
+
+ The answers/implementations differ for the various types of file inputs
+ that may be accepted. All implementations must subclass and override
+ public methods from this class.
+ """
+
+ def __init__(self, osutil, transfer_coordinator, bandwidth_limiter=None):
+ self._osutil = osutil
+ self._transfer_coordinator = transfer_coordinator
+ self._bandwidth_limiter = bandwidth_limiter
+
+ @classmethod
+ def is_compatible(cls, upload_source):
+ """Determines if the source for the upload is compatible with manager
+
+ :param upload_source: The source for which the upload will pull data
+ from.
+
+ :returns: True if the manager can handle the type of source specified
+ otherwise returns False.
+ """
+ raise NotImplementedError('must implement _is_compatible()')
+
+ def stores_body_in_memory(self, operation_name):
+ """Whether the body it provides are stored in-memory
+
+ :type operation_name: str
+ :param operation_name: The name of the client operation that the body
+ is being used for. Valid operation_names are ``put_object`` and
+ ``upload_part``.
+
+ :rtype: boolean
+ :returns: True if the body returned by the manager will be stored in
+ memory. False if the manager will not directly store the body in
+ memory.
+ """
raise NotImplementedError('must implement store_body_in_memory()')
-
- def provide_transfer_size(self, transfer_future):
- """Provides the transfer size of an upload
-
- :type transfer_future: s3transfer.futures.TransferFuture
- :param transfer_future: The future associated with upload request
- """
- raise NotImplementedError('must implement provide_transfer_size()')
-
- def requires_multipart_upload(self, transfer_future, config):
- """Determines where a multipart upload is required
-
- :type transfer_future: s3transfer.futures.TransferFuture
- :param transfer_future: The future associated with upload request
-
- :type config: s3transfer.manager.TransferConfig
- :param config: The config associated to the transfer manager
-
- :rtype: boolean
- :returns: True, if the upload should be multipart based on
+
+ def provide_transfer_size(self, transfer_future):
+ """Provides the transfer size of an upload
+
+ :type transfer_future: s3transfer.futures.TransferFuture
+ :param transfer_future: The future associated with upload request
+ """
+ raise NotImplementedError('must implement provide_transfer_size()')
+
+ def requires_multipart_upload(self, transfer_future, config):
+ """Determines where a multipart upload is required
+
+ :type transfer_future: s3transfer.futures.TransferFuture
+ :param transfer_future: The future associated with upload request
+
+ :type config: s3transfer.manager.TransferConfig
+ :param config: The config associated to the transfer manager
+
+ :rtype: boolean
+ :returns: True, if the upload should be multipart based on
configuration and size. False, otherwise.
- """
- raise NotImplementedError('must implement requires_multipart_upload()')
-
- def get_put_object_body(self, transfer_future):
- """Returns the body to use for PutObject
-
- :type transfer_future: s3transfer.futures.TransferFuture
- :param transfer_future: The future associated with upload request
-
- :type config: s3transfer.manager.TransferConfig
- :param config: The config associated to the transfer manager
-
- :rtype: s3transfer.utils.ReadFileChunk
- :returns: A ReadFileChunk including all progress callbacks
- associated with the transfer future.
- """
- raise NotImplementedError('must implement get_put_object_body()')
-
- def yield_upload_part_bodies(self, transfer_future, chunksize):
- """Yields the part number and body to use for each UploadPart
-
- :type transfer_future: s3transfer.futures.TransferFuture
- :param transfer_future: The future associated with upload request
-
- :type chunksize: int
- :param chunksize: The chunksize to use for this upload.
-
- :rtype: int, s3transfer.utils.ReadFileChunk
- :returns: Yields the part number and the ReadFileChunk including all
- progress callbacks associated with the transfer future for that
- specific yielded part.
- """
- raise NotImplementedError('must implement yield_upload_part_bodies()')
-
- def _wrap_fileobj(self, fileobj):
- fileobj = InterruptReader(fileobj, self._transfer_coordinator)
- if self._bandwidth_limiter:
- fileobj = self._bandwidth_limiter.get_bandwith_limited_stream(
+ """
+ raise NotImplementedError('must implement requires_multipart_upload()')
+
+ def get_put_object_body(self, transfer_future):
+ """Returns the body to use for PutObject
+
+ :type transfer_future: s3transfer.futures.TransferFuture
+ :param transfer_future: The future associated with upload request
+
+ :type config: s3transfer.manager.TransferConfig
+ :param config: The config associated to the transfer manager
+
+ :rtype: s3transfer.utils.ReadFileChunk
+ :returns: A ReadFileChunk including all progress callbacks
+ associated with the transfer future.
+ """
+ raise NotImplementedError('must implement get_put_object_body()')
+
+ def yield_upload_part_bodies(self, transfer_future, chunksize):
+ """Yields the part number and body to use for each UploadPart
+
+ :type transfer_future: s3transfer.futures.TransferFuture
+ :param transfer_future: The future associated with upload request
+
+ :type chunksize: int
+ :param chunksize: The chunksize to use for this upload.
+
+ :rtype: int, s3transfer.utils.ReadFileChunk
+ :returns: Yields the part number and the ReadFileChunk including all
+ progress callbacks associated with the transfer future for that
+ specific yielded part.
+ """
+ raise NotImplementedError('must implement yield_upload_part_bodies()')
+
+ def _wrap_fileobj(self, fileobj):
+ fileobj = InterruptReader(fileobj, self._transfer_coordinator)
+ if self._bandwidth_limiter:
+ fileobj = self._bandwidth_limiter.get_bandwith_limited_stream(
fileobj, self._transfer_coordinator, enabled=False
)
- return fileobj
-
- def _get_progress_callbacks(self, transfer_future):
- callbacks = get_callbacks(transfer_future, 'progress')
- # We only want to be wrapping the callbacks if there are callbacks to
- # invoke because we do not want to be doing any unnecessary work if
- # there are no callbacks to invoke.
- if callbacks:
- return [AggregatedProgressCallback(callbacks)]
- return []
-
- def _get_close_callbacks(self, aggregated_progress_callbacks):
- return [callback.flush for callback in aggregated_progress_callbacks]
-
-
-class UploadFilenameInputManager(UploadInputManager):
- """Upload utility for filenames"""
-
- @classmethod
- def is_compatible(cls, upload_source):
+ return fileobj
+
+ def _get_progress_callbacks(self, transfer_future):
+ callbacks = get_callbacks(transfer_future, 'progress')
+ # We only want to be wrapping the callbacks if there are callbacks to
+ # invoke because we do not want to be doing any unnecessary work if
+ # there are no callbacks to invoke.
+ if callbacks:
+ return [AggregatedProgressCallback(callbacks)]
+ return []
+
+ def _get_close_callbacks(self, aggregated_progress_callbacks):
+ return [callback.flush for callback in aggregated_progress_callbacks]
+
+
+class UploadFilenameInputManager(UploadInputManager):
+ """Upload utility for filenames"""
+
+ @classmethod
+ def is_compatible(cls, upload_source):
return isinstance(upload_source, str)
-
- def stores_body_in_memory(self, operation_name):
- return False
-
- def provide_transfer_size(self, transfer_future):
- transfer_future.meta.provide_transfer_size(
+
+ def stores_body_in_memory(self, operation_name):
+ return False
+
+ def provide_transfer_size(self, transfer_future):
+ transfer_future.meta.provide_transfer_size(
self._osutil.get_file_size(transfer_future.meta.call_args.fileobj)
)
-
- def requires_multipart_upload(self, transfer_future, config):
- return transfer_future.meta.size >= config.multipart_threshold
-
- def get_put_object_body(self, transfer_future):
- # Get a file-like object for the given input
- fileobj, full_size = self._get_put_object_fileobj_with_full_size(
+
+ def requires_multipart_upload(self, transfer_future, config):
+ return transfer_future.meta.size >= config.multipart_threshold
+
+ def get_put_object_body(self, transfer_future):
+ # Get a file-like object for the given input
+ fileobj, full_size = self._get_put_object_fileobj_with_full_size(
transfer_future
)
-
- # Wrap fileobj with interrupt reader that will quickly cancel
- # uploads if needed instead of having to wait for the socket
- # to completely read all of the data.
- fileobj = self._wrap_fileobj(fileobj)
-
- callbacks = self._get_progress_callbacks(transfer_future)
- close_callbacks = self._get_close_callbacks(callbacks)
- size = transfer_future.meta.size
- # Return the file-like object wrapped into a ReadFileChunk to get
- # progress.
- return self._osutil.open_file_chunk_reader_from_fileobj(
+
+ # Wrap fileobj with interrupt reader that will quickly cancel
+ # uploads if needed instead of having to wait for the socket
+ # to completely read all of the data.
+ fileobj = self._wrap_fileobj(fileobj)
+
+ callbacks = self._get_progress_callbacks(transfer_future)
+ close_callbacks = self._get_close_callbacks(callbacks)
+ size = transfer_future.meta.size
+ # Return the file-like object wrapped into a ReadFileChunk to get
+ # progress.
+ return self._osutil.open_file_chunk_reader_from_fileobj(
fileobj=fileobj,
chunk_size=size,
full_file_size=full_size,
callbacks=callbacks,
close_callbacks=close_callbacks,
)
-
- def yield_upload_part_bodies(self, transfer_future, chunksize):
- full_file_size = transfer_future.meta.size
- num_parts = self._get_num_parts(transfer_future, chunksize)
- for part_number in range(1, num_parts + 1):
- callbacks = self._get_progress_callbacks(transfer_future)
- close_callbacks = self._get_close_callbacks(callbacks)
- start_byte = chunksize * (part_number - 1)
- # Get a file-like object for that part and the size of the full
- # file size for the associated file-like object for that part.
- fileobj, full_size = self._get_upload_part_fileobj_with_full_size(
+
+ def yield_upload_part_bodies(self, transfer_future, chunksize):
+ full_file_size = transfer_future.meta.size
+ num_parts = self._get_num_parts(transfer_future, chunksize)
+ for part_number in range(1, num_parts + 1):
+ callbacks = self._get_progress_callbacks(transfer_future)
+ close_callbacks = self._get_close_callbacks(callbacks)
+ start_byte = chunksize * (part_number - 1)
+ # Get a file-like object for that part and the size of the full
+ # file size for the associated file-like object for that part.
+ fileobj, full_size = self._get_upload_part_fileobj_with_full_size(
transfer_future.meta.call_args.fileobj,
start_byte=start_byte,
part_size=chunksize,
full_file_size=full_file_size,
)
-
- # Wrap fileobj with interrupt reader that will quickly cancel
- # uploads if needed instead of having to wait for the socket
- # to completely read all of the data.
- fileobj = self._wrap_fileobj(fileobj)
-
- # Wrap the file-like object into a ReadFileChunk to get progress.
- read_file_chunk = self._osutil.open_file_chunk_reader_from_fileobj(
+
+ # Wrap fileobj with interrupt reader that will quickly cancel
+ # uploads if needed instead of having to wait for the socket
+ # to completely read all of the data.
+ fileobj = self._wrap_fileobj(fileobj)
+
+ # Wrap the file-like object into a ReadFileChunk to get progress.
+ read_file_chunk = self._osutil.open_file_chunk_reader_from_fileobj(
fileobj=fileobj,
chunk_size=chunksize,
full_file_size=full_size,
callbacks=callbacks,
close_callbacks=close_callbacks,
)
- yield part_number, read_file_chunk
-
- def _get_deferred_open_file(self, fileobj, start_byte):
- fileobj = DeferredOpenFile(
+ yield part_number, read_file_chunk
+
+ def _get_deferred_open_file(self, fileobj, start_byte):
+ fileobj = DeferredOpenFile(
fileobj, start_byte, open_function=self._osutil.open
)
- return fileobj
-
- def _get_put_object_fileobj_with_full_size(self, transfer_future):
- fileobj = transfer_future.meta.call_args.fileobj
- size = transfer_future.meta.size
- return self._get_deferred_open_file(fileobj, 0), size
-
- def _get_upload_part_fileobj_with_full_size(self, fileobj, **kwargs):
- start_byte = kwargs['start_byte']
- full_size = kwargs['full_file_size']
- return self._get_deferred_open_file(fileobj, start_byte), full_size
-
- def _get_num_parts(self, transfer_future, part_size):
+ return fileobj
+
+ def _get_put_object_fileobj_with_full_size(self, transfer_future):
+ fileobj = transfer_future.meta.call_args.fileobj
+ size = transfer_future.meta.size
+ return self._get_deferred_open_file(fileobj, 0), size
+
+ def _get_upload_part_fileobj_with_full_size(self, fileobj, **kwargs):
+ start_byte = kwargs['start_byte']
+ full_size = kwargs['full_file_size']
+ return self._get_deferred_open_file(fileobj, start_byte), full_size
+
+ def _get_num_parts(self, transfer_future, part_size):
return int(math.ceil(transfer_future.meta.size / float(part_size)))
-
-
-class UploadSeekableInputManager(UploadFilenameInputManager):
- """Upload utility for an open file object"""
-
- @classmethod
- def is_compatible(cls, upload_source):
- return readable(upload_source) and seekable(upload_source)
-
- def stores_body_in_memory(self, operation_name):
- if operation_name == 'put_object':
- return False
- else:
- return True
-
- def provide_transfer_size(self, transfer_future):
- fileobj = transfer_future.meta.call_args.fileobj
- # To determine size, first determine the starting position
- # Seek to the end and then find the difference in the length
- # between the end and start positions.
- start_position = fileobj.tell()
- fileobj.seek(0, 2)
- end_position = fileobj.tell()
- fileobj.seek(start_position)
- transfer_future.meta.provide_transfer_size(
+
+
+class UploadSeekableInputManager(UploadFilenameInputManager):
+ """Upload utility for an open file object"""
+
+ @classmethod
+ def is_compatible(cls, upload_source):
+ return readable(upload_source) and seekable(upload_source)
+
+ def stores_body_in_memory(self, operation_name):
+ if operation_name == 'put_object':
+ return False
+ else:
+ return True
+
+ def provide_transfer_size(self, transfer_future):
+ fileobj = transfer_future.meta.call_args.fileobj
+ # To determine size, first determine the starting position
+ # Seek to the end and then find the difference in the length
+ # between the end and start positions.
+ start_position = fileobj.tell()
+ fileobj.seek(0, 2)
+ end_position = fileobj.tell()
+ fileobj.seek(start_position)
+ transfer_future.meta.provide_transfer_size(
end_position - start_position
)
-
- def _get_upload_part_fileobj_with_full_size(self, fileobj, **kwargs):
- # Note: It is unfortunate that in order to do a multithreaded
- # multipart upload we cannot simply copy the filelike object
- # since there is not really a mechanism in python (i.e. os.dup
- # points to the same OS filehandle which causes concurrency
- # issues). So instead we need to read from the fileobj and
+
+ def _get_upload_part_fileobj_with_full_size(self, fileobj, **kwargs):
+ # Note: It is unfortunate that in order to do a multithreaded
+ # multipart upload we cannot simply copy the filelike object
+ # since there is not really a mechanism in python (i.e. os.dup
+ # points to the same OS filehandle which causes concurrency
+ # issues). So instead we need to read from the fileobj and
# chunk the data out to separate file-like objects in memory.
- data = fileobj.read(kwargs['part_size'])
- # We return the length of the data instead of the full_file_size
+ data = fileobj.read(kwargs['part_size'])
+ # We return the length of the data instead of the full_file_size
# because we partitioned the data into separate BytesIO objects
- # meaning the BytesIO object has no knowledge of its start position
- # relative the input source nor access to the rest of the input
- # source. So we must treat it as its own standalone file.
+ # meaning the BytesIO object has no knowledge of its start position
+ # relative the input source nor access to the rest of the input
+ # source. So we must treat it as its own standalone file.
return BytesIO(data), len(data)
-
- def _get_put_object_fileobj_with_full_size(self, transfer_future):
- fileobj = transfer_future.meta.call_args.fileobj
- # The current position needs to be taken into account when retrieving
- # the full size of the file.
- size = fileobj.tell() + transfer_future.meta.size
- return fileobj, size
-
-
-class UploadNonSeekableInputManager(UploadInputManager):
- """Upload utility for a file-like object that cannot seek."""
-
- def __init__(self, osutil, transfer_coordinator, bandwidth_limiter=None):
+
+ def _get_put_object_fileobj_with_full_size(self, transfer_future):
+ fileobj = transfer_future.meta.call_args.fileobj
+ # The current position needs to be taken into account when retrieving
+ # the full size of the file.
+ size = fileobj.tell() + transfer_future.meta.size
+ return fileobj, size
+
+
+class UploadNonSeekableInputManager(UploadInputManager):
+ """Upload utility for a file-like object that cannot seek."""
+
+ def __init__(self, osutil, transfer_coordinator, bandwidth_limiter=None):
super().__init__(osutil, transfer_coordinator, bandwidth_limiter)
- self._initial_data = b''
-
- @classmethod
- def is_compatible(cls, upload_source):
- return readable(upload_source)
-
- def stores_body_in_memory(self, operation_name):
- return True
-
- def provide_transfer_size(self, transfer_future):
- # No-op because there is no way to do this short of reading the entire
- # body into memory.
- return
-
- def requires_multipart_upload(self, transfer_future, config):
- # If the user has set the size, we can use that.
- if transfer_future.meta.size is not None:
- return transfer_future.meta.size >= config.multipart_threshold
-
- # This is tricky to determine in this case because we can't know how
- # large the input is. So to figure it out, we read data into memory
- # up until the threshold and compare how much data was actually read
- # against the threshold.
- fileobj = transfer_future.meta.call_args.fileobj
- threshold = config.multipart_threshold
- self._initial_data = self._read(fileobj, threshold, False)
- if len(self._initial_data) < threshold:
- return False
- else:
- return True
-
- def get_put_object_body(self, transfer_future):
- callbacks = self._get_progress_callbacks(transfer_future)
- close_callbacks = self._get_close_callbacks(callbacks)
- fileobj = transfer_future.meta.call_args.fileobj
-
- body = self._wrap_data(
+ self._initial_data = b''
+
+ @classmethod
+ def is_compatible(cls, upload_source):
+ return readable(upload_source)
+
+ def stores_body_in_memory(self, operation_name):
+ return True
+
+ def provide_transfer_size(self, transfer_future):
+ # No-op because there is no way to do this short of reading the entire
+ # body into memory.
+ return
+
+ def requires_multipart_upload(self, transfer_future, config):
+ # If the user has set the size, we can use that.
+ if transfer_future.meta.size is not None:
+ return transfer_future.meta.size >= config.multipart_threshold
+
+ # This is tricky to determine in this case because we can't know how
+ # large the input is. So to figure it out, we read data into memory
+ # up until the threshold and compare how much data was actually read
+ # against the threshold.
+ fileobj = transfer_future.meta.call_args.fileobj
+ threshold = config.multipart_threshold
+ self._initial_data = self._read(fileobj, threshold, False)
+ if len(self._initial_data) < threshold:
+ return False
+ else:
+ return True
+
+ def get_put_object_body(self, transfer_future):
+ callbacks = self._get_progress_callbacks(transfer_future)
+ close_callbacks = self._get_close_callbacks(callbacks)
+ fileobj = transfer_future.meta.call_args.fileobj
+
+ body = self._wrap_data(
self._initial_data + fileobj.read(), callbacks, close_callbacks
)
-
- # Zero out the stored data so we don't have additional copies
- # hanging around in memory.
- self._initial_data = None
- return body
-
- def yield_upload_part_bodies(self, transfer_future, chunksize):
- file_object = transfer_future.meta.call_args.fileobj
- part_number = 0
-
- # Continue reading parts from the file-like object until it is empty.
- while True:
- callbacks = self._get_progress_callbacks(transfer_future)
- close_callbacks = self._get_close_callbacks(callbacks)
- part_number += 1
- part_content = self._read(file_object, chunksize)
- if not part_content:
- break
- part_object = self._wrap_data(
+
+ # Zero out the stored data so we don't have additional copies
+ # hanging around in memory.
+ self._initial_data = None
+ return body
+
+ def yield_upload_part_bodies(self, transfer_future, chunksize):
+ file_object = transfer_future.meta.call_args.fileobj
+ part_number = 0
+
+ # Continue reading parts from the file-like object until it is empty.
+ while True:
+ callbacks = self._get_progress_callbacks(transfer_future)
+ close_callbacks = self._get_close_callbacks(callbacks)
+ part_number += 1
+ part_content = self._read(file_object, chunksize)
+ if not part_content:
+ break
+ part_object = self._wrap_data(
part_content, callbacks, close_callbacks
)
-
- # Zero out part_content to avoid hanging on to additional data.
- part_content = None
- yield part_number, part_object
-
- def _read(self, fileobj, amount, truncate=True):
- """
- Reads a specific amount of data from a stream and returns it. If there
- is any data in initial_data, that will be popped out first.
-
- :type fileobj: A file-like object that implements read
- :param fileobj: The stream to read from.
-
- :type amount: int
- :param amount: The number of bytes to read from the stream.
-
- :type truncate: bool
- :param truncate: Whether or not to truncate initial_data after
- reading from it.
-
- :return: Generator which generates part bodies from the initial data.
- """
- # If the the initial data is empty, we simply read from the fileobj
- if len(self._initial_data) == 0:
- return fileobj.read(amount)
-
+
+ # Zero out part_content to avoid hanging on to additional data.
+ part_content = None
+ yield part_number, part_object
+
+ def _read(self, fileobj, amount, truncate=True):
+ """
+ Reads a specific amount of data from a stream and returns it. If there
+ is any data in initial_data, that will be popped out first.
+
+ :type fileobj: A file-like object that implements read
+ :param fileobj: The stream to read from.
+
+ :type amount: int
+ :param amount: The number of bytes to read from the stream.
+
+ :type truncate: bool
+ :param truncate: Whether or not to truncate initial_data after
+ reading from it.
+
+ :return: Generator which generates part bodies from the initial data.
+ """
+ # If the the initial data is empty, we simply read from the fileobj
+ if len(self._initial_data) == 0:
+ return fileobj.read(amount)
+
# If the requested number of bytes is less than the amount of
- # initial data, pull entirely from initial data.
- if amount <= len(self._initial_data):
- data = self._initial_data[:amount]
- # Truncate initial data so we don't hang onto the data longer
- # than we need.
- if truncate:
- self._initial_data = self._initial_data[amount:]
- return data
-
- # At this point there is some initial data left, but not enough to
- # satisfy the number of bytes requested. Pull out the remaining
- # initial data and read the rest from the fileobj.
- amount_to_read = amount - len(self._initial_data)
- data = self._initial_data + fileobj.read(amount_to_read)
-
- # Zero out initial data so we don't hang onto the data any more.
- if truncate:
- self._initial_data = b''
- return data
-
- def _wrap_data(self, data, callbacks, close_callbacks):
- """
- Wraps data with the interrupt reader and the file chunk reader.
-
- :type data: bytes
- :param data: The data to wrap.
-
- :type callbacks: list
- :param callbacks: The callbacks associated with the transfer future.
-
- :type close_callbacks: list
- :param close_callbacks: The callbacks to be called when closing the
- wrapper for the data.
-
- :return: Fully wrapped data.
- """
+ # initial data, pull entirely from initial data.
+ if amount <= len(self._initial_data):
+ data = self._initial_data[:amount]
+ # Truncate initial data so we don't hang onto the data longer
+ # than we need.
+ if truncate:
+ self._initial_data = self._initial_data[amount:]
+ return data
+
+ # At this point there is some initial data left, but not enough to
+ # satisfy the number of bytes requested. Pull out the remaining
+ # initial data and read the rest from the fileobj.
+ amount_to_read = amount - len(self._initial_data)
+ data = self._initial_data + fileobj.read(amount_to_read)
+
+ # Zero out initial data so we don't hang onto the data any more.
+ if truncate:
+ self._initial_data = b''
+ return data
+
+ def _wrap_data(self, data, callbacks, close_callbacks):
+ """
+ Wraps data with the interrupt reader and the file chunk reader.
+
+ :type data: bytes
+ :param data: The data to wrap.
+
+ :type callbacks: list
+ :param callbacks: The callbacks associated with the transfer future.
+
+ :type close_callbacks: list
+ :param close_callbacks: The callbacks to be called when closing the
+ wrapper for the data.
+
+ :return: Fully wrapped data.
+ """
fileobj = self._wrap_fileobj(BytesIO(data))
- return self._osutil.open_file_chunk_reader_from_fileobj(
+ return self._osutil.open_file_chunk_reader_from_fileobj(
fileobj=fileobj,
chunk_size=len(data),
full_file_size=len(data),
callbacks=callbacks,
close_callbacks=close_callbacks,
)
-
-
-class UploadSubmissionTask(SubmissionTask):
- """Task for submitting tasks to execute an upload"""
-
- UPLOAD_PART_ARGS = [
- 'SSECustomerKey',
- 'SSECustomerAlgorithm',
- 'SSECustomerKeyMD5',
- 'RequestPayer',
+
+
+class UploadSubmissionTask(SubmissionTask):
+ """Task for submitting tasks to execute an upload"""
+
+ UPLOAD_PART_ARGS = [
+ 'SSECustomerKey',
+ 'SSECustomerAlgorithm',
+ 'SSECustomerKeyMD5',
+ 'RequestPayer',
'ExpectedBucketOwner',
- ]
-
+ ]
+
COMPLETE_MULTIPART_ARGS = ['RequestPayer', 'ExpectedBucketOwner']
-
- def _get_upload_input_manager_cls(self, transfer_future):
+
+ def _get_upload_input_manager_cls(self, transfer_future):
"""Retrieves a class for managing input for an upload based on file type
-
- :type transfer_future: s3transfer.futures.TransferFuture
- :param transfer_future: The transfer future for the request
-
- :rtype: class of UploadInputManager
- :returns: The appropriate class to use for managing a specific type of
- input for uploads.
- """
- upload_manager_resolver_chain = [
- UploadFilenameInputManager,
- UploadSeekableInputManager,
+
+ :type transfer_future: s3transfer.futures.TransferFuture
+ :param transfer_future: The transfer future for the request
+
+ :rtype: class of UploadInputManager
+ :returns: The appropriate class to use for managing a specific type of
+ input for uploads.
+ """
+ upload_manager_resolver_chain = [
+ UploadFilenameInputManager,
+ UploadSeekableInputManager,
UploadNonSeekableInputManager,
- ]
-
- fileobj = transfer_future.meta.call_args.fileobj
- for upload_manager_cls in upload_manager_resolver_chain:
- if upload_manager_cls.is_compatible(fileobj):
- return upload_manager_cls
- raise RuntimeError(
+ ]
+
+ fileobj = transfer_future.meta.call_args.fileobj
+ for upload_manager_cls in upload_manager_resolver_chain:
+ if upload_manager_cls.is_compatible(fileobj):
+ return upload_manager_cls
+ raise RuntimeError(
'Input {} of type: {} is not supported.'.format(
fileobj, type(fileobj)
)
)
-
+
def _submit(
self,
client,
@@ -557,37 +557,37 @@ class UploadSubmissionTask(SubmissionTask):
transfer_future,
bandwidth_limiter=None,
):
- """
- :param client: The client associated with the transfer manager
-
- :type config: s3transfer.manager.TransferConfig
- :param config: The transfer config associated with the transfer
- manager
-
- :type osutil: s3transfer.utils.OSUtil
- :param osutil: The os utility associated to the transfer manager
-
- :type request_executor: s3transfer.futures.BoundedExecutor
- :param request_executor: The request executor associated with the
- transfer manager
-
- :type transfer_future: s3transfer.futures.TransferFuture
- :param transfer_future: The transfer future associated with the
- transfer request that tasks are being submitted for
- """
- upload_input_manager = self._get_upload_input_manager_cls(
+ """
+ :param client: The client associated with the transfer manager
+
+ :type config: s3transfer.manager.TransferConfig
+ :param config: The transfer config associated with the transfer
+ manager
+
+ :type osutil: s3transfer.utils.OSUtil
+ :param osutil: The os utility associated to the transfer manager
+
+ :type request_executor: s3transfer.futures.BoundedExecutor
+ :param request_executor: The request executor associated with the
+ transfer manager
+
+ :type transfer_future: s3transfer.futures.TransferFuture
+ :param transfer_future: The transfer future associated with the
+ transfer request that tasks are being submitted for
+ """
+ upload_input_manager = self._get_upload_input_manager_cls(
transfer_future
)(osutil, self._transfer_coordinator, bandwidth_limiter)
-
- # Determine the size if it was not provided
- if transfer_future.meta.size is None:
- upload_input_manager.provide_transfer_size(transfer_future)
-
- # Do a multipart upload if needed, otherwise do a regular put object.
- if not upload_input_manager.requires_multipart_upload(
+
+ # Determine the size if it was not provided
+ if transfer_future.meta.size is None:
+ upload_input_manager.provide_transfer_size(transfer_future)
+
+ # Do a multipart upload if needed, otherwise do a regular put object.
+ if not upload_input_manager.requires_multipart_upload(
transfer_future, config
):
- self._submit_upload_request(
+ self._submit_upload_request(
client,
config,
osutil,
@@ -595,8 +595,8 @@ class UploadSubmissionTask(SubmissionTask):
transfer_future,
upload_input_manager,
)
- else:
- self._submit_multipart_request(
+ else:
+ self._submit_multipart_request(
client,
config,
osutil,
@@ -604,7 +604,7 @@ class UploadSubmissionTask(SubmissionTask):
transfer_future,
upload_input_manager,
)
-
+
def _submit_upload_request(
self,
client,
@@ -614,32 +614,32 @@ class UploadSubmissionTask(SubmissionTask):
transfer_future,
upload_input_manager,
):
- call_args = transfer_future.meta.call_args
-
- # Get any tags that need to be associated to the put object task
- put_object_tag = self._get_upload_task_tag(
+ call_args = transfer_future.meta.call_args
+
+ # Get any tags that need to be associated to the put object task
+ put_object_tag = self._get_upload_task_tag(
upload_input_manager, 'put_object'
)
-
- # Submit the request of a single upload.
- self._transfer_coordinator.submit(
- request_executor,
- PutObjectTask(
- transfer_coordinator=self._transfer_coordinator,
- main_kwargs={
- 'client': client,
- 'fileobj': upload_input_manager.get_put_object_body(
+
+ # Submit the request of a single upload.
+ self._transfer_coordinator.submit(
+ request_executor,
+ PutObjectTask(
+ transfer_coordinator=self._transfer_coordinator,
+ main_kwargs={
+ 'client': client,
+ 'fileobj': upload_input_manager.get_put_object_body(
transfer_future
),
- 'bucket': call_args.bucket,
- 'key': call_args.key,
+ 'bucket': call_args.bucket,
+ 'key': call_args.key,
'extra_args': call_args.extra_args,
- },
+ },
is_final=True,
- ),
+ ),
tag=put_object_tag,
- )
-
+ )
+
def _submit_multipart_request(
self,
client,
@@ -649,141 +649,141 @@ class UploadSubmissionTask(SubmissionTask):
transfer_future,
upload_input_manager,
):
- call_args = transfer_future.meta.call_args
-
- # Submit the request to create a multipart upload.
- create_multipart_future = self._transfer_coordinator.submit(
- request_executor,
- CreateMultipartUploadTask(
- transfer_coordinator=self._transfer_coordinator,
- main_kwargs={
- 'client': client,
- 'bucket': call_args.bucket,
- 'key': call_args.key,
- 'extra_args': call_args.extra_args,
+ call_args = transfer_future.meta.call_args
+
+ # Submit the request to create a multipart upload.
+ create_multipart_future = self._transfer_coordinator.submit(
+ request_executor,
+ CreateMultipartUploadTask(
+ transfer_coordinator=self._transfer_coordinator,
+ main_kwargs={
+ 'client': client,
+ 'bucket': call_args.bucket,
+ 'key': call_args.key,
+ 'extra_args': call_args.extra_args,
},
),
- )
-
- # Submit requests to upload the parts of the file.
- part_futures = []
- extra_part_args = self._extra_upload_part_args(call_args.extra_args)
-
- # Get any tags that need to be associated to the submitted task
- # for upload the data
- upload_part_tag = self._get_upload_task_tag(
+ )
+
+ # Submit requests to upload the parts of the file.
+ part_futures = []
+ extra_part_args = self._extra_upload_part_args(call_args.extra_args)
+
+ # Get any tags that need to be associated to the submitted task
+ # for upload the data
+ upload_part_tag = self._get_upload_task_tag(
upload_input_manager, 'upload_part'
)
-
- size = transfer_future.meta.size
- adjuster = ChunksizeAdjuster()
- chunksize = adjuster.adjust_chunksize(config.multipart_chunksize, size)
- part_iterator = upload_input_manager.yield_upload_part_bodies(
+
+ size = transfer_future.meta.size
+ adjuster = ChunksizeAdjuster()
+ chunksize = adjuster.adjust_chunksize(config.multipart_chunksize, size)
+ part_iterator = upload_input_manager.yield_upload_part_bodies(
transfer_future, chunksize
)
-
- for part_number, fileobj in part_iterator:
- part_futures.append(
- self._transfer_coordinator.submit(
- request_executor,
- UploadPartTask(
- transfer_coordinator=self._transfer_coordinator,
- main_kwargs={
- 'client': client,
- 'fileobj': fileobj,
- 'bucket': call_args.bucket,
- 'key': call_args.key,
- 'part_number': part_number,
+
+ for part_number, fileobj in part_iterator:
+ part_futures.append(
+ self._transfer_coordinator.submit(
+ request_executor,
+ UploadPartTask(
+ transfer_coordinator=self._transfer_coordinator,
+ main_kwargs={
+ 'client': client,
+ 'fileobj': fileobj,
+ 'bucket': call_args.bucket,
+ 'key': call_args.key,
+ 'part_number': part_number,
'extra_args': extra_part_args,
- },
- pending_main_kwargs={
- 'upload_id': create_multipart_future
},
- ),
+ pending_main_kwargs={
+ 'upload_id': create_multipart_future
+ },
+ ),
tag=upload_part_tag,
- )
- )
-
- complete_multipart_extra_args = self._extra_complete_multipart_args(
+ )
+ )
+
+ complete_multipart_extra_args = self._extra_complete_multipart_args(
call_args.extra_args
)
- # Submit the request to complete the multipart upload.
- self._transfer_coordinator.submit(
- request_executor,
- CompleteMultipartUploadTask(
- transfer_coordinator=self._transfer_coordinator,
- main_kwargs={
- 'client': client,
- 'bucket': call_args.bucket,
- 'key': call_args.key,
- 'extra_args': complete_multipart_extra_args,
- },
- pending_main_kwargs={
- 'upload_id': create_multipart_future,
+ # Submit the request to complete the multipart upload.
+ self._transfer_coordinator.submit(
+ request_executor,
+ CompleteMultipartUploadTask(
+ transfer_coordinator=self._transfer_coordinator,
+ main_kwargs={
+ 'client': client,
+ 'bucket': call_args.bucket,
+ 'key': call_args.key,
+ 'extra_args': complete_multipart_extra_args,
+ },
+ pending_main_kwargs={
+ 'upload_id': create_multipart_future,
'parts': part_futures,
- },
+ },
is_final=True,
),
- )
-
- def _extra_upload_part_args(self, extra_args):
- # Only the args in UPLOAD_PART_ARGS actually need to be passed
- # onto the upload_part calls.
- return get_filtered_dict(extra_args, self.UPLOAD_PART_ARGS)
-
- def _extra_complete_multipart_args(self, extra_args):
- return get_filtered_dict(extra_args, self.COMPLETE_MULTIPART_ARGS)
-
- def _get_upload_task_tag(self, upload_input_manager, operation_name):
- tag = None
- if upload_input_manager.stores_body_in_memory(operation_name):
- tag = IN_MEMORY_UPLOAD_TAG
- return tag
-
-
-class PutObjectTask(Task):
- """Task to do a nonmultipart upload"""
-
- def _main(self, client, fileobj, bucket, key, extra_args):
- """
- :param client: The client to use when calling PutObject
- :param fileobj: The file to upload.
- :param bucket: The name of the bucket to upload to
- :param key: The name of the key to upload to
- :param extra_args: A dictionary of any extra arguments that may be
- used in the upload.
- """
- with fileobj as body:
- client.put_object(Bucket=bucket, Key=key, Body=body, **extra_args)
-
-
-class UploadPartTask(Task):
- """Task to upload a part in a multipart upload"""
+ )
+
+ def _extra_upload_part_args(self, extra_args):
+ # Only the args in UPLOAD_PART_ARGS actually need to be passed
+ # onto the upload_part calls.
+ return get_filtered_dict(extra_args, self.UPLOAD_PART_ARGS)
+
+ def _extra_complete_multipart_args(self, extra_args):
+ return get_filtered_dict(extra_args, self.COMPLETE_MULTIPART_ARGS)
+
+ def _get_upload_task_tag(self, upload_input_manager, operation_name):
+ tag = None
+ if upload_input_manager.stores_body_in_memory(operation_name):
+ tag = IN_MEMORY_UPLOAD_TAG
+ return tag
+
+
+class PutObjectTask(Task):
+ """Task to do a nonmultipart upload"""
+
+ def _main(self, client, fileobj, bucket, key, extra_args):
+ """
+ :param client: The client to use when calling PutObject
+ :param fileobj: The file to upload.
+ :param bucket: The name of the bucket to upload to
+ :param key: The name of the key to upload to
+ :param extra_args: A dictionary of any extra arguments that may be
+ used in the upload.
+ """
+ with fileobj as body:
+ client.put_object(Bucket=bucket, Key=key, Body=body, **extra_args)
+
+
+class UploadPartTask(Task):
+ """Task to upload a part in a multipart upload"""
def _main(
self, client, fileobj, bucket, key, upload_id, part_number, extra_args
):
- """
- :param client: The client to use when calling PutObject
- :param fileobj: The file to upload.
- :param bucket: The name of the bucket to upload to
- :param key: The name of the key to upload to
- :param upload_id: The id of the upload
- :param part_number: The number representing the part of the multipart
- upload
- :param extra_args: A dictionary of any extra arguments that may be
- used in the upload.
-
- :rtype: dict
- :returns: A dictionary representing a part::
-
- {'Etag': etag_value, 'PartNumber': part_number}
-
- This value can be appended to a list to be used to complete
- the multipart upload.
- """
- with fileobj as body:
- response = client.upload_part(
+ """
+ :param client: The client to use when calling PutObject
+ :param fileobj: The file to upload.
+ :param bucket: The name of the bucket to upload to
+ :param key: The name of the key to upload to
+ :param upload_id: The id of the upload
+ :param part_number: The number representing the part of the multipart
+ upload
+ :param extra_args: A dictionary of any extra arguments that may be
+ used in the upload.
+
+ :rtype: dict
+ :returns: A dictionary representing a part::
+
+ {'Etag': etag_value, 'PartNumber': part_number}
+
+ This value can be appended to a list to be used to complete
+ the multipart upload.
+ """
+ with fileobj as body:
+ response = client.upload_part(
Bucket=bucket,
Key=key,
UploadId=upload_id,
@@ -791,5 +791,5 @@ class UploadPartTask(Task):
Body=body,
**extra_args
)
- etag = response['ETag']
- return {'ETag': etag, 'PartNumber': part_number}
+ etag = response['ETag']
+ return {'ETag': etag, 'PartNumber': part_number}
diff --git a/contrib/python/s3transfer/py3/s3transfer/utils.py b/contrib/python/s3transfer/py3/s3transfer/utils.py
index 6901f9fcbd..ba881c67dd 100644
--- a/contrib/python/s3transfer/py3/s3transfer/utils.py
+++ b/contrib/python/s3transfer/py3/s3transfer/utils.py
@@ -1,39 +1,39 @@
-# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-# http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-import functools
+# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+# http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+import functools
import logging
-import math
-import os
+import math
+import os
import random
import socket
-import stat
-import string
-import threading
-from collections import defaultdict
-
+import stat
+import string
+import threading
+from collections import defaultdict
+
from botocore.exceptions import IncompleteReadError, ReadTimeoutError
from s3transfer.compat import SOCKET_ERROR, fallocate, rename_file
-
-MAX_PARTS = 10000
-# The maximum file size you can upload via S3 per request.
-# See: http://docs.aws.amazon.com/AmazonS3/latest/dev/UploadingObjects.html
-# and: http://docs.aws.amazon.com/AmazonS3/latest/dev/qfacts.html
-MAX_SINGLE_UPLOAD_SIZE = 5 * (1024 ** 3)
-MIN_UPLOAD_CHUNKSIZE = 5 * (1024 ** 2)
-logger = logging.getLogger(__name__)
-
-
+
+MAX_PARTS = 10000
+# The maximum file size you can upload via S3 per request.
+# See: http://docs.aws.amazon.com/AmazonS3/latest/dev/UploadingObjects.html
+# and: http://docs.aws.amazon.com/AmazonS3/latest/dev/qfacts.html
+MAX_SINGLE_UPLOAD_SIZE = 5 * (1024 ** 3)
+MIN_UPLOAD_CHUNKSIZE = 5 * (1024 ** 2)
+logger = logging.getLogger(__name__)
+
+
S3_RETRYABLE_DOWNLOAD_ERRORS = (
socket.timeout,
SOCKET_ERROR,
@@ -42,24 +42,24 @@ S3_RETRYABLE_DOWNLOAD_ERRORS = (
)
-def random_file_extension(num_digits=8):
- return ''.join(random.choice(string.hexdigits) for _ in range(num_digits))
-
-
-def signal_not_transferring(request, operation_name, **kwargs):
+def random_file_extension(num_digits=8):
+ return ''.join(random.choice(string.hexdigits) for _ in range(num_digits))
+
+
+def signal_not_transferring(request, operation_name, **kwargs):
if operation_name in ['PutObject', 'UploadPart'] and hasattr(
request.body, 'signal_not_transferring'
):
- request.body.signal_not_transferring()
-
-
-def signal_transferring(request, operation_name, **kwargs):
+ request.body.signal_not_transferring()
+
+
+def signal_transferring(request, operation_name, **kwargs):
if operation_name in ['PutObject', 'UploadPart'] and hasattr(
request.body, 'signal_transferring'
):
- request.body.signal_transferring()
-
-
+ request.body.signal_transferring()
+
+
def calculate_num_parts(size, part_size):
return int(math.ceil(size / float(part_size)))
@@ -67,190 +67,190 @@ def calculate_num_parts(size, part_size):
def calculate_range_parameter(
part_size, part_index, num_parts, total_size=None
):
- """Calculate the range parameter for multipart downloads/copies
-
- :type part_size: int
- :param part_size: The size of the part
-
- :type part_index: int
- :param part_index: The index for which this parts starts. This index starts
- at zero
-
- :type num_parts: int
- :param num_parts: The total number of parts in the transfer
-
- :returns: The value to use for Range parameter on downloads or
- the CopySourceRange parameter for copies
- """
- # Used to calculate the Range parameter
- start_range = part_index * part_size
- if part_index == num_parts - 1:
- end_range = ''
- if total_size is not None:
- end_range = str(total_size - 1)
- else:
- end_range = start_range + part_size - 1
+ """Calculate the range parameter for multipart downloads/copies
+
+ :type part_size: int
+ :param part_size: The size of the part
+
+ :type part_index: int
+ :param part_index: The index for which this parts starts. This index starts
+ at zero
+
+ :type num_parts: int
+ :param num_parts: The total number of parts in the transfer
+
+ :returns: The value to use for Range parameter on downloads or
+ the CopySourceRange parameter for copies
+ """
+ # Used to calculate the Range parameter
+ start_range = part_index * part_size
+ if part_index == num_parts - 1:
+ end_range = ''
+ if total_size is not None:
+ end_range = str(total_size - 1)
+ else:
+ end_range = start_range + part_size - 1
range_param = f'bytes={start_range}-{end_range}'
- return range_param
-
-
-def get_callbacks(transfer_future, callback_type):
- """Retrieves callbacks from a subscriber
-
- :type transfer_future: s3transfer.futures.TransferFuture
- :param transfer_future: The transfer future the subscriber is associated
- to.
-
- :type callback_type: str
- :param callback_type: The type of callback to retrieve from the subscriber.
- Valid types include:
- * 'queued'
- * 'progress'
- * 'done'
-
- :returns: A list of callbacks for the type specified. All callbacks are
- preinjected with the transfer future.
- """
- callbacks = []
- for subscriber in transfer_future.meta.call_args.subscribers:
- callback_name = 'on_' + callback_type
- if hasattr(subscriber, callback_name):
- callbacks.append(
- functools.partial(
+ return range_param
+
+
+def get_callbacks(transfer_future, callback_type):
+ """Retrieves callbacks from a subscriber
+
+ :type transfer_future: s3transfer.futures.TransferFuture
+ :param transfer_future: The transfer future the subscriber is associated
+ to.
+
+ :type callback_type: str
+ :param callback_type: The type of callback to retrieve from the subscriber.
+ Valid types include:
+ * 'queued'
+ * 'progress'
+ * 'done'
+
+ :returns: A list of callbacks for the type specified. All callbacks are
+ preinjected with the transfer future.
+ """
+ callbacks = []
+ for subscriber in transfer_future.meta.call_args.subscribers:
+ callback_name = 'on_' + callback_type
+ if hasattr(subscriber, callback_name):
+ callbacks.append(
+ functools.partial(
getattr(subscriber, callback_name), future=transfer_future
- )
- )
- return callbacks
-
-
-def invoke_progress_callbacks(callbacks, bytes_transferred):
- """Calls all progress callbacks
-
- :param callbacks: A list of progress callbacks to invoke
- :param bytes_transferred: The number of bytes transferred. This is passed
- to the callbacks. If no bytes were transferred the callbacks will not
- be invoked because no progress was achieved. It is also possible
- to receive a negative amount which comes from retrying a transfer
- request.
- """
- # Only invoke the callbacks if bytes were actually transferred.
- if bytes_transferred:
- for callback in callbacks:
- callback(bytes_transferred=bytes_transferred)
-
-
-def get_filtered_dict(original_dict, whitelisted_keys):
- """Gets a dictionary filtered by whitelisted keys
-
- :param original_dict: The original dictionary of arguments to source keys
- and values.
- :param whitelisted_key: A list of keys to include in the filtered
- dictionary.
-
- :returns: A dictionary containing key/values from the original dictionary
- whose key was included in the whitelist
- """
- filtered_dict = {}
- for key, value in original_dict.items():
- if key in whitelisted_keys:
- filtered_dict[key] = value
- return filtered_dict
-
-
+ )
+ )
+ return callbacks
+
+
+def invoke_progress_callbacks(callbacks, bytes_transferred):
+ """Calls all progress callbacks
+
+ :param callbacks: A list of progress callbacks to invoke
+ :param bytes_transferred: The number of bytes transferred. This is passed
+ to the callbacks. If no bytes were transferred the callbacks will not
+ be invoked because no progress was achieved. It is also possible
+ to receive a negative amount which comes from retrying a transfer
+ request.
+ """
+ # Only invoke the callbacks if bytes were actually transferred.
+ if bytes_transferred:
+ for callback in callbacks:
+ callback(bytes_transferred=bytes_transferred)
+
+
+def get_filtered_dict(original_dict, whitelisted_keys):
+ """Gets a dictionary filtered by whitelisted keys
+
+ :param original_dict: The original dictionary of arguments to source keys
+ and values.
+ :param whitelisted_key: A list of keys to include in the filtered
+ dictionary.
+
+ :returns: A dictionary containing key/values from the original dictionary
+ whose key was included in the whitelist
+ """
+ filtered_dict = {}
+ for key, value in original_dict.items():
+ if key in whitelisted_keys:
+ filtered_dict[key] = value
+ return filtered_dict
+
+
class CallArgs:
- def __init__(self, **kwargs):
- """A class that records call arguments
-
- The call arguments must be passed as keyword arguments. It will set
- each keyword argument as an attribute of the object along with its
- associated value.
- """
- for arg, value in kwargs.items():
- setattr(self, arg, value)
-
-
+ def __init__(self, **kwargs):
+ """A class that records call arguments
+
+ The call arguments must be passed as keyword arguments. It will set
+ each keyword argument as an attribute of the object along with its
+ associated value.
+ """
+ for arg, value in kwargs.items():
+ setattr(self, arg, value)
+
+
class FunctionContainer:
- """An object that contains a function and any args or kwargs to call it
-
- When called the provided function will be called with provided args
- and kwargs.
- """
-
- def __init__(self, func, *args, **kwargs):
- self._func = func
- self._args = args
- self._kwargs = kwargs
-
- def __repr__(self):
+ """An object that contains a function and any args or kwargs to call it
+
+ When called the provided function will be called with provided args
+ and kwargs.
+ """
+
+ def __init__(self, func, *args, **kwargs):
+ self._func = func
+ self._args = args
+ self._kwargs = kwargs
+
+ def __repr__(self):
return 'Function: {} with args {} and kwargs {}'.format(
self._func, self._args, self._kwargs
)
-
- def __call__(self):
- return self._func(*self._args, **self._kwargs)
-
-
+
+ def __call__(self):
+ return self._func(*self._args, **self._kwargs)
+
+
class CountCallbackInvoker:
- """An abstraction to invoke a callback when a shared count reaches zero
-
- :param callback: Callback invoke when finalized count reaches zero
- """
-
- def __init__(self, callback):
- self._lock = threading.Lock()
- self._callback = callback
- self._count = 0
- self._is_finalized = False
-
- @property
- def current_count(self):
- with self._lock:
- return self._count
-
- def increment(self):
- """Increment the count by one"""
- with self._lock:
- if self._is_finalized:
- raise RuntimeError(
- 'Counter has been finalized it can no longer be '
- 'incremented.'
- )
- self._count += 1
-
- def decrement(self):
- """Decrement the count by one"""
- with self._lock:
- if self._count == 0:
- raise RuntimeError(
+ """An abstraction to invoke a callback when a shared count reaches zero
+
+ :param callback: Callback invoke when finalized count reaches zero
+ """
+
+ def __init__(self, callback):
+ self._lock = threading.Lock()
+ self._callback = callback
+ self._count = 0
+ self._is_finalized = False
+
+ @property
+ def current_count(self):
+ with self._lock:
+ return self._count
+
+ def increment(self):
+ """Increment the count by one"""
+ with self._lock:
+ if self._is_finalized:
+ raise RuntimeError(
+ 'Counter has been finalized it can no longer be '
+ 'incremented.'
+ )
+ self._count += 1
+
+ def decrement(self):
+ """Decrement the count by one"""
+ with self._lock:
+ if self._count == 0:
+ raise RuntimeError(
'Counter is at zero. It cannot dip below zero'
)
- self._count -= 1
- if self._is_finalized and self._count == 0:
- self._callback()
-
- def finalize(self):
- """Finalize the counter
-
- Once finalized, the counter never be incremented and the callback
- can be invoked once the count reaches zero
- """
- with self._lock:
- self._is_finalized = True
- if self._count == 0:
- self._callback()
-
-
+ self._count -= 1
+ if self._is_finalized and self._count == 0:
+ self._callback()
+
+ def finalize(self):
+ """Finalize the counter
+
+ Once finalized, the counter never be incremented and the callback
+ can be invoked once the count reaches zero
+ """
+ with self._lock:
+ self._is_finalized = True
+ if self._count == 0:
+ self._callback()
+
+
class OSUtils:
_MAX_FILENAME_LEN = 255
- def get_file_size(self, filename):
- return os.path.getsize(filename)
-
- def open_file_chunk_reader(self, filename, start_byte, size, callbacks):
+ def get_file_size(self, filename):
+ return os.path.getsize(filename)
+
+ def open_file_chunk_reader(self, filename, start_byte, size, callbacks):
return ReadFileChunk.from_filename(
filename, start_byte, size, callbacks, enable_callbacks=False
)
-
+
def open_file_chunk_reader_from_fileobj(
self,
fileobj,
@@ -259,7 +259,7 @@ class OSUtils:
callbacks,
close_callbacks=None,
):
- return ReadFileChunk(
+ return ReadFileChunk(
fileobj,
chunk_size,
full_file_size,
@@ -267,58 +267,58 @@ class OSUtils:
enable_callbacks=False,
close_callbacks=close_callbacks,
)
-
- def open(self, filename, mode):
- return open(filename, mode)
-
- def remove_file(self, filename):
- """Remove a file, noop if file does not exist."""
- # Unlike os.remove, if the file does not exist,
- # then this method does nothing.
- try:
- os.remove(filename)
- except OSError:
- pass
-
- def rename_file(self, current_filename, new_filename):
- rename_file(current_filename, new_filename)
-
- def is_special_file(cls, filename):
- """Checks to see if a file is a special UNIX file.
-
- It checks if the file is a character special device, block special
- device, FIFO, or socket.
-
- :param filename: Name of the file
-
- :returns: True if the file is a special file. False, if is not.
- """
- # If it does not exist, it must be a new file so it cannot be
- # a special file.
- if not os.path.exists(filename):
- return False
- mode = os.stat(filename).st_mode
- # Character special device.
- if stat.S_ISCHR(mode):
- return True
- # Block special device
- if stat.S_ISBLK(mode):
- return True
- # Named pipe / FIFO
- if stat.S_ISFIFO(mode):
- return True
- # Socket.
- if stat.S_ISSOCK(mode):
- return True
- return False
-
+
+ def open(self, filename, mode):
+ return open(filename, mode)
+
+ def remove_file(self, filename):
+ """Remove a file, noop if file does not exist."""
+ # Unlike os.remove, if the file does not exist,
+ # then this method does nothing.
+ try:
+ os.remove(filename)
+ except OSError:
+ pass
+
+ def rename_file(self, current_filename, new_filename):
+ rename_file(current_filename, new_filename)
+
+ def is_special_file(cls, filename):
+ """Checks to see if a file is a special UNIX file.
+
+ It checks if the file is a character special device, block special
+ device, FIFO, or socket.
+
+ :param filename: Name of the file
+
+ :returns: True if the file is a special file. False, if is not.
+ """
+ # If it does not exist, it must be a new file so it cannot be
+ # a special file.
+ if not os.path.exists(filename):
+ return False
+ mode = os.stat(filename).st_mode
+ # Character special device.
+ if stat.S_ISCHR(mode):
+ return True
+ # Block special device
+ if stat.S_ISBLK(mode):
+ return True
+ # Named pipe / FIFO
+ if stat.S_ISFIFO(mode):
+ return True
+ # Socket.
+ if stat.S_ISSOCK(mode):
+ return True
+ return False
+
def get_temp_filename(self, filename):
suffix = os.extsep + random_file_extension()
path = os.path.dirname(filename)
name = os.path.basename(filename)
temp_filename = name[: self._MAX_FILENAME_LEN - len(suffix)] + suffix
return os.path.join(path, temp_filename)
-
+
def allocate(self, filename, size):
try:
with self.open(filename, 'wb') as f:
@@ -329,72 +329,72 @@ class OSUtils:
class DeferredOpenFile:
- def __init__(self, filename, start_byte=0, mode='rb', open_function=open):
- """A class that defers the opening of a file till needed
-
- This is useful for deferring opening of a file till it is needed
- in a separate thread, as there is a limit of how many open files
- there can be in a single thread for most operating systems. The
- file gets opened in the following methods: ``read()``, ``seek()``,
- and ``__enter__()``
-
- :type filename: str
- :param filename: The name of the file to open
-
- :type start_byte: int
- :param start_byte: The byte to seek to when the file is opened.
-
- :type mode: str
- :param mode: The mode to use to open the file
-
- :type open_function: function
- :param open_function: The function to use to open the file
- """
- self._filename = filename
- self._fileobj = None
- self._start_byte = start_byte
- self._mode = mode
- self._open_function = open_function
-
- def _open_if_needed(self):
- if self._fileobj is None:
- self._fileobj = self._open_function(self._filename, self._mode)
- if self._start_byte != 0:
- self._fileobj.seek(self._start_byte)
-
- @property
- def name(self):
- return self._filename
-
- def read(self, amount=None):
- self._open_if_needed()
- return self._fileobj.read(amount)
-
- def write(self, data):
- self._open_if_needed()
- self._fileobj.write(data)
-
+ def __init__(self, filename, start_byte=0, mode='rb', open_function=open):
+ """A class that defers the opening of a file till needed
+
+ This is useful for deferring opening of a file till it is needed
+ in a separate thread, as there is a limit of how many open files
+ there can be in a single thread for most operating systems. The
+ file gets opened in the following methods: ``read()``, ``seek()``,
+ and ``__enter__()``
+
+ :type filename: str
+ :param filename: The name of the file to open
+
+ :type start_byte: int
+ :param start_byte: The byte to seek to when the file is opened.
+
+ :type mode: str
+ :param mode: The mode to use to open the file
+
+ :type open_function: function
+ :param open_function: The function to use to open the file
+ """
+ self._filename = filename
+ self._fileobj = None
+ self._start_byte = start_byte
+ self._mode = mode
+ self._open_function = open_function
+
+ def _open_if_needed(self):
+ if self._fileobj is None:
+ self._fileobj = self._open_function(self._filename, self._mode)
+ if self._start_byte != 0:
+ self._fileobj.seek(self._start_byte)
+
+ @property
+ def name(self):
+ return self._filename
+
+ def read(self, amount=None):
+ self._open_if_needed()
+ return self._fileobj.read(amount)
+
+ def write(self, data):
+ self._open_if_needed()
+ self._fileobj.write(data)
+
def seek(self, where, whence=0):
- self._open_if_needed()
+ self._open_if_needed()
self._fileobj.seek(where, whence)
-
- def tell(self):
- if self._fileobj is None:
- return self._start_byte
- return self._fileobj.tell()
-
- def close(self):
- if self._fileobj:
- self._fileobj.close()
-
- def __enter__(self):
- self._open_if_needed()
- return self
-
- def __exit__(self, *args, **kwargs):
- self.close()
-
-
+
+ def tell(self):
+ if self._fileobj is None:
+ return self._start_byte
+ return self._fileobj.tell()
+
+ def close(self):
+ if self._fileobj:
+ self._fileobj.close()
+
+ def __enter__(self):
+ self._open_if_needed()
+ return self
+
+ def __exit__(self, *args, **kwargs):
+ self.close()
+
+
class ReadFileChunk:
def __init__(
self,
@@ -405,42 +405,42 @@ class ReadFileChunk:
enable_callbacks=True,
close_callbacks=None,
):
- """
-
- Given a file object shown below::
-
- |___________________________________________________|
- 0 | | full_file_size
- |----chunk_size---|
- f.tell()
-
- :type fileobj: file
- :param fileobj: File like object
-
- :type chunk_size: int
- :param chunk_size: The max chunk size to read. Trying to read
- pass the end of the chunk size will behave like you've
- reached the end of the file.
-
- :type full_file_size: int
- :param full_file_size: The entire content length associated
- with ``fileobj``.
-
- :type callbacks: A list of function(amount_read)
- :param callbacks: Called whenever data is read from this object in the
- order provided.
-
- :type enable_callbacks: boolean
- :param enable_callbacks: True if to run callbacks. Otherwise, do not
- run callbacks
-
- :type close_callbacks: A list of function()
- :param close_callbacks: Called when close is called. The function
- should take no arguments.
- """
- self._fileobj = fileobj
- self._start_byte = self._fileobj.tell()
- self._size = self._calculate_file_size(
+ """
+
+ Given a file object shown below::
+
+ |___________________________________________________|
+ 0 | | full_file_size
+ |----chunk_size---|
+ f.tell()
+
+ :type fileobj: file
+ :param fileobj: File like object
+
+ :type chunk_size: int
+ :param chunk_size: The max chunk size to read. Trying to read
+ pass the end of the chunk size will behave like you've
+ reached the end of the file.
+
+ :type full_file_size: int
+ :param full_file_size: The entire content length associated
+ with ``fileobj``.
+
+ :type callbacks: A list of function(amount_read)
+ :param callbacks: Called whenever data is read from this object in the
+ order provided.
+
+ :type enable_callbacks: boolean
+ :param enable_callbacks: True if to run callbacks. Otherwise, do not
+ run callbacks
+
+ :type close_callbacks: A list of function()
+ :param close_callbacks: Called when close is called. The function
+ should take no arguments.
+ """
+ self._fileobj = fileobj
+ self._start_byte = self._fileobj.tell()
+ self._size = self._calculate_file_size(
self._fileobj,
requested_size=chunk_size,
start_byte=self._start_byte,
@@ -448,16 +448,16 @@ class ReadFileChunk:
)
# _amount_read represents the position in the chunk and may exceed
# the chunk size, but won't allow reads out of bounds.
- self._amount_read = 0
- self._callbacks = callbacks
- if callbacks is None:
- self._callbacks = []
- self._callbacks_enabled = enable_callbacks
- self._close_callbacks = close_callbacks
- if close_callbacks is None:
- self._close_callbacks = close_callbacks
-
- @classmethod
+ self._amount_read = 0
+ self._callbacks = callbacks
+ if callbacks is None:
+ self._callbacks = []
+ self._callbacks_enabled = enable_callbacks
+ self._close_callbacks = close_callbacks
+ if close_callbacks is None:
+ self._close_callbacks = close_callbacks
+
+ @classmethod
def from_filename(
cls,
filename,
@@ -466,70 +466,70 @@ class ReadFileChunk:
callbacks=None,
enable_callbacks=True,
):
- """Convenience factory function to create from a filename.
-
- :type start_byte: int
- :param start_byte: The first byte from which to start reading.
-
- :type chunk_size: int
- :param chunk_size: The max chunk size to read. Trying to read
- pass the end of the chunk size will behave like you've
- reached the end of the file.
-
- :type full_file_size: int
- :param full_file_size: The entire content length associated
- with ``fileobj``.
-
- :type callbacks: function(amount_read)
- :param callbacks: Called whenever data is read from this object.
-
- :type enable_callbacks: bool
- :param enable_callbacks: Indicate whether to invoke callback
- during read() calls.
-
- :rtype: ``ReadFileChunk``
- :return: A new instance of ``ReadFileChunk``
-
- """
- f = open(filename, 'rb')
- f.seek(start_byte)
- file_size = os.fstat(f.fileno()).st_size
- return cls(f, chunk_size, file_size, callbacks, enable_callbacks)
-
+ """Convenience factory function to create from a filename.
+
+ :type start_byte: int
+ :param start_byte: The first byte from which to start reading.
+
+ :type chunk_size: int
+ :param chunk_size: The max chunk size to read. Trying to read
+ pass the end of the chunk size will behave like you've
+ reached the end of the file.
+
+ :type full_file_size: int
+ :param full_file_size: The entire content length associated
+ with ``fileobj``.
+
+ :type callbacks: function(amount_read)
+ :param callbacks: Called whenever data is read from this object.
+
+ :type enable_callbacks: bool
+ :param enable_callbacks: Indicate whether to invoke callback
+ during read() calls.
+
+ :rtype: ``ReadFileChunk``
+ :return: A new instance of ``ReadFileChunk``
+
+ """
+ f = open(filename, 'rb')
+ f.seek(start_byte)
+ file_size = os.fstat(f.fileno()).st_size
+ return cls(f, chunk_size, file_size, callbacks, enable_callbacks)
+
def _calculate_file_size(
self, fileobj, requested_size, start_byte, actual_file_size
):
- max_chunk_size = actual_file_size - start_byte
- return min(max_chunk_size, requested_size)
-
- def read(self, amount=None):
+ max_chunk_size = actual_file_size - start_byte
+ return min(max_chunk_size, requested_size)
+
+ def read(self, amount=None):
amount_left = max(self._size - self._amount_read, 0)
- if amount is None:
+ if amount is None:
amount_to_read = amount_left
- else:
+ else:
amount_to_read = min(amount_left, amount)
- data = self._fileobj.read(amount_to_read)
- self._amount_read += len(data)
- if self._callbacks is not None and self._callbacks_enabled:
- invoke_progress_callbacks(self._callbacks, len(data))
- return data
-
- def signal_transferring(self):
- self.enable_callback()
- if hasattr(self._fileobj, 'signal_transferring'):
- self._fileobj.signal_transferring()
-
- def signal_not_transferring(self):
- self.disable_callback()
- if hasattr(self._fileobj, 'signal_not_transferring'):
- self._fileobj.signal_not_transferring()
-
- def enable_callback(self):
- self._callbacks_enabled = True
-
- def disable_callback(self):
- self._callbacks_enabled = False
-
+ data = self._fileobj.read(amount_to_read)
+ self._amount_read += len(data)
+ if self._callbacks is not None and self._callbacks_enabled:
+ invoke_progress_callbacks(self._callbacks, len(data))
+ return data
+
+ def signal_transferring(self):
+ self.enable_callback()
+ if hasattr(self._fileobj, 'signal_transferring'):
+ self._fileobj.signal_transferring()
+
+ def signal_not_transferring(self):
+ self.disable_callback()
+ if hasattr(self._fileobj, 'signal_not_transferring'):
+ self._fileobj.signal_not_transferring()
+
+ def enable_callback(self):
+ self._callbacks_enabled = True
+
+ def disable_callback(self):
+ self._callbacks_enabled = False
+
def seek(self, where, whence=0):
if whence not in (0, 1, 2):
# Mimic io's error for invalid whence values
@@ -544,202 +544,202 @@ class ReadFileChunk:
where += self._size
self._fileobj.seek(max(where, self._start_byte))
- if self._callbacks is not None and self._callbacks_enabled:
- # To also rewind the callback() for an accurate progress report
+ if self._callbacks is not None and self._callbacks_enabled:
+ # To also rewind the callback() for an accurate progress report
bounded_where = max(min(where - self._start_byte, self._size), 0)
bounded_amount_read = min(self._amount_read, self._size)
amount = bounded_where - bounded_amount_read
- invoke_progress_callbacks(
+ invoke_progress_callbacks(
self._callbacks, bytes_transferred=amount
)
self._amount_read = max(where - self._start_byte, 0)
-
- def close(self):
- if self._close_callbacks is not None and self._callbacks_enabled:
- for callback in self._close_callbacks:
- callback()
- self._fileobj.close()
-
- def tell(self):
- return self._amount_read
-
- def __len__(self):
- # __len__ is defined because requests will try to determine the length
- # of the stream to set a content length. In the normal case
- # of the file it will just stat the file, but we need to change that
- # behavior. By providing a __len__, requests will use that instead
- # of stat'ing the file.
- return self._size
-
- def __enter__(self):
- return self
-
- def __exit__(self, *args, **kwargs):
- self.close()
-
- def __iter__(self):
- # This is a workaround for http://bugs.python.org/issue17575
- # Basically httplib will try to iterate over the contents, even
- # if its a file like object. This wasn't noticed because we've
- # already exhausted the stream so iterating over the file immediately
- # stops, which is what we're simulating here.
- return iter([])
-
-
+
+ def close(self):
+ if self._close_callbacks is not None and self._callbacks_enabled:
+ for callback in self._close_callbacks:
+ callback()
+ self._fileobj.close()
+
+ def tell(self):
+ return self._amount_read
+
+ def __len__(self):
+ # __len__ is defined because requests will try to determine the length
+ # of the stream to set a content length. In the normal case
+ # of the file it will just stat the file, but we need to change that
+ # behavior. By providing a __len__, requests will use that instead
+ # of stat'ing the file.
+ return self._size
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, *args, **kwargs):
+ self.close()
+
+ def __iter__(self):
+ # This is a workaround for http://bugs.python.org/issue17575
+ # Basically httplib will try to iterate over the contents, even
+ # if its a file like object. This wasn't noticed because we've
+ # already exhausted the stream so iterating over the file immediately
+ # stops, which is what we're simulating here.
+ return iter([])
+
+
class StreamReaderProgress:
- """Wrapper for a read only stream that adds progress callbacks."""
-
- def __init__(self, stream, callbacks=None):
- self._stream = stream
- self._callbacks = callbacks
- if callbacks is None:
- self._callbacks = []
-
- def read(self, *args, **kwargs):
- value = self._stream.read(*args, **kwargs)
- invoke_progress_callbacks(self._callbacks, len(value))
- return value
-
-
-class NoResourcesAvailable(Exception):
- pass
-
-
+ """Wrapper for a read only stream that adds progress callbacks."""
+
+ def __init__(self, stream, callbacks=None):
+ self._stream = stream
+ self._callbacks = callbacks
+ if callbacks is None:
+ self._callbacks = []
+
+ def read(self, *args, **kwargs):
+ value = self._stream.read(*args, **kwargs)
+ invoke_progress_callbacks(self._callbacks, len(value))
+ return value
+
+
+class NoResourcesAvailable(Exception):
+ pass
+
+
class TaskSemaphore:
- def __init__(self, count):
- """A semaphore for the purpose of limiting the number of tasks
-
- :param count: The size of semaphore
- """
- self._semaphore = threading.Semaphore(count)
-
- def acquire(self, tag, blocking=True):
- """Acquire the semaphore
-
- :param tag: A tag identifying what is acquiring the semaphore. Note
- that this is not really needed to directly use this class but is
- needed for API compatibility with the SlidingWindowSemaphore
- implementation.
- :param block: If True, block until it can be acquired. If False,
+ def __init__(self, count):
+ """A semaphore for the purpose of limiting the number of tasks
+
+ :param count: The size of semaphore
+ """
+ self._semaphore = threading.Semaphore(count)
+
+ def acquire(self, tag, blocking=True):
+ """Acquire the semaphore
+
+ :param tag: A tag identifying what is acquiring the semaphore. Note
+ that this is not really needed to directly use this class but is
+ needed for API compatibility with the SlidingWindowSemaphore
+ implementation.
+ :param block: If True, block until it can be acquired. If False,
do not block and raise an exception if cannot be acquired.
-
- :returns: A token (can be None) to use when releasing the semaphore
- """
- logger.debug("Acquiring %s", tag)
- if not self._semaphore.acquire(blocking):
- raise NoResourcesAvailable("Cannot acquire tag '%s'" % tag)
-
- def release(self, tag, acquire_token):
- """Release the semaphore
-
- :param tag: A tag identifying what is releasing the semaphore
- :param acquire_token: The token returned from when the semaphore was
- acquired. Note that this is not really needed to directly use this
- class but is needed for API compatibility with the
- SlidingWindowSemaphore implementation.
- """
+
+ :returns: A token (can be None) to use when releasing the semaphore
+ """
+ logger.debug("Acquiring %s", tag)
+ if not self._semaphore.acquire(blocking):
+ raise NoResourcesAvailable("Cannot acquire tag '%s'" % tag)
+
+ def release(self, tag, acquire_token):
+ """Release the semaphore
+
+ :param tag: A tag identifying what is releasing the semaphore
+ :param acquire_token: The token returned from when the semaphore was
+ acquired. Note that this is not really needed to directly use this
+ class but is needed for API compatibility with the
+ SlidingWindowSemaphore implementation.
+ """
logger.debug(f"Releasing acquire {tag}/{acquire_token}")
- self._semaphore.release()
-
-
-class SlidingWindowSemaphore(TaskSemaphore):
- """A semaphore used to coordinate sequential resource access.
-
- This class is similar to the stdlib BoundedSemaphore:
-
- * It's initialized with a count.
- * Each call to ``acquire()`` decrements the counter.
- * If the count is at zero, then ``acquire()`` will either block until the
- count increases, or if ``blocking=False``, then it will raise
- a NoResourcesAvailable exception indicating that it failed to acquire the
- semaphore.
-
- The main difference is that this semaphore is used to limit
- access to a resource that requires sequential access. For example,
- if I want to access resource R that has 20 subresources R_0 - R_19,
- this semaphore can also enforce that you only have a max range of
- 10 at any given point in time. You must also specify a tag name
- when you acquire the semaphore. The sliding window semantics apply
- on a per tag basis. The internal count will only be incremented
- when the minimum sequence number for a tag is released.
-
- """
-
- def __init__(self, count):
- self._count = count
- # Dict[tag, next_sequence_number].
- self._tag_sequences = defaultdict(int)
- self._lowest_sequence = {}
- self._lock = threading.Lock()
- self._condition = threading.Condition(self._lock)
- # Dict[tag, List[sequence_number]]
- self._pending_release = {}
-
- def current_count(self):
- with self._lock:
- return self._count
-
- def acquire(self, tag, blocking=True):
- logger.debug("Acquiring %s", tag)
- self._condition.acquire()
- try:
- if self._count == 0:
- if not blocking:
- raise NoResourcesAvailable("Cannot acquire tag '%s'" % tag)
- else:
- while self._count == 0:
- self._condition.wait()
- # self._count is no longer zero.
- # First, check if this is the first time we're seeing this tag.
- sequence_number = self._tag_sequences[tag]
- if sequence_number == 0:
- # First time seeing the tag, so record we're at 0.
- self._lowest_sequence[tag] = sequence_number
- self._tag_sequences[tag] += 1
- self._count -= 1
- return sequence_number
- finally:
- self._condition.release()
-
- def release(self, tag, acquire_token):
- sequence_number = acquire_token
- logger.debug("Releasing acquire %s/%s", tag, sequence_number)
- self._condition.acquire()
- try:
- if tag not in self._tag_sequences:
- raise ValueError("Attempted to release unknown tag: %s" % tag)
- max_sequence = self._tag_sequences[tag]
- if self._lowest_sequence[tag] == sequence_number:
- # We can immediately process this request and free up
- # resources.
- self._lowest_sequence[tag] += 1
- self._count += 1
- self._condition.notify()
- queued = self._pending_release.get(tag, [])
- while queued:
- if self._lowest_sequence[tag] == queued[-1]:
- queued.pop()
- self._lowest_sequence[tag] += 1
- self._count += 1
- else:
- break
- elif self._lowest_sequence[tag] < sequence_number < max_sequence:
- # We can't do anything right now because we're still waiting
- # for the min sequence for the tag to be released. We have
- # to queue this for pending release.
+ self._semaphore.release()
+
+
+class SlidingWindowSemaphore(TaskSemaphore):
+ """A semaphore used to coordinate sequential resource access.
+
+ This class is similar to the stdlib BoundedSemaphore:
+
+ * It's initialized with a count.
+ * Each call to ``acquire()`` decrements the counter.
+ * If the count is at zero, then ``acquire()`` will either block until the
+ count increases, or if ``blocking=False``, then it will raise
+ a NoResourcesAvailable exception indicating that it failed to acquire the
+ semaphore.
+
+ The main difference is that this semaphore is used to limit
+ access to a resource that requires sequential access. For example,
+ if I want to access resource R that has 20 subresources R_0 - R_19,
+ this semaphore can also enforce that you only have a max range of
+ 10 at any given point in time. You must also specify a tag name
+ when you acquire the semaphore. The sliding window semantics apply
+ on a per tag basis. The internal count will only be incremented
+ when the minimum sequence number for a tag is released.
+
+ """
+
+ def __init__(self, count):
+ self._count = count
+ # Dict[tag, next_sequence_number].
+ self._tag_sequences = defaultdict(int)
+ self._lowest_sequence = {}
+ self._lock = threading.Lock()
+ self._condition = threading.Condition(self._lock)
+ # Dict[tag, List[sequence_number]]
+ self._pending_release = {}
+
+ def current_count(self):
+ with self._lock:
+ return self._count
+
+ def acquire(self, tag, blocking=True):
+ logger.debug("Acquiring %s", tag)
+ self._condition.acquire()
+ try:
+ if self._count == 0:
+ if not blocking:
+ raise NoResourcesAvailable("Cannot acquire tag '%s'" % tag)
+ else:
+ while self._count == 0:
+ self._condition.wait()
+ # self._count is no longer zero.
+ # First, check if this is the first time we're seeing this tag.
+ sequence_number = self._tag_sequences[tag]
+ if sequence_number == 0:
+ # First time seeing the tag, so record we're at 0.
+ self._lowest_sequence[tag] = sequence_number
+ self._tag_sequences[tag] += 1
+ self._count -= 1
+ return sequence_number
+ finally:
+ self._condition.release()
+
+ def release(self, tag, acquire_token):
+ sequence_number = acquire_token
+ logger.debug("Releasing acquire %s/%s", tag, sequence_number)
+ self._condition.acquire()
+ try:
+ if tag not in self._tag_sequences:
+ raise ValueError("Attempted to release unknown tag: %s" % tag)
+ max_sequence = self._tag_sequences[tag]
+ if self._lowest_sequence[tag] == sequence_number:
+ # We can immediately process this request and free up
+ # resources.
+ self._lowest_sequence[tag] += 1
+ self._count += 1
+ self._condition.notify()
+ queued = self._pending_release.get(tag, [])
+ while queued:
+ if self._lowest_sequence[tag] == queued[-1]:
+ queued.pop()
+ self._lowest_sequence[tag] += 1
+ self._count += 1
+ else:
+ break
+ elif self._lowest_sequence[tag] < sequence_number < max_sequence:
+ # We can't do anything right now because we're still waiting
+ # for the min sequence for the tag to be released. We have
+ # to queue this for pending release.
self._pending_release.setdefault(tag, []).append(
sequence_number
)
- self._pending_release[tag].sort(reverse=True)
- else:
- raise ValueError(
- "Attempted to release unknown sequence number "
+ self._pending_release[tag].sort(reverse=True)
+ else:
+ raise ValueError(
+ "Attempted to release unknown sequence number "
"%s for tag: %s" % (sequence_number, tag)
)
- finally:
- self._condition.release()
-
-
+ finally:
+ self._condition.release()
+
+
class ChunksizeAdjuster:
def __init__(
self,
@@ -747,56 +747,56 @@ class ChunksizeAdjuster:
min_size=MIN_UPLOAD_CHUNKSIZE,
max_parts=MAX_PARTS,
):
- self.max_size = max_size
- self.min_size = min_size
- self.max_parts = max_parts
-
- def adjust_chunksize(self, current_chunksize, file_size=None):
- """Get a chunksize close to current that fits within all S3 limits.
-
- :type current_chunksize: int
- :param current_chunksize: The currently configured chunksize.
-
- :type file_size: int or None
- :param file_size: The size of the file to upload. This might be None
- if the object being transferred has an unknown size.
-
- :returns: A valid chunksize that fits within configured limits.
- """
- chunksize = current_chunksize
- if file_size is not None:
- chunksize = self._adjust_for_max_parts(chunksize, file_size)
- return self._adjust_for_chunksize_limits(chunksize)
-
- def _adjust_for_chunksize_limits(self, current_chunksize):
- if current_chunksize > self.max_size:
- logger.debug(
- "Chunksize greater than maximum chunksize. "
+ self.max_size = max_size
+ self.min_size = min_size
+ self.max_parts = max_parts
+
+ def adjust_chunksize(self, current_chunksize, file_size=None):
+ """Get a chunksize close to current that fits within all S3 limits.
+
+ :type current_chunksize: int
+ :param current_chunksize: The currently configured chunksize.
+
+ :type file_size: int or None
+ :param file_size: The size of the file to upload. This might be None
+ if the object being transferred has an unknown size.
+
+ :returns: A valid chunksize that fits within configured limits.
+ """
+ chunksize = current_chunksize
+ if file_size is not None:
+ chunksize = self._adjust_for_max_parts(chunksize, file_size)
+ return self._adjust_for_chunksize_limits(chunksize)
+
+ def _adjust_for_chunksize_limits(self, current_chunksize):
+ if current_chunksize > self.max_size:
+ logger.debug(
+ "Chunksize greater than maximum chunksize. "
"Setting to %s from %s." % (self.max_size, current_chunksize)
)
- return self.max_size
- elif current_chunksize < self.min_size:
- logger.debug(
- "Chunksize less than minimum chunksize. "
+ return self.max_size
+ elif current_chunksize < self.min_size:
+ logger.debug(
+ "Chunksize less than minimum chunksize. "
"Setting to %s from %s." % (self.min_size, current_chunksize)
)
- return self.min_size
- else:
- return current_chunksize
-
- def _adjust_for_max_parts(self, current_chunksize, file_size):
- chunksize = current_chunksize
- num_parts = int(math.ceil(file_size / float(chunksize)))
-
- while num_parts > self.max_parts:
- chunksize *= 2
- num_parts = int(math.ceil(file_size / float(chunksize)))
-
- if chunksize != current_chunksize:
- logger.debug(
- "Chunksize would result in the number of parts exceeding the "
+ return self.min_size
+ else:
+ return current_chunksize
+
+ def _adjust_for_max_parts(self, current_chunksize, file_size):
+ chunksize = current_chunksize
+ num_parts = int(math.ceil(file_size / float(chunksize)))
+
+ while num_parts > self.max_parts:
+ chunksize *= 2
+ num_parts = int(math.ceil(file_size / float(chunksize)))
+
+ if chunksize != current_chunksize:
+ logger.debug(
+ "Chunksize would result in the number of parts exceeding the "
"maximum. Setting to %s from %s."
% (chunksize, current_chunksize)
)
-
- return chunksize
+
+ return chunksize
diff --git a/contrib/python/s3transfer/py3/ya.make b/contrib/python/s3transfer/py3/ya.make
index 8759ec0761..964a630639 100644
--- a/contrib/python/s3transfer/py3/ya.make
+++ b/contrib/python/s3transfer/py3/ya.make
@@ -1,50 +1,50 @@
# Generated by devtools/yamaker (pypi).
PY3_LIBRARY()
-
+
OWNER(gebetix g:python-contrib)
VERSION(0.5.1)
-
+
LICENSE(Apache-2.0)
-
-PEERDIR(
- contrib/python/botocore
-)
-
+
+PEERDIR(
+ contrib/python/botocore
+)
+
NO_LINT()
NO_CHECK_IMPORTS(
s3transfer.crt
)
-PY_SRCS(
- TOP_LEVEL
- s3transfer/__init__.py
- s3transfer/bandwidth.py
- s3transfer/compat.py
+PY_SRCS(
+ TOP_LEVEL
+ s3transfer/__init__.py
+ s3transfer/bandwidth.py
+ s3transfer/compat.py
s3transfer/constants.py
s3transfer/copies.py
s3transfer/crt.py
- s3transfer/delete.py
- s3transfer/download.py
- s3transfer/exceptions.py
- s3transfer/futures.py
- s3transfer/manager.py
+ s3transfer/delete.py
+ s3transfer/download.py
+ s3transfer/exceptions.py
+ s3transfer/futures.py
+ s3transfer/manager.py
s3transfer/processpool.py
- s3transfer/subscribers.py
- s3transfer/tasks.py
- s3transfer/upload.py
- s3transfer/utils.py
-)
-
+ s3transfer/subscribers.py
+ s3transfer/tasks.py
+ s3transfer/upload.py
+ s3transfer/utils.py
+)
+
RESOURCE_FILES(
PREFIX contrib/python/s3transfer/py3/
.dist-info/METADATA
.dist-info/top_level.txt
)
-END()
+END()
RECURSE_FOR_TESTS(
tests
diff --git a/contrib/python/s3transfer/ya.make b/contrib/python/s3transfer/ya.make
index 657af5f9f8..bf4a234edb 100644
--- a/contrib/python/s3transfer/ya.make
+++ b/contrib/python/s3transfer/ya.make
@@ -1,18 +1,18 @@
PY23_LIBRARY()
-
+
LICENSE(Service-Py23-Proxy)
OWNER(g:python-contrib)
-
+
IF (PYTHON2)
PEERDIR(contrib/python/s3transfer/py2)
ELSE()
PEERDIR(contrib/python/s3transfer/py3)
ENDIF()
-
-NO_LINT()
-
-END()
+
+NO_LINT()
+
+END()
RECURSE(
py2
diff --git a/contrib/python/ya.make b/contrib/python/ya.make
index b44a2928e8..d01ced9f3a 100644
--- a/contrib/python/ya.make
+++ b/contrib/python/ya.make
@@ -960,7 +960,7 @@ RECURSE(
ruamel.yaml
Rx
s3-tests
- s3transfer
+ s3transfer
sacrebleu
salt-pepper
saneyaml
@@ -1022,7 +1022,7 @@ RECURSE(
spintop-openhtf
splunk-sdk-python
sqlalchemy
- SQLAlchemy-Continuum
+ SQLAlchemy-Continuum
sqlalchemy-stubs
SQLAlchemy-Utils
sqlparse
@@ -1086,7 +1086,7 @@ RECURSE(
transitions
transliterate
trollius
- trollsift
+ trollsift
Twiggy
twiggy-goodies
Twisted