diff options
| author | alexv-smirnov <[email protected]> | 2023-03-28 22:25:04 +0300 | 
|---|---|---|
| committer | alexv-smirnov <[email protected]> | 2023-03-28 22:25:04 +0300 | 
| commit | b8a17f9b1c166d2e9a26b99348a4c29d972caf55 (patch) | |
| tree | 1a2d881f1a9452b9c6103dbf69d73da7624e98e5 /build/scripts/fetch_from_sandbox.py | |
| parent | 25659221f18577ea38430a8ec3349836f5626b6a (diff) | |
Revert ymake build from ydb oss export
Diffstat (limited to 'build/scripts/fetch_from_sandbox.py')
| -rwxr-xr-x | build/scripts/fetch_from_sandbox.py | 269 | 
1 files changed, 0 insertions, 269 deletions
| diff --git a/build/scripts/fetch_from_sandbox.py b/build/scripts/fetch_from_sandbox.py deleted file mode 100755 index a99542e1743..00000000000 --- a/build/scripts/fetch_from_sandbox.py +++ /dev/null @@ -1,269 +0,0 @@ -import itertools -import json -import logging -import argparse -import os -import random -import subprocess -import sys -import time -import urllib2 -import uuid - -import fetch_from - - -ORIGIN_SUFFIX = '?origin=fetch-from-sandbox' -MDS_PREFIX = 'http://storage-int.mds.yandex.net/get-sandbox/' -TEMPORARY_ERROR_CODES = (429, 500, 503, 504) - - -def parse_args(): -    parser = argparse.ArgumentParser() -    fetch_from.add_common_arguments(parser) -    parser.add_argument('--resource-id', type=int, required=True) -    parser.add_argument('--custom-fetcher') -    parser.add_argument('--resource-file') -    return parser.parse_args() - - -class ResourceInfoError(Exception): -    pass - - -class UnsupportedProtocolException(Exception): -    pass - - -def _sky_path(): -    return "/usr/local/bin/sky" - - -def _is_skynet_avaliable(): -    if not os.path.exists(_sky_path()): -        return False -    try: -        subprocess.check_output([_sky_path(), "--version"]) -        return True -    except subprocess.CalledProcessError: -        return False -    except OSError: -        return False - - -def download_by_skynet(resource_info, file_name): -    def sky_get(skynet_id, target_dir, timeout=None): -        cmd_args = [_sky_path(), 'get', "-N", "Backbone", "--user", "--wait", "--dir", target_dir, skynet_id] -        if timeout is not None: -            cmd_args += ["--timeout", str(timeout)] -        logging.info('Call skynet with args: %s', cmd_args) -        stdout = subprocess.check_output(cmd_args).strip() -        logging.debug('Skynet call with args %s is finished, result is %s', cmd_args, stdout) -        return stdout - -    if not _is_skynet_avaliable(): -        raise UnsupportedProtocolException("Skynet is not available") - -    skynet_id = resource_info.get("skynet_id") -    if not skynet_id: -        raise ValueError("Resource does not have skynet_id") - -    temp_dir = os.path.abspath(fetch_from.uniq_string_generator()) -    os.mkdir(temp_dir) -    sky_get(skynet_id, temp_dir) -    return os.path.join(temp_dir, file_name) - - -def _urlopen(url, data=None, headers=None): -    n = 10 -    tout = 30 -    started = time.time() -    reqid = uuid.uuid4() - -    request = urllib2.Request(url, data=data, headers=headers or {}) -    request.add_header('X-Request-Timeout', str(tout)) -    request.add_header('X-Request-Id', str(reqid)) -    request.add_header('User-Agent', 'fetch_from_sandbox.py') -    for i in xrange(n): -        retry_after = i -        try: -            request.add_header('X-Request-Duration', str(int(time.time() - started))) -            return urllib2.urlopen(request, timeout=tout).read() - -        except urllib2.HTTPError as e: -            logging.warning('failed to fetch URL %s with HTTP code %d: %s', url, e.code, e) -            retry_after = int(e.headers.get('Retry-After', str(retry_after))) - -            if e.code not in TEMPORARY_ERROR_CODES: -                raise - -        except Exception as e: -            logging.warning('failed to fetch URL %s: %s', url, e) - -        if i + 1 == n: -            raise e - -        time.sleep(retry_after) - - -def _query(url): -    return json.loads(_urlopen(url)) - - -_SANDBOX_BASE_URL = 'https://sandbox.yandex-team.ru/api/v1.0' - - -def get_resource_info(resource_id, touch=False, no_links=False): -    url = ''.join((_SANDBOX_BASE_URL, '/resource/', str(resource_id))) -    headers = {} -    if touch: -        headers.update({'X-Touch-Resource': '1'}) -    if no_links: -        headers.update({'X-No-Links': '1'}) -    return _query(url) - - -def get_resource_http_links(resource_id): -    url = ''.join((_SANDBOX_BASE_URL, '/resource/', str(resource_id), '/data/http')) -    return [r['url'] + ORIGIN_SUFFIX for r in _query(url)] - - -def fetch_via_script(script, resource_id): -    return subprocess.check_output([script, str(resource_id)]).rstrip() - - -def fetch(resource_id, custom_fetcher): -    try: -        resource_info = get_resource_info(resource_id, touch=True, no_links=True) -    except Exception as e: -        sys.stderr.write( -            "Failed to fetch resource {}: {}\n".format(resource_id, str(e)) -        ) -        raise - -    if resource_info.get('state', 'DELETED') != 'READY': -        raise ResourceInfoError("Resource {} is not READY".format(resource_id)) - -    logging.info('Resource %s info %s', str(resource_id), json.dumps(resource_info)) - -    resource_file_name = os.path.basename(resource_info["file_name"]) -    expected_md5 = resource_info.get('md5') - -    proxy_link = resource_info['http']['proxy'] + ORIGIN_SUFFIX - -    mds_id = resource_info.get('attributes', {}).get('mds') -    mds_link = MDS_PREFIX + mds_id if mds_id else None - -    def get_storage_links(): -        storage_links = get_resource_http_links(resource_id) -        random.shuffle(storage_links) -        return storage_links - -    skynet = _is_skynet_avaliable() - -    if not skynet: -        logging.info("Skynet is not available, will try other protocols") - -    def iter_tries(): -        if skynet: -            yield lambda: download_by_skynet(resource_info, resource_file_name) - -        if custom_fetcher: -            yield lambda: fetch_via_script(custom_fetcher, resource_id) - -        # Don't try too hard here: we will get back to proxy later on -        yield lambda: fetch_from.fetch_url(proxy_link, False, resource_file_name, expected_md5, tries=2) -        for x in get_storage_links(): -            # Don't spend too much time connecting single host -            yield lambda: fetch_from.fetch_url(x, False, resource_file_name, expected_md5, tries=1) -            if mds_link is not None: -                # Don't try too hard here: we will get back to MDS later on -                yield lambda: fetch_from.fetch_url(mds_link, True, resource_file_name, expected_md5, tries=2) -        yield lambda: fetch_from.fetch_url(proxy_link, False, resource_file_name, expected_md5) -        if mds_link is not None: -            yield lambda: fetch_from.fetch_url(mds_link, True, resource_file_name, expected_md5) - -    if resource_info.get('attributes', {}).get('ttl') != 'inf': -        sys.stderr.write('WARNING: resource {} ttl is not "inf".\n'.format(resource_id)) - -    exc_info = None -    for i, action in enumerate(itertools.islice(iter_tries(), 0, 10)): -        try: -            fetched_file = action() -            break -        except UnsupportedProtocolException: -            pass -        except subprocess.CalledProcessError as e: -            logging.warning('failed to fetch resource %s with subprocess: %s', resource_id, e) -            time.sleep(i) -        except urllib2.HTTPError as e: -            logging.warning('failed to fetch resource %s with HTTP code %d: %s', resource_id, e.code, e) -            if e.code not in TEMPORARY_ERROR_CODES: -                exc_info = exc_info or sys.exc_info() -            time.sleep(i) -        except Exception as e: -            logging.exception(e) -            exc_info = exc_info or sys.exc_info() -            time.sleep(i) -    else: -        if exc_info: -            raise exc_info[0], exc_info[1], exc_info[2] -        else: -            raise Exception("No available protocol and/or server to fetch resource") - -    return fetched_file, resource_info['file_name'] - - -def _get_resource_info_from_file(resource_file): -    if resource_file is None or not os.path.exists(resource_file): -        return None - -    RESOURCE_INFO_JSON = "resource_info.json" -    RESOURCE_CONTENT_FILE_NAME = "resource" - -    resource_dir, resource_file = os.path.split(resource_file) -    if resource_file != RESOURCE_CONTENT_FILE_NAME: -        return None - -    resource_json = os.path.join(resource_dir, RESOURCE_INFO_JSON) -    if not os.path.isfile(resource_json): -        return None - -    try: -        with open(resource_json, 'r') as j: -            resource_info = json.load(j) -        resource_info['file_name']  # check consistency -        return resource_info -    except: -        logging.debug('Invalid %s in %s', RESOURCE_INFO_JSON, resource_dir) - -    return None - - -def main(args): -    custom_fetcher = os.environ.get('YA_CUSTOM_FETCHER') - -    resource_info = _get_resource_info_from_file(args.resource_file) -    if resource_info: -        fetched_file = args.resource_file -        file_name = resource_info['file_name'] -    else: -        # This code should be merged to ya and removed. -        fetched_file, file_name = fetch(args.resource_id, custom_fetcher) - -    fetch_from.process(fetched_file, file_name, args, remove=not custom_fetcher and not resource_info) - - -if __name__ == '__main__': -    args = parse_args() -    fetch_from.setup_logging(args, os.path.basename(__file__)) - -    try: -        main(args) -    except Exception as e: -        logging.exception(e) -        print >>sys.stderr, open(args.abs_log_path).read() -        sys.stderr.flush() - -        import error -        sys.exit(error.ExitCodes.INFRASTRUCTURE_ERROR if fetch_from.is_temporary(e) else 1) | 
