aboutsummaryrefslogblamecommitdiffstats
path: root/ya
blob: 307ca2d49ff746d11113dd479bbffaa5fbf87739 (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11
12
13











                                                                                                         
                                                             

                   
                                                      
                     
                                                 
             
                         
                                                      
                     
                                                 
             
                          
                                                      
                     
                                                 
             
                           
                                                      
                     
                                                 
             
                  
                                                      
                     
                                                 
             
     
                  










































































































































































































































































































                                                                                                                        
#!/usr/bin/env python

# Please, keep this script in sync with arcadia/ya

import os
import sys
import platform

RETRIES = 5
HASH_PREFIX = 10

REGISTRY_ENDPOINT = os.environ.get("YA_REGISTRY_ENDPOINT", "https://s3.mds.yandex.net/devtools-registry")

# Please do not change this dict, it is updated automatically
# Start of mapping
PLATFORM_MAP = {
    "data": {
        "darwin": {
            "md5": "6a4ae484c012b7d9b749e429cfec790b",
            "urls": [
                f"{REGISTRY_ENDPOINT}/5043396962"
            ]
        },
        "darwin-arm64": {
            "md5": "10e63fe43fa462a086fa1a3bada78835",
            "urls": [
                f"{REGISTRY_ENDPOINT}/5043396524"
            ]
        },
        "linux-aarch64": {
            "md5": "f3ceae756e70718853a2d97abb2979f8",
            "urls": [
                f"{REGISTRY_ENDPOINT}/5043395848"
            ]
        },
        "win32-clang-cl": {
            "md5": "c78b53797bb61a48d3157371e036d7b0",
            "urls": [
                f"{REGISTRY_ENDPOINT}/5043397428"
            ]
        },
        "linux": {
            "md5": "704629ae536e120412074a5be8bb0440",
            "urls": [
                f"{REGISTRY_ENDPOINT}/5043397896"
            ]
        }
    }
} # End of mapping


def create_dirs(path):
    try:
        os.makedirs(path)
    except OSError as e:
        import errno

        if e.errno != errno.EEXIST:
            raise

    return path


def home_dir():
    # Do not trust $HOME, as it is unreliable in certain environments
    # Temporarily delete os.environ["HOME"] to force reading current home directory from /etc/passwd
    home_from_env = os.environ.pop("HOME", None)
    try:
        home_from_passwd = os.path.expanduser("~")
        if os.path.isabs(home_from_passwd):
            # This home dir is valid, prefer it over $HOME
            return home_from_passwd
        else:
            # When python is built with musl (this is quire weird though),
            # only users from /etc/passwd will be properly resolved,
            # as musl does not have nss module for LDAP integration.
            return home_from_env

    finally:
        if home_from_env is not None:
            os.environ["HOME"] = home_from_env


def misc_root():
    return create_dirs(os.getenv('YA_CACHE_DIR') or os.path.join(home_dir(), '.ya'))


def tool_root():
    return create_dirs(os.getenv('YA_CACHE_DIR_TOOLS') or os.path.join(misc_root(), 'tools'))


# TODO: remove when switched to S3, won't be needed in OSS
def ya_token():
    def get_token_from_file():
        try:
            with open(os.environ.get('YA_TOKEN_PATH', os.path.join(home_dir(), '.ya_token')), 'r') as f:
                return f.read().strip()
        except:
            pass

    return os.getenv('YA_TOKEN') or get_token_from_file()


TOOLS_DIR = tool_root()


def uniq(size=6):
    import string
    import random

    return ''.join(random.choice(string.ascii_lowercase + string.digits) for _ in range(size))


_ssl_is_tuned = False


def _tune_ssl():
    global _ssl_is_tuned
    if _ssl_is_tuned:
        return

    try:
        import ssl

        ssl._create_default_https_context = ssl._create_unverified_context
    except AttributeError:
        pass

    try:
        import urllib3

        urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
    except (AttributeError, ImportError):
        pass
    _ssl_is_tuned = True


def _fetch(url, into):
    import hashlib

    _tune_ssl()

    from urllib.request import urlopen
    from urllib.request import Request
    from urllib.parse import urlparse

    request = Request(str(url))
    # TODO: Remove when switched to S3 distribution
    request.add_header('User-Agent', 'ya-bootstrap')
    token = ya_token()
    if token:
        request.add_header('Authorization', 'OAuth {}'.format(token))

    md5 = hashlib.md5()
    sys.stderr.write('Downloading %s ' % url)
    sys.stderr.flush()
    conn = urlopen(request, timeout=10)
    sys.stderr.write('[')
    sys.stderr.flush()
    try:
        with open(into, 'wb') as f:
            while True:
                block = conn.read(1024 * 1024)
                sys.stderr.write('.')
                sys.stderr.flush()
                if block:
                    md5.update(block)
                    f.write(block)
                else:
                    break
        return md5.hexdigest()

    finally:
        sys.stderr.write('] ')
        sys.stderr.flush()


def _atomic_fetch(url, into, md5):
    tmp_dest = into + '.' + uniq()
    try:
        real_md5 = _fetch(url, tmp_dest)
        if real_md5 != md5:
            raise Exception('MD5 mismatched: %s differs from %s' % (real_md5, md5))
        os.rename(tmp_dest, into)
        sys.stderr.write('OK\n')
    except Exception as e:
        sys.stderr.write('ERROR: ' + str(e) + '\n')
        raise
    finally:
        try:
            os.remove(tmp_dest)
        except OSError:
            pass


def _extract(path, into):
    import tarfile

    tar = tarfile.open(path, errorlevel=2)

    # tar.extractall() will try to set file ownership according to the attributes stored in the archive
    # by calling TarFile.chown() method.
    # As this information is hardly relevant to the point of deployment / extraction,
    # it will just fail (python2) if ya is executed with root euid, or silently set non-existent numeric owner (python3)
    # to the files being extracted.
    # mock it with noop to retain current user ownership.
    tar.chown = lambda *args, **kwargs: None

    tar.extractall(path=into)
    tar.close()


def _get(urls, md5):
    dest_path = os.path.join(TOOLS_DIR, md5[:HASH_PREFIX])

    if not os.path.exists(dest_path):
        for iter in range(RETRIES):
            try:
                _atomic_fetch(urls[iter % len(urls)], dest_path, md5)
                break
            except Exception:
                if iter + 1 == RETRIES:
                    raise
                else:
                    import time

                    time.sleep(iter)

    return dest_path


def _get_dir(urls, md5, ya_name):
    dest_dir = os.path.join(TOOLS_DIR, md5[:HASH_PREFIX] + '_d')

    if os.path.isfile(os.path.join(dest_dir, ya_name)):
        return dest_dir

    try:
        packed_path = _get(urls, md5)
    except Exception:
        if os.path.isfile(os.path.join(dest_dir, ya_name)):
            return dest_dir
        raise

    tmp_dir = dest_dir + '.' + uniq()
    try:
        try:
            _extract(packed_path, tmp_dir)
        except Exception:
            if os.path.isfile(os.path.join(dest_dir, ya_name)):
                return dest_dir
            raise

        try:
            os.rename(tmp_dir, dest_dir)
        except OSError as e:
            import errno

            if e.errno != errno.ENOTEMPTY:
                raise

        return dest_dir
    finally:
        import shutil

        shutil.rmtree(tmp_dir, ignore_errors=True)
        try:
            os.remove(packed_path)
        except Exception:
            pass


def _mine_repo_root():
    # We think that this script is located in the root of the repo.
    return os.path.dirname(os.path.realpath(__file__))


def main():
    if not os.path.exists(TOOLS_DIR):
        os.makedirs(TOOLS_DIR)

    result_args = sys.argv[1:]

    meta = PLATFORM_MAP['data']
    my_platform = platform.system().lower()
    my_machine = platform.machine().lower()
    if my_platform == 'linux':
        if 'ppc64le' in platform.platform():
            my_platform = 'linux-ppc64le'
        elif 'aarch64' in platform.platform():
            my_platform = 'linux-aarch64'
        else:
            my_platform = 'linux_musl'
    if my_platform == 'darwin' and my_machine == 'arm64':
        my_platform = 'darwin-arm64'

    def _platform_key(target_platform):
        """match by max prefix length, prefer shortest"""

        def _key_for_platform(platform):
            return len(os.path.commonprefix([target_platform, platform])), -len(platform)

        return _key_for_platform

    best_key = max(meta.keys(), key=_platform_key(my_platform))
    value = meta[best_key]

    ya_name = {'win32': 'ya-bin.exe', 'win32-clang-cl': 'ya-bin.exe'}.get(best_key, 'ya-bin')  # XXX
    ya_dir = _get_dir(value['urls'], value['md5'], ya_name)

    # Popen `args` must have `str` type
    ya_path = str(os.path.join(ya_dir, ya_name))

    env = os.environ.copy()
    if 'YA_SOURCE_ROOT' not in env:
        src_root = _mine_repo_root()
        if src_root is not None:
            env['YA_SOURCE_ROOT'] = src_root

    for env_name in [
        'LD_PRELOAD',
        'Y_PYTHON_SOURCE_ROOT',
    ]:
        if env_name in os.environ:
            sys.stderr.write(
                "Warn: {}='{}' is specified and may affect the correct operation of the ya\n".format(
                    env_name, env[env_name]
                )
            )

    if os.name == 'nt':
        import subprocess

        p = subprocess.Popen([ya_path] + result_args, env=env)
        p.wait()
        sys.exit(p.returncode)
    else:
        os.execve(ya_path, [ya_path] + result_args, env)


if __name__ == '__main__':
    try:
        main()
    except Exception as e:
        sys.stderr.write('ERROR: ' + str(e) + '\n')
        from traceback import format_exc

        sys.stderr.write(format_exc() + "\n")
        sys.exit(1)