import itertools
import json
import logging
import argparse
import os
import random
import subprocess
import sys
import time
import urllib2
import uuid

import fetch_from


ORIGIN_SUFFIX = '?origin=fetch-from-sandbox'
MDS_PREFIX = 'http://storage-int.mds.yandex.net/get-sandbox/'
TEMPORARY_ERROR_CODES = (429, 500, 503, 504)


def parse_args():
    parser = argparse.ArgumentParser()
    fetch_from.add_common_arguments(parser)
    parser.add_argument('--resource-id', type=int, required=True)
    parser.add_argument('--custom-fetcher')
    parser.add_argument('--resource-file')
    return parser.parse_args()


class ResourceInfoError(Exception):
    pass


class UnsupportedProtocolException(Exception):
    pass


def _sky_path():
    return "/usr/local/bin/sky"


def _is_skynet_avaliable():
    if not os.path.exists(_sky_path()):
        return False
    try:
        subprocess.check_output([_sky_path(), "--version"])
        return True
    except subprocess.CalledProcessError:
        return False
    except OSError:
        return False


def download_by_skynet(resource_info, file_name):
    def sky_get(skynet_id, target_dir, timeout=None):
        cmd_args = [_sky_path(), 'get', "-N", "Backbone", "--user", "--wait", "--dir", target_dir, skynet_id]
        if timeout is not None:
            cmd_args += ["--timeout", str(timeout)]
        logging.info('Call skynet with args: %s', cmd_args)
        stdout = subprocess.check_output(cmd_args).strip()
        logging.debug('Skynet call with args %s is finished, result is %s', cmd_args, stdout)
        return stdout

    if not _is_skynet_avaliable():
        raise UnsupportedProtocolException("Skynet is not available")

    skynet_id = resource_info.get("skynet_id")
    if not skynet_id:
        raise ValueError("Resource does not have skynet_id")

    temp_dir = os.path.abspath(fetch_from.uniq_string_generator())
    os.mkdir(temp_dir)
    sky_get(skynet_id, temp_dir)
    return os.path.join(temp_dir, file_name)


def _urlopen(url, data=None, headers=None):
    n = 10
    tout = 30
    started = time.time()
    reqid = uuid.uuid4()

    request = urllib2.Request(url, data=data, headers=headers or {})
    request.add_header('X-Request-Timeout', str(tout))
    request.add_header('X-Request-Id', str(reqid))
    request.add_header('User-Agent', 'fetch_from_sandbox.py')
    for i in xrange(n):
        retry_after = i
        try:
            request.add_header('X-Request-Duration', str(int(time.time() - started)))
            return urllib2.urlopen(request, timeout=tout).read()

        except urllib2.HTTPError as e:
            logging.warning('failed to fetch URL %s with HTTP code %d: %s', url, e.code, e)
            retry_after = int(e.headers.get('Retry-After', str(retry_after)))

            if e.code not in TEMPORARY_ERROR_CODES:
                raise

        except Exception as e:
            logging.warning('failed to fetch URL %s: %s', url, e)

        if i + 1 == n:
            raise e

        time.sleep(retry_after)


def _query(url):
    return json.loads(_urlopen(url))


_SANDBOX_BASE_URL = 'https://sandbox.yandex-team.ru/api/v1.0'


def get_resource_info(resource_id, touch=False, no_links=False):
    url = ''.join((_SANDBOX_BASE_URL, '/resource/', str(resource_id)))
    headers = {}
    if touch:
        headers.update({'X-Touch-Resource': '1'})
    if no_links:
        headers.update({'X-No-Links': '1'})
    return _query(url)


def get_resource_http_links(resource_id):
    url = ''.join((_SANDBOX_BASE_URL, '/resource/', str(resource_id), '/data/http'))
    return [r['url'] + ORIGIN_SUFFIX for r in _query(url)]


def fetch_via_script(script, resource_id):
    return subprocess.check_output([script, str(resource_id)]).rstrip()


def fetch(resource_id, custom_fetcher):
    try:
        resource_info = get_resource_info(resource_id, touch=True, no_links=True)
    except Exception as e:
        sys.stderr.write(
            "Failed to fetch resource {}: {}\n".format(resource_id, str(e))
        )
        raise

    if resource_info.get('state', 'DELETED') != 'READY':
        raise ResourceInfoError("Resource {} is not READY".format(resource_id))

    logging.info('Resource %s info %s', str(resource_id), json.dumps(resource_info))

    is_multifile = resource_info.get('multifile', False)
    resource_file_name = os.path.basename(resource_info["file_name"])
    expected_md5 = resource_info.get('md5')

    proxy_link = resource_info['http']['proxy'] + ORIGIN_SUFFIX
    if is_multifile:
        proxy_link += '&stream=tgz'

    mds_id = resource_info.get('attributes', {}).get('mds')
    mds_link = MDS_PREFIX + mds_id if mds_id else None

    def get_storage_links():
        storage_links = get_resource_http_links(resource_id)
        random.shuffle(storage_links)
        return storage_links

    skynet = _is_skynet_avaliable()

    if not skynet:
        logging.info("Skynet is not available, will try other protocols")

    def iter_tries():
        if skynet:
            yield lambda: download_by_skynet(resource_info, resource_file_name)

        if custom_fetcher:
            yield lambda: fetch_via_script(custom_fetcher, resource_id)

        # Don't try too hard here: we will get back to proxy later on
        yield lambda: fetch_from.fetch_url(proxy_link, False, resource_file_name, expected_md5, tries=2)
        for x in get_storage_links():
            # Don't spend too much time connecting single host
            yield lambda: fetch_from.fetch_url(x, False, resource_file_name, expected_md5, tries=1)
            if mds_link is not None:
                # Don't try too hard here: we will get back to MDS later on
                yield lambda: fetch_from.fetch_url(mds_link, True, resource_file_name, expected_md5, tries=2)
        yield lambda: fetch_from.fetch_url(proxy_link, False, resource_file_name, expected_md5)
        if mds_link is not None:
            yield lambda: fetch_from.fetch_url(mds_link, True, resource_file_name, expected_md5)

    if resource_info.get('attributes', {}).get('ttl') != 'inf':
        sys.stderr.write('WARNING: resource {} ttl is not "inf".\n'.format(resource_id))

    exc_info = None
    for i, action in enumerate(itertools.islice(iter_tries(), 0, 10)):
        try:
            fetched_file = action()
            break
        except UnsupportedProtocolException:
            pass
        except subprocess.CalledProcessError as e:
            logging.warning('failed to fetch resource %s with subprocess: %s', resource_id, e)
            time.sleep(i)
        except urllib2.HTTPError as e:
            logging.warning('failed to fetch resource %s with HTTP code %d: %s', resource_id, e.code, e)
            if e.code not in TEMPORARY_ERROR_CODES:
                exc_info = exc_info or sys.exc_info()
            time.sleep(i)
        except Exception as e:
            logging.exception(e)
            exc_info = exc_info or sys.exc_info()
            time.sleep(i)
    else:
        if exc_info:
            raise exc_info[0], exc_info[1], exc_info[2]
        else:
            raise Exception("No available protocol and/or server to fetch resource")

    return fetched_file, resource_info['file_name']


def _get_resource_info_from_file(resource_file):
    if resource_file is None or not os.path.exists(resource_file):
        return None

    RESOURCE_INFO_JSON = "resource_info.json"
    RESOURCE_CONTENT_FILE_NAME = "resource"

    resource_dir, resource_file = os.path.split(resource_file)
    if resource_file != RESOURCE_CONTENT_FILE_NAME:
        return None

    resource_json = os.path.join(resource_dir, RESOURCE_INFO_JSON)
    if not os.path.isfile(resource_json):
        return None

    try:
        with open(resource_json, 'r') as j:
            resource_info = json.load(j)
        resource_info['file_name']  # check consistency
        return resource_info
    except:
        logging.debug('Invalid %s in %s', RESOURCE_INFO_JSON, resource_dir)

    return None


def main(args):
    custom_fetcher = os.environ.get('YA_CUSTOM_FETCHER')

    resource_info = _get_resource_info_from_file(args.resource_file)
    if resource_info:
        fetched_file = args.resource_file
        file_name = resource_info['file_name']
    else:
        # This code should be merged to ya and removed.
        fetched_file, file_name = fetch(args.resource_id, custom_fetcher)

    fetch_from.process(fetched_file, file_name, args, remove=not custom_fetcher and not resource_info)


if __name__ == '__main__':
    args = parse_args()
    fetch_from.setup_logging(args, os.path.basename(__file__))

    try:
        main(args)
    except Exception as e:
        logging.exception(e)
        print >>sys.stderr, open(args.abs_log_path).read()
        sys.stderr.flush()

        import error
        sys.exit(error.ExitCodes.INFRASTRUCTURE_ERROR if fetch_from.is_temporary(e) else 1)