aboutsummaryrefslogtreecommitdiffstats
path: root/build/scripts/process_from_http.py
diff options
context:
space:
mode:
authorzaverden <zaverden@yandex-team.com>2024-05-28 10:37:17 +0300
committerzaverden <zaverden@yandex-team.com>2024-05-28 10:54:22 +0300
commita7e30949b36f26cf7637a65c3c152445de8de3e7 (patch)
treefd59c0c2b817911e5d1651abc2455125a8f6f7ac /build/scripts/process_from_http.py
parente912133f8e9c98c98f7cfd18caa71b3a0c4ac82d (diff)
downloadydb-a7e30949b36f26cf7637a65c3c152445de8de3e7.tar.gz
feat(local fetcher): http and https URIs are under common http schema
Both `http:` and `https:` are recognized as `http` resource type deda06fcadd25ab57d1b64d39c17349a33c1a55e
Diffstat (limited to 'build/scripts/process_from_http.py')
-rw-r--r--build/scripts/process_from_http.py95
1 files changed, 95 insertions, 0 deletions
diff --git a/build/scripts/process_from_http.py b/build/scripts/process_from_http.py
new file mode 100644
index 0000000000..a88eb1995a
--- /dev/null
+++ b/build/scripts/process_from_http.py
@@ -0,0 +1,95 @@
+import argparse
+import errno
+import hashlib
+import os
+import platform
+import shutil
+import sys
+
+
+class CliArgs:
+ def __init__(self, resource_root, uri, out): # type: (str,str,str) -> None
+ self.resource_root = resource_root
+ self.uri = uri
+ self.out = out
+
+
+def parse_args(): # type: () -> CliArgs
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--resource-root", required=True)
+ parser.add_argument("--uri", required=True)
+ parser.add_argument("--out", required=True)
+ return parser.parse_args()
+
+
+def print_err_msg(msg): # type: (str) -> None
+ print("[[bad]]process_from_https: {}[[rst]]".format(msg), file=sys.stderr)
+
+
+def link_or_copy(src, dst): # type: (str,str) -> None
+ try:
+ if platform.system().lower() == "windows":
+ shutil.copy(src, dst)
+ else:
+ os.link(src, dst)
+ except OSError as e:
+ if e.errno == errno.EEXIST:
+ print_err_msg("destination file already exists: {}".format(dst))
+ if e.errno == errno.ENOENT:
+ print_err_msg("source file does not exists: {}".format(src))
+ raise
+
+
+def md5_hex(string): # type: (str) -> str
+ return hashlib.md5(string.encode()).hexdigest()
+
+
+def get_integrity_from_meta(meta_str): # type: (str) -> str | None
+ pairs = meta_str.split("&")
+ integrity_prefix = "integrity="
+ for pair in pairs:
+ if pair.startswith(integrity_prefix):
+ return pair[len(integrity_prefix) :]
+
+ return None
+
+
+def get_path_from_uri(resource_uri): # type: (str) -> str | None
+ if not resource_uri.startswith("https://") and not resource_uri.startswith("http://"):
+ print_err_msg("Uri has to start with 'https:' or 'http:', got {}".format(resource_uri))
+ return None
+
+ _, meta_str = resource_uri.split("#", 1)
+ integrity = get_integrity_from_meta(meta_str)
+
+ if not integrity:
+ print_err_msg("Uri mate has to have integrity field, got {}".format(resource_uri))
+ return None
+
+ resource_id = md5_hex(integrity)
+
+ return "http/{}/resource".format(resource_id)
+
+
+def main():
+ args = parse_args()
+ relative_resource_path = get_path_from_uri(args.uri)
+ resource_path = os.path.join(args.resource_root, relative_resource_path)
+
+ if not resource_path:
+ print_err_msg("Cannot get filepath from uri")
+ return 1
+
+ if not os.path.exists(resource_path):
+ print_err_msg("File {} not found in $(RESOURCE_ROOT)".format(relative_resource_path))
+ return 1
+
+ our_dirname = os.path.dirname(args.out)
+ if our_dirname:
+ os.makedirs(our_dirname, exist_ok=True)
+
+ link_or_copy(resource_path, args.out)
+
+
+if __name__ == "__main__":
+ sys.exit(main())