diff options
| author | shadchin <[email protected]> | 2022-02-10 16:44:39 +0300 |
|---|---|---|
| committer | Daniil Cherednik <[email protected]> | 2022-02-10 16:44:39 +0300 |
| commit | e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0 (patch) | |
| tree | 64175d5cadab313b3e7039ebaa06c5bc3295e274 /contrib/python/charset-normalizer/charset_normalizer/cli | |
| parent | 2598ef1d0aee359b4b6d5fdd1758916d5907d04f (diff) | |
Restoring authorship annotation for <[email protected]>. Commit 2 of 2.
Diffstat (limited to 'contrib/python/charset-normalizer/charset_normalizer/cli')
| -rw-r--r-- | contrib/python/charset-normalizer/charset_normalizer/cli/normalizer.py | 274 |
1 files changed, 137 insertions, 137 deletions
diff --git a/contrib/python/charset-normalizer/charset_normalizer/cli/normalizer.py b/contrib/python/charset-normalizer/charset_normalizer/cli/normalizer.py index ac0177cff3d..5f912c923b7 100644 --- a/contrib/python/charset-normalizer/charset_normalizer/cli/normalizer.py +++ b/contrib/python/charset-normalizer/charset_normalizer/cli/normalizer.py @@ -1,61 +1,61 @@ -import argparse -import sys +import argparse +import sys from json import dumps -from os.path import abspath +from os.path import abspath from platform import python_version from typing import List - -from charset_normalizer import from_fp -from charset_normalizer.models import CliDetectionResult -from charset_normalizer.version import __version__ - - + +from charset_normalizer import from_fp +from charset_normalizer.models import CliDetectionResult +from charset_normalizer.version import __version__ + + def query_yes_no(question: str, default: str = "yes") -> bool: - """Ask a yes/no question via input() and return their answer. - - "question" is a string that is presented to the user. - "default" is the presumed answer if the user just hits <Enter>. - It must be "yes" (the default), "no" or None (meaning - an answer is required of the user). - - The "answer" return value is True for "yes" or False for "no". - - Credit goes to (c) https://stackoverflow.com/questions/3041986/apt-command-line-interface-like-yes-no-input - """ + """Ask a yes/no question via input() and return their answer. + + "question" is a string that is presented to the user. + "default" is the presumed answer if the user just hits <Enter>. + It must be "yes" (the default), "no" or None (meaning + an answer is required of the user). + + The "answer" return value is True for "yes" or False for "no". + + Credit goes to (c) https://stackoverflow.com/questions/3041986/apt-command-line-interface-like-yes-no-input + """ valid = {"yes": True, "y": True, "ye": True, "no": False, "n": False} - if default is None: - prompt = " [y/n] " - elif default == "yes": - prompt = " [Y/n] " - elif default == "no": - prompt = " [y/N] " - else: - raise ValueError("invalid default answer: '%s'" % default) - - while True: - sys.stdout.write(question + prompt) - choice = input().lower() + if default is None: + prompt = " [y/n] " + elif default == "yes": + prompt = " [Y/n] " + elif default == "no": + prompt = " [y/N] " + else: + raise ValueError("invalid default answer: '%s'" % default) + + while True: + sys.stdout.write(question + prompt) + choice = input().lower() if default is not None and choice == "": - return valid[default] - elif choice in valid: - return valid[choice] - else: + return valid[default] + elif choice in valid: + return valid[choice] + else: sys.stdout.write("Please respond with 'yes' or 'no' " "(or 'y' or 'n').\n") - - + + def cli_detect(argv: List[str] = None) -> int: - """ - CLI assistant using ARGV and ArgumentParser - :param argv: - :return: 0 if everything is fine, anything else equal trouble - """ - parser = argparse.ArgumentParser( - description="The Real First Universal Charset Detector. " + """ + CLI assistant using ARGV and ArgumentParser + :param argv: + :return: 0 if everything is fine, anything else equal trouble + """ + parser = argparse.ArgumentParser( + description="The Real First Universal Charset Detector. " "Discover originating encoding used on text file. " "Normalize text to unicode." - ) - - parser.add_argument( + ) + + parser.add_argument( "files", type=argparse.FileType("rb"), nargs="+", help="File(s) to be analysed" ) parser.add_argument( @@ -117,34 +117,34 @@ def cli_detect(argv: List[str] = None) -> int: help="Define a custom maximum amount of chaos allowed in decoded content. 0. <= chaos <= 1.", ) parser.add_argument( - "--version", - action="version", + "--version", + action="version", version="Charset-Normalizer {} - Python {}".format( __version__, python_version() ), help="Show version information and exit.", - ) - - args = parser.parse_args(argv) - - if args.replace is True and args.normalize is False: + ) + + args = parser.parse_args(argv) + + if args.replace is True and args.normalize is False: print("Use --replace in addition of --normalize only.", file=sys.stderr) - return 1 - - if args.force is True and args.replace is False: + return 1 + + if args.force is True and args.replace is False: print("Use --force in addition of --replace only.", file=sys.stderr) - return 1 - + return 1 + if args.threshold < 0.0 or args.threshold > 1.0: print("--threshold VALUE should be between 0. AND 1.", file=sys.stderr) - return 1 - - x_ = [] - - for my_file in args.files: - + return 1 + + x_ = [] + + for my_file in args.files: + matches = from_fp(my_file, threshold=args.threshold, explain=args.verbose) - + best_guess = matches.best() if best_guess is None: @@ -157,25 +157,25 @@ def cli_detect(argv: List[str] = None) -> int: ), file=sys.stderr, ) - x_.append( - CliDetectionResult( - abspath(my_file.name), - None, - [], - [], - "Unknown", - [], - False, + x_.append( + CliDetectionResult( + abspath(my_file.name), + None, + [], + [], + "Unknown", + [], + False, 1.0, 0.0, - None, + None, True, - ) - ) - else: - x_.append( - CliDetectionResult( - abspath(my_file.name), + ) + ) + else: + x_.append( + CliDetectionResult( + abspath(my_file.name), best_guess.encoding, best_guess.encoding_aliases, [ @@ -188,36 +188,36 @@ def cli_detect(argv: List[str] = None) -> int: best_guess.bom, best_guess.percent_chaos, best_guess.percent_coherence, - None, + None, True, - ) - ) - - if len(matches) > 1 and args.alternatives: - for el in matches: + ) + ) + + if len(matches) > 1 and args.alternatives: + for el in matches: if el != best_guess: - x_.append( - CliDetectionResult( - abspath(my_file.name), - el.encoding, - el.encoding_aliases, + x_.append( + CliDetectionResult( + abspath(my_file.name), + el.encoding, + el.encoding_aliases, [ cp for cp in el.could_be_from_charset if cp != el.encoding ], - el.language, - el.alphabets, - el.bom, - el.percent_chaos, - el.percent_coherence, - None, + el.language, + el.alphabets, + el.bom, + el.percent_chaos, + el.percent_coherence, + None, False, - ) - ) - - if args.normalize is True: - + ) + ) + + if args.normalize is True: + if best_guess.encoding.startswith("utf") is True: print( '"{}" file does not need to be normalized, as it already came from unicode.'.format( @@ -225,16 +225,16 @@ def cli_detect(argv: List[str] = None) -> int: ), file=sys.stderr, ) - if my_file.closed is False: - my_file.close() - continue - + if my_file.closed is False: + my_file.close() + continue + o_ = my_file.name.split(".") # type: List[str] - - if args.replace is False: + + if args.replace is False: o_.insert(-1, best_guess.encoding) - if my_file.closed is False: - my_file.close() + if my_file.closed is False: + my_file.close() elif ( args.force is False and query_yes_no( @@ -248,30 +248,30 @@ def cli_detect(argv: List[str] = None) -> int: if my_file.closed is False: my_file.close() continue - - try: + + try: x_[0].unicode_path = abspath("./{}".format(".".join(o_))) - + with open(x_[0].unicode_path, "w", encoding="utf-8") as fp: fp.write(str(best_guess)) - except IOError as e: - print(str(e), file=sys.stderr) - if my_file.closed is False: - my_file.close() - return 2 - - if my_file.closed is False: - my_file.close() - - if args.minimal is False: - print( - dumps( + except IOError as e: + print(str(e), file=sys.stderr) + if my_file.closed is False: + my_file.close() + return 2 + + if my_file.closed is False: + my_file.close() + + if args.minimal is False: + print( + dumps( [el.__dict__ for el in x_] if len(x_) > 1 else x_[0].__dict__, - ensure_ascii=True, + ensure_ascii=True, indent=4, - ) - ) - else: + ) + ) + else: for my_file in args.files: print( ", ".join( @@ -282,9 +282,9 @@ def cli_detect(argv: List[str] = None) -> int: ] ) ) - - return 0 - - + + return 0 + + if __name__ == "__main__": - cli_detect() + cli_detect() |
