summaryrefslogtreecommitdiffstats
path: root/contrib/python/charset-normalizer/charset_normalizer/cli
diff options
context:
space:
mode:
authorshadchin <[email protected]>2022-02-10 16:44:39 +0300
committerDaniil Cherednik <[email protected]>2022-02-10 16:44:39 +0300
commite9656aae26e0358d5378e5b63dcac5c8dbe0e4d0 (patch)
tree64175d5cadab313b3e7039ebaa06c5bc3295e274 /contrib/python/charset-normalizer/charset_normalizer/cli
parent2598ef1d0aee359b4b6d5fdd1758916d5907d04f (diff)
Restoring authorship annotation for <[email protected]>. Commit 2 of 2.
Diffstat (limited to 'contrib/python/charset-normalizer/charset_normalizer/cli')
-rw-r--r--contrib/python/charset-normalizer/charset_normalizer/cli/normalizer.py274
1 files changed, 137 insertions, 137 deletions
diff --git a/contrib/python/charset-normalizer/charset_normalizer/cli/normalizer.py b/contrib/python/charset-normalizer/charset_normalizer/cli/normalizer.py
index ac0177cff3d..5f912c923b7 100644
--- a/contrib/python/charset-normalizer/charset_normalizer/cli/normalizer.py
+++ b/contrib/python/charset-normalizer/charset_normalizer/cli/normalizer.py
@@ -1,61 +1,61 @@
-import argparse
-import sys
+import argparse
+import sys
from json import dumps
-from os.path import abspath
+from os.path import abspath
from platform import python_version
from typing import List
-
-from charset_normalizer import from_fp
-from charset_normalizer.models import CliDetectionResult
-from charset_normalizer.version import __version__
-
-
+
+from charset_normalizer import from_fp
+from charset_normalizer.models import CliDetectionResult
+from charset_normalizer.version import __version__
+
+
def query_yes_no(question: str, default: str = "yes") -> bool:
- """Ask a yes/no question via input() and return their answer.
-
- "question" is a string that is presented to the user.
- "default" is the presumed answer if the user just hits <Enter>.
- It must be "yes" (the default), "no" or None (meaning
- an answer is required of the user).
-
- The "answer" return value is True for "yes" or False for "no".
-
- Credit goes to (c) https://stackoverflow.com/questions/3041986/apt-command-line-interface-like-yes-no-input
- """
+ """Ask a yes/no question via input() and return their answer.
+
+ "question" is a string that is presented to the user.
+ "default" is the presumed answer if the user just hits <Enter>.
+ It must be "yes" (the default), "no" or None (meaning
+ an answer is required of the user).
+
+ The "answer" return value is True for "yes" or False for "no".
+
+ Credit goes to (c) https://stackoverflow.com/questions/3041986/apt-command-line-interface-like-yes-no-input
+ """
valid = {"yes": True, "y": True, "ye": True, "no": False, "n": False}
- if default is None:
- prompt = " [y/n] "
- elif default == "yes":
- prompt = " [Y/n] "
- elif default == "no":
- prompt = " [y/N] "
- else:
- raise ValueError("invalid default answer: '%s'" % default)
-
- while True:
- sys.stdout.write(question + prompt)
- choice = input().lower()
+ if default is None:
+ prompt = " [y/n] "
+ elif default == "yes":
+ prompt = " [Y/n] "
+ elif default == "no":
+ prompt = " [y/N] "
+ else:
+ raise ValueError("invalid default answer: '%s'" % default)
+
+ while True:
+ sys.stdout.write(question + prompt)
+ choice = input().lower()
if default is not None and choice == "":
- return valid[default]
- elif choice in valid:
- return valid[choice]
- else:
+ return valid[default]
+ elif choice in valid:
+ return valid[choice]
+ else:
sys.stdout.write("Please respond with 'yes' or 'no' " "(or 'y' or 'n').\n")
-
-
+
+
def cli_detect(argv: List[str] = None) -> int:
- """
- CLI assistant using ARGV and ArgumentParser
- :param argv:
- :return: 0 if everything is fine, anything else equal trouble
- """
- parser = argparse.ArgumentParser(
- description="The Real First Universal Charset Detector. "
+ """
+ CLI assistant using ARGV and ArgumentParser
+ :param argv:
+ :return: 0 if everything is fine, anything else equal trouble
+ """
+ parser = argparse.ArgumentParser(
+ description="The Real First Universal Charset Detector. "
"Discover originating encoding used on text file. "
"Normalize text to unicode."
- )
-
- parser.add_argument(
+ )
+
+ parser.add_argument(
"files", type=argparse.FileType("rb"), nargs="+", help="File(s) to be analysed"
)
parser.add_argument(
@@ -117,34 +117,34 @@ def cli_detect(argv: List[str] = None) -> int:
help="Define a custom maximum amount of chaos allowed in decoded content. 0. <= chaos <= 1.",
)
parser.add_argument(
- "--version",
- action="version",
+ "--version",
+ action="version",
version="Charset-Normalizer {} - Python {}".format(
__version__, python_version()
),
help="Show version information and exit.",
- )
-
- args = parser.parse_args(argv)
-
- if args.replace is True and args.normalize is False:
+ )
+
+ args = parser.parse_args(argv)
+
+ if args.replace is True and args.normalize is False:
print("Use --replace in addition of --normalize only.", file=sys.stderr)
- return 1
-
- if args.force is True and args.replace is False:
+ return 1
+
+ if args.force is True and args.replace is False:
print("Use --force in addition of --replace only.", file=sys.stderr)
- return 1
-
+ return 1
+
if args.threshold < 0.0 or args.threshold > 1.0:
print("--threshold VALUE should be between 0. AND 1.", file=sys.stderr)
- return 1
-
- x_ = []
-
- for my_file in args.files:
-
+ return 1
+
+ x_ = []
+
+ for my_file in args.files:
+
matches = from_fp(my_file, threshold=args.threshold, explain=args.verbose)
-
+
best_guess = matches.best()
if best_guess is None:
@@ -157,25 +157,25 @@ def cli_detect(argv: List[str] = None) -> int:
),
file=sys.stderr,
)
- x_.append(
- CliDetectionResult(
- abspath(my_file.name),
- None,
- [],
- [],
- "Unknown",
- [],
- False,
+ x_.append(
+ CliDetectionResult(
+ abspath(my_file.name),
+ None,
+ [],
+ [],
+ "Unknown",
+ [],
+ False,
1.0,
0.0,
- None,
+ None,
True,
- )
- )
- else:
- x_.append(
- CliDetectionResult(
- abspath(my_file.name),
+ )
+ )
+ else:
+ x_.append(
+ CliDetectionResult(
+ abspath(my_file.name),
best_guess.encoding,
best_guess.encoding_aliases,
[
@@ -188,36 +188,36 @@ def cli_detect(argv: List[str] = None) -> int:
best_guess.bom,
best_guess.percent_chaos,
best_guess.percent_coherence,
- None,
+ None,
True,
- )
- )
-
- if len(matches) > 1 and args.alternatives:
- for el in matches:
+ )
+ )
+
+ if len(matches) > 1 and args.alternatives:
+ for el in matches:
if el != best_guess:
- x_.append(
- CliDetectionResult(
- abspath(my_file.name),
- el.encoding,
- el.encoding_aliases,
+ x_.append(
+ CliDetectionResult(
+ abspath(my_file.name),
+ el.encoding,
+ el.encoding_aliases,
[
cp
for cp in el.could_be_from_charset
if cp != el.encoding
],
- el.language,
- el.alphabets,
- el.bom,
- el.percent_chaos,
- el.percent_coherence,
- None,
+ el.language,
+ el.alphabets,
+ el.bom,
+ el.percent_chaos,
+ el.percent_coherence,
+ None,
False,
- )
- )
-
- if args.normalize is True:
-
+ )
+ )
+
+ if args.normalize is True:
+
if best_guess.encoding.startswith("utf") is True:
print(
'"{}" file does not need to be normalized, as it already came from unicode.'.format(
@@ -225,16 +225,16 @@ def cli_detect(argv: List[str] = None) -> int:
),
file=sys.stderr,
)
- if my_file.closed is False:
- my_file.close()
- continue
-
+ if my_file.closed is False:
+ my_file.close()
+ continue
+
o_ = my_file.name.split(".") # type: List[str]
-
- if args.replace is False:
+
+ if args.replace is False:
o_.insert(-1, best_guess.encoding)
- if my_file.closed is False:
- my_file.close()
+ if my_file.closed is False:
+ my_file.close()
elif (
args.force is False
and query_yes_no(
@@ -248,30 +248,30 @@ def cli_detect(argv: List[str] = None) -> int:
if my_file.closed is False:
my_file.close()
continue
-
- try:
+
+ try:
x_[0].unicode_path = abspath("./{}".format(".".join(o_)))
-
+
with open(x_[0].unicode_path, "w", encoding="utf-8") as fp:
fp.write(str(best_guess))
- except IOError as e:
- print(str(e), file=sys.stderr)
- if my_file.closed is False:
- my_file.close()
- return 2
-
- if my_file.closed is False:
- my_file.close()
-
- if args.minimal is False:
- print(
- dumps(
+ except IOError as e:
+ print(str(e), file=sys.stderr)
+ if my_file.closed is False:
+ my_file.close()
+ return 2
+
+ if my_file.closed is False:
+ my_file.close()
+
+ if args.minimal is False:
+ print(
+ dumps(
[el.__dict__ for el in x_] if len(x_) > 1 else x_[0].__dict__,
- ensure_ascii=True,
+ ensure_ascii=True,
indent=4,
- )
- )
- else:
+ )
+ )
+ else:
for my_file in args.files:
print(
", ".join(
@@ -282,9 +282,9 @@ def cli_detect(argv: List[str] = None) -> int:
]
)
)
-
- return 0
-
-
+
+ return 0
+
+
if __name__ == "__main__":
- cli_detect()
+ cli_detect()