diff --git a/tools/external/binaryornot/__init__.py b/tools/external/binaryornot/__init__.py new file mode 100644 index 0000000..7719808 --- /dev/null +++ b/tools/external/binaryornot/__init__.py @@ -0,0 +1,3 @@ +__author__ = 'Audrey Roy' +__email__ = 'audreyr@gmail.com' +__version__ = '0.3.0' diff --git a/tools/external/binaryornot/check.py b/tools/external/binaryornot/check.py new file mode 100644 index 0000000..e9e0a42 --- /dev/null +++ b/tools/external/binaryornot/check.py @@ -0,0 +1,19 @@ +# -*- coding: utf-8 -*- + +""" +binaryornot.check +----------------- + +Main code for checking if a file is binary or text. +""" + +from .helpers import get_starting_chunk, is_binary_string + + +def is_binary(filename): + """ + :param filename: File to check. + :returns: True if it's a binary file, otherwise False. + """ + chunk = get_starting_chunk(filename) + return is_binary_string(chunk) diff --git a/tools/external/binaryornot/helpers.py b/tools/external/binaryornot/helpers.py new file mode 100644 index 0000000..f9d126c --- /dev/null +++ b/tools/external/binaryornot/helpers.py @@ -0,0 +1,62 @@ +# -*- coding: utf-8 -*- + +""" +binaryornot.helpers +------------------- + +Helper utilities used by BinaryOrNot. +""" +def print_as_hex(s): + """ + Print a string as hex bytes. + """ + + print(":".join("{0:x}".format(ord(c)) for c in s)) + + +def get_starting_chunk(filename, length=1024): + """ + :param filename: File to open and get the first little chunk of. + :param length: Number of bytes to read, default 1024. + :returns: Starting chunk of bytes. + """ + # Ensure we open the file in binary mode + with open(filename, 'rb') as f: + chunk = f.read(length) + return chunk + + +_printable_extended_ascii = b'\n\r\t\f\b' +if bytes is str: + # Python 2 means we need to invoke chr() explicitly + _printable_extended_ascii += b''.join(map(chr, range(32, 256))) +else: + # Python 3 means bytes accepts integer input directly + _printable_extended_ascii += bytes(range(32, 256)) + +def is_binary_string(bytes_to_check): + """ + :param bytes: A chunk of bytes to check. + :returns: True if appears to be a binary, otherwise False. + """ + # Uses a simplified version of the Perl detection algorithm, + # based roughly on Eli Bendersky's translation to Python: + # http://eli.thegreenplace.net/2011/10/19/perls-guess-if-file-is-text-or-binary-implemented-in-python/ + + # This is biased slightly more in favour of deeming files as text + # files than the Perl algorithm, since all ASCII compatible character + # sets are accepted as text, not just utf-8 + + # Empty files are considered text files + if not bytes_to_check: + return False + + # Check for NUL bytes first + if b'\x00' in bytes_to_check: + return True + + # Now check for a high percentage of ASCII control characters + # Binary if control chars are > 30% of the string + control_chars = bytes_to_check.translate(None, _printable_extended_ascii) + nontext_ratio = float(len(control_chars)) / float(len(bytes_to_check)) + return nontext_ratio > 0.3 diff --git a/tools/external/cookiecutter/config.py b/tools/external/cookiecutter/config.py index 82e47fe..76ce1c1 100755 --- a/tools/external/cookiecutter/config.py +++ b/tools/external/cookiecutter/config.py @@ -14,8 +14,6 @@ import logging import os import io -import yaml - from .exceptions import ConfigDoesNotExistException from .exceptions import InvalidConfiguration @@ -36,6 +34,7 @@ def get_config(config_path): if not os.path.exists(config_path): raise ConfigDoesNotExistException + import yaml logger.debug('config_path is {0}'.format(config_path)) with io.open(config_path, encoding='utf-8') as file_handle: try: