toolchain: add binaryornot and prevent yaml import in cookie cutter

This commit is contained in:
Mathieu Virbel 2015-02-18 03:01:06 +01:00
parent 3d9240261d
commit f912e9bd59
4 changed files with 85 additions and 2 deletions

View file

@ -0,0 +1,3 @@
__author__ = 'Audrey Roy'
__email__ = 'audreyr@gmail.com'
__version__ = '0.3.0'

19
tools/external/binaryornot/check.py vendored Normal file
View file

@ -0,0 +1,19 @@
# -*- coding: utf-8 -*-
"""
binaryornot.check
-----------------
Main code for checking if a file is binary or text.
"""
from .helpers import get_starting_chunk, is_binary_string
def is_binary(filename):
"""
:param filename: File to check.
:returns: True if it's a binary file, otherwise False.
"""
chunk = get_starting_chunk(filename)
return is_binary_string(chunk)

62
tools/external/binaryornot/helpers.py vendored Normal file
View file

@ -0,0 +1,62 @@
# -*- coding: utf-8 -*-
"""
binaryornot.helpers
-------------------
Helper utilities used by BinaryOrNot.
"""
def print_as_hex(s):
"""
Print a string as hex bytes.
"""
print(":".join("{0:x}".format(ord(c)) for c in s))
def get_starting_chunk(filename, length=1024):
"""
:param filename: File to open and get the first little chunk of.
:param length: Number of bytes to read, default 1024.
:returns: Starting chunk of bytes.
"""
# Ensure we open the file in binary mode
with open(filename, 'rb') as f:
chunk = f.read(length)
return chunk
_printable_extended_ascii = b'\n\r\t\f\b'
if bytes is str:
# Python 2 means we need to invoke chr() explicitly
_printable_extended_ascii += b''.join(map(chr, range(32, 256)))
else:
# Python 3 means bytes accepts integer input directly
_printable_extended_ascii += bytes(range(32, 256))
def is_binary_string(bytes_to_check):
"""
:param bytes: A chunk of bytes to check.
:returns: True if appears to be a binary, otherwise False.
"""
# Uses a simplified version of the Perl detection algorithm,
# based roughly on Eli Bendersky's translation to Python:
# http://eli.thegreenplace.net/2011/10/19/perls-guess-if-file-is-text-or-binary-implemented-in-python/
# This is biased slightly more in favour of deeming files as text
# files than the Perl algorithm, since all ASCII compatible character
# sets are accepted as text, not just utf-8
# Empty files are considered text files
if not bytes_to_check:
return False
# Check for NUL bytes first
if b'\x00' in bytes_to_check:
return True
# Now check for a high percentage of ASCII control characters
# Binary if control chars are > 30% of the string
control_chars = bytes_to_check.translate(None, _printable_extended_ascii)
nontext_ratio = float(len(control_chars)) / float(len(bytes_to_check))
return nontext_ratio > 0.3

View file

@ -14,8 +14,6 @@ import logging
import os
import io
import yaml
from .exceptions import ConfigDoesNotExistException
from .exceptions import InvalidConfiguration
@ -36,6 +34,7 @@ def get_config(config_path):
if not os.path.exists(config_path):
raise ConfigDoesNotExistException
import yaml
logger.debug('config_path is {0}'.format(config_path))
with io.open(config_path, encoding='utf-8') as file_handle:
try: