# -*- coding: utf-8 -*- """ binaryornot.helpers ------------------- Helper utilities used by BinaryOrNot. """ def print_as_hex(s): """ Print a string as hex bytes. """ print(":".join("{0:x}".format(ord(c)) for c in s)) def get_starting_chunk(filename, length=1024): """ :param filename: File to open and get the first little chunk of. :param length: Number of bytes to read, default 1024. :returns: Starting chunk of bytes. """ # Ensure we open the file in binary mode with open(filename, 'rb') as f: chunk = f.read(length) return chunk _printable_extended_ascii = b'\n\r\t\f\b' if bytes is str: # Python 2 means we need to invoke chr() explicitly _printable_extended_ascii += b''.join(map(chr, range(32, 256))) else: # Python 3 means bytes accepts integer input directly _printable_extended_ascii += bytes(range(32, 256)) def is_binary_string(bytes_to_check): """ :param bytes: A chunk of bytes to check. :returns: True if appears to be a binary, otherwise False. """ # Uses a simplified version of the Perl detection algorithm, # based roughly on Eli Bendersky's translation to Python: # http://eli.thegreenplace.net/2011/10/19/perls-guess-if-file-is-text-or-binary-implemented-in-python/ # This is biased slightly more in favour of deeming files as text # files than the Perl algorithm, since all ASCII compatible character # sets are accepted as text, not just utf-8 # Empty files are considered text files if not bytes_to_check: return False # Check for NUL bytes first if b'\x00' in bytes_to_check: return True # Now check for a high percentage of ASCII control characters # Binary if control chars are > 30% of the string control_chars = bytes_to_check.translate(None, _printable_extended_ascii) nontext_ratio = float(len(control_chars)) / float(len(bytes_to_check)) return nontext_ratio > 0.3