import new p4a
This commit is contained in:
parent
d9d8195764
commit
a712121c3c
3036 changed files with 445600 additions and 3262 deletions
venv/lib/python3.8/site-packages/Cython/Compiler
|
@ -0,0 +1,335 @@
|
|||
#
|
||||
# Cython -- encoding related tools
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
import re
|
||||
import sys
|
||||
|
||||
if sys.version_info[0] >= 3:
|
||||
_unicode, _str, _bytes, _unichr = str, str, bytes, chr
|
||||
IS_PYTHON3 = True
|
||||
else:
|
||||
_unicode, _str, _bytes, _unichr = unicode, str, str, unichr
|
||||
IS_PYTHON3 = False
|
||||
|
||||
empty_bytes = _bytes()
|
||||
empty_unicode = _unicode()
|
||||
|
||||
join_bytes = empty_bytes.join
|
||||
|
||||
|
||||
class UnicodeLiteralBuilder(object):
|
||||
"""Assemble a unicode string.
|
||||
"""
|
||||
def __init__(self):
|
||||
self.chars = []
|
||||
|
||||
def append(self, characters):
|
||||
if isinstance(characters, _bytes):
|
||||
# this came from a Py2 string literal in the parser code
|
||||
characters = characters.decode("ASCII")
|
||||
assert isinstance(characters, _unicode), str(type(characters))
|
||||
self.chars.append(characters)
|
||||
|
||||
if sys.maxunicode == 65535:
|
||||
def append_charval(self, char_number):
|
||||
if char_number > 65535:
|
||||
# wide Unicode character on narrow platform => replace
|
||||
# by surrogate pair
|
||||
char_number -= 0x10000
|
||||
self.chars.append( _unichr((char_number // 1024) + 0xD800) )
|
||||
self.chars.append( _unichr((char_number % 1024) + 0xDC00) )
|
||||
else:
|
||||
self.chars.append( _unichr(char_number) )
|
||||
else:
|
||||
def append_charval(self, char_number):
|
||||
self.chars.append( _unichr(char_number) )
|
||||
|
||||
def append_uescape(self, char_number, escape_string):
|
||||
self.append_charval(char_number)
|
||||
|
||||
def getstring(self):
|
||||
return EncodedString(u''.join(self.chars))
|
||||
|
||||
def getstrings(self):
|
||||
return (None, self.getstring())
|
||||
|
||||
|
||||
class BytesLiteralBuilder(object):
|
||||
"""Assemble a byte string or char value.
|
||||
"""
|
||||
def __init__(self, target_encoding):
|
||||
self.chars = []
|
||||
self.target_encoding = target_encoding
|
||||
|
||||
def append(self, characters):
|
||||
if isinstance(characters, _unicode):
|
||||
characters = characters.encode(self.target_encoding)
|
||||
assert isinstance(characters, _bytes), str(type(characters))
|
||||
self.chars.append(characters)
|
||||
|
||||
def append_charval(self, char_number):
|
||||
self.chars.append( _unichr(char_number).encode('ISO-8859-1') )
|
||||
|
||||
def append_uescape(self, char_number, escape_string):
|
||||
self.append(escape_string)
|
||||
|
||||
def getstring(self):
|
||||
# this *must* return a byte string!
|
||||
return bytes_literal(join_bytes(self.chars), self.target_encoding)
|
||||
|
||||
def getchar(self):
|
||||
# this *must* return a byte string!
|
||||
return self.getstring()
|
||||
|
||||
def getstrings(self):
|
||||
return (self.getstring(), None)
|
||||
|
||||
|
||||
class StrLiteralBuilder(object):
|
||||
"""Assemble both a bytes and a unicode representation of a string.
|
||||
"""
|
||||
def __init__(self, target_encoding):
|
||||
self._bytes = BytesLiteralBuilder(target_encoding)
|
||||
self._unicode = UnicodeLiteralBuilder()
|
||||
|
||||
def append(self, characters):
|
||||
self._bytes.append(characters)
|
||||
self._unicode.append(characters)
|
||||
|
||||
def append_charval(self, char_number):
|
||||
self._bytes.append_charval(char_number)
|
||||
self._unicode.append_charval(char_number)
|
||||
|
||||
def append_uescape(self, char_number, escape_string):
|
||||
self._bytes.append(escape_string)
|
||||
self._unicode.append_charval(char_number)
|
||||
|
||||
def getstrings(self):
|
||||
return (self._bytes.getstring(), self._unicode.getstring())
|
||||
|
||||
|
||||
class EncodedString(_unicode):
|
||||
# unicode string subclass to keep track of the original encoding.
|
||||
# 'encoding' is None for unicode strings and the source encoding
|
||||
# otherwise
|
||||
encoding = None
|
||||
|
||||
def __deepcopy__(self, memo):
|
||||
return self
|
||||
|
||||
def byteencode(self):
|
||||
assert self.encoding is not None
|
||||
return self.encode(self.encoding)
|
||||
|
||||
def utf8encode(self):
|
||||
assert self.encoding is None
|
||||
return self.encode("UTF-8")
|
||||
|
||||
@property
|
||||
def is_unicode(self):
|
||||
return self.encoding is None
|
||||
|
||||
def contains_surrogates(self):
|
||||
return string_contains_surrogates(self)
|
||||
|
||||
def as_utf8_string(self):
|
||||
return bytes_literal(self.utf8encode(), 'utf8')
|
||||
|
||||
|
||||
def string_contains_surrogates(ustring):
|
||||
"""
|
||||
Check if the unicode string contains surrogate code points
|
||||
on a CPython platform with wide (UCS-4) or narrow (UTF-16)
|
||||
Unicode, i.e. characters that would be spelled as two
|
||||
separate code units on a narrow platform.
|
||||
"""
|
||||
for c in map(ord, ustring):
|
||||
if c > 65535: # can only happen on wide platforms
|
||||
return True
|
||||
if 0xD800 <= c <= 0xDFFF:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
class BytesLiteral(_bytes):
|
||||
# bytes subclass that is compatible with EncodedString
|
||||
encoding = None
|
||||
|
||||
def __deepcopy__(self, memo):
|
||||
return self
|
||||
|
||||
def byteencode(self):
|
||||
if IS_PYTHON3:
|
||||
return _bytes(self)
|
||||
else:
|
||||
# fake-recode the string to make it a plain bytes object
|
||||
return self.decode('ISO-8859-1').encode('ISO-8859-1')
|
||||
|
||||
def utf8encode(self):
|
||||
assert False, "this is not a unicode string: %r" % self
|
||||
|
||||
def __str__(self):
|
||||
"""Fake-decode the byte string to unicode to support %
|
||||
formatting of unicode strings.
|
||||
"""
|
||||
return self.decode('ISO-8859-1')
|
||||
|
||||
is_unicode = False
|
||||
|
||||
def as_c_string_literal(self):
|
||||
value = split_string_literal(escape_byte_string(self))
|
||||
return '"%s"' % value
|
||||
|
||||
|
||||
def bytes_literal(s, encoding):
|
||||
assert isinstance(s, bytes)
|
||||
s = BytesLiteral(s)
|
||||
s.encoding = encoding
|
||||
return s
|
||||
|
||||
|
||||
def encoded_string(s, encoding):
|
||||
assert isinstance(s, (_unicode, bytes))
|
||||
s = EncodedString(s)
|
||||
if encoding is not None:
|
||||
s.encoding = encoding
|
||||
return s
|
||||
|
||||
|
||||
char_from_escape_sequence = {
|
||||
r'\a' : u'\a',
|
||||
r'\b' : u'\b',
|
||||
r'\f' : u'\f',
|
||||
r'\n' : u'\n',
|
||||
r'\r' : u'\r',
|
||||
r'\t' : u'\t',
|
||||
r'\v' : u'\v',
|
||||
}.get
|
||||
|
||||
_c_special = ('\\', '??', '"') + tuple(map(chr, range(32)))
|
||||
|
||||
|
||||
def _to_escape_sequence(s):
|
||||
if s in '\n\r\t':
|
||||
return repr(s)[1:-1]
|
||||
elif s == '"':
|
||||
return r'\"'
|
||||
elif s == '\\':
|
||||
return r'\\'
|
||||
else:
|
||||
# within a character sequence, oct passes much better than hex
|
||||
return ''.join(['\\%03o' % ord(c) for c in s])
|
||||
|
||||
|
||||
def _build_specials_replacer():
|
||||
subexps = []
|
||||
replacements = {}
|
||||
for special in _c_special:
|
||||
regexp = ''.join(['[%s]' % c.replace('\\', '\\\\') for c in special])
|
||||
subexps.append(regexp)
|
||||
replacements[special.encode('ASCII')] = _to_escape_sequence(special).encode('ASCII')
|
||||
sub = re.compile(('(%s)' % '|'.join(subexps)).encode('ASCII')).sub
|
||||
def replace_specials(m):
|
||||
return replacements[m.group(1)]
|
||||
def replace(s):
|
||||
return sub(replace_specials, s)
|
||||
return replace
|
||||
|
||||
_replace_specials = _build_specials_replacer()
|
||||
|
||||
|
||||
def escape_char(c):
|
||||
if IS_PYTHON3:
|
||||
c = c.decode('ISO-8859-1')
|
||||
if c in '\n\r\t\\':
|
||||
return repr(c)[1:-1]
|
||||
elif c == "'":
|
||||
return "\\'"
|
||||
n = ord(c)
|
||||
if n < 32 or n > 127:
|
||||
# hex works well for characters
|
||||
return "\\x%02X" % n
|
||||
else:
|
||||
return c
|
||||
|
||||
def escape_byte_string(s):
|
||||
"""Escape a byte string so that it can be written into C code.
|
||||
Note that this returns a Unicode string instead which, when
|
||||
encoded as ISO-8859-1, will result in the correct byte sequence
|
||||
being written.
|
||||
"""
|
||||
s = _replace_specials(s)
|
||||
try:
|
||||
return s.decode("ASCII") # trial decoding: plain ASCII => done
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
if IS_PYTHON3:
|
||||
s_new = bytearray()
|
||||
append, extend = s_new.append, s_new.extend
|
||||
for b in s:
|
||||
if b >= 128:
|
||||
extend(('\\%3o' % b).encode('ASCII'))
|
||||
else:
|
||||
append(b)
|
||||
return s_new.decode('ISO-8859-1')
|
||||
else:
|
||||
l = []
|
||||
append = l.append
|
||||
for c in s:
|
||||
o = ord(c)
|
||||
if o >= 128:
|
||||
append('\\%3o' % o)
|
||||
else:
|
||||
append(c)
|
||||
return join_bytes(l).decode('ISO-8859-1')
|
||||
|
||||
def split_string_literal(s, limit=2000):
|
||||
# MSVC can't handle long string literals.
|
||||
if len(s) < limit:
|
||||
return s
|
||||
else:
|
||||
start = 0
|
||||
chunks = []
|
||||
while start < len(s):
|
||||
end = start + limit
|
||||
if len(s) > end-4 and '\\' in s[end-4:end]:
|
||||
end -= 4 - s[end-4:end].find('\\') # just before the backslash
|
||||
while s[end-1] == '\\':
|
||||
end -= 1
|
||||
if end == start:
|
||||
# must have been a long line of backslashes
|
||||
end = start + limit - (limit % 2) - 4
|
||||
break
|
||||
chunks.append(s[start:end])
|
||||
start = end
|
||||
return '""'.join(chunks)
|
||||
|
||||
def encode_pyunicode_string(s):
|
||||
"""Create Py_UNICODE[] representation of a given unicode string.
|
||||
"""
|
||||
s = list(map(ord, s)) + [0]
|
||||
|
||||
if sys.maxunicode >= 0x10000: # Wide build or Py3.3
|
||||
utf16, utf32 = [], s
|
||||
for code_point in s:
|
||||
if code_point >= 0x10000: # outside of BMP
|
||||
high, low = divmod(code_point - 0x10000, 1024)
|
||||
utf16.append(high + 0xD800)
|
||||
utf16.append(low + 0xDC00)
|
||||
else:
|
||||
utf16.append(code_point)
|
||||
else:
|
||||
utf16, utf32 = s, []
|
||||
for code_unit in s:
|
||||
if 0xDC00 <= code_unit <= 0xDFFF and utf32 and 0xD800 <= utf32[-1] <= 0xDBFF:
|
||||
high, low = utf32[-1], code_unit
|
||||
utf32[-1] = ((high & 0x3FF) << 10) + (low & 0x3FF) + 0x10000
|
||||
else:
|
||||
utf32.append(code_unit)
|
||||
|
||||
if utf16 == utf32:
|
||||
utf16 = []
|
||||
return ",".join(map(_unicode, utf16)), ",".join(map(_unicode, utf32))
|
Loading…
Add table
Add a link
Reference in a new issue