from itertools import chain from binascii import hexlify from collections import namedtuple from typing import List from torba.bcd_data_stream import BCDataStream from torba.util import subclass_tuple # bitcoin opcodes OP_0 = 0x00 OP_1 = 0x51 OP_16 = 0x60 OP_DUP = 0x76 OP_HASH160 = 0xa9 OP_EQUALVERIFY = 0x88 OP_CHECKSIG = 0xac OP_CHECKMULTISIG = 0xae OP_EQUAL = 0x87 OP_PUSHDATA1 = 0x4c OP_PUSHDATA2 = 0x4d OP_PUSHDATA4 = 0x4e OP_RETURN = 0x6a OP_2DROP = 0x6d OP_DROP = 0x75 # template matching opcodes (not real opcodes) # base class for PUSH_DATA related opcodes # pylint: disable=invalid-name PUSH_DATA_OP = namedtuple('PUSH_DATA_OP', 'name') # opcode for variable length strings # pylint: disable=invalid-name PUSH_SINGLE = subclass_tuple('PUSH_SINGLE', PUSH_DATA_OP) # opcode for variable number of variable length strings # pylint: disable=invalid-name PUSH_MANY = subclass_tuple('PUSH_MANY', PUSH_DATA_OP) # opcode with embedded subscript parsing # pylint: disable=invalid-name PUSH_SUBSCRIPT = namedtuple('PUSH_SUBSCRIPT', 'name template') def is_push_data_opcode(opcode): return isinstance(opcode, (PUSH_DATA_OP, PUSH_SUBSCRIPT)) def is_push_data_token(token): return 1 <= token <= OP_PUSHDATA4 def push_data(data): size = len(data) if size < OP_PUSHDATA1: yield BCDataStream.uint8.pack(size) elif size <= 0xFF: yield BCDataStream.uint8.pack(OP_PUSHDATA1) yield BCDataStream.uint8.pack(size) elif size <= 0xFFFF: yield BCDataStream.uint8.pack(OP_PUSHDATA2) yield BCDataStream.uint16.pack(size) else: yield BCDataStream.uint8.pack(OP_PUSHDATA4) yield BCDataStream.uint32.pack(size) yield data def read_data(token, stream): if token < OP_PUSHDATA1: return stream.read(token) if token == OP_PUSHDATA1: return stream.read(stream.read_uint8()) if token == OP_PUSHDATA2: return stream.read(stream.read_uint16()) return stream.read(stream.read_uint32()) # opcode for OP_1 - OP_16 # pylint: disable=invalid-name SMALL_INTEGER = namedtuple('SMALL_INTEGER', 'name') def is_small_integer(token): return OP_1 <= token <= OP_16 def push_small_integer(num): assert 1 <= num <= 16 yield BCDataStream.uint8.pack(OP_1 + (num - 1)) def read_small_integer(token): return (token - OP_1) + 1 class Token(namedtuple('Token', 'value')): __slots__ = () def __repr__(self): name = None for var_name, var_value in globals().items(): if var_name.startswith('OP_') and var_value == self.value: name = var_name break return name or self.value class DataToken(Token): __slots__ = () def __repr__(self): return '"{}"'.format(hexlify(self.value)) class SmallIntegerToken(Token): __slots__ = () def __repr__(self): return 'SmallIntegerToken({})'.format(self.value) def token_producer(source): token = source.read_uint8() while token is not None: if is_push_data_token(token): yield DataToken(read_data(token, source)) elif is_small_integer(token): yield SmallIntegerToken(read_small_integer(token)) else: yield Token(token) token = source.read_uint8() def tokenize(source): return list(token_producer(source)) class ScriptError(Exception): """ General script handling error. """ class ParseError(ScriptError): """ Script parsing error. """ class Parser: def __init__(self, opcodes, tokens): self.opcodes = opcodes self.tokens = tokens self.values = {} self.token_index = 0 self.opcode_index = 0 def parse(self): while self.token_index < len(self.tokens) and self.opcode_index < len(self.opcodes): token = self.tokens[self.token_index] opcode = self.opcodes[self.opcode_index] if isinstance(token, DataToken): if isinstance(opcode, (PUSH_SINGLE, PUSH_SUBSCRIPT)): self.push_single(opcode, token.value) elif isinstance(opcode, PUSH_MANY): self.consume_many_non_greedy() else: raise ParseError("DataToken found but opcode was '{}'.".format(opcode)) elif isinstance(token, SmallIntegerToken): if isinstance(opcode, SMALL_INTEGER): self.values[opcode.name] = token.value else: raise ParseError("SmallIntegerToken found but opcode was '{}'.".format(opcode)) elif token.value == opcode: pass else: raise ParseError("Token is '{}' and opcode is '{}'.".format(token.value, opcode)) self.token_index += 1 self.opcode_index += 1 if self.token_index < len(self.tokens): raise ParseError("Parse completed without all tokens being consumed.") if self.opcode_index < len(self.opcodes): raise ParseError("Parse completed without all opcodes being consumed.") return self def consume_many_non_greedy(self): """ Allows PUSH_MANY to consume data without being greedy in cases when one or more PUSH_SINGLEs follow a PUSH_MANY. This will prioritize giving all PUSH_SINGLEs some data and only after that subsume the rest into PUSH_MANY. """ token_values = [] while self.token_index < len(self.tokens): token = self.tokens[self.token_index] if not isinstance(token, DataToken): self.token_index -= 1 break token_values.append(token.value) self.token_index += 1 push_opcodes = [] push_many_count = 0 while self.opcode_index < len(self.opcodes): opcode = self.opcodes[self.opcode_index] if not is_push_data_opcode(opcode): self.opcode_index -= 1 break if isinstance(opcode, PUSH_MANY): push_many_count += 1 push_opcodes.append(opcode) self.opcode_index += 1 if push_many_count > 1: raise ParseError( "Cannot have more than one consecutive PUSH_MANY, as there is no way to tell which" " token value should go into which PUSH_MANY." ) if len(push_opcodes) > len(token_values): raise ParseError( "Not enough token values to match all of the PUSH_MANY and PUSH_SINGLE opcodes." ) many_opcode = push_opcodes.pop(0) # consume data into PUSH_SINGLE opcodes, working backwards for opcode in reversed(push_opcodes): self.push_single(opcode, token_values.pop()) # finally PUSH_MANY gets everything that's left self.values[many_opcode.name] = token_values def push_single(self, opcode, value): if isinstance(opcode, PUSH_SINGLE): self.values[opcode.name] = value elif isinstance(opcode, PUSH_SUBSCRIPT): self.values[opcode.name] = Script.from_source_with_template(value, opcode.template) else: raise ParseError("Not a push single or subscript: {}".format(opcode)) class Template: __slots__ = 'name', 'opcodes' def __init__(self, name, opcodes): self.name = name self.opcodes = opcodes def parse(self, tokens): return Parser(self.opcodes, tokens).parse().values def generate(self, values): source = BCDataStream() for opcode in self.opcodes: if isinstance(opcode, PUSH_SINGLE): data = values[opcode.name] source.write_many(push_data(data)) elif isinstance(opcode, PUSH_SUBSCRIPT): data = values[opcode.name] source.write_many(push_data(data.source)) elif isinstance(opcode, PUSH_MANY): for data in values[opcode.name]: source.write_many(push_data(data)) elif isinstance(opcode, SMALL_INTEGER): data = values[opcode.name] source.write_many(push_small_integer(data)) else: source.write_uint8(opcode) return source.get_bytes() class Script: __slots__ = 'source', 'template', 'values' templates: List[Template] = [] def __init__(self, source=None, template=None, values=None, template_hint=None): self.source = source self.template = template self.values = values if source: self.parse(template_hint) elif template and values: self.generate() @property def tokens(self): return tokenize(BCDataStream(self.source)) @classmethod def from_source_with_template(cls, source, template): return cls(source, template_hint=template) def parse(self, template_hint=None): tokens = self.tokens for template in chain((template_hint,), self.templates): if not template: continue try: self.values = template.parse(tokens) self.template = template return except ParseError: continue raise ValueError('No matching templates for source: {}'.format(hexlify(self.source))) def generate(self): self.source = self.template.generate(self.values) class BaseInputScript(Script): """ Input / redeem script templates (aka scriptSig) """ __slots__ = () REDEEM_PUBKEY = Template('pubkey', ( PUSH_SINGLE('signature'), )) REDEEM_PUBKEY_HASH = Template('pubkey_hash', ( PUSH_SINGLE('signature'), PUSH_SINGLE('pubkey') )) REDEEM_SCRIPT = Template('script', ( SMALL_INTEGER('signatures_count'), PUSH_MANY('pubkeys'), SMALL_INTEGER('pubkeys_count'), OP_CHECKMULTISIG )) REDEEM_SCRIPT_HASH = Template('script_hash', ( OP_0, PUSH_MANY('signatures'), PUSH_SUBSCRIPT('script', REDEEM_SCRIPT) )) templates = [ REDEEM_PUBKEY, REDEEM_PUBKEY_HASH, REDEEM_SCRIPT_HASH, REDEEM_SCRIPT ] @classmethod def redeem_pubkey_hash(cls, signature, pubkey): return cls(template=cls.REDEEM_PUBKEY_HASH, values={ 'signature': signature, 'pubkey': pubkey }) @classmethod def redeem_script_hash(cls, signatures, pubkeys): return cls(template=cls.REDEEM_SCRIPT_HASH, values={ 'signatures': signatures, 'script': cls.redeem_script(signatures, pubkeys) }) @classmethod def redeem_script(cls, signatures, pubkeys): return cls(template=cls.REDEEM_SCRIPT, values={ 'signatures_count': len(signatures), 'pubkeys': pubkeys, 'pubkeys_count': len(pubkeys) }) class BaseOutputScript(Script): __slots__ = () # output / payment script templates (aka scriptPubKey) PAY_PUBKEY_FULL = Template('pay_pubkey_full', ( PUSH_SINGLE('pubkey'), OP_CHECKSIG )) PAY_PUBKEY_HASH = Template('pay_pubkey_hash', ( OP_DUP, OP_HASH160, PUSH_SINGLE('pubkey_hash'), OP_EQUALVERIFY, OP_CHECKSIG )) PAY_SCRIPT_HASH = Template('pay_script_hash', ( OP_HASH160, PUSH_SINGLE('script_hash'), OP_EQUAL )) RETURN_DATA = Template('return_data', ( OP_RETURN, PUSH_SINGLE('data') )) templates = [ PAY_PUBKEY_FULL, PAY_PUBKEY_HASH, PAY_SCRIPT_HASH, RETURN_DATA ] @classmethod def pay_pubkey_hash(cls, pubkey_hash): return cls(template=cls.PAY_PUBKEY_HASH, values={ 'pubkey_hash': pubkey_hash }) @classmethod def pay_script_hash(cls, script_hash): return cls(template=cls.PAY_SCRIPT_HASH, values={ 'script_hash': script_hash }) @property def is_pay_pubkey(self): return self.template.name.endswith('pay_pubkey_full') @property def is_pay_pubkey_hash(self): return self.template.name.endswith('pay_pubkey_hash') @property def is_pay_script_hash(self): return self.template.name.endswith('pay_script_hash') @property def is_return_data(self): return self.template.name.endswith('return_data')