diff --git a/txscript/README.md b/txscript/README.md index 004c586d..f0abb518 100644 --- a/txscript/README.md +++ b/txscript/README.md @@ -37,6 +37,10 @@ $ go get -u github.com/btcsuite/btcd/txscript * [Manually Signing a Transaction Output](https://pkg.go.dev/github.com/btcsuite/btcd/txscript#example-SignTxOutput) Demonstrates manually creating and signing a redeem transaction. +* [Counting Opcodes in Scripts](http://godoc.org/github.com/decred/dcrd/txscript#example-ScriptTokenizer) + Demonstrates creating a script tokenizer instance and using it to count the + number of opcodes a script contains. + ## GPG Verification Key All official release tags are signed by Conformal so users can ensure the code diff --git a/txscript/bench_test.go b/txscript/bench_test.go index 51f9aeab..673d1cc3 100644 --- a/txscript/bench_test.go +++ b/txscript/bench_test.go @@ -96,17 +96,18 @@ func BenchmarkScriptParsing(b *testing.B) { b.Fatalf("failed to create benchmark script: %v", err) } + const scriptVersion = 0 b.ResetTimer() b.ReportAllocs() for i := 0; i < b.N; i++ { - pops, err := parseScript(script) - if err != nil { - b.Fatalf("failed to parse script: %v", err) + tokenizer := MakeScriptTokenizer(scriptVersion, script) + for tokenizer.Next() { + _ = tokenizer.Opcode() + _ = tokenizer.Data() + _ = tokenizer.ByteIndex() } - - for _, pop := range pops { - _ = pop.opcode - _ = pop.data + if err := tokenizer.Err(); err != nil { + b.Fatalf("failed to parse script: %v", err) } } } diff --git a/txscript/error.go b/txscript/error.go index a61d0272..f42b893e 100644 --- a/txscript/error.go +++ b/txscript/error.go @@ -1,4 +1,5 @@ // Copyright (c) 2013-2017 The btcsuite developers +// Copyright (c) 2015-2019 The Decred developers // Use of this source code is governed by an ISC // license that can be found in the LICENSE file. @@ -47,6 +48,10 @@ const ( // the provided data exceeds MaxDataCarrierSize. ErrTooMuchNullData + // ErrUnsupportedScriptVersion is returned when an unsupported script + // version is passed to a function which deals with script analysis. + ErrUnsupportedScriptVersion + // ------------------------------------------ // Failures related to final execution state. // ------------------------------------------ @@ -352,6 +357,7 @@ var errorCodeStrings = map[ErrorCode]string{ ErrNotMultisigScript: "ErrNotMultisigScript", ErrTooManyRequiredSigs: "ErrTooManyRequiredSigs", ErrTooMuchNullData: "ErrTooMuchNullData", + ErrUnsupportedScriptVersion: "ErrUnsupportedScriptVersion", ErrEarlyReturn: "ErrEarlyReturn", ErrEmptyStack: "ErrEmptyStack", ErrEvalFalse: "ErrEvalFalse", diff --git a/txscript/error_test.go b/txscript/error_test.go index ebac85fd..abfa1565 100644 --- a/txscript/error_test.go +++ b/txscript/error_test.go @@ -1,4 +1,5 @@ // Copyright (c) 2017 The btcsuite developers +// Copyright (c) 2015-2019 The Decred developers // Use of this source code is governed by an ISC // license that can be found in the LICENSE file. @@ -22,6 +23,7 @@ func TestErrorCodeStringer(t *testing.T) { {ErrUnsupportedAddress, "ErrUnsupportedAddress"}, {ErrTooManyRequiredSigs, "ErrTooManyRequiredSigs"}, {ErrTooMuchNullData, "ErrTooMuchNullData"}, + {ErrUnsupportedScriptVersion, "ErrUnsupportedScriptVersion"}, {ErrNotMultisigScript, "ErrNotMultisigScript"}, {ErrEarlyReturn, "ErrEarlyReturn"}, {ErrEmptyStack, "ErrEmptyStack"}, diff --git a/txscript/example_test.go b/txscript/example_test.go index 7bf2b3f0..6e17341c 100644 --- a/txscript/example_test.go +++ b/txscript/example_test.go @@ -1,4 +1,5 @@ // Copyright (c) 2014-2016 The btcsuite developers +// Copyright (c) 2015-2019 The Decred developers // Use of this source code is governed by an ISC // license that can be found in the LICENSE file. @@ -180,3 +181,34 @@ func ExampleSignTxOutput() { // Output: // Transaction successfully signed } + +// This example demonstrates creating a script tokenizer instance and using it +// to count the number of opcodes a script contains. +func ExampleScriptTokenizer() { + // Create a script to use in the example. Ordinarily this would come from + // some other source. + hash160 := btcutil.Hash160([]byte("example")) + script, err := txscript.NewScriptBuilder().AddOp(txscript.OP_DUP). + AddOp(txscript.OP_HASH160).AddData(hash160). + AddOp(txscript.OP_EQUALVERIFY).AddOp(txscript.OP_CHECKSIG).Script() + if err != nil { + fmt.Printf("failed to build script: %v\n", err) + return + } + + // Create a tokenizer to iterate the script and count the number of opcodes. + const scriptVersion = 0 + var numOpcodes int + tokenizer := txscript.MakeScriptTokenizer(scriptVersion, script) + for tokenizer.Next() { + numOpcodes++ + } + if tokenizer.Err() != nil { + fmt.Printf("script failed to parse: %v\n", err) + } else { + fmt.Printf("script contains %d opcode(s)\n", numOpcodes) + } + + // Output: + // script contains 5 opcode(s) +} diff --git a/txscript/tokenizer.go b/txscript/tokenizer.go new file mode 100644 index 00000000..72e00f07 --- /dev/null +++ b/txscript/tokenizer.go @@ -0,0 +1,186 @@ +// Copyright (c) 2019 The Decred developers +// Use of this source code is governed by an ISC +// license that can be found in the LICENSE file. + +package txscript + +import ( + "encoding/binary" + "fmt" +) + +// opcodeArrayRef is used to break initialization cycles. +var opcodeArrayRef *[256]opcode + +func init() { + opcodeArrayRef = &opcodeArray +} + +// ScriptTokenizer provides a facility for easily and efficiently tokenizing +// transaction scripts without creating allocations. Each successive opcode is +// parsed with the Next function, which returns false when iteration is +// complete, either due to successfully tokenizing the entire script or +// encountering a parse error. In the case of failure, the Err function may be +// used to obtain the specific parse error. +// +// Upon successfully parsing an opcode, the opcode and data associated with it +// may be obtained via the Opcode and Data functions, respectively. +// +// The ByteIndex function may be used to obtain the tokenizer's current offset +// into the raw script. +type ScriptTokenizer struct { + script []byte + version uint16 + offset int32 + op *opcode + data []byte + err error +} + +// Done returns true when either all opcodes have been exhausted or a parse +// failure was encountered and therefore the state has an associated error. +func (t *ScriptTokenizer) Done() bool { + return t.err != nil || t.offset >= int32(len(t.script)) +} + +// Next attempts to parse the next opcode and returns whether or not it was +// successful. It will not be successful if invoked when already at the end of +// the script, a parse failure is encountered, or an associated error already +// exists due to a previous parse failure. +// +// In the case of a true return, the parsed opcode and data can be obtained with +// the associated functions and the offset into the script will either point to +// the next opcode or the end of the script if the final opcode was parsed. +// +// In the case of a false return, the parsed opcode and data will be the last +// successfully parsed values (if any) and the offset into the script will +// either point to the failing opcode or the end of the script if the function +// was invoked when already at the end of the script. +// +// Invoking this function when already at the end of the script is not +// considered an error and will simply return false. +func (t *ScriptTokenizer) Next() bool { + if t.Done() { + return false + } + + op := &opcodeArrayRef[t.script[t.offset]] + switch { + // No additional data. Note that some of the opcodes, notably OP_1NEGATE, + // OP_0, and OP_[1-16] represent the data themselves. + case op.length == 1: + t.offset++ + t.op = op + t.data = nil + return true + + // Data pushes of specific lengths -- OP_DATA_[1-75]. + case op.length > 1: + script := t.script[t.offset:] + if len(script) < op.length { + str := fmt.Sprintf("opcode %s requires %d bytes, but script only "+ + "has %d remaining", op.name, op.length, len(script)) + t.err = scriptError(ErrMalformedPush, str) + return false + } + + // Move the offset forward and set the opcode and data accordingly. + t.offset += int32(op.length) + t.op = op + t.data = script[1:op.length] + return true + + // Data pushes with parsed lengths -- OP_PUSHDATA{1,2,4}. + case op.length < 0: + script := t.script[t.offset+1:] + if len(script) < -op.length { + str := fmt.Sprintf("opcode %s requires %d bytes, but script only "+ + "has %d remaining", op.name, -op.length, len(script)) + t.err = scriptError(ErrMalformedPush, str) + return false + } + + // Next -length bytes are little endian length of data. + var dataLen int32 + switch op.length { + case -1: + dataLen = int32(script[0]) + case -2: + dataLen = int32(binary.LittleEndian.Uint16(script[:2])) + case -4: + dataLen = int32(binary.LittleEndian.Uint32(script[:4])) + default: + // In practice it should be impossible to hit this + // check as each op code is predefined, and only uses + // the specified lengths. + str := fmt.Sprintf("invalid opcode length %d", op.length) + t.err = scriptError(ErrMalformedPush, str) + return false + } + + // Move to the beginning of the data. + script = script[-op.length:] + + // Disallow entries that do not fit script or were sign extended. + if dataLen > int32(len(script)) || dataLen < 0 { + str := fmt.Sprintf("opcode %s pushes %d bytes, but script only "+ + "has %d remaining", op.name, dataLen, len(script)) + t.err = scriptError(ErrMalformedPush, str) + return false + } + + // Move the offset forward and set the opcode and data accordingly. + t.offset += 1 + int32(-op.length) + dataLen + t.op = op + t.data = script[:dataLen] + return true + } + + // The only remaining case is an opcode with length zero which is + // impossible. + panic("unreachable") +} + +// Script returns the full script associated with the tokenizer. +func (t *ScriptTokenizer) Script() []byte { + return t.script +} + +// ByteIndex returns the current offset into the full script that will be parsed +// next and therefore also implies everything before it has already been parsed. +func (t *ScriptTokenizer) ByteIndex() int32 { + return t.offset +} + +// Opcode returns the current opcode associated with the tokenizer. +func (t *ScriptTokenizer) Opcode() byte { + return t.op.value +} + +// Data returns the data associated with the most recently successfully parsed +// opcode. +func (t *ScriptTokenizer) Data() []byte { + return t.data +} + +// Err returns any errors currently associated with the tokenizer. This will +// only be non-nil in the case a parsing error was encountered. +func (t *ScriptTokenizer) Err() error { + return t.err +} + +// MakeScriptTokenizer returns a new instance of a script tokenizer. Passing +// an unsupported script version will result in the returned tokenizer +// immediately having an err set accordingly. +// +// See the docs for ScriptTokenizer for more details. +func MakeScriptTokenizer(scriptVersion uint16, script []byte) ScriptTokenizer { + // Only version 0 scripts are currently supported. + var err error + if scriptVersion != 0 { + str := fmt.Sprintf("script version %d is not supported", scriptVersion) + err = scriptError(ErrUnsupportedScriptVersion, str) + + } + return ScriptTokenizer{version: scriptVersion, script: script, err: err} +} diff --git a/txscript/tokenizer_test.go b/txscript/tokenizer_test.go new file mode 100644 index 00000000..fd008bfc --- /dev/null +++ b/txscript/tokenizer_test.go @@ -0,0 +1,259 @@ +// Copyright (c) 2019 The Decred developers +// Use of this source code is governed by an ISC +// license that can be found in the LICENSE file. + +package txscript + +import ( + "bytes" + "fmt" + "testing" +) + +// TestScriptTokenizer ensures a wide variety of behavior provided by the script +// tokenizer performs as expected. +func TestScriptTokenizer(t *testing.T) { + t.Skip() + + type expectedResult struct { + op byte // expected parsed opcode + data []byte // expected parsed data + index int32 // expected index into raw script after parsing token + } + + type tokenizerTest struct { + name string // test description + script []byte // the script to tokenize + expected []expectedResult // the expected info after parsing each token + finalIdx int32 // the expected final byte index + err error // expected error + } + + // Add both positive and negative tests for OP_DATA_1 through OP_DATA_75. + const numTestsHint = 100 // Make prealloc linter happy. + tests := make([]tokenizerTest, 0, numTestsHint) + for op := byte(OP_DATA_1); op < OP_DATA_75; op++ { + data := bytes.Repeat([]byte{0x01}, int(op)) + tests = append(tests, tokenizerTest{ + name: fmt.Sprintf("OP_DATA_%d", op), + script: append([]byte{op}, data...), + expected: []expectedResult{{op, data, 1 + int32(op)}}, + finalIdx: 1 + int32(op), + err: nil, + }) + + // Create test that provides one less byte than the data push requires. + tests = append(tests, tokenizerTest{ + name: fmt.Sprintf("short OP_DATA_%d", op), + script: append([]byte{op}, data[1:]...), + expected: nil, + finalIdx: 0, + err: scriptError(ErrMalformedPush, ""), + }) + } + + // Add both positive and negative tests for OP_PUSHDATA{1,2,4}. + data := mustParseShortForm("0x01{76}") + tests = append(tests, []tokenizerTest{{ + name: "OP_PUSHDATA1", + script: mustParseShortForm("OP_PUSHDATA1 0x4c 0x01{76}"), + expected: []expectedResult{{OP_PUSHDATA1, data, 2 + int32(len(data))}}, + finalIdx: 2 + int32(len(data)), + err: nil, + }, { + name: "OP_PUSHDATA1 no data length", + script: mustParseShortForm("OP_PUSHDATA1"), + expected: nil, + finalIdx: 0, + err: scriptError(ErrMalformedPush, ""), + }, { + name: "OP_PUSHDATA1 short data by 1 byte", + script: mustParseShortForm("OP_PUSHDATA1 0x4c 0x01{75}"), + expected: nil, + finalIdx: 0, + err: scriptError(ErrMalformedPush, ""), + }, { + name: "OP_PUSHDATA2", + script: mustParseShortForm("OP_PUSHDATA2 0x4c00 0x01{76}"), + expected: []expectedResult{{OP_PUSHDATA2, data, 3 + int32(len(data))}}, + finalIdx: 3 + int32(len(data)), + err: nil, + }, { + name: "OP_PUSHDATA2 no data length", + script: mustParseShortForm("OP_PUSHDATA2"), + expected: nil, + finalIdx: 0, + err: scriptError(ErrMalformedPush, ""), + }, { + name: "OP_PUSHDATA2 short data by 1 byte", + script: mustParseShortForm("OP_PUSHDATA2 0x4c00 0x01{75}"), + expected: nil, + finalIdx: 0, + err: scriptError(ErrMalformedPush, ""), + }, { + name: "OP_PUSHDATA4", + script: mustParseShortForm("OP_PUSHDATA4 0x4c000000 0x01{76}"), + expected: []expectedResult{{OP_PUSHDATA4, data, 5 + int32(len(data))}}, + finalIdx: 5 + int32(len(data)), + err: nil, + }, { + name: "OP_PUSHDATA4 no data length", + script: mustParseShortForm("OP_PUSHDATA4"), + expected: nil, + finalIdx: 0, + err: scriptError(ErrMalformedPush, ""), + }, { + name: "OP_PUSHDATA4 short data by 1 byte", + script: mustParseShortForm("OP_PUSHDATA4 0x4c000000 0x01{75}"), + expected: nil, + finalIdx: 0, + err: scriptError(ErrMalformedPush, ""), + }}...) + + // Add tests for OP_0, and OP_1 through OP_16 (small integers/true/false). + opcodes := []byte{OP_0} + for op := byte(OP_1); op < OP_16; op++ { + opcodes = append(opcodes, op) + } + for _, op := range opcodes { + tests = append(tests, tokenizerTest{ + name: fmt.Sprintf("OP_%d", op), + script: []byte{op}, + expected: []expectedResult{{op, nil, 1}}, + finalIdx: 1, + err: nil, + }) + } + + // Add various positive and negative tests for multi-opcode scripts. + tests = append(tests, []tokenizerTest{{ + name: "pay-to-pubkey-hash", + script: mustParseShortForm("DUP HASH160 DATA_20 0x01{20} EQUAL CHECKSIG"), + expected: []expectedResult{ + {OP_DUP, nil, 1}, {OP_HASH160, nil, 2}, + {OP_DATA_20, mustParseShortForm("0x01{20}"), 23}, + {OP_EQUAL, nil, 24}, {OP_CHECKSIG, nil, 25}, + }, + finalIdx: 25, + err: nil, + }, { + name: "almost pay-to-pubkey-hash (short data)", + script: mustParseShortForm("DUP HASH160 DATA_20 0x01{17} EQUAL CHECKSIG"), + expected: []expectedResult{ + {OP_DUP, nil, 1}, {OP_HASH160, nil, 2}, + }, + finalIdx: 2, + err: scriptError(ErrMalformedPush, ""), + }, { + name: "almost pay-to-pubkey-hash (overlapped data)", + script: mustParseShortForm("DUP HASH160 DATA_20 0x01{19} EQUAL CHECKSIG"), + expected: []expectedResult{ + {OP_DUP, nil, 1}, {OP_HASH160, nil, 2}, + {OP_DATA_20, mustParseShortForm("0x01{19} EQUAL"), 23}, + {OP_CHECKSIG, nil, 24}, + }, + finalIdx: 24, + err: nil, + }, { + name: "pay-to-script-hash", + script: mustParseShortForm("HASH160 DATA_20 0x01{20} EQUAL"), + expected: []expectedResult{ + {OP_HASH160, nil, 1}, + {OP_DATA_20, mustParseShortForm("0x01{20}"), 22}, + {OP_EQUAL, nil, 23}, + }, + finalIdx: 23, + err: nil, + }, { + name: "almost pay-to-script-hash (short data)", + script: mustParseShortForm("HASH160 DATA_20 0x01{18} EQUAL"), + expected: []expectedResult{ + {OP_HASH160, nil, 1}, + }, + finalIdx: 1, + err: scriptError(ErrMalformedPush, ""), + }, { + name: "almost pay-to-script-hash (overlapped data)", + script: mustParseShortForm("HASH160 DATA_20 0x01{19} EQUAL"), + expected: []expectedResult{ + {OP_HASH160, nil, 1}, + {OP_DATA_20, mustParseShortForm("0x01{19} EQUAL"), 22}, + }, + finalIdx: 22, + err: nil, + }}...) + + const scriptVersion = 0 + for _, test := range tests { + tokenizer := MakeScriptTokenizer(scriptVersion, test.script) + var opcodeNum int + for tokenizer.Next() { + // Ensure Next never returns true when there is an error set. + if err := tokenizer.Err(); err != nil { + t.Fatalf("%q: Next returned true when tokenizer has err: %v", + test.name, err) + } + + // Ensure the test data expects a token to be parsed. + op := tokenizer.Opcode() + data := tokenizer.Data() + if opcodeNum >= len(test.expected) { + t.Fatalf("%q: unexpected token '%d' (data: '%x')", test.name, + op, data) + } + expected := &test.expected[opcodeNum] + + // Ensure the opcode and data are the expected values. + if op != expected.op { + t.Fatalf("%q: unexpected opcode -- got %v, want %v", test.name, + op, expected.op) + } + if !bytes.Equal(data, expected.data) { + t.Fatalf("%q: unexpected data -- got %x, want %x", test.name, + data, expected.data) + } + + tokenizerIdx := tokenizer.ByteIndex() + if tokenizerIdx != expected.index { + t.Fatalf("%q: unexpected byte index -- got %d, want %d", + test.name, tokenizerIdx, expected.index) + } + + opcodeNum++ + } + + // Ensure the tokenizer claims it is done. This should be the case + // regardless of whether or not there was a parse error. + if !tokenizer.Done() { + t.Fatalf("%q: tokenizer claims it is not done", test.name) + } + + // Ensure the error is as expected. + if test.err == nil && tokenizer.Err() != nil { + t.Fatalf("%q: unexpected tokenizer err -- got %v, want nil", + test.name, tokenizer.Err()) + } else if test.err != nil { + if !IsErrorCode(tokenizer.Err(), test.err.(Error).ErrorCode) { + t.Fatalf("%q: unexpected tokenizer err -- got %v, want %v", + test.name, tokenizer.Err(), test.err.(Error).ErrorCode) + } + } + + // Ensure the final index is the expected value. + tokenizerIdx := tokenizer.ByteIndex() + if tokenizerIdx != test.finalIdx { + t.Fatalf("%q: unexpected final byte index -- got %d, want %d", + test.name, tokenizerIdx, test.finalIdx) + } + } +} + +// TestScriptTokenizerUnsupportedVersion ensures the tokenizer fails immediately +// with an unsupported script version. +func TestScriptTokenizerUnsupportedVersion(t *testing.T) { + const scriptVersion = 65535 + tokenizer := MakeScriptTokenizer(scriptVersion, nil) + if !IsErrorCode(tokenizer.Err(), ErrUnsupportedScriptVersion) { + t.Fatalf("script tokenizer did not error with unsupported version") + } +}