txscript: Optimize script disasm.

This converts the DisasmString function to make use of the new zero-allocation script tokenizer instead of the far less efficient parseScript thereby significantly optimizing the function. In order to facilitate this, the opcode disassembly functionality is split into a separate function called disasmOpcode that accepts the opcode struct and data independently as opposed to requiring a parsed opcode. The new function also accepts a pointer to a string builder so the disassembly can be more efficiently be built. While here, the comment is modified to explicitly call out the script version semantics. The following is a before and after comparison of a large script: benchmark old ns/op new ns/op delta BenchmarkDisasmString-8 102902 40124 -61.01% benchmark old allocs new allocs delta BenchmarkDisasmString-8 46 51 +10.87% benchmark old bytes new bytes delta BenchmarkDisasmString-8 389324 130552 -66.47%
2019-03-13 01:11:05 -05:00 · 2019-03-13 01:11:05 -05:00 · f980c9a28d
commit f980c9a28d
parent 099784267e
2 changed files with 61 additions and 34 deletions
--- a/txscript/opcode.go
+++ b/txscript/opcode.go
@ -9,8 +9,10 @@ import (
 	"crypto/sha1"
 	"crypto/sha256"
 	"encoding/binary"
 	"encoding/hex"
 	"fmt"
 	"hash"
 	"strings"
 	"golang.org/x/crypto/ripemd160"
@ -815,45 +817,60 @@ func (pop *parsedOpcode) checkMinimalDataPush() error {
 	return nil
 }
-// print returns a human-readable string representation of the opcode for use
+// disasmOpcode writes a human-readable disassembly of the provided opcode and
-// in script disassembly.
+// data into the provided buffer.  The compact flag indicates the disassembly
-func (pop *parsedOpcode) print(oneline bool) string {
+// should print a more compact representation of data-carrying and small integer
-	// The reference implementation one-line disassembly replaces opcodes
+// opcodes.  For example, OP_0 through OP_16 are replaced with the numeric value
-	// which represent values (e.g. OP_0 through OP_16 and OP_1NEGATE)
+// and data pushes are printed as only the hex representation of the data as
-	// with the raw value.  However, when not doing a one-line dissassembly,
+// opposed to including the opcode that specifies the amount of data to push as
-	// we prefer to show the actual opcode names.  Thus, only replace the
+// well.
-	// opcodes in question when the oneline flag is set.
+func disasmOpcode(buf *strings.Builder, op *opcode, data []byte, compact bool) {
-	opcodeName := pop.opcode.name
+	// Replace opcode which represent values (e.g. OP_0 through OP_16 and
-	if oneline {
+	// OP_1NEGATE) with the raw value when performing a compact disassembly.
 	opcodeName := op.name
 	if compact {
 		if replName, ok := opcodeOnelineRepls[opcodeName]; ok {
 			opcodeName = replName
 		}
-		// Nothing more to do for non-data push opcodes.
+		// Either write the human-readable opcode or the parsed data in hex for
-		if pop.opcode.length == 1 {
+		// data-carrying opcodes.
-			return opcodeName
+		switch {
 		case op.length == 1:
 			buf.WriteString(opcodeName)
 		default:
 			buf.WriteString(hex.EncodeToString(data))
 		}
-		return fmt.Sprintf("%x", pop.data)
+		return
 	}
-	// Nothing more to do for non-data push opcodes.
+	buf.WriteString(opcodeName)
-	if pop.opcode.length == 1 {
+
-		return opcodeName
+	switch op.length {
-	}
+	// Only write the opcode name for non-data push opcodes.
 	case 1:
 		return
 	// Add length for the OP_PUSHDATA# opcodes.
 	retString := opcodeName
 	switch pop.opcode.length {
 	case -1:
-		retString += fmt.Sprintf(" 0x%02x", len(pop.data))
+		buf.WriteString(fmt.Sprintf(" 0x%02x", len(data)))
 	case -2:
-		retString += fmt.Sprintf(" 0x%04x", len(pop.data))
+		buf.WriteString(fmt.Sprintf(" 0x%04x", len(data)))
 	case -4:
-		retString += fmt.Sprintf(" 0x%08x", len(pop.data))
+		buf.WriteString(fmt.Sprintf(" 0x%08x", len(data)))
 	}
-	return fmt.Sprintf("%s 0x%02x", retString, pop.data)
+	buf.WriteString(fmt.Sprintf(" 0x%02x", data))
 }
 // print returns a human-readable string representation of the opcode for use
 // in script disassembly.
 func (pop *parsedOpcode) print(compact bool) string {
 	var buf strings.Builder
 	disasmOpcode(&buf, pop.opcode, pop.data, compact)
 	return buf.String()
 }
 // bytes returns any data associated with the opcode encoded as it would be in
--- a/txscript/script.go
+++ b/txscript/script.go
@ -8,6 +8,7 @@ import (
 	"bytes"
 	"encoding/binary"
 	"fmt"
 	"strings"
 	"time"
 	"github.com/btcsuite/btcd/chaincfg/chainhash"
@ -275,20 +276,29 @@ func unparseScript(pops []parsedOpcode) ([]byte, error) {
 // script up to the point the failure occurred along with the string '[error]'
 // appended.  In addition, the reason the script failed to parse is returned
 // if the caller wants more information about the failure.
-func DisasmString(buf []byte) (string, error) {
+//
-	var disbuf bytes.Buffer
+// NOTE: This function is only valid for version 0 scripts.  Since the function
-	opcodes, err := parseScript(buf)
+// does not accept a script version, the results are undefined for other script
-	for _, pop := range opcodes {
+// versions.
-		disbuf.WriteString(pop.print(true))
+func DisasmString(script []byte) (string, error) {
 	const scriptVersion = 0
 	var disbuf strings.Builder
 	tokenizer := MakeScriptTokenizer(scriptVersion, script)
 	if tokenizer.Next() {
 		disasmOpcode(&disbuf, tokenizer.op, tokenizer.Data(), true)
 	}
 	for tokenizer.Next() {
 		disbuf.WriteByte(' ')
 		disasmOpcode(&disbuf, tokenizer.op, tokenizer.Data(), true)
 	}
 	if tokenizer.Err() != nil {
 		if tokenizer.ByteIndex() != 0 {
 			disbuf.WriteByte(' ')
 		}
 	if disbuf.Len() > 0 {
 		disbuf.Truncate(disbuf.Len() - 1)
 	}
 	if err != nil {
 		disbuf.WriteString("[error]")
 	}
-	return disbuf.String(), err
+	return disbuf.String(), tokenizer.Err()
 }
 // removeOpcode will remove any opcode matching ``opcode'' from the opcode