txscript: Optimize script disasm.

This converts the DisasmString function to make use of the new zero-allocation script tokenizer instead of the far less efficient parseScript thereby significantly optimizing the function. In order to facilitate this, the opcode disassembly functionality is split into a separate function called disasmOpcode that accepts the opcode struct and data independently as opposed to requiring a parsed opcode. The new function also accepts a pointer to a string builder so the disassembly can be more efficiently be built. While here, the comment is modified to explicitly call out the script version semantics. The following is a before and after comparison of a large script: benchmark old ns/op new ns/op delta BenchmarkDisasmString-8 102902 40124 -61.01% benchmark old allocs new allocs delta BenchmarkDisasmString-8 46 51 +10.87% benchmark old bytes new bytes delta BenchmarkDisasmString-8 389324 130552 -66.47%
2019-03-13 01:11:05 -05:00 · 2019-03-13 01:11:05 -05:00 · ce08988514
commit ce08988514
parent 94bb41664b
2 changed files with 61 additions and 34 deletions
--- a/txscript/opcode.go
+++ b/txscript/opcode.go
@ -9,8 +9,10 @@ import (
 	"crypto/sha1"
 	"crypto/sha256"
 	"encoding/binary"
+	"encoding/hex"
 	"fmt"
 	"hash"
+	"strings"

 	"golang.org/x/crypto/ripemd160"

@ -815,45 +817,60 @@ func (pop *parsedOpcode) checkMinimalDataPush() error {
 	return nil
 }

-// print returns a human-readable string representation of the opcode for use
-// in script disassembly.
-func (pop *parsedOpcode) print(oneline bool) string {
-	// The reference implementation one-line disassembly replaces opcodes
-	// which represent values (e.g. OP_0 through OP_16 and OP_1NEGATE)
-	// with the raw value.  However, when not doing a one-line dissassembly,
-	// we prefer to show the actual opcode names.  Thus, only replace the
-	// opcodes in question when the oneline flag is set.
-	opcodeName := pop.opcode.name
-	if oneline {
+// disasmOpcode writes a human-readable disassembly of the provided opcode and
+// data into the provided buffer.  The compact flag indicates the disassembly
+// should print a more compact representation of data-carrying and small integer
+// opcodes.  For example, OP_0 through OP_16 are replaced with the numeric value
+// and data pushes are printed as only the hex representation of the data as
+// opposed to including the opcode that specifies the amount of data to push as
+// well.
+func disasmOpcode(buf *strings.Builder, op *opcode, data []byte, compact bool) {
+	// Replace opcode which represent values (e.g. OP_0 through OP_16 and
+	// OP_1NEGATE) with the raw value when performing a compact disassembly.
+	opcodeName := op.name
+	if compact {
 		if replName, ok := opcodeOnelineRepls[opcodeName]; ok {
 			opcodeName = replName
 		}

-		// Nothing more to do for non-data push opcodes.
-		if pop.opcode.length == 1 {
-			return opcodeName
+		// Either write the human-readable opcode or the parsed data in hex for
+		// data-carrying opcodes.
+		switch {
+		case op.length == 1:
+			buf.WriteString(opcodeName)
+
+		default:
+			buf.WriteString(hex.EncodeToString(data))
 		}

-		return fmt.Sprintf("%x", pop.data)
+		return
 	}

-	// Nothing more to do for non-data push opcodes.
-	if pop.opcode.length == 1 {
-		return opcodeName
-	}
+	buf.WriteString(opcodeName)
+
+	switch op.length {
+	// Only write the opcode name for non-data push opcodes.
+	case 1:
+		return

 	// Add length for the OP_PUSHDATA# opcodes.
-	retString := opcodeName
-	switch pop.opcode.length {
 	case -1:
-		retString += fmt.Sprintf(" 0x%02x", len(pop.data))
+		buf.WriteString(fmt.Sprintf(" 0x%02x", len(data)))
 	case -2:
-		retString += fmt.Sprintf(" 0x%04x", len(pop.data))
+		buf.WriteString(fmt.Sprintf(" 0x%04x", len(data)))
 	case -4:
-		retString += fmt.Sprintf(" 0x%08x", len(pop.data))
+		buf.WriteString(fmt.Sprintf(" 0x%08x", len(data)))
 	}

-	return fmt.Sprintf("%s 0x%02x", retString, pop.data)
+	buf.WriteString(fmt.Sprintf(" 0x%02x", data))
+}
+
+// print returns a human-readable string representation of the opcode for use
+// in script disassembly.
+func (pop *parsedOpcode) print(compact bool) string {
+	var buf strings.Builder
+	disasmOpcode(&buf, pop.opcode, pop.data, compact)
+	return buf.String()
 }

 // bytes returns any data associated with the opcode encoded as it would be in
--- a/txscript/script.go
+++ b/txscript/script.go
@ -8,6 +8,7 @@ import (
 	"bytes"
 	"encoding/binary"
 	"fmt"
+	"strings"
 	"time"

 	"github.com/btcsuite/btcd/chaincfg/chainhash"
@ -275,20 +276,29 @@ func unparseScript(pops []parsedOpcode) ([]byte, error) {
 // script up to the point the failure occurred along with the string '[error]'
 // appended.  In addition, the reason the script failed to parse is returned
 // if the caller wants more information about the failure.
-func DisasmString(buf []byte) (string, error) {
-	var disbuf bytes.Buffer
-	opcodes, err := parseScript(buf)
-	for _, pop := range opcodes {
-		disbuf.WriteString(pop.print(true))
+//
+// NOTE: This function is only valid for version 0 scripts.  Since the function
+// does not accept a script version, the results are undefined for other script
+// versions.
+func DisasmString(script []byte) (string, error) {
+	const scriptVersion = 0
+
+	var disbuf strings.Builder
+	tokenizer := MakeScriptTokenizer(scriptVersion, script)
+	if tokenizer.Next() {
+		disasmOpcode(&disbuf, tokenizer.op, tokenizer.Data(), true)
+	}
+	for tokenizer.Next() {
 		disbuf.WriteByte(' ')
+		disasmOpcode(&disbuf, tokenizer.op, tokenizer.Data(), true)
 	}
-	if disbuf.Len() > 0 {
-		disbuf.Truncate(disbuf.Len() - 1)
-	}
-	if err != nil {
+	if tokenizer.Err() != nil {
+		if tokenizer.ByteIndex() != 0 {
+			disbuf.WriteByte(' ')
+		}
 		disbuf.WriteString("[error]")
 	}
-	return disbuf.String(), err
+	return disbuf.String(), tokenizer.Err()
 }

 // removeOpcode will remove any opcode matching ``opcode'' from the opcode