txscript: Optimize script disasm.

This converts the DisasmString function to make use of the new
zero-allocation script tokenizer instead of the far less efficient
parseScript thereby significantly optimizing the function.

In order to facilitate this, the opcode disassembly functionality is
split into a separate function called disasmOpcode that accepts the
opcode struct and data independently as opposed to requiring a parsed
opcode.  The new function also accepts a pointer to a string builder so
the disassembly can be more efficiently be built.

While here, the comment is modified to explicitly call out the script
version semantics.

The following is a before and after comparison of a large script:

benchmark                   old ns/op     new ns/op     delta
BenchmarkDisasmString-8     102902        40124         -61.01%

benchmark                   old allocs     new allocs     delta
BenchmarkDisasmString-8     46             51             +10.87%

benchmark                   old bytes     new bytes     delta
BenchmarkDisasmString-8     389324        130552        -66.47%
This commit is contained in:
Dave Collins 2019-03-13 01:11:05 -05:00 committed by Roy Lee
parent 94bb41664b
commit ce08988514
2 changed files with 61 additions and 34 deletions

View file

@ -9,8 +9,10 @@ import (
"crypto/sha1" "crypto/sha1"
"crypto/sha256" "crypto/sha256"
"encoding/binary" "encoding/binary"
"encoding/hex"
"fmt" "fmt"
"hash" "hash"
"strings"
"golang.org/x/crypto/ripemd160" "golang.org/x/crypto/ripemd160"
@ -815,45 +817,60 @@ func (pop *parsedOpcode) checkMinimalDataPush() error {
return nil return nil
} }
// print returns a human-readable string representation of the opcode for use // disasmOpcode writes a human-readable disassembly of the provided opcode and
// in script disassembly. // data into the provided buffer. The compact flag indicates the disassembly
func (pop *parsedOpcode) print(oneline bool) string { // should print a more compact representation of data-carrying and small integer
// The reference implementation one-line disassembly replaces opcodes // opcodes. For example, OP_0 through OP_16 are replaced with the numeric value
// which represent values (e.g. OP_0 through OP_16 and OP_1NEGATE) // and data pushes are printed as only the hex representation of the data as
// with the raw value. However, when not doing a one-line dissassembly, // opposed to including the opcode that specifies the amount of data to push as
// we prefer to show the actual opcode names. Thus, only replace the // well.
// opcodes in question when the oneline flag is set. func disasmOpcode(buf *strings.Builder, op *opcode, data []byte, compact bool) {
opcodeName := pop.opcode.name // Replace opcode which represent values (e.g. OP_0 through OP_16 and
if oneline { // OP_1NEGATE) with the raw value when performing a compact disassembly.
opcodeName := op.name
if compact {
if replName, ok := opcodeOnelineRepls[opcodeName]; ok { if replName, ok := opcodeOnelineRepls[opcodeName]; ok {
opcodeName = replName opcodeName = replName
} }
// Nothing more to do for non-data push opcodes. // Either write the human-readable opcode or the parsed data in hex for
if pop.opcode.length == 1 { // data-carrying opcodes.
return opcodeName switch {
case op.length == 1:
buf.WriteString(opcodeName)
default:
buf.WriteString(hex.EncodeToString(data))
} }
return fmt.Sprintf("%x", pop.data) return
} }
// Nothing more to do for non-data push opcodes. buf.WriteString(opcodeName)
if pop.opcode.length == 1 {
return opcodeName switch op.length {
} // Only write the opcode name for non-data push opcodes.
case 1:
return
// Add length for the OP_PUSHDATA# opcodes. // Add length for the OP_PUSHDATA# opcodes.
retString := opcodeName
switch pop.opcode.length {
case -1: case -1:
retString += fmt.Sprintf(" 0x%02x", len(pop.data)) buf.WriteString(fmt.Sprintf(" 0x%02x", len(data)))
case -2: case -2:
retString += fmt.Sprintf(" 0x%04x", len(pop.data)) buf.WriteString(fmt.Sprintf(" 0x%04x", len(data)))
case -4: case -4:
retString += fmt.Sprintf(" 0x%08x", len(pop.data)) buf.WriteString(fmt.Sprintf(" 0x%08x", len(data)))
} }
return fmt.Sprintf("%s 0x%02x", retString, pop.data) buf.WriteString(fmt.Sprintf(" 0x%02x", data))
}
// print returns a human-readable string representation of the opcode for use
// in script disassembly.
func (pop *parsedOpcode) print(compact bool) string {
var buf strings.Builder
disasmOpcode(&buf, pop.opcode, pop.data, compact)
return buf.String()
} }
// bytes returns any data associated with the opcode encoded as it would be in // bytes returns any data associated with the opcode encoded as it would be in

View file

@ -8,6 +8,7 @@ import (
"bytes" "bytes"
"encoding/binary" "encoding/binary"
"fmt" "fmt"
"strings"
"time" "time"
"github.com/btcsuite/btcd/chaincfg/chainhash" "github.com/btcsuite/btcd/chaincfg/chainhash"
@ -275,20 +276,29 @@ func unparseScript(pops []parsedOpcode) ([]byte, error) {
// script up to the point the failure occurred along with the string '[error]' // script up to the point the failure occurred along with the string '[error]'
// appended. In addition, the reason the script failed to parse is returned // appended. In addition, the reason the script failed to parse is returned
// if the caller wants more information about the failure. // if the caller wants more information about the failure.
func DisasmString(buf []byte) (string, error) { //
var disbuf bytes.Buffer // NOTE: This function is only valid for version 0 scripts. Since the function
opcodes, err := parseScript(buf) // does not accept a script version, the results are undefined for other script
for _, pop := range opcodes { // versions.
disbuf.WriteString(pop.print(true)) func DisasmString(script []byte) (string, error) {
const scriptVersion = 0
var disbuf strings.Builder
tokenizer := MakeScriptTokenizer(scriptVersion, script)
if tokenizer.Next() {
disasmOpcode(&disbuf, tokenizer.op, tokenizer.Data(), true)
}
for tokenizer.Next() {
disbuf.WriteByte(' ') disbuf.WriteByte(' ')
disasmOpcode(&disbuf, tokenizer.op, tokenizer.Data(), true)
} }
if disbuf.Len() > 0 { if tokenizer.Err() != nil {
disbuf.Truncate(disbuf.Len() - 1) if tokenizer.ByteIndex() != 0 {
} disbuf.WriteByte(' ')
if err != nil { }
disbuf.WriteString("[error]") disbuf.WriteString("[error]")
} }
return disbuf.String(), err return disbuf.String(), tokenizer.Err()
} }
// removeOpcode will remove any opcode matching ``opcode'' from the opcode // removeOpcode will remove any opcode matching ``opcode'' from the opcode