From f980c9a28d943e926e2a4241b4e89de3e0a6fca6 Mon Sep 17 00:00:00 2001 From: Dave Collins Date: Wed, 13 Mar 2019 01:11:05 -0500 Subject: [PATCH] txscript: Optimize script disasm. This converts the DisasmString function to make use of the new zero-allocation script tokenizer instead of the far less efficient parseScript thereby significantly optimizing the function. In order to facilitate this, the opcode disassembly functionality is split into a separate function called disasmOpcode that accepts the opcode struct and data independently as opposed to requiring a parsed opcode. The new function also accepts a pointer to a string builder so the disassembly can be more efficiently be built. While here, the comment is modified to explicitly call out the script version semantics. The following is a before and after comparison of a large script: benchmark old ns/op new ns/op delta BenchmarkDisasmString-8 102902 40124 -61.01% benchmark old allocs new allocs delta BenchmarkDisasmString-8 46 51 +10.87% benchmark old bytes new bytes delta BenchmarkDisasmString-8 389324 130552 -66.47% --- txscript/opcode.go | 65 +++++++++++++++++++++++++++++----------------- txscript/script.go | 30 ++++++++++++++------- 2 files changed, 61 insertions(+), 34 deletions(-) diff --git a/txscript/opcode.go b/txscript/opcode.go index a878a966..7383f881 100644 --- a/txscript/opcode.go +++ b/txscript/opcode.go @@ -9,8 +9,10 @@ import ( "crypto/sha1" "crypto/sha256" "encoding/binary" + "encoding/hex" "fmt" "hash" + "strings" "golang.org/x/crypto/ripemd160" @@ -815,45 +817,60 @@ func (pop *parsedOpcode) checkMinimalDataPush() error { return nil } -// print returns a human-readable string representation of the opcode for use -// in script disassembly. -func (pop *parsedOpcode) print(oneline bool) string { - // The reference implementation one-line disassembly replaces opcodes - // which represent values (e.g. OP_0 through OP_16 and OP_1NEGATE) - // with the raw value. However, when not doing a one-line dissassembly, - // we prefer to show the actual opcode names. Thus, only replace the - // opcodes in question when the oneline flag is set. - opcodeName := pop.opcode.name - if oneline { +// disasmOpcode writes a human-readable disassembly of the provided opcode and +// data into the provided buffer. The compact flag indicates the disassembly +// should print a more compact representation of data-carrying and small integer +// opcodes. For example, OP_0 through OP_16 are replaced with the numeric value +// and data pushes are printed as only the hex representation of the data as +// opposed to including the opcode that specifies the amount of data to push as +// well. +func disasmOpcode(buf *strings.Builder, op *opcode, data []byte, compact bool) { + // Replace opcode which represent values (e.g. OP_0 through OP_16 and + // OP_1NEGATE) with the raw value when performing a compact disassembly. + opcodeName := op.name + if compact { if replName, ok := opcodeOnelineRepls[opcodeName]; ok { opcodeName = replName } - // Nothing more to do for non-data push opcodes. - if pop.opcode.length == 1 { - return opcodeName + // Either write the human-readable opcode or the parsed data in hex for + // data-carrying opcodes. + switch { + case op.length == 1: + buf.WriteString(opcodeName) + + default: + buf.WriteString(hex.EncodeToString(data)) } - return fmt.Sprintf("%x", pop.data) + return } - // Nothing more to do for non-data push opcodes. - if pop.opcode.length == 1 { - return opcodeName - } + buf.WriteString(opcodeName) + + switch op.length { + // Only write the opcode name for non-data push opcodes. + case 1: + return // Add length for the OP_PUSHDATA# opcodes. - retString := opcodeName - switch pop.opcode.length { case -1: - retString += fmt.Sprintf(" 0x%02x", len(pop.data)) + buf.WriteString(fmt.Sprintf(" 0x%02x", len(data))) case -2: - retString += fmt.Sprintf(" 0x%04x", len(pop.data)) + buf.WriteString(fmt.Sprintf(" 0x%04x", len(data))) case -4: - retString += fmt.Sprintf(" 0x%08x", len(pop.data)) + buf.WriteString(fmt.Sprintf(" 0x%08x", len(data))) } - return fmt.Sprintf("%s 0x%02x", retString, pop.data) + buf.WriteString(fmt.Sprintf(" 0x%02x", data)) +} + +// print returns a human-readable string representation of the opcode for use +// in script disassembly. +func (pop *parsedOpcode) print(compact bool) string { + var buf strings.Builder + disasmOpcode(&buf, pop.opcode, pop.data, compact) + return buf.String() } // bytes returns any data associated with the opcode encoded as it would be in diff --git a/txscript/script.go b/txscript/script.go index 92a50e37..b7c2ef96 100644 --- a/txscript/script.go +++ b/txscript/script.go @@ -8,6 +8,7 @@ import ( "bytes" "encoding/binary" "fmt" + "strings" "time" "github.com/btcsuite/btcd/chaincfg/chainhash" @@ -275,20 +276,29 @@ func unparseScript(pops []parsedOpcode) ([]byte, error) { // script up to the point the failure occurred along with the string '[error]' // appended. In addition, the reason the script failed to parse is returned // if the caller wants more information about the failure. -func DisasmString(buf []byte) (string, error) { - var disbuf bytes.Buffer - opcodes, err := parseScript(buf) - for _, pop := range opcodes { - disbuf.WriteString(pop.print(true)) +// +// NOTE: This function is only valid for version 0 scripts. Since the function +// does not accept a script version, the results are undefined for other script +// versions. +func DisasmString(script []byte) (string, error) { + const scriptVersion = 0 + + var disbuf strings.Builder + tokenizer := MakeScriptTokenizer(scriptVersion, script) + if tokenizer.Next() { + disasmOpcode(&disbuf, tokenizer.op, tokenizer.Data(), true) + } + for tokenizer.Next() { disbuf.WriteByte(' ') + disasmOpcode(&disbuf, tokenizer.op, tokenizer.Data(), true) } - if disbuf.Len() > 0 { - disbuf.Truncate(disbuf.Len() - 1) - } - if err != nil { + if tokenizer.Err() != nil { + if tokenizer.ByteIndex() != 0 { + disbuf.WriteByte(' ') + } disbuf.WriteString("[error]") } - return disbuf.String(), err + return disbuf.String(), tokenizer.Err() } // removeOpcode will remove any opcode matching ``opcode'' from the opcode