txscript: Implement efficient opcode data removal.

This introduces a new function named removeOpcodeByDataRaw which accepts
the raw scripts and data to remove versus requiring the parsed opcodes
to both significantly optimize it as well as make it more flexible for
working with raw scripts.

There are several places in the rest of the code that currently only
have access to the parsed opcodes, so this only introduces the function
for use in the future and deprecates the existing one.

Note that, in practice, the script will never actually contain the data
that is intended to be removed since the function is only used during
signature verification to remove the signature itself which would
require some incredibly non-standard code to create.

Thus, as an optimization, it avoids allocating a new script unless there
is actually a match that needs to be removed.

Finally, it updates the tests to use the new function.
This commit is contained in:
Dave Collins 2019-03-13 01:12:51 -05:00 committed by Roy Lee
parent 5283e30bfc
commit f5d78e8b10
2 changed files with 61 additions and 7 deletions

View file

@ -310,6 +310,8 @@ func isCanonicalPush(opcode byte, data []byte) bool {
// removeOpcodeByData will return the script minus any opcodes that would push // removeOpcodeByData will return the script minus any opcodes that would push
// the passed data to the stack. // the passed data to the stack.
//
// DEPRECATED. Use removeOpcodeByDataRaw instead.
func removeOpcodeByData(pkscript []parsedOpcode, data []byte) []parsedOpcode { func removeOpcodeByData(pkscript []parsedOpcode, data []byte) []parsedOpcode {
retScript := make([]parsedOpcode, 0, len(pkscript)) retScript := make([]parsedOpcode, 0, len(pkscript))
for _, pop := range pkscript { for _, pop := range pkscript {
@ -323,6 +325,59 @@ func removeOpcodeByData(pkscript []parsedOpcode, data []byte) []parsedOpcode {
} }
// removeOpcodeByDataRaw will return the script minus any opcodes that perform a
// canonical push of data that contains the passed data to remove. This
// function assumes it is provided a version 0 script as any future version of
// script should avoid this functionality since it is unncessary due to the
// signature scripts not being part of the witness-free transaction hash.
//
// WARNING: This will return the passed script unmodified unless a modification
// is necessary in which case the modified script is returned. This implies
// callers may NOT rely on being able to safely mutate either the passed or
// returned script without potentially modifying the same data.
//
// NOTE: This function is only valid for version 0 scripts. Since the function
// does not accept a script version, the results are undefined for other script
// versions.
func removeOpcodeByDataRaw(script []byte, dataToRemove []byte) []byte {
// Avoid work when possible.
if len(script) == 0 || len(dataToRemove) == 0 {
return script
}
// Parse through the script looking for a canonical data push that contains
// the data to remove.
const scriptVersion = 0
var result []byte
var prevOffset int32
tokenizer := MakeScriptTokenizer(scriptVersion, script)
for tokenizer.Next() {
// In practice, the script will basically never actually contain the
// data since this function is only used during signature verification
// to remove the signature itself which would require some incredibly
// non-standard code to create.
//
// Thus, as an optimization, avoid allocating a new script unless there
// is actually a match that needs to be removed.
op, data := tokenizer.Opcode(), tokenizer.Data()
if isCanonicalPush(op, data) && bytes.Contains(data, dataToRemove) {
if result == nil {
fullPushLen := tokenizer.ByteIndex() - prevOffset
result = make([]byte, 0, int32(len(script))-fullPushLen)
result = append(result, script[0:prevOffset]...)
}
} else if result != nil {
result = append(result, script[prevOffset:tokenizer.ByteIndex()]...)
}
prevOffset = tokenizer.ByteIndex()
}
if result == nil {
result = script
}
return result
}
// calcHashPrevOuts calculates a single hash of all the previous outputs // calcHashPrevOuts calculates a single hash of all the previous outputs
// (txid:index) referenced within the passed transaction. This calculated hash // (txid:index) referenced within the passed transaction. This calculated hash
// can be re-used when validating all inputs spending segwit outputs, with a // can be re-used when validating all inputs spending segwit outputs, with a

View file

@ -4129,16 +4129,15 @@ func TestRemoveOpcodeByData(t *testing.T) {
}, },
} }
// tstRemoveOpcodeByData is a convenience function to parse the provided // tstRemoveOpcodeByData is a convenience function to ensure the provided
// raw script, remove the passed data, then unparse the result back // script parses before attempting to remove the passed data.
// into a raw script. const scriptVersion = 0
tstRemoveOpcodeByData := func(script []byte, data []byte) ([]byte, error) { tstRemoveOpcodeByData := func(script []byte, data []byte) ([]byte, error) {
pops, err := parseScript(script) if err := checkScriptParses(scriptVersion, script); err != nil {
if err != nil {
return nil, err return nil, err
} }
pops = removeOpcodeByData(pops, data)
return unparseScript(pops) return removeOpcodeByDataRaw(script, data), nil
} }
for _, test := range tests { for _, test := range tests {