// Copyright (c) 2015-2016 The btcsuite developers // Use of this source code is governed by an ISC // license that can be found in the LICENSE file. package blockchain import ( "github.com/btcsuite/btcd/btcec" "github.com/btcsuite/btcd/txscript" ) // ----------------------------------------------------------------------------- // A variable length quantity (VLQ) is an encoding that uses an arbitrary number // of binary octets to represent an arbitrarily large integer. The scheme // employs a most significant byte (MSB) base-128 encoding where the high bit in // each byte indicates whether or not the byte is the final one. In addition, // to ensure there are no redundant encodings, an offset is subtracted every // time a group of 7 bits is shifted out. Therefore each integer can be // represented in exactly one way, and each representation stands for exactly // one integer. // // Another nice property of this encoding is that it provides a compact // representation of values that are typically used to indicate sizes. For // example, the values 0 - 127 are represented with a single byte, 128 - 16511 // with two bytes, and 16512 - 2113663 with three bytes. // // While the encoding allows arbitrarily large integers, it is artificially // limited in this code to an unsigned 64-bit integer for efficiency purposes. // // Example encodings: // 0 -> [0x00] // 127 -> [0x7f] * Max 1-byte value // 128 -> [0x80 0x00] // 129 -> [0x80 0x01] // 255 -> [0x80 0x7f] // 256 -> [0x81 0x00] // 16511 -> [0xff 0x7f] * Max 2-byte value // 16512 -> [0x80 0x80 0x00] // 32895 -> [0x80 0xff 0x7f] // 2113663 -> [0xff 0xff 0x7f] * Max 3-byte value // 270549119 -> [0xff 0xff 0xff 0x7f] * Max 4-byte value // 2^64-1 -> [0x80 0xfe 0xfe 0xfe 0xfe 0xfe 0xfe 0xfe 0xfe 0x7f] // // References: // https://en.wikipedia.org/wiki/Variable-length_quantity // http://www.codecodex.com/wiki/Variable-Length_Integers // ----------------------------------------------------------------------------- // serializeSizeVLQ returns the number of bytes it would take to serialize the // passed number as a variable-length quantity according to the format described // above. func serializeSizeVLQ(n uint64) int { size := 1 for ; n > 0x7f; n = (n >> 7) - 1 { size++ } return size } // putVLQ serializes the provided number to a variable-length quantity according // to the format described above and returns the number of bytes of the encoded // value. The result is placed directly into the passed byte slice which must // be at least large enough to handle the number of bytes returned by the // serializeSizeVLQ function or it will panic. func putVLQ(target []byte, n uint64) int { offset := 0 for ; ; offset++ { // The high bit is set when another byte follows. highBitMask := byte(0x80) if offset == 0 { highBitMask = 0x00 } target[offset] = byte(n&0x7f) | highBitMask if n <= 0x7f { break } n = (n >> 7) - 1 } // Reverse the bytes so it is MSB-encoded. for i, j := 0, offset; i < j; i, j = i+1, j-1 { target[i], target[j] = target[j], target[i] } return offset + 1 } // deserializeVLQ deserializes the provided variable-length quantity according // to the format described above. It also returns the number of bytes // deserialized. func deserializeVLQ(serialized []byte) (uint64, int) { var n uint64 var size int for _, val := range serialized { size++ n = (n << 7) | uint64(val&0x7f) if val&0x80 != 0x80 { break } n++ } return n, size } // ----------------------------------------------------------------------------- // In order to reduce the size of stored scripts, a domain specific compression // algorithm is used which recognizes standard scripts and stores them using // less bytes than the original script. The compression algorithm used here was // obtained from Bitcoin Core, so all credits for the algorithm go to it. // // The general serialized format is: // // <script size or type><script data> // // Field Type Size // script size or type VLQ variable // script data []byte variable // // The specific serialized format for each recognized standard script is: // // - Pay-to-pubkey-hash: (21 bytes) - <0><20-byte pubkey hash> // - Pay-to-script-hash: (21 bytes) - <1><20-byte script hash> // - Pay-to-pubkey**: (33 bytes) - <2, 3, 4, or 5><32-byte pubkey X value> // 2, 3 = compressed pubkey with bit 0 specifying the y coordinate to use // 4, 5 = uncompressed pubkey with bit 0 specifying the y coordinate to use // ** Only valid public keys starting with 0x02, 0x03, and 0x04 are supported. // // Any scripts which are not recognized as one of the aforementioned standard // scripts are encoded using the general serialized format and encode the script // size as the sum of the actual size of the script and the number of special // cases. // ----------------------------------------------------------------------------- // The following constants specify the special constants used to identify a // special script type in the domain-specific compressed script encoding. // // NOTE: This section specifically does not use iota since these values are // serialized and must be stable for long-term storage. const ( // cstPayToPubKeyHash identifies a compressed pay-to-pubkey-hash script. cstPayToPubKeyHash = 0 // cstPayToScriptHash identifies a compressed pay-to-script-hash script. cstPayToScriptHash = 1 // cstPayToPubKeyComp2 identifies a compressed pay-to-pubkey script to // a compressed pubkey. Bit 0 specifies which y-coordinate to use // to reconstruct the full uncompressed pubkey. cstPayToPubKeyComp2 = 2 // cstPayToPubKeyComp3 identifies a compressed pay-to-pubkey script to // a compressed pubkey. Bit 0 specifies which y-coordinate to use // to reconstruct the full uncompressed pubkey. cstPayToPubKeyComp3 = 3 // cstPayToPubKeyUncomp4 identifies a compressed pay-to-pubkey script to // an uncompressed pubkey. Bit 0 specifies which y-coordinate to use // to reconstruct the full uncompressed pubkey. cstPayToPubKeyUncomp4 = 4 // cstPayToPubKeyUncomp5 identifies a compressed pay-to-pubkey script to // an uncompressed pubkey. Bit 0 specifies which y-coordinate to use // to reconstruct the full uncompressed pubkey. cstPayToPubKeyUncomp5 = 5 // numSpecialScripts is the number of special scripts recognized by the // domain-specific script compression algorithm. numSpecialScripts = 6 ) // isPubKeyHash returns whether or not the passed public key script is a // standard pay-to-pubkey-hash script along with the pubkey hash it is paying to // if it is. func isPubKeyHash(script []byte) (bool, []byte) { if len(script) == 25 && script[0] == txscript.OP_DUP && script[1] == txscript.OP_HASH160 && script[2] == txscript.OP_DATA_20 && script[23] == txscript.OP_EQUALVERIFY && script[24] == txscript.OP_CHECKSIG { return true, script[3:23] } return false, nil } // isScriptHash returns whether or not the passed public key script is a // standard pay-to-script-hash script along with the script hash it is paying to // if it is. func isScriptHash(script []byte) (bool, []byte) { if len(script) == 23 && script[0] == txscript.OP_HASH160 && script[1] == txscript.OP_DATA_20 && script[22] == txscript.OP_EQUAL { return true, script[2:22] } return false, nil } // isPubKey returns whether or not the passed public key script is a standard // pay-to-pubkey script that pays to a valid compressed or uncompressed public // key along with the serialized pubkey it is paying to if it is. // // NOTE: This function ensures the public key is actually valid since the // compression algorithm requires valid pubkeys. It does not support hybrid // pubkeys. This means that even if the script has the correct form for a // pay-to-pubkey script, this function will only return true when it is paying // to a valid compressed or uncompressed pubkey. func isPubKey(script []byte) (bool, []byte) { // Pay-to-compressed-pubkey script. if len(script) == 35 && script[0] == txscript.OP_DATA_33 && script[34] == txscript.OP_CHECKSIG && (script[1] == 0x02 || script[1] == 0x03) { // Ensure the public key is valid. serializedPubKey := script[1:34] _, err := btcec.ParsePubKey(serializedPubKey, btcec.S256()) if err == nil { return true, serializedPubKey } } // Pay-to-uncompressed-pubkey script. if len(script) == 67 && script[0] == txscript.OP_DATA_65 && script[66] == txscript.OP_CHECKSIG && script[1] == 0x04 { // Ensure the public key is valid. serializedPubKey := script[1:66] _, err := btcec.ParsePubKey(serializedPubKey, btcec.S256()) if err == nil { return true, serializedPubKey } } return false, nil } // compressedScriptSize returns the number of bytes the passed script would take // when encoded with the domain specific compression algorithm described above. func compressedScriptSize(pkScript []byte, version int32) int { // Pay-to-pubkey-hash script. if valid, _ := isPubKeyHash(pkScript); valid { return 21 } // Pay-to-script-hash script. if valid, _ := isScriptHash(pkScript); valid { return 21 } // Pay-to-pubkey (compressed or uncompressed) script. if valid, _ := isPubKey(pkScript); valid { return 33 } // When none of the above special cases apply, encode the script as is // preceded by the sum of its size and the number of special cases // encoded as a variable length quantity. return serializeSizeVLQ(uint64(len(pkScript)+numSpecialScripts)) + len(pkScript) } // decodeCompressedScriptSize treats the passed serialized bytes as a compressed // script, possibly followed by other data, and returns the number of bytes it // occupies taking into account the special encoding of the script size by the // domain specific compression algorithm described above. func decodeCompressedScriptSize(serialized []byte, version int32) int { scriptSize, bytesRead := deserializeVLQ(serialized) if bytesRead == 0 { return 0 } switch scriptSize { case cstPayToPubKeyHash: return 21 case cstPayToScriptHash: return 21 case cstPayToPubKeyComp2, cstPayToPubKeyComp3, cstPayToPubKeyUncomp4, cstPayToPubKeyUncomp5: return 33 } scriptSize -= numSpecialScripts scriptSize += uint64(bytesRead) return int(scriptSize) } // putCompressedScript compresses the passed script according to the domain // specific compression algorithm described above directly into the passed // target byte slice. The target byte slice must be at least large enough to // handle the number of bytes returned by the compressedScriptSize function or // it will panic. func putCompressedScript(target, pkScript []byte, version int32) int { // Pay-to-pubkey-hash script. if valid, hash := isPubKeyHash(pkScript); valid { target[0] = cstPayToPubKeyHash copy(target[1:21], hash) return 21 } // Pay-to-script-hash script. if valid, hash := isScriptHash(pkScript); valid { target[0] = cstPayToScriptHash copy(target[1:21], hash) return 21 } // Pay-to-pubkey (compressed or uncompressed) script. if valid, serializedPubKey := isPubKey(pkScript); valid { pubKeyFormat := serializedPubKey[0] switch pubKeyFormat { case 0x02, 0x03: target[0] = pubKeyFormat copy(target[1:33], serializedPubKey[1:33]) return 33 case 0x04: // Encode the oddness of the serialized pubkey into the // compressed script type. target[0] = pubKeyFormat | (serializedPubKey[64] & 0x01) copy(target[1:33], serializedPubKey[1:33]) return 33 } } // When none of the above special cases apply, encode the unmodified // script preceded by the sum of its size and the number of special // cases encoded as a variable length quantity. encodedSize := uint64(len(pkScript) + numSpecialScripts) vlqSizeLen := putVLQ(target, encodedSize) copy(target[vlqSizeLen:], pkScript) return vlqSizeLen + len(pkScript) } // decompressScript returns the original script obtained by decompressing the // passed compressed script according to the domain specific compression // algorithm described above. // // NOTE: The script parameter must already have been proven to be long enough // to contain the number of bytes returned by decodeCompressedScriptSize or it // will panic. This is acceptable since it is only an internal function. func decompressScript(compressedPkScript []byte, version int32) []byte { // In practice this function will not be called with a zero-length or // nil script since the nil script encoding includes the length, however // the code below assumes the length exists, so just return nil now if // the function ever ends up being called with a nil script in the // future. if len(compressedPkScript) == 0 { return nil } // Decode the script size and examine it for the special cases. encodedScriptSize, bytesRead := deserializeVLQ(compressedPkScript) switch encodedScriptSize { // Pay-to-pubkey-hash script. The resulting script is: // <OP_DUP><OP_HASH160><20 byte hash><OP_EQUALVERIFY><OP_CHECKSIG> case cstPayToPubKeyHash: pkScript := make([]byte, 25) pkScript[0] = txscript.OP_DUP pkScript[1] = txscript.OP_HASH160 pkScript[2] = txscript.OP_DATA_20 copy(pkScript[3:], compressedPkScript[bytesRead:bytesRead+20]) pkScript[23] = txscript.OP_EQUALVERIFY pkScript[24] = txscript.OP_CHECKSIG return pkScript // Pay-to-script-hash script. The resulting script is: // <OP_HASH160><20 byte script hash><OP_EQUAL> case cstPayToScriptHash: pkScript := make([]byte, 23) pkScript[0] = txscript.OP_HASH160 pkScript[1] = txscript.OP_DATA_20 copy(pkScript[2:], compressedPkScript[bytesRead:bytesRead+20]) pkScript[22] = txscript.OP_EQUAL return pkScript // Pay-to-compressed-pubkey script. The resulting script is: // <OP_DATA_33><33 byte compressed pubkey><OP_CHECKSIG> case cstPayToPubKeyComp2, cstPayToPubKeyComp3: pkScript := make([]byte, 35) pkScript[0] = txscript.OP_DATA_33 pkScript[1] = byte(encodedScriptSize) copy(pkScript[2:], compressedPkScript[bytesRead:bytesRead+32]) pkScript[34] = txscript.OP_CHECKSIG return pkScript // Pay-to-uncompressed-pubkey script. The resulting script is: // <OP_DATA_65><65 byte uncompressed pubkey><OP_CHECKSIG> case cstPayToPubKeyUncomp4, cstPayToPubKeyUncomp5: // Change the leading byte to the appropriate compressed pubkey // identifier (0x02 or 0x03) so it can be decoded as a // compressed pubkey. This really should never fail since the // encoding ensures it is valid before compressing to this type. compressedKey := make([]byte, 33) compressedKey[0] = byte(encodedScriptSize - 2) copy(compressedKey[1:], compressedPkScript[1:]) key, err := btcec.ParsePubKey(compressedKey, btcec.S256()) if err != nil { return nil } pkScript := make([]byte, 67) pkScript[0] = txscript.OP_DATA_65 copy(pkScript[1:], key.SerializeUncompressed()) pkScript[66] = txscript.OP_CHECKSIG return pkScript } // When none of the special cases apply, the script was encoded using // the general format, so reduce the script size by the number of // special cases and return the unmodified script. scriptSize := int(encodedScriptSize - numSpecialScripts) pkScript := make([]byte, scriptSize) copy(pkScript, compressedPkScript[bytesRead:bytesRead+scriptSize]) return pkScript } // ----------------------------------------------------------------------------- // In order to reduce the size of stored amounts, a domain specific compression // algorithm is used which relies on there typically being a lot of zeroes at // end of the amounts. The compression algorithm used here was obtained from // Bitcoin Core, so all credits for the algorithm go to it. // // While this is simply exchanging one uint64 for another, the resulting value // for typical amounts has a much smaller magnitude which results in fewer bytes // when encoded as variable length quantity. For example, consider the amount // of 0.1 BTC which is 10000000 satoshi. Encoding 10000000 as a VLQ would take // 4 bytes while encoding the compressed value of 8 as a VLQ only takes 1 byte. // // Essentially the compression is achieved by splitting the value into an // exponent in the range [0-9] and a digit in the range [1-9], when possible, // and encoding them in a way that can be decoded. More specifically, the // encoding is as follows: // - 0 is 0 // - Find the exponent, e, as the largest power of 10 that evenly divides the // value up to a maximum of 9 // - When e < 9, the final digit can't be 0 so store it as d and remove it by // dividing the value by 10 (call the result n). The encoded value is thus: // 1 + 10*(9*n + d-1) + e // - When e==9, the only thing known is the amount is not 0. The encoded value // is thus: // 1 + 10*(n-1) + e == 10 + 10*(n-1) // // Example encodings: // (The numbers in parenthesis are the number of bytes when serialized as a VLQ) // 0 (1) -> 0 (1) * 0.00000000 BTC // 1000 (2) -> 4 (1) * 0.00001000 BTC // 10000 (2) -> 5 (1) * 0.00010000 BTC // 12345678 (4) -> 111111101(4) * 0.12345678 BTC // 50000000 (4) -> 47 (1) * 0.50000000 BTC // 100000000 (4) -> 9 (1) * 1.00000000 BTC // 500000000 (5) -> 49 (1) * 5.00000000 BTC // 1000000000 (5) -> 10 (1) * 10.00000000 BTC // ----------------------------------------------------------------------------- // compressTxOutAmount compresses the passed amount according to the domain // specific compression algorithm described above. func compressTxOutAmount(amount uint64) uint64 { // No need to do any work if it's zero. if amount == 0 { return 0 } // Find the largest power of 10 (max of 9) that evenly divides the // value. exponent := uint64(0) for amount%10 == 0 && exponent < 9 { amount /= 10 exponent++ } // The compressed result for exponents less than 9 is: // 1 + 10*(9*n + d-1) + e if exponent < 9 { lastDigit := amount % 10 amount /= 10 return 1 + 10*(9*amount+lastDigit-1) + exponent } // The compressed result for an exponent of 9 is: // 1 + 10*(n-1) + e == 10 + 10*(n-1) return 10 + 10*(amount-1) } // decompressTxOutAmount returns the original amount the passed compressed // amount represents according to the domain specific compression algorithm // described above. func decompressTxOutAmount(amount uint64) uint64 { // No need to do any work if it's zero. if amount == 0 { return 0 } // The decompressed amount is either of the following two equations: // x = 1 + 10*(9*n + d - 1) + e // x = 1 + 10*(n - 1) + 9 amount-- // The decompressed amount is now one of the following two equations: // x = 10*(9*n + d - 1) + e // x = 10*(n - 1) + 9 exponent := amount % 10 amount /= 10 // The decompressed amount is now one of the following two equations: // x = 9*n + d - 1 | where e < 9 // x = n - 1 | where e = 9 n := uint64(0) if exponent < 9 { lastDigit := amount%9 + 1 amount /= 9 n = amount*10 + lastDigit } else { n = amount + 1 } // Apply the exponent. for ; exponent > 0; exponent-- { n *= 10 } return n } // ----------------------------------------------------------------------------- // Compressed transaction outputs consist of an amount and a public key script // both compressed using the domain specific compression algorithms previously // described. // // The serialized format is: // // <compressed amount><compressed script> // // Field Type Size // compressed amount VLQ variable // compressed script []byte variable // ----------------------------------------------------------------------------- // compressedTxOutSize returns the number of bytes the passed transaction output // fields would take when encoded with the format described above. The // preCompressed flag indicates the provided amount and script are already // compressed. This is useful since loaded utxo entries are not decompressed // until the output is accessed. func compressedTxOutSize(amount uint64, pkScript []byte, version int32, preCompressed bool) int { if preCompressed { return serializeSizeVLQ(amount) + len(pkScript) } return serializeSizeVLQ(compressTxOutAmount(amount)) + compressedScriptSize(pkScript, version) } // putCompressedTxOut potentially compresses the passed amount and script // according to their domain specific compression algorithms and encodes them // directly into the passed target byte slice with the format described above. // The preCompressed flag indicates the provided amount and script are already // compressed in which case the values are not modified. This is useful since // loaded utxo entries are not decompressed until the output is accessed. The // target byte slice must be at least large enough to handle the number of bytes // returned by the compressedTxOutSize function or it will panic. func putCompressedTxOut(target []byte, amount uint64, pkScript []byte, version int32, preCompressed bool) int { if preCompressed { offset := putVLQ(target, amount) copy(target[offset:], pkScript) return offset + len(pkScript) } offset := putVLQ(target, compressTxOutAmount(amount)) offset += putCompressedScript(target[offset:], pkScript, version) return offset } // decodeCompressedTxOut decodes the passed compressed txout, possibly followed // by other data, into its compressed amount and compressed script and returns // them along with the number of bytes they occupied. func decodeCompressedTxOut(serialized []byte, version int32) (uint64, []byte, int, error) { // Deserialize the compressed amount and ensure there are bytes // remaining for the compressed script. compressedAmount, bytesRead := deserializeVLQ(serialized) if bytesRead >= len(serialized) { return 0, nil, bytesRead, errDeserialize("unexpected end of " + "data after compressed amount") } // Decode the compressed script size and ensure there are enough bytes // left in the slice for it. scriptSize := decodeCompressedScriptSize(serialized[bytesRead:], version) if len(serialized[bytesRead:]) < scriptSize { return 0, nil, bytesRead, errDeserialize("unexpected end of " + "data after script size") } // Make a copy of the compressed script so the original serialized data // can be released as soon as possible. compressedScript := make([]byte, scriptSize) copy(compressedScript, serialized[bytesRead:bytesRead+scriptSize]) return compressedAmount, compressedScript, bytesRead + scriptSize, nil }