fully match python's stream creation and decoding

This commit is contained in:
Alex Grintsvayg 2018-10-23 16:41:19 -04:00
parent ad5abf26a8
commit 6356308048
No known key found for this signature in database
GPG key ID: AEB3F089F86A22B5
4 changed files with 272 additions and 202 deletions

View file

@ -4,10 +4,8 @@ import (
"bytes"
"crypto/aes"
"crypto/cipher"
"crypto/rand"
"crypto/sha512"
"encoding/hex"
"encoding/json"
"strconv"
"github.com/lbryio/lbry.go/errors"
@ -15,9 +13,6 @@ import (
const MaxBlobSize = 2097152 // 2mb, or 2 * 2^20
// -1 to leave room for padding, since there must be at least one byte of pkcs7 padding
const maxBlobDataSize = MaxBlobSize - 1
type Blob []byte
var ErrBlobTooBig = errors.Base("blob must be at most " + strconv.Itoa(MaxBlobSize) + " bytes")
@ -36,6 +31,11 @@ func (b Blob) Hash() []byte {
return hashBytes[:]
}
// HashHex returns th blob hash as a hex string
func (b Blob) HashHex() string {
return hex.EncodeToString(b.Hash())
}
// ValidForSend returns true if the blob size is within the limits
func (b Blob) ValidForSend() error {
if b.Size() > MaxBlobSize {
@ -101,8 +101,10 @@ func pkcs7Pad(data []byte, blockLen int) ([]byte, error) {
if padLen == 0 {
padLen = blockLen
}
pad := bytes.Repeat([]byte{byte(padLen)}, padLen)
return append(data, pad...), nil
padded := make([]byte, len(data)+padLen)
copy(padded, data)
copy(padded[len(padded)-padLen:], bytes.Repeat([]byte{byte(padLen)}, padLen))
return padded, nil
}
func pkcs7Unpad(data []byte, blockLen int) ([]byte, error) {
@ -126,154 +128,3 @@ func pkcs7Unpad(data []byte, blockLen int) ([]byte, error) {
return data[:len(data)-padLen], nil
}
// BlobInfo is the stream descriptor info for a single blob in a stream
// Encoding to and from JSON is customized to match existing behavior (see json.go in package)
type BlobInfo struct {
Length int `json:"length"`
BlobNum int `json:"blob_num"`
BlobHash []byte `json:"-"`
IV []byte `json:"-"`
}
// Hash returns the hash of the blob info for calculating the stream hash
func (bi BlobInfo) Hash() []byte {
sum := sha512.New384()
if bi.Length > 0 {
sum.Write([]byte(hex.EncodeToString(bi.BlobHash)))
}
sum.Write([]byte(strconv.Itoa(bi.BlobNum)))
sum.Write([]byte(hex.EncodeToString(bi.IV)))
sum.Write([]byte(strconv.Itoa(bi.Length)))
return sum.Sum(nil)
}
// SDBlob contains information about the rest of the blobs in the stream
// Encoding to and from JSON is customized to match existing behavior (see json.go in package)
type SDBlob struct {
StreamName string `json:"-"`
BlobInfos []BlobInfo `json:"blobs"`
StreamType string `json:"stream_type"`
Key []byte `json:"-"`
SuggestedFileName string `json:"-"`
StreamHash []byte `json:"-"`
ivFunc func() []byte
}
// ToBlob converts the SDBlob to a normal data Blob
func (s SDBlob) ToBlob() (Blob, error) {
b, err := json.Marshal(s)
return Blob(b), err
}
// FromBlob unmarshals a data Blob that should contain SDBlob data
func (s *SDBlob) FromBlob(b Blob) error {
return json.Unmarshal(b, s)
}
func NewSdBlob(blobs []Blob) *SDBlob {
return newSdBlob(blobs, nil, nil)
}
func newSdBlob(blobs []Blob, key []byte, ivs [][]byte) *SDBlob {
sd := &SDBlob{}
if key == nil {
key = randIV()
}
sd.Key = key
if ivs == nil {
ivs = make([][]byte, len(blobs))
for i := range ivs {
ivs[i] = randIV()
}
}
for i, b := range blobs {
sd.addBlob(b, ivs[i])
}
sd.updateStreamHash()
return sd
}
// addBlob adds the blob's info to stream
func (s *SDBlob) addBlob(b Blob, iv []byte) {
if iv == nil {
iv = s.nextIV()
}
s.BlobInfos = append(s.BlobInfos, BlobInfo{
BlobNum: len(s.BlobInfos),
Length: b.Size(),
BlobHash: b.Hash(),
IV: iv,
})
}
// nextIV returns the next IV using ivFunc, or a random IV if no ivFunc is set
func (s SDBlob) nextIV() []byte {
if s.ivFunc != nil {
return s.ivFunc()
}
return randIV()
}
// IsValid returns true if the set StreamHash matches the current hash of the stream data
func (s SDBlob) IsValid() bool {
return bytes.Equal(s.StreamHash, s.computeStreamHash())
}
// updateStreamHash sets the stream hash to the current hash of the stream data
func (s *SDBlob) updateStreamHash() {
s.StreamHash = s.computeStreamHash()
}
// computeStreamHash calculates the stream hash for the stream
func (s *SDBlob) computeStreamHash() []byte {
return streamHash(
hex.EncodeToString([]byte(s.StreamName)),
hex.EncodeToString(s.Key),
hex.EncodeToString([]byte(s.SuggestedFileName)),
s.BlobInfos,
)
}
func (s SDBlob) fileSize() int {
size := 0
for _, bi := range s.BlobInfos {
size += bi.Length
}
return size
}
// streamHash calculates the stream hash, given the stream's fields and blobs
func streamHash(hexStreamName, hexKey, hexSuggestedFileName string, blobInfos []BlobInfo) []byte {
blobSum := sha512.New384()
for _, b := range blobInfos {
blobSum.Write(b.Hash())
}
sum := sha512.New384()
sum.Write([]byte(hexStreamName))
sum.Write([]byte(hexKey))
sum.Write([]byte(hexSuggestedFileName))
sum.Write(blobSum.Sum(nil))
return sum.Sum(nil)
}
// randIV returns a random AES IV
func randIV() []byte {
iv := make([]byte, aes.BlockSize)
_, err := rand.Read(iv)
if err != nil {
panic("failed to make random iv")
}
return iv
}
// NullIV returns an IV of 0s
func NullIV() []byte {
return make([]byte, aes.BlockSize)
}

View file

@ -53,18 +53,18 @@ func TestSdBlob_UnmarshalJSON(t *testing.T) {
// can MAYBE use https://godoc.org/github.com/docker/go/canonical/json#Encoder.Canonical
rawBlob = strings.Replace(rawBlob, " ", "", -1)
b := SDBlob{}
err := json.Unmarshal([]byte(rawBlob), &b)
sdBlob := SDBlob{}
err := json.Unmarshal([]byte(rawBlob), &sdBlob)
if err != nil {
t.Fatal(err)
}
if !b.IsValid() {
if !sdBlob.IsValid() {
t.Fatalf("decoded blob is not valid. expected stream hash %s, got %s",
hex.EncodeToString(b.StreamHash), hex.EncodeToString(b.computeStreamHash()))
hex.EncodeToString(sdBlob.StreamHash), hex.EncodeToString(sdBlob.computeStreamHash()))
}
reEncoded, err := json.Marshal(b)
reEncoded, err := json.Marshal(sdBlob)
if err != nil {
t.Fatal(err)
}
@ -213,45 +213,59 @@ func TestBlob_Plaintext(t *testing.T) {
}
}
func TestBlob_DecryptStream(t *testing.T) {
sdHash := "1bf7d39c45d1a38ffa74bff179bf7f67d400ff57fa0b5a0308963f08d01712b3079530a8c188e8c89d9b390c6ee06f05"
sdBlob := &SDBlob{}
err := json.Unmarshal(testdata(t, sdHash), sdBlob)
func TestStreamToFile(t *testing.T) {
blobHashes := []string{
"1bf7d39c45d1a38ffa74bff179bf7f67d400ff57fa0b5a0308963f08d01712b3079530a8c188e8c89d9b390c6ee06f05", // sd hash
"a2f1841bb9c5f3b583ac3b8c07ee1a5bf9cc48923721c30d5ca6318615776c284e8936d72fa4db7fdda2e4e9598b1e6c",
"0c9675ad7f40f29dcd41883ed9cf7e145bbb13976d9b83ab9354f4f61a87f0f7771a56724c2aa7a5ab43c68d7942e5cb",
"a4d07d442b9907036c75b6c92db316a8b8428733bf5ec976627a48a7c862bf84db33075d54125a7c0b297bd2dc445f1c",
"dcd2093f4a3eca9f6dd59d785d0bef068fee788481986aa894cf72ed4d992c0ff9d19d1743525de2f5c3c62f5ede1c58",
}
stream := make(Stream, len(blobHashes))
for i, hash := range blobHashes {
stream[i] = testdata(t, hash)
}
data, err := stream.Data()
if err != nil {
t.Fatal(err)
}
if !sdBlob.IsValid() {
t.Fatal("sd blob does not appear to be valid")
}
var file []byte
for _, bi := range sdBlob.BlobInfos {
if bi.Length == 0 {
continue
}
blobHash := hex.EncodeToString(bi.BlobHash)
blob := Blob(testdata(t, blobHash))
plaintext, err := blob.Plaintext(sdBlob.Key, bi.IV)
if err != nil {
t.Fatal(err)
}
file = append(file, plaintext...)
}
expectedLen := 6990951
actualLen := len(file)
actualLen := len(data)
if actualLen != expectedLen {
t.Errorf("file length mismatch. got %d, expected %d", actualLen, expectedLen)
}
expectedSha256 := unhex(t, "51e4d03bd6d69ea17d1be3ce01fdffa44ffe053f2dbce8d42a50283b2890fea2")
actualSha256 := sha256.Sum256(file)
actualSha256 := sha256.Sum256(data)
if !bytes.Equal(actualSha256[:], expectedSha256) {
t.Errorf("file hash mismatch. got %s, expected %s", hex.EncodeToString(actualSha256[:]), hex.EncodeToString(expectedSha256))
}
sdBlob := &SDBlob{}
err = sdBlob.FromBlob(stream[0])
if err != nil {
t.Fatal(err)
}
newStream, err := Reconstruct(data, *sdBlob)
if err != nil {
t.Fatal(err)
}
if len(newStream) != len(blobHashes) {
t.Fatalf("stream length mismatch. got %d blobs, expected %d", len(newStream), len(blobHashes))
}
for i, hash := range blobHashes {
if newStream[i].HashHex() != hash {
t.Errorf("blob %d hash mismatch. got %s, expected %s", i, newStream[i].HashHex(), hash)
}
}
}
func TestNew(t *testing.T) {

151
stream/sdBlob.go Normal file
View file

@ -0,0 +1,151 @@
package stream
import (
"bytes"
"crypto/aes"
"crypto/rand"
"crypto/sha512"
"encoding/hex"
"encoding/json"
"strconv"
)
const streamTypeLBRYFile = "lbryfile"
// BlobInfo is the stream descriptor info for a single blob in a stream
// Encoding to and from JSON is customized to match existing behavior (see json.go in package)
type BlobInfo struct {
Length int `json:"length"`
BlobNum int `json:"blob_num"`
BlobHash []byte `json:"-"`
IV []byte `json:"-"`
}
// Hash returns the hash of the blob info for calculating the stream hash
func (bi BlobInfo) Hash() []byte {
sum := sha512.New384()
if bi.Length > 0 {
sum.Write([]byte(hex.EncodeToString(bi.BlobHash)))
}
sum.Write([]byte(strconv.Itoa(bi.BlobNum)))
sum.Write([]byte(hex.EncodeToString(bi.IV)))
sum.Write([]byte(strconv.Itoa(bi.Length)))
return sum.Sum(nil)
}
// SDBlob contains information about the rest of the blobs in the stream
// Encoding to and from JSON is customized to match existing behavior (see json.go in package)
type SDBlob struct {
StreamName string `json:"-"`
BlobInfos []BlobInfo `json:"blobs"`
StreamType string `json:"stream_type"`
Key []byte `json:"-"`
SuggestedFileName string `json:"-"`
StreamHash []byte `json:"-"`
}
// ToBlob converts the SDBlob to a normal data Blob
func (s SDBlob) ToBlob() (Blob, error) {
b, err := json.Marshal(s)
return Blob(b), err
}
// FromBlob unmarshals a data Blob that should contain SDBlob data
func (s *SDBlob) FromBlob(b Blob) error {
return json.Unmarshal(b, s)
}
func newSdBlob(blobs []Blob, key []byte, ivs [][]byte, streamName, suggestedFilename string) *SDBlob {
if len(ivs) != len(blobs)+1 { // +1 for terminating 0-length blob
panic("wrong number of IVs provided")
}
sd := &SDBlob{
StreamType: streamTypeLBRYFile,
StreamName: streamName,
SuggestedFileName: suggestedFilename,
Key: key,
}
for i, b := range blobs {
sd.addBlob(b, ivs[i])
}
// terminating blob
sd.addBlob(Blob{}, ivs[len(ivs)-1])
sd.updateStreamHash()
return sd
}
// addBlob adds the blob's info to stream
func (s *SDBlob) addBlob(b Blob, iv []byte) {
if len(iv) == 0 {
panic("empty IV")
}
s.BlobInfos = append(s.BlobInfos, BlobInfo{
BlobNum: len(s.BlobInfos),
Length: b.Size(),
BlobHash: b.Hash(),
IV: iv,
})
}
// IsValid returns true if the set StreamHash matches the current hash of the stream data
func (s SDBlob) IsValid() bool {
return bytes.Equal(s.StreamHash, s.computeStreamHash())
}
// updateStreamHash sets the stream hash to the current hash of the stream data
func (s *SDBlob) updateStreamHash() {
s.StreamHash = s.computeStreamHash()
}
// computeStreamHash calculates the stream hash for the stream
func (s *SDBlob) computeStreamHash() []byte {
return streamHash(
hex.EncodeToString([]byte(s.StreamName)),
hex.EncodeToString(s.Key),
hex.EncodeToString([]byte(s.SuggestedFileName)),
s.BlobInfos,
)
}
func (s SDBlob) fileSize() int {
size := 0
for _, bi := range s.BlobInfos {
size += bi.Length
}
return size
}
// streamHash calculates the stream hash, given the stream's fields and blobs
func streamHash(hexStreamName, hexKey, hexSuggestedFileName string, blobInfos []BlobInfo) []byte {
blobSum := sha512.New384()
for _, b := range blobInfos {
blobSum.Write(b.Hash())
}
sum := sha512.New384()
sum.Write([]byte(hexStreamName))
sum.Write([]byte(hexKey))
sum.Write([]byte(hexSuggestedFileName))
sum.Write(blobSum.Sum(nil))
return sum.Sum(nil)
}
// randIV returns a random AES IV
func randIV() []byte {
iv := make([]byte, aes.BlockSize)
_, err := rand.Read(iv)
if err != nil {
panic("failed to make random iv")
}
return iv
}
// NullIV returns an IV of 0s
func NullIV() []byte {
return make([]byte, aes.BlockSize)
}

View file

@ -2,29 +2,51 @@ package stream
import (
"bytes"
"math"
"strings"
"github.com/lbryio/lbry.go/errors"
)
type Stream []Blob
// -1 to leave room for padding, since there must be at least one byte of pkcs7 padding
const maxBlobDataSize = MaxBlobSize - 1
// New creates a new Stream from a byte slice
func New(data []byte) (Stream, error) {
var err error
numBlobs := len(data) / maxBlobDataSize
if len(data)%maxBlobDataSize != 0 {
numBlobs++ // ++ for unfinished blob at the end
}
key := randIV()
ivs := make([][]byte, numBlobs)
ivs := make([][]byte, numContentBlobs(data)+1) // +1 for terminating 0-length blob
for i := range ivs {
ivs[i] = randIV()
}
return makeStream(data, key, ivs, "", "")
}
// Reconstruct creates a stream from the given data using predetermined IVs and key from the SD blob
// NOTE: this will assume that all blobs except the last one are at max length. in theory this is not
// required, but in practice this is always true. if this is false, streams may not match exactly
func Reconstruct(data []byte, sdBlob SDBlob) (Stream, error) {
ivs := make([][]byte, len(sdBlob.BlobInfos))
for i := range ivs {
ivs[i] = sdBlob.BlobInfos[i].IV
}
return makeStream(data, sdBlob.Key, ivs, sdBlob.StreamName, sdBlob.SuggestedFileName)
}
func makeStream(data, key []byte, ivs [][]byte, streamName, suggestedFilename string) (Stream, error) {
var err error
numBlobs := numContentBlobs(data)
if len(ivs) != numBlobs+1 { // +1 for terminating 0-length blob
return nil, errors.Err("incorrect number of IVs provided")
}
s := make(Stream, numBlobs+1) // +1 for sd blob
for i := 0; i < numBlobs; i++ {
start := i - 1*maxBlobDataSize
start := i * maxBlobDataSize
end := start + maxBlobDataSize
if end > len(data) {
end = len(data)
@ -35,18 +57,24 @@ func New(data []byte) (Stream, error) {
}
}
sd := newSdBlob(s[1:], key, ivs)
s[0], err = sd.ToBlob()
sd := newSdBlob(s[1:], key, ivs, streamName, suggestedFilename)
jsonSD, err := sd.ToBlob()
if err != nil {
return nil, err
}
// COMPATIBILITY HACK to make json output match python's json. this can be
// removed when we implement canonical JSON encoding
jsonSD = []byte(strings.Replace(string(jsonSD), ",", ", ", -1))
jsonSD = []byte(strings.Replace(string(jsonSD), ":", ": ", -1))
s[0] = jsonSD
return s, nil
}
func (s Stream) Data() ([]byte, error) {
if len(s) < 2 {
return nil, errors.Err("stream must be at least 2 blobs long")
return nil, errors.Err("stream must be at least 2 blobs long") // sd blob and content blob
}
sdBlob := &SDBlob{}
@ -59,13 +87,34 @@ func (s Stream) Data() ([]byte, error) {
return nil, errors.Err("sd blob is not valid")
}
if sdBlob.BlobInfos[len(sdBlob.BlobInfos)-1].Length != 0 {
return nil, errors.Err("sd blob is missing the terminating 0-length blob")
}
if len(s[1:]) != len(sdBlob.BlobInfos)-1 { // -1 for terminating 0-length blob
return nil, errors.Err("number of blobs in stream does not match number of blobs in sd info")
}
var file []byte
for i, b := range s[1:] {
if !bytes.Equal(b.Hash(), sdBlob.BlobInfos[i].BlobHash) {
for i, blobInfo := range sdBlob.BlobInfos {
if blobInfo.Length == 0 {
if i != len(sdBlob.BlobInfos)-1 {
return nil, errors.Err("got 0-length blob before end of stream")
}
break
}
if blobInfo.BlobNum != i {
return nil, errors.Err("blobs are out of order in sd blob")
}
blob := s[i+1]
if !bytes.Equal(blob.Hash(), blobInfo.BlobHash) {
return nil, errors.Err("blob hash doesn't match hash in blobInfo")
}
data, err := b.Plaintext(sdBlob.Key, sdBlob.BlobInfos[i].IV)
data, err := blob.Plaintext(sdBlob.Key, blobInfo.IV)
if err != nil {
return nil, err
}
@ -74,3 +123,8 @@ func (s Stream) Data() ([]byte, error) {
return file, nil
}
//numContentBlobs returns the number of content blobs required to store the data
func numContentBlobs(data []byte) int {
return int(math.Ceil(float64(len(data)) / float64(maxBlobDataSize)))
}