switch to io.Reader interface for stream creation

2021-03-25 16:57:12 -04:00 · 2021-03-25 16:57:12 -04:00 · 419e7c88a3
commit 419e7c88a3
parent 988178df50
4 changed files with 292 additions and 106 deletions
--- a/stream/blob.go
+++ b/stream/blob.go
@ -35,7 +35,7 @@ func (b Blob) Hash() []byte {
 	return hashBytes[:]
 }
-// HashHex returns th blob hash as a hex string
+// HashHex returns the blob hash as a hex string
 func (b Blob) HashHex() string {
 	return hex.EncodeToString(b.Hash())
 }
--- a/stream/sdBlob.go
+++ b/stream/sdBlob.go
@ -8,6 +8,7 @@ import (
 	"encoding/hex"
 	"encoding/json"
 	"strconv"
 	"strings"
 )
 const streamTypeLBRYFile = "lbryfile"
@ -44,10 +45,30 @@ type SDBlob struct {
 	StreamHash        []byte     `json:"-"`
 }
 // Hash returns a hash of the SD blob data
 func (s SDBlob) Hash() []byte {
 	hashBytes := sha512.Sum384(s.ToBlob())
 	return hashBytes[:]
 }
 // HashHex returns the SD blob hash as a hex string
 func (s SDBlob) HashHex() string {
 	return hex.EncodeToString(s.Hash())
 }
 // ToBlob converts the SDBlob to a normal data Blob
-func (s SDBlob) ToBlob() (Blob, error) {
+func (s SDBlob) ToBlob() Blob {
-	b, err := json.Marshal(s)
+	jsonSD, err := json.Marshal(s)
-	return Blob(b), err
+	if err != nil {
 		panic(err)
 	}
 	// COMPATIBILITY HACK to make json output match python's json. this can be
 	// removed when we implement canonical JSON encoding
 	jsonSD = []byte(strings.Replace(string(jsonSD), ",", ", ", -1))
 	jsonSD = []byte(strings.Replace(string(jsonSD), ":", ": ", -1))
 	return jsonSD
 }
 // FromBlob unmarshals a data Blob that should contain SDBlob data
@ -55,30 +76,6 @@ func (s *SDBlob) FromBlob(b Blob) error {
 	return json.Unmarshal(b, s)
 }
 func newSdBlob(blobs []Blob, key []byte, ivs [][]byte, streamName, suggestedFilename string) *SDBlob {
 	if len(ivs) != len(blobs)+1 { // +1 for terminating 0-length blob
 		panic("wrong number of IVs provided")
 	}
 	sd := &SDBlob{
 		StreamType:        streamTypeLBRYFile,
 		StreamName:        streamName,
 		SuggestedFileName: suggestedFilename,
 		Key:               key,
 	}
 	for i, b := range blobs {
 		sd.addBlob(b, ivs[i])
 	}
 	// terminating blob
 	sd.addBlob(Blob{}, ivs[len(ivs)-1])
 	sd.updateStreamHash()
 	return sd
 }
 // addBlob adds the blob's info to stream
 func (s *SDBlob) addBlob(b Blob, iv []byte) {
 	if len(iv) == 0 {
--- a/stream/stream.go
+++ b/stream/stream.go
@ -2,8 +2,10 @@ package stream
 import (
 	"bytes"
 	"crypto/sha512"
 	"hash"
 	"io"
 	"math"
 	"strings"
 	"github.com/lbryio/lbry.go/v2/extras/errors"
 )
@ -13,66 +15,22 @@ type Stream []Blob
 // -1 to leave room for padding, since there must be at least one byte of pkcs7 padding
 const maxBlobDataSize = MaxBlobSize - 1
-// New creates a new Stream from a byte slice
+// New creates a new Stream from a stream of bytes.
-func New(data []byte) (Stream, error) {
+func New(src io.Reader) (Stream, error) {
-	key := randIV()
+	return NewEncoder(src).Stream()
 	ivs := make([][]byte, numContentBlobs(data)+1) // +1 for terminating 0-length blob
 	for i := range ivs {
 		ivs[i] = randIV()
 	}
 	return makeStream(data, key, ivs, "", "")
 }
 // Reconstruct creates a stream from the given data using predetermined IVs and key from the SD blob
 // NOTE: this will assume that all blobs except the last one are at max length. in theory this is not
 // required, but in practice this is always true. if this is false, streams may not match exactly
 func Reconstruct(data []byte, sdBlob SDBlob) (Stream, error) {
 	ivs := make([][]byte, len(sdBlob.BlobInfos))
 	for i := range ivs {
 		ivs[i] = sdBlob.BlobInfos[i].IV
 	}
 	return makeStream(data, sdBlob.Key, ivs, sdBlob.StreamName, sdBlob.SuggestedFileName)
 }
 func makeStream(data, key []byte, ivs [][]byte, streamName, suggestedFilename string) (Stream, error) {
 	var err error
 	numBlobs := numContentBlobs(data)
 	if len(ivs) != numBlobs+1 { // +1 for terminating 0-length blob
 		return nil, errors.Err("incorrect number of IVs provided")
 	}
 	s := make(Stream, numBlobs+1) // +1 for sd blob
 	for i := 0; i < numBlobs; i++ {
 		start := i * maxBlobDataSize
 		end := start + maxBlobDataSize
 		if end > len(data) {
 			end = len(data)
 		}
 		s[i+1], err = NewBlob(data[start:end], key, ivs[i])
 		if err != nil {
 			return nil, err
 		}
 	}
 	sd := newSdBlob(s[1:], key, ivs, streamName, suggestedFilename)
 	jsonSD, err := sd.ToBlob()
 	if err != nil {
 		return nil, err
 	}
 	// COMPATIBILITY HACK to make json output match python's json. this can be
 	// removed when we implement canonical JSON encoding
 	jsonSD = []byte(strings.Replace(string(jsonSD), ",", ", ", -1))
 	jsonSD = []byte(strings.Replace(string(jsonSD), ":", ": ", -1))
 	s[0] = jsonSD
 	return s, nil
 }
 // Data returns the file data that a stream encapsulates.
 //
 // Deprecated: use Decode() instead. It's a more accurate name. Data() will be removed in the future.
 func (s Stream) Data() ([]byte, error) {
 	return s.Decode()
 }
 // Decode returns the file data that a stream encapsulates
 //
 // TODO: this should use io.Writer instead of returning bytes
 func (s Stream) Decode() ([]byte, error) {
 	if len(s) < 2 {
 		return nil, errors.Err("stream must be at least 2 blobs long") // sd blob and content blob
 	}
@ -124,7 +82,152 @@ func (s Stream) Data() ([]byte, error) {
 	return file, nil
 }
-//numContentBlobs returns the number of content blobs required to store the data
+// Encoder reads bytes from a source and returns blobs of the stream
-func numContentBlobs(data []byte) int {
+type Encoder struct {
-	return int(math.Ceil(float64(len(data)) / float64(maxBlobDataSize)))
+	// source data to be encoded into a stream
 	src io.Reader
 	// preset IVs to use for encrypting blobs
 	ivs [][]byte
 	// an optionals hint about the total size of the source data
 	// encoder will use this to preallocate space for blobs
 	srcSizeHint int
 	// buffer for reading bytes from reader
 	buf []byte
 	// sd blob that gets built as stream is encoded
 	sd *SDBlob
 	// number of bytes read from src
 	srcLen int
 	// running hash bytes read from src
 	srcHash hash.Hash
 }
 // NewEncoder creates a new stream encoder
 func NewEncoder(src io.Reader) *Encoder {
 	return &Encoder{
 		src: src,
 		buf: make([]byte, maxBlobDataSize),
 		sd: &SDBlob{
 			StreamType: streamTypeLBRYFile,
 			Key:        randIV(),
 		},
 		srcHash: sha512.New384(),
 	}
 }
 // NewEncoderWithIVs creates a new encoder that uses preset cryptographic material
 func NewEncoderWithIVs(src io.Reader, key []byte, ivs [][]byte) *Encoder {
 	e := NewEncoder(src)
 	e.sd.Key = key
 	e.ivs = ivs
 	return e
 }
 // NewEncoderFromSD creates a new encoder that reuses cryptographic material from an sd blob
 // This can be used to reconstruct a stream exactly from a file
 // NOTE: this will assume that all blobs except the last one are at max length. in theory this is not
 // required, but in practice this is always true. if this is false, streams may not match exactly
 func NewEncoderFromSD(src io.Reader, sdBlob *SDBlob) *Encoder {
 	ivs := make([][]byte, len(sdBlob.BlobInfos))
 	for i := range ivs {
 		ivs[i] = sdBlob.BlobInfos[i].IV
 	}
 	e := NewEncoderWithIVs(src, sdBlob.Key, ivs)
 	e.sd.StreamName = sdBlob.StreamName
 	e.sd.SuggestedFileName = sdBlob.SuggestedFileName
 	return e
 }
 // TODO: consider making a NewPartialEncoder that also copies blobinfos from sdBlobs and seeks forward in the data
 // this would avoid re-creating blobs that were created in the past
 // Next returns the next blob in the stream
 func (e *Encoder) Next() (Blob, error) {
 	n, err := e.src.Read(e.buf)
 	if err != nil {
 		return nil, err
 	}
 	e.srcLen += n
 	e.srcHash.Write(e.buf[:n])
 	iv := e.nextIV()
 	blob, err := NewBlob(e.buf[:n], e.sd.Key, iv)
 	if err != nil {
 		return nil, err
 	}
 	e.sd.addBlob(blob, iv)
 	return blob, nil
 }
 // Stream creates the whole stream in one call
 func (e *Encoder) Stream() (Stream, error) {
 	s := make(Stream, 1, 1+int(math.Ceil(float64(e.srcSizeHint)/maxBlobDataSize))) // len starts at 1 and cap is +1 to leave room for sd blob
 	for {
 		blob, err := e.Next()
 		if err != nil {
 			if !errors.Is(err, io.EOF) {
 				return nil, err
 			}
 			// if stream is not terminated, terminate it
 			if e.sd.BlobInfos[len(e.sd.BlobInfos)-1].Length > 0 {
 				e.sd.addBlob(Blob{}, e.nextIV())
 			}
 			break
 		}
 		s = append(s, blob)
 	}
 	s[0] = e.SDBlob().ToBlob()
 	if cap(s) > len(s) {
 		// size hint was too big. copy stream to smaller underlying array to free memory
 		// this might be premature optimization...
 		s = append(Stream(nil), s[:]...)
 	}
 	return s, nil
 }
 // SDBlob returns the sd blob so far
 func (e *Encoder) SDBlob() *SDBlob {
 	e.sd.updateStreamHash()
 	return e.sd
 }
 // SourceLen returns the number of bytes read from source
 func (e *Encoder) SourceLen() int {
 	return e.srcLen
 }
 // SourceLen returns a hash of the bytes read from source
 func (e *Encoder) SourceHash() []byte {
 	return e.srcHash.Sum(nil)
 }
 // SourceSizeHint sets a hint about the total size of the source
 // This helps allocate RAM more efficiently.
 // If the hint is wrong, it still works fine but there will be a small performance penalty.
 func (e *Encoder) SourceSizeHint(size int) *Encoder {
 	e.srcSizeHint = size
 	return e
 }
 // nextIV returns the next preset IV if there is one
 func (e *Encoder) nextIV() []byte {
 	if len(e.ivs) == 0 {
 		return randIV()
 	}
 	iv := e.ivs[0]
 	e.ivs = e.ivs[1:]
 	return iv
 }
--- a/stream/stream_test.go
+++ b/stream/stream_test.go
@ -2,26 +2,28 @@ package stream
 import (
 	"bytes"
 	"crypto/rand"
 	"crypto/sha256"
 	"crypto/sha512"
 	"encoding/hex"
 	"testing"
 )
-func TestStreamToFile(t *testing.T) {
+var testdataBlobHashes = []string{
-	blobHashes := []string{
+	"1bf7d39c45d1a38ffa74bff179bf7f67d400ff57fa0b5a0308963f08d01712b3079530a8c188e8c89d9b390c6ee06f05", // sd hash
-		"1bf7d39c45d1a38ffa74bff179bf7f67d400ff57fa0b5a0308963f08d01712b3079530a8c188e8c89d9b390c6ee06f05", // sd hash
+	"a2f1841bb9c5f3b583ac3b8c07ee1a5bf9cc48923721c30d5ca6318615776c284e8936d72fa4db7fdda2e4e9598b1e6c",
-		"a2f1841bb9c5f3b583ac3b8c07ee1a5bf9cc48923721c30d5ca6318615776c284e8936d72fa4db7fdda2e4e9598b1e6c",
+	"0c9675ad7f40f29dcd41883ed9cf7e145bbb13976d9b83ab9354f4f61a87f0f7771a56724c2aa7a5ab43c68d7942e5cb",
-		"0c9675ad7f40f29dcd41883ed9cf7e145bbb13976d9b83ab9354f4f61a87f0f7771a56724c2aa7a5ab43c68d7942e5cb",
+	"a4d07d442b9907036c75b6c92db316a8b8428733bf5ec976627a48a7c862bf84db33075d54125a7c0b297bd2dc445f1c",
-		"a4d07d442b9907036c75b6c92db316a8b8428733bf5ec976627a48a7c862bf84db33075d54125a7c0b297bd2dc445f1c",
+	"dcd2093f4a3eca9f6dd59d785d0bef068fee788481986aa894cf72ed4d992c0ff9d19d1743525de2f5c3c62f5ede1c58",
-		"dcd2093f4a3eca9f6dd59d785d0bef068fee788481986aa894cf72ed4d992c0ff9d19d1743525de2f5c3c62f5ede1c58",
+}
 	}
-	stream := make(Stream, len(blobHashes))
+func TestStreamToFile(t *testing.T) {
-	for i, hash := range blobHashes {
+	stream := make(Stream, len(testdataBlobHashes))
 	for i, hash := range testdataBlobHashes {
 		stream[i] = testdata(t, hash)
 	}
-	data, err := stream.Data()
+	data, err := stream.Decode()
 	if err != nil {
 		t.Fatal(err)
 	}
@ -33,6 +35,8 @@ func TestStreamToFile(t *testing.T) {
 		t.Errorf("file length mismatch. got %d, expected %d", actualLen, expectedLen)
 	}
 	expectedFileHash := sha512.Sum384(data)
 	expectedSha256 := unhex(t, "51e4d03bd6d69ea17d1be3ce01fdffa44ffe053f2dbce8d42a50283b2890fea2")
 	actualSha256 := sha256.Sum256(data)
@ -46,22 +50,104 @@ func TestStreamToFile(t *testing.T) {
 		t.Fatal(err)
 	}
-	newStream, err := Reconstruct(data, *sdBlob)
+	enc := NewEncoderFromSD(bytes.NewBuffer(data), sdBlob)
-	if err != nil {
+	newStream, err := enc.Stream()
-		t.Fatal(err)
+
 	if len(newStream) != len(testdataBlobHashes) {
 		t.Fatalf("stream length mismatch. got %d blobs, expected %d", len(newStream), len(testdataBlobHashes))
 	}
-	if len(newStream) != len(blobHashes) {
+	if enc.SourceLen() != expectedLen {
-		t.Fatalf("stream length mismatch. got %d blobs, expected %d", len(newStream), len(blobHashes))
+		t.Errorf("reconstructed file length mismatch. got %d, expected %d", enc.SourceLen(), expectedLen)
 	}
-	for i, hash := range blobHashes {
+	if !bytes.Equal(enc.SourceHash(), expectedFileHash[:]) {
 		t.Errorf("reconstructed file hash mismatch. got %s, expected %s", hex.EncodeToString(enc.SourceHash()), hex.EncodeToString(expectedFileHash[:]))
 	}
 	for i, hash := range testdataBlobHashes {
 		if newStream[i].HashHex() != hash {
 			t.Errorf("blob %d hash mismatch. got %s, expected %s", i, newStream[i].HashHex(), hash)
 		}
 	}
 }
 func TestMakeStream(t *testing.T) {
 	blobsToRead := 3
 	totalBlobs := blobsToRead + 3
 	data := make([]byte, ((totalBlobs-1)*maxBlobDataSize)+1000) // last blob is partial
 	_, err := rand.Read(data)
 	if err != nil {
 		t.Fatal(err)
 	}
 	buf := bytes.NewBuffer(data)
 	enc := NewEncoder(buf)
 	stream := make(Stream, blobsToRead+1) // +1 for sd blob
 	for i := 1; i < blobsToRead+1; i++ {  // start at 1 to skip sd blob
 		stream[i], err = enc.Next()
 		if err != nil {
 			t.Fatal(err)
 		}
 	}
 	sdBlob := enc.SDBlob()
 	if len(sdBlob.BlobInfos) != blobsToRead {
 		t.Errorf("expected %d blobs in partial sdblob, got %d", blobsToRead, len(sdBlob.BlobInfos))
 	}
 	if enc.SourceLen() != maxBlobDataSize*blobsToRead {
 		t.Errorf("expected length of %d , got %d", maxBlobDataSize*blobsToRead, enc.SourceLen())
 	}
 	// now finish the stream, reusing key and IVs
 	buf = bytes.NewBuffer(data) // rewind to the beginning of the data
 	enc = NewEncoderFromSD(buf, sdBlob)
 	reconstructedStream, err := enc.Stream()
 	if err != nil {
 		t.Fatal(err)
 	}
 	if len(reconstructedStream) != totalBlobs+1 { // +1 for the terminating blob at the end
 		t.Errorf("expected %d blobs in stream, got %d", totalBlobs+1, len(reconstructedStream))
 	}
 	if enc.SourceLen() != len(data) {
 		t.Errorf("expected length of %d , got %d", len(data), enc.SourceLen())
 	}
 	reconstructedSDBlob := enc.SDBlob()
 	for i := 0; i < len(sdBlob.BlobInfos); i++ {
 		if !bytes.Equal(sdBlob.BlobInfos[i].IV, reconstructedSDBlob.BlobInfos[i].IV) {
 			t.Errorf("blob info %d of reconstructed sd blobd does not match original sd blob", i)
 		}
 	}
 	for i := 1; i < len(stream); i++ { // start at 1 to skip sd blob
 		if !bytes.Equal(stream[i], reconstructedStream[i]) {
 			t.Errorf("blob %d of reconstructed stream does not match original stream", i)
 		}
 	}
 }
 func TestSizeHint(t *testing.T) {
 	b := make([]byte, 12)
 	newStream, err := NewEncoder(bytes.NewBuffer(b)).SourceSizeHint(5 * maxBlobDataSize).Stream()
 	if err != nil {
 		t.Fatal(err)
 	}
 	if cap(newStream) != 2 { // 1 for sd blob, 1 for the 12 bytes of the actual stream
 		t.Fatalf("expected 2 blobs allocated, got %d", cap(newStream))
 	}
 }
 func TestNew(t *testing.T) {
 	t.Skip("TODO: test new stream creation and decryption")
 }