lbry.go/stream/stream.go

package stream

import (
	"bytes"
	"crypto/sha512"
	"hash"
	"io"
	"math"

	"github.com/lbryio/lbry.go/v2/extras/errors"
)

type Stream []Blob

// -1 to leave room for padding, since there must be at least one byte of pkcs7 padding
const maxBlobDataSize = MaxBlobSize - 1

// New creates a new Stream from a stream of bytes.
func New(src io.Reader) (Stream, error) {
	return NewEncoder(src).Stream()
}

// Data returns the file data that a stream encapsulates.
//
// Deprecated: use Decode() instead. It's a more accurate name. Data() will be removed in the future.
func (s Stream) Data() ([]byte, error) {
	return s.Decode()
}

// Decode returns the file data that a stream encapsulates
//
// TODO: this should use io.Writer instead of returning bytes
func (s Stream) Decode() ([]byte, error) {
	if len(s) < 2 {
		return nil, errors.Err("stream must be at least 2 blobs long") // sd blob and content blob
	}

	sdBlob := &SDBlob{}
	err := sdBlob.FromBlob(s[0])
	if err != nil {
		return nil, err
	}

	if !sdBlob.IsValid() {
		return nil, errors.Err("sd blob is not valid")
	}

	if sdBlob.BlobInfos[len(sdBlob.BlobInfos)-1].Length != 0 {
		return nil, errors.Err("sd blob is missing the terminating 0-length blob")
	}

	if len(s[1:]) != len(sdBlob.BlobInfos)-1 { // -1 for terminating 0-length blob
		return nil, errors.Err("number of blobs in stream does not match number of blobs in sd info")
	}

	var file []byte
	for i, blobInfo := range sdBlob.BlobInfos {
		if blobInfo.Length == 0 {
			if i != len(sdBlob.BlobInfos)-1 {
				return nil, errors.Err("got 0-length blob before end of stream")
			}
			break
		}

		if blobInfo.BlobNum != i {
			return nil, errors.Err("blobs are out of order in sd blob")
		}

		blob := s[i+1]

		if !bytes.Equal(blob.Hash(), blobInfo.BlobHash) {
			return nil, errors.Err("blob hash doesn't match hash in blobInfo")
		}

		data, err := blob.Plaintext(sdBlob.Key, blobInfo.IV)
		if err != nil {
			return nil, err
		}
		file = append(file, data...)
	}

	return file, nil
}

// Encoder reads bytes from a source and returns blobs of the stream
type Encoder struct {
	// source data to be encoded into a stream
	src io.Reader
	// preset IVs to use for encrypting blobs
	ivs [][]byte
	// an optionals hint about the total size of the source data
	// encoder will use this to preallocate space for blobs
	srcSizeHint int

	// buffer for reading bytes from reader
	buf []byte
	// sd blob that gets built as stream is encoded
	sd *SDBlob
	// number of bytes read from src
	srcLen int
	// running hash bytes read from src
	srcHash hash.Hash
}

// NewEncoder creates a new stream encoder
func NewEncoder(src io.Reader) *Encoder {
	return &Encoder{
		src: src,

		buf: make([]byte, maxBlobDataSize),
		sd: &SDBlob{
			StreamType: streamTypeLBRYFile,
			Key:        randIV(),
		},
		srcHash: sha512.New384(),
	}
}

// NewEncoderWithIVs creates a new encoder that uses preset cryptographic material
func NewEncoderWithIVs(src io.Reader, key []byte, ivs [][]byte) *Encoder {
	e := NewEncoder(src)
	e.sd.Key = key
	e.ivs = ivs
	return e
}

// NewEncoderFromSD creates a new encoder that reuses cryptographic material from an sd blob
// This can be used to reconstruct a stream exactly from a file
// NOTE: this will assume that all blobs except the last one are at max length. in theory this is not
// required, but in practice this is always true. if this is false, streams may not match exactly
func NewEncoderFromSD(src io.Reader, sdBlob *SDBlob) *Encoder {
	ivs := make([][]byte, len(sdBlob.BlobInfos))
	for i := range ivs {
		ivs[i] = sdBlob.BlobInfos[i].IV
	}

	e := NewEncoderWithIVs(src, sdBlob.Key, ivs)
	e.sd.StreamName = sdBlob.StreamName
	e.sd.SuggestedFileName = sdBlob.SuggestedFileName
	return e
}

// TODO: consider making a NewPartialEncoder that also copies blobinfos from sdBlobs and seeks forward in the data
// this would avoid re-creating blobs that were created in the past

// Next reads the next chunk of data, encodes it into a blob, and adds it to the stream
// When the source is fully consumed, Next() makes sure the stream is terminated (i.e. the sd blob
// ends with an empty terminating blob) and returns io.EOF
func (e *Encoder) Next() (Blob, error) {
	n, err := e.src.Read(e.buf)
	if err != nil {
		if errors.Is(err, io.EOF) {
			e.ensureTerminated()
		}
		return nil, err
	}

	e.srcLen += n
	e.srcHash.Write(e.buf[:n])
	iv := e.nextIV()

	blob, err := NewBlob(e.buf[:n], e.sd.Key, iv)
	if err != nil {
		return nil, err
	}

	e.sd.addBlob(blob, iv)

	return blob, nil
}

// Stream creates the whole stream in one call
func (e *Encoder) Stream() (Stream, error) {
	s := make(Stream, 1, 1+int(math.Ceil(float64(e.srcSizeHint)/maxBlobDataSize))) // len starts at 1 and cap is +1 to leave room for sd blob

	for {
		blob, err := e.Next()
		if err != nil {
			if errors.Is(err, io.EOF) {
				break
			}
			return nil, err
		}

		s = append(s, blob)
	}

	s[0] = e.SDBlob().ToBlob()

	if cap(s) > len(s) {
		// size hint was too big. copy stream to smaller underlying array to free memory
		// this might be premature optimization...
		s = append(Stream(nil), s[:]...)
	}

	return s, nil
}

// SDBlob returns the sd blob so far
func (e *Encoder) SDBlob() *SDBlob {
	e.sd.updateStreamHash()
	return e.sd
}

// SourceLen returns the number of bytes read from source
func (e *Encoder) SourceLen() int {
	return e.srcLen
}

// SourceLen returns a hash of the bytes read from source
func (e *Encoder) SourceHash() []byte {
	return e.srcHash.Sum(nil)
}

// SourceSizeHint sets a hint about the total size of the source
// This helps allocate RAM more efficiently.
// If the hint is wrong, it still works fine but there will be a small performance penalty.
func (e *Encoder) SourceSizeHint(size int) *Encoder {
	e.srcSizeHint = size
	return e
}

func (e *Encoder) isTerminated() bool {
	return len(e.sd.BlobInfos) >= 1 && e.sd.BlobInfos[len(e.sd.BlobInfos)-1].Length == 0
}

func (e *Encoder) ensureTerminated() {
	if !e.isTerminated() {
		e.sd.addBlob(Blob{}, e.nextIV())
	}
}

// nextIV returns the next preset IV if there is one
func (e *Encoder) nextIV() []byte {
	if len(e.ivs) == 0 {
		return randIV()
	}

	iv := e.ivs[0]
	e.ivs = e.ivs[1:]
	return iv
}
tests pass on converting stream to file 2018-10-10 03:23:35 +02:00			`package stream`

			`import (`
			`"bytes"`
switch to io.Reader interface for stream creation 2021-03-25 21:57:12 +01:00			`"crypto/sha512"`
			`"hash"`
			`"io"`
fully match python's stream creation and decoding 2018-10-23 22:41:19 +02:00			`"math"`
tests pass on converting stream to file 2018-10-10 03:23:35 +02:00
update to v2 2019-10-10 05:07:33 +02:00			`"github.com/lbryio/lbry.go/v2/extras/errors"`
tests pass on converting stream to file 2018-10-10 03:23:35 +02:00			`)`

			`type Stream []Blob`

fully match python's stream creation and decoding 2018-10-23 22:41:19 +02:00			`// -1 to leave room for padding, since there must be at least one byte of pkcs7 padding`
			`const maxBlobDataSize = MaxBlobSize - 1`
tests pass on converting stream to file 2018-10-10 03:23:35 +02:00
switch to io.Reader interface for stream creation 2021-03-25 21:57:12 +01:00			`// New creates a new Stream from a stream of bytes.`
			`func New(src io.Reader) (Stream, error) {`
			`return NewEncoder(src).Stream()`
fully match python's stream creation and decoding 2018-10-23 22:41:19 +02:00			`}`

switch to io.Reader interface for stream creation 2021-03-25 21:57:12 +01:00			`// Data returns the file data that a stream encapsulates.`
			`//`
			`// Deprecated: use Decode() instead. It's a more accurate name. Data() will be removed in the future.`
			`func (s Stream) Data() ([]byte, error) {`
			`return s.Decode()`
tests pass on converting stream to file 2018-10-10 03:23:35 +02:00			`}`

switch to io.Reader interface for stream creation 2021-03-25 21:57:12 +01:00			`// Decode returns the file data that a stream encapsulates`
			`//`
			`// TODO: this should use io.Writer instead of returning bytes`
			`func (s Stream) Decode() ([]byte, error) {`
tests pass on converting stream to file 2018-10-10 03:23:35 +02:00			`if len(s) < 2 {`
fully match python's stream creation and decoding 2018-10-23 22:41:19 +02:00			`return nil, errors.Err("stream must be at least 2 blobs long") // sd blob and content blob`
tests pass on converting stream to file 2018-10-10 03:23:35 +02:00			`}`

			`sdBlob := &SDBlob{}`
			`err := sdBlob.FromBlob(s[0])`
			`if err != nil {`
			`return nil, err`
			`}`

			`if !sdBlob.IsValid() {`
			`return nil, errors.Err("sd blob is not valid")`
			`}`

fully match python's stream creation and decoding 2018-10-23 22:41:19 +02:00			`if sdBlob.BlobInfos[len(sdBlob.BlobInfos)-1].Length != 0 {`
			`return nil, errors.Err("sd blob is missing the terminating 0-length blob")`
			`}`

			`if len(s[1:]) != len(sdBlob.BlobInfos)-1 { // -1 for terminating 0-length blob`
			`return nil, errors.Err("number of blobs in stream does not match number of blobs in sd info")`
			`}`

tests pass on converting stream to file 2018-10-10 03:23:35 +02:00			`var file []byte`
fully match python's stream creation and decoding 2018-10-23 22:41:19 +02:00			`for i, blobInfo := range sdBlob.BlobInfos {`
			`if blobInfo.Length == 0 {`
			`if i != len(sdBlob.BlobInfos)-1 {`
			`return nil, errors.Err("got 0-length blob before end of stream")`
			`}`
			`break`
			`}`

			`if blobInfo.BlobNum != i {`
			`return nil, errors.Err("blobs are out of order in sd blob")`
			`}`

			`blob := s[i+1]`

			`if !bytes.Equal(blob.Hash(), blobInfo.BlobHash) {`
tests pass on converting stream to file 2018-10-10 03:23:35 +02:00			`return nil, errors.Err("blob hash doesn't match hash in blobInfo")`
			`}`

fully match python's stream creation and decoding 2018-10-23 22:41:19 +02:00			`data, err := blob.Plaintext(sdBlob.Key, blobInfo.IV)`
tests pass on converting stream to file 2018-10-10 03:23:35 +02:00			`if err != nil {`
			`return nil, err`
			`}`
			`file = append(file, data...)`
			`}`

			`return file, nil`
			`}`
fully match python's stream creation and decoding 2018-10-23 22:41:19 +02:00
switch to io.Reader interface for stream creation 2021-03-25 21:57:12 +01:00			`// Encoder reads bytes from a source and returns blobs of the stream`
			`type Encoder struct {`
			`// source data to be encoded into a stream`
			`src io.Reader`
			`// preset IVs to use for encrypting blobs`
			`ivs [][]byte`
			`// an optionals hint about the total size of the source data`
			`// encoder will use this to preallocate space for blobs`
			`srcSizeHint int`

			`// buffer for reading bytes from reader`
			`buf []byte`
			`// sd blob that gets built as stream is encoded`
			`sd *SDBlob`
			`// number of bytes read from src`
			`srcLen int`
			`// running hash bytes read from src`
			`srcHash hash.Hash`
			`}`

			`// NewEncoder creates a new stream encoder`
			`func NewEncoder(src io.Reader) *Encoder {`
			`return &Encoder{`
			`src: src,`

			`buf: make([]byte, maxBlobDataSize),`
			`sd: &SDBlob{`
			`StreamType: streamTypeLBRYFile,`
			`Key: randIV(),`
			`},`
			`srcHash: sha512.New384(),`
			`}`
			`}`

			`// NewEncoderWithIVs creates a new encoder that uses preset cryptographic material`
			`func NewEncoderWithIVs(src io.Reader, key []byte, ivs [][]byte) *Encoder {`
			`e := NewEncoder(src)`
			`e.sd.Key = key`
			`e.ivs = ivs`
			`return e`
			`}`

			`// NewEncoderFromSD creates a new encoder that reuses cryptographic material from an sd blob`
			`// This can be used to reconstruct a stream exactly from a file`
			`// NOTE: this will assume that all blobs except the last one are at max length. in theory this is not`
			`// required, but in practice this is always true. if this is false, streams may not match exactly`
			`func NewEncoderFromSD(src io.Reader, sdBlob SDBlob) Encoder {`
			`ivs := make([][]byte, len(sdBlob.BlobInfos))`
			`for i := range ivs {`
			`ivs[i] = sdBlob.BlobInfos[i].IV`
			`}`

			`e := NewEncoderWithIVs(src, sdBlob.Key, ivs)`
			`e.sd.StreamName = sdBlob.StreamName`
			`e.sd.SuggestedFileName = sdBlob.SuggestedFileName`
			`return e`
			`}`

			`// TODO: consider making a NewPartialEncoder that also copies blobinfos from sdBlobs and seeks forward in the data`
			`// this would avoid re-creating blobs that were created in the past`

terminate stream after consuming all the data 2021-04-02 20:16:46 +02:00			`// Next reads the next chunk of data, encodes it into a blob, and adds it to the stream`
			`// When the source is fully consumed, Next() makes sure the stream is terminated (i.e. the sd blob`
			`// ends with an empty terminating blob) and returns io.EOF`
switch to io.Reader interface for stream creation 2021-03-25 21:57:12 +01:00			`func (e *Encoder) Next() (Blob, error) {`
			`n, err := e.src.Read(e.buf)`
			`if err != nil {`
terminate stream after consuming all the data 2021-04-02 20:16:46 +02:00			`if errors.Is(err, io.EOF) {`
			`e.ensureTerminated()`
			`}`
switch to io.Reader interface for stream creation 2021-03-25 21:57:12 +01:00			`return nil, err`
			`}`

			`e.srcLen += n`
			`e.srcHash.Write(e.buf[:n])`
			`iv := e.nextIV()`

			`blob, err := NewBlob(e.buf[:n], e.sd.Key, iv)`
			`if err != nil {`
			`return nil, err`
			`}`

			`e.sd.addBlob(blob, iv)`

			`return blob, nil`
			`}`

			`// Stream creates the whole stream in one call`
			`func (e *Encoder) Stream() (Stream, error) {`
			`s := make(Stream, 1, 1+int(math.Ceil(float64(e.srcSizeHint)/maxBlobDataSize))) // len starts at 1 and cap is +1 to leave room for sd blob`

			`for {`
			`blob, err := e.Next()`
			`if err != nil {`
terminate stream after consuming all the data 2021-04-02 20:16:46 +02:00			`if errors.Is(err, io.EOF) {`
			`break`
switch to io.Reader interface for stream creation 2021-03-25 21:57:12 +01:00			`}`
terminate stream after consuming all the data 2021-04-02 20:16:46 +02:00			`return nil, err`
switch to io.Reader interface for stream creation 2021-03-25 21:57:12 +01:00			`}`

			`s = append(s, blob)`
			`}`

			`s[0] = e.SDBlob().ToBlob()`

			`if cap(s) > len(s) {`
			`// size hint was too big. copy stream to smaller underlying array to free memory`
			`// this might be premature optimization...`
			`s = append(Stream(nil), s[:]...)`
			`}`

			`return s, nil`
			`}`

			`// SDBlob returns the sd blob so far`
			`func (e Encoder) SDBlob() SDBlob {`
			`e.sd.updateStreamHash()`
			`return e.sd`
			`}`

			`// SourceLen returns the number of bytes read from source`
			`func (e *Encoder) SourceLen() int {`
			`return e.srcLen`
			`}`

			`// SourceLen returns a hash of the bytes read from source`
			`func (e *Encoder) SourceHash() []byte {`
			`return e.srcHash.Sum(nil)`
			`}`

			`// SourceSizeHint sets a hint about the total size of the source`
			`// This helps allocate RAM more efficiently.`
			`// If the hint is wrong, it still works fine but there will be a small performance penalty.`
			`func (e Encoder) SourceSizeHint(size int) Encoder {`
			`e.srcSizeHint = size`
			`return e`
			`}`

terminate stream after consuming all the data 2021-04-02 20:16:46 +02:00			`func (e *Encoder) isTerminated() bool {`
			`return len(e.sd.BlobInfos) >= 1 && e.sd.BlobInfos[len(e.sd.BlobInfos)-1].Length == 0`
			`}`

			`func (e *Encoder) ensureTerminated() {`
			`if !e.isTerminated() {`
			`e.sd.addBlob(Blob{}, e.nextIV())`
			`}`
			`}`

switch to io.Reader interface for stream creation 2021-03-25 21:57:12 +01:00			`// nextIV returns the next preset IV if there is one`
			`func (e *Encoder) nextIV() []byte {`
			`if len(e.ivs) == 0 {`
			`return randIV()`
			`}`

			`iv := e.ivs[0]`
			`e.ivs = e.ivs[1:]`
			`return iv`
fully match python's stream creation and decoding 2018-10-23 22:41:19 +02:00			`}`