add stream_name and suggested_file_name to publish

This commit is contained in:
Alex Grintsvayg 2022-10-14 12:48:05 -04:00
parent 37b396deb1
commit 4bb90dcb6e
No known key found for this signature in database
GPG key ID: AEB3F089F86A22B5
6 changed files with 130 additions and 11 deletions

View file

@ -197,7 +197,7 @@ func makeStream(path string) (stream.Stream, *pb.Stream, error) {
}
defer file.Close()
enc := stream.NewEncoder(file)
enc := stream.NewEncoderFromFile(file)
s, err := enc.Stream()
if err != nil {

View file

@ -7,11 +7,14 @@ import (
"crypto/sha512"
"encoding/hex"
"encoding/json"
"path"
"regexp"
"strconv"
"strings"
)
const streamTypeLBRYFile = "lbryfile"
const defaultSanitizedFilename = "lbry_download"
// BlobInfo is the stream descriptor info for a single blob in a stream
// Encoding to and from JSON is customized to match existing behavior (see json.go in package)
@ -35,14 +38,14 @@ func (bi BlobInfo) Hash() []byte {
}
// SDBlob contains information about the rest of the blobs in the stream
// Encoding to and from JSON is customized to match existing behavior (see json.go in package)
// NOTE: Encoding to and from JSON is customized to match existing behavior (see json.go in package)
type SDBlob struct {
StreamName string `json:"-"`
StreamName string `json:"-"` // shadowed by JSONSDBlob in json.go
BlobInfos []BlobInfo `json:"blobs"`
StreamType string `json:"stream_type"`
Key []byte `json:"-"`
SuggestedFileName string `json:"-"`
StreamHash []byte `json:"-"`
Key []byte `json:"-"` // shadowed by JSONSDBlob in json.go
SuggestedFileName string `json:"-"` // shadowed by JSONSDBlob in json.go
StreamHash []byte `json:"-"` // shadowed by JSONSDBlob in json.go
}
// Hash returns a hash of the SD blob data
@ -155,3 +158,40 @@ func randIV() []byte {
func NullIV() []byte {
return make([]byte, aes.BlockSize)
}
var illegalFilenameChars = regexp.MustCompile(`(` +
`[<>:"/\\|?*]+|` + // Illegal characters
`[\x00-\x1F]+|` + // All characters in range 0-31
`[ \t]*(\.)+[ \t]*$|` + // Dots at the end
`(^[ \t]+|[ \t]+$)|` + // Leading and trailing whitespace
`^CON$|^PRN$|^AUX$|` + // Illegal names on windows
`^NUL$|^COM[1-9]$|^LPT[1-9]$` + // Illegal names on windows
`)`)
// sanitizeFilename cleans a filename so it can go into an sd blob
// python implementation: https://github.com/lbryio/lbry-sdk/blob/e89acac235f497b0215991d5142aa678d525eb59/lbry/stream/descriptor.py#L69
func sanitizeFilename(name string) string {
//defaultFilename := "lbry_download"
ext := path.Ext(name)
name = name[:len(name)-len(ext)]
if name == "" && ext != "" {
// python does it this way. I think it's weird, but we should try and match them
name = ext
ext = ""
}
name = illegalFilenameChars.ReplaceAllString(name, "")
ext = illegalFilenameChars.ReplaceAllString(ext, "")
if name == "" {
name = defaultSanitizedFilename
}
if len(ext) > 1 {
name += ext
}
return name
}

View file

@ -73,3 +73,48 @@ func TestSdBlob_UnmarshalJSON(t *testing.T) {
t.Fatal("re-encoded string is not equal to original string")
}
}
func TestSdBlob_SanitizeFilename(t *testing.T) {
// from https://github.com/lbryio/lbry-sdk/blob/ff303860513690c4b1c52a053aa75f28858002d3/tests/unit/stream/test_stream_descriptor.py#L81
testCases := []struct {
Filename string
Santizied string
}{
{
Filename: " t/-?t|.g.ext ",
Santizied: "t-t.g.ext",
},
{
Filename: "end_dot .",
Santizied: "end_dot",
},
{
Filename: ".file\x00\x00",
Santizied: ".file",
},
{
Filename: "test n\x16ame.ext",
Santizied: "test name.ext",
},
{
Filename: "COM8.ext",
Santizied: "lbry_download.ext",
},
{
Filename: "LPT2",
Santizied: "lbry_download",
},
{
Filename: "",
Santizied: "lbry_download",
},
}
for _, tt := range testCases {
sanitized := sanitizeFilename(tt.Filename)
if sanitized != tt.Santizied {
t.Errorf("expected: '%s', actual: '%s'", tt.Santizied, sanitized)
}
}
}

View file

@ -6,6 +6,8 @@ import (
"hash"
"io"
"math"
"os"
"path"
"github.com/cockroachdb/errors"
)
@ -117,11 +119,10 @@ func NewEncoder(src io.Reader) *Encoder {
}
// NewEncoderWithIVs creates a new encoder that uses preset cryptographic material
//
// Deprecated: use NewEncoder().WithIVs() instead
func NewEncoderWithIVs(src io.Reader, key []byte, ivs [][]byte) *Encoder {
e := NewEncoder(src)
e.sd.Key = key
e.ivs = ivs
return e
return NewEncoder(src).WithIVs(key, ivs)
}
// NewEncoderFromSD creates a new encoder that reuses cryptographic material from an sd blob
@ -134,12 +135,29 @@ func NewEncoderFromSD(src io.Reader, sdBlob *SDBlob) *Encoder {
ivs[i] = sdBlob.BlobInfos[i].IV
}
e := NewEncoderWithIVs(src, sdBlob.Key, ivs)
e := NewEncoder(src).WithIVs(sdBlob.Key, ivs)
e.sd.StreamName = sdBlob.StreamName
e.sd.SuggestedFileName = sdBlob.SuggestedFileName
return e
}
// NewEncoderFromFile creates a new encoder for a file
func NewEncoderFromFile(file *os.File) *Encoder {
e := NewEncoder(file)
filename := path.Base(file.Name()) // todo: is path.Base() needed here?
e.sd.StreamName = filename
e.sd.SuggestedFileName = sanitizeFilename(filename)
return e
}
// WithIVs sets preset cryptographic material for encoding
func (e *Encoder) WithIVs(key []byte, ivs [][]byte) *Encoder {
e.sd.Key = key
e.ivs = ivs
return e
}
// TODO: consider making a NewPartialEncoder that also copies blobinfos from sdBlobs and seeks forward in the data
// this would avoid re-creating blobs that were created in the past

View file

@ -7,6 +7,8 @@ import (
"crypto/sha512"
"encoding/hex"
"io"
"os"
"path/filepath"
"testing"
"github.com/cockroachdb/errors"
@ -200,3 +202,17 @@ func TestSizeHint(t *testing.T) {
func TestNew(t *testing.T) {
t.Skip("TODO: test new stream creation and decryption")
}
func TestNewEncoderFromFile(t *testing.T) {
f, err := os.Open(filepath.Join("testdata", `new "encoder" from file.whatever`))
if err != nil {
t.Error(err)
return
}
e := NewEncoderFromFile(f)
if e.sd.SuggestedFileName != "new encoder from file.whatever" {
t.Error("wrong or missing suggested_file_name in sd blob")
}
}

View file