From 423b6e19f7cfd90eb17edb23d93a66cd50f78458 Mon Sep 17 00:00:00 2001 From: ashmrtn Date: Mon, 19 Sep 2022 09:49:33 -0700 Subject: [PATCH] Encode paths given to kopia (#892) ## Description Encode all paths given to kopia with base64URL to ensure no special characters that kopia can't handle end up in there. The encoded paths are not stored in backup details nor are they ever surfaced to the user. This also works around the previous limitation where Corso was unable to properly backup or restore exchange email layouts that had folders containing `/` characters ## Type of change - [x] :sunflower: Feature - [x] :bug: Bugfix - [ ] :world_map: Documentation - [ ] :robot: Test - [ ] :computer: CI/Deployment - [ ] :hamster: Trivial/Minor ## Issue(s) * closes #865 ## Test Plan - [ ] :muscle: Manual - [x] :zap: Unit test - [x] :green_heart: E2E --- src/internal/kopia/path_encoder.go | 34 +++++++++ src/internal/kopia/path_encoder_test.go | 99 +++++++++++++++++++++++++ src/internal/kopia/wrapper.go | 17 +++-- src/internal/kopia/wrapper_test.go | 33 +++++---- 4 files changed, 161 insertions(+), 22 deletions(-) create mode 100644 src/internal/kopia/path_encoder.go create mode 100644 src/internal/kopia/path_encoder_test.go diff --git a/src/internal/kopia/path_encoder.go b/src/internal/kopia/path_encoder.go new file mode 100644 index 000000000..dc25f59c8 --- /dev/null +++ b/src/internal/kopia/path_encoder.go @@ -0,0 +1,34 @@ +package kopia + +import ( + "encoding/base64" + "path" +) + +var encoder = base64.URLEncoding + +// encodeElements takes a set of strings and returns a slice of the strings +// after encoding them to a file system-safe format. Elements are returned in +// the same order they were passed in. +func encodeElements(elements ...string) []string { + encoded := make([]string, 0, len(elements)) + + for _, e := range elements { + encoded = append(encoded, encoder.EncodeToString([]byte(e))) + } + + return encoded +} + +// encodeAsPath takes a set of elements and returns the concatenated elements as +// if they were a path. The elements are joined with the separator in the golang +// path package. +func encodeAsPath(elements ...string) string { + return path.Join(encodeElements(elements...)...) +} + +// decodeElement takes an encoded element and decodes it if possible. +func decodeElement(element string) (string, error) { + r, err := encoder.DecodeString(element) + return string(r), err +} diff --git a/src/internal/kopia/path_encoder_test.go b/src/internal/kopia/path_encoder_test.go new file mode 100644 index 000000000..1517dc35b --- /dev/null +++ b/src/internal/kopia/path_encoder_test.go @@ -0,0 +1,99 @@ +package kopia + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" +) + +type PathEncoderSuite struct { + suite.Suite +} + +func TestPathEncoderSuite(t *testing.T) { + suite.Run(t, new(PathEncoderSuite)) +} + +func (suite *PathEncoderSuite) TestEncodeDecode() { + t := suite.T() + elements := []string{"these", "are", "some", "path", "elements"} + + encoded := encodeElements(elements...) + + decoded := make([]string, 0, len(elements)) + + for _, e := range encoded { + dec, err := decodeElement(e) + require.NoError(t, err) + + decoded = append(decoded, dec) + } + + assert.Equal(t, elements, decoded) +} + +func (suite *PathEncoderSuite) TestEncodeAsPathDecode() { + table := []struct { + name string + elements []string + expected []string + }{ + { + name: "MultipleElements", + elements: []string{"these", "are", "some", "path", "elements"}, + expected: []string{"these", "are", "some", "path", "elements"}, + }, + { + name: "SingleElement", + elements: []string{"elements"}, + expected: []string{"elements"}, + }, + { + name: "EmptyPath", + elements: []string{""}, + expected: []string{""}, + }, + { + name: "NilPath", + elements: nil, + // Gets "" back because individual elements are decoded and "" is the 0 + // value for the decoder. + expected: []string{""}, + }, + } + + for _, test := range table { + suite.T().Run(test.name, func(t *testing.T) { + encoded := encodeAsPath(test.elements...) + + // Sanity check, first and last character should not be '/'. + assert.Equal(t, strings.Trim(encoded, "/"), encoded) + + decoded := make([]string, 0, len(test.elements)) + + for _, e := range strings.Split(encoded, "/") { + dec, err := decodeElement(e) + require.NoError(t, err) + + decoded = append(decoded, dec) + } + + assert.Equal(t, test.expected, decoded) + }) + } +} + +func FuzzEncodeDecodeSingleString(f *testing.F) { + f.Fuzz(func(t *testing.T, in string) { + encoded := encodeElements(in) + assert.Len(t, encoded, 1) + assert.False(t, strings.ContainsRune(encoded[0], '/')) + + decoded, err := decodeElement(encoded[0]) + require.NoError(t, err) + assert.Equal(t, in, decoded) + }) +} diff --git a/src/internal/kopia/wrapper.go b/src/internal/kopia/wrapper.go index 5e9265a66..621254ad5 100644 --- a/src/internal/kopia/wrapper.go +++ b/src/internal/kopia/wrapper.go @@ -219,12 +219,10 @@ func getStreamItemFunc( // Relative path given to us in the callback is missing the root // element. Add to pending set before calling the callback to avoid race // conditions when the item is completed. - p := itemPath.PopFront().String() d := &itemDetails{info: ei.Info(), repoPath: itemPath} + progress.put(encodeAsPath(itemPath.PopFront().Elements()...), d) - progress.put(p, d) - - entry := virtualfs.StreamingFileFromReader(e.UUID(), e.ToReader()) + entry := virtualfs.StreamingFileFromReader(encodeAsPath(e.UUID()), e.ToReader()) if err := cb(ctx, entry); err != nil { // Kopia's uploader swallows errors in most cases, so if we see // something here it's probably a big issue and we should return. @@ -253,7 +251,7 @@ func buildKopiaDirs(dirName string, dir *treeMap, progress *corsoProgress) (fs.D } return virtualfs.NewStreamingDirectory( - dirName, + encodeAsPath(dirName), getStreamItemFunc(childDirs, dir.collection, progress), ), nil } @@ -480,7 +478,7 @@ func getItemStream( e, err := snapshotfs.GetNestedEntry( ctx, snapshotRoot, - itemPath.PopFront().Elements(), + encodeElements(itemPath.PopFront().Elements()...), ) if err != nil { return nil, errors.Wrap(err, "getting nested object handle") @@ -496,8 +494,13 @@ func getItemStream( return nil, errors.Wrap(err, "opening file") } + decodedName, err := decodeElement(f.Name()) + if err != nil { + return nil, errors.Wrap(err, "decoding file name") + } + return &kopiaDataStream{ - uuid: f.Name(), + uuid: decodedName, reader: r, }, nil } diff --git a/src/internal/kopia/wrapper_test.go b/src/internal/kopia/wrapper_test.go index 31acb7830..8d584486b 100644 --- a/src/internal/kopia/wrapper_test.go +++ b/src/internal/kopia/wrapper_test.go @@ -316,7 +316,9 @@ func (suite *KopiaUnitSuite) TestBuildDirectoryTree() { ctx := context.Background() tenant := "a-tenant" user1 := testUser + user1Encoded := encodeAsPath(user1) user2 := "user2" + user2Encoded := encodeAsPath(user2) p2, err := path.FromDataLayerPath( stdpath.Join( @@ -330,9 +332,10 @@ func (suite *KopiaUnitSuite) TestBuildDirectoryTree() { ) require.NoError(t, err) + // Encode user names here so we don't have to decode things later. expectedFileCount := map[string]int{ - user1: 5, - user2: 42, + user1Encoded: 5, + user2Encoded: 42, } progress := &corsoProgress{pending: map[string]*itemDetails{}} @@ -340,11 +343,11 @@ func (suite *KopiaUnitSuite) TestBuildDirectoryTree() { collections := []data.Collection{ mockconnector.NewMockExchangeCollection( suite.testPath, - expectedFileCount[user1], + expectedFileCount[user1Encoded], ), mockconnector.NewMockExchangeCollection( p2, - expectedFileCount[user2], + expectedFileCount[user2Encoded], ), } @@ -361,24 +364,24 @@ func (suite *KopiaUnitSuite) TestBuildDirectoryTree() { // - 42 separate files dirTree, err := inflateDirTree(ctx, collections, progress) require.NoError(t, err) - assert.Equal(t, testTenant, dirTree.Name()) + assert.Equal(t, encodeAsPath(testTenant), dirTree.Name()) entries, err := fs.GetAllEntries(ctx, dirTree) require.NoError(t, err) - expectDirs(t, entries, []string{service}, true) + expectDirs(t, entries, encodeElements(service), true) entries = getDirEntriesForEntry(t, ctx, entries[0]) - expectDirs(t, entries, []string{user1, user2}, true) + expectDirs(t, entries, encodeElements(user1, user2), true) for _, entry := range entries { userName := entry.Name() entries = getDirEntriesForEntry(t, ctx, entry) - expectDirs(t, entries, []string{category}, true) + expectDirs(t, entries, encodeElements(category), true) entries = getDirEntriesForEntry(t, ctx, entries[0]) - expectDirs(t, entries, []string{testInboxDir}, true) + expectDirs(t, entries, encodeElements(testInboxDir), true) entries = getDirEntriesForEntry(t, ctx, entries[0]) assert.Len(t, entries, expectedFileCount[userName]) @@ -447,21 +450,21 @@ func (suite *KopiaUnitSuite) TestBuildDirectoryTree_MixedDirectory() { dirTree, err := inflateDirTree(ctx, test.layout, progress) require.NoError(t, err) - assert.Equal(t, testTenant, dirTree.Name()) + assert.Equal(t, encodeAsPath(testTenant), dirTree.Name()) entries, err := fs.GetAllEntries(ctx, dirTree) require.NoError(t, err) - expectDirs(t, entries, []string{service}, true) + expectDirs(t, entries, encodeElements(service), true) entries = getDirEntriesForEntry(t, ctx, entries[0]) - expectDirs(t, entries, []string{testUser}, true) + expectDirs(t, entries, encodeElements(testUser), true) entries = getDirEntriesForEntry(t, ctx, entries[0]) - expectDirs(t, entries, []string{category}, true) + expectDirs(t, entries, encodeElements(category), true) entries = getDirEntriesForEntry(t, ctx, entries[0]) - expectDirs(t, entries, []string{testInboxDir}, true) + expectDirs(t, entries, encodeElements(testInboxDir), true) entries = getDirEntriesForEntry(t, ctx, entries[0]) // 42 files and 1 subdirectory. @@ -476,7 +479,7 @@ func (suite *KopiaUnitSuite) TestBuildDirectoryTree_MixedDirectory() { } subDirs = append(subDirs, d) - assert.Equal(t, subdir, d.Name()) + assert.Equal(t, encodeAsPath(subdir), d.Name()) } require.Len(t, subDirs, 1)