use path struct in kopia DataCollection (#827)

* Use path struct in kopia DataCollection

Does not change the external API of the DataCollection any, just updates
internals in preparation for switching support of
data.Collection.FullPath.

* Expand Path interface slightly

kopia.Wrapper needs some extra functionality from paths, mostly along
the lines of directly manipulating the elements in the path. This gives
access to those functions.

* Use path struct in kopia.Wrapper for restore

Pass path structs to the newly created collections during restore.

* Add tests for new path functionality
This commit is contained in:
ashmrtn 2022-09-13 14:18:00 -07:00 committed by GitHub
parent 573f55686f
commit 226489c58f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 343 additions and 41 deletions

View File

@ -2,8 +2,10 @@ package kopia
import (
"io"
"strings"
"github.com/alcionai/corso/src/internal/data"
"github.com/alcionai/corso/src/internal/path"
)
var (
@ -12,7 +14,7 @@ var (
)
type kopiaDataCollection struct {
path []string
path path.Path
streams []data.Stream
}
@ -31,7 +33,9 @@ func (kdc *kopiaDataCollection) Items() <-chan data.Stream {
}
func (kdc kopiaDataCollection) FullPath() []string {
return append([]string{}, kdc.path...)
// TODO(ashmrtn): Update this once data.Collection.FullPath supports
// path.Path. Assumes no adversarial users that use "/" in their folder names.
return strings.Split(kdc.path.String(), "/")
}
type kopiaDataStream struct {

View File

@ -11,6 +11,7 @@ import (
"github.com/stretchr/testify/suite"
"github.com/alcionai/corso/src/internal/data"
"github.com/alcionai/corso/src/internal/path"
)
// ---------------
@ -26,15 +27,33 @@ func TestKopiaDataCollectionUnitSuite(t *testing.T) {
func (suite *KopiaDataCollectionUnitSuite) TestReturnsPath() {
t := suite.T()
expected := []string{
"a-tenant",
path.ExchangeService.String(),
"a-user",
path.EmailCategory.String(),
"some",
"path",
"for",
"data",
}
path := []string{"some", "path", "for", "data"}
b := path.Builder{}.Append("some", "path", "for", "data")
pth, err := b.ToDataLayerExchangePathForCategory(
"a-tenant",
"a-user",
path.EmailCategory,
false,
)
require.NoError(t, err)
c := kopiaDataCollection{
streams: []data.Stream{},
path: path,
path: pth,
}
assert.Equal(t, c.FullPath(), path)
// TODO(ashmrtn): Update when data.Collection.FullPath supports path.Path
assert.Equal(t, expected, c.FullPath())
}
func (suite *KopiaDataCollectionUnitSuite) TestReturnsStreams() {
@ -80,7 +99,7 @@ func (suite *KopiaDataCollectionUnitSuite) TestReturnsStreams() {
suite.T().Run(test.name, func(t *testing.T) {
c := kopiaDataCollection{
streams: test.streams,
path: []string{},
path: nil,
}
count := 0

View File

@ -2,7 +2,7 @@ package kopia
import (
"context"
"path"
stdpath "path"
"sync"
"github.com/hashicorp/go-multierror"
@ -16,6 +16,7 @@ import (
"github.com/pkg/errors"
"github.com/alcionai/corso/src/internal/data"
"github.com/alcionai/corso/src/internal/path"
"github.com/alcionai/corso/src/pkg/backup/details"
"github.com/alcionai/corso/src/pkg/logger"
)
@ -163,7 +164,7 @@ func getStreamItemFunc(
return errs.ErrorOrNil()
}
itemPath := path.Join(append(streamedEnts.FullPath(), e.UUID())...)
itemPath := stdpath.Join(append(streamedEnts.FullPath(), e.UUID())...)
ei, ok := e.(data.StreamInfo)
if !ok {
@ -179,7 +180,7 @@ func getStreamItemFunc(
// Relative path given to us in the callback is missing the root
// element. Add to pending set before calling the callback to avoid race
// conditions when the item is completed.
p := path.Join(append(streamedEnts.FullPath()[1:], e.UUID())...)
p := stdpath.Join(append(streamedEnts.FullPath()[1:], e.UUID())...)
d := &itemDetails{info: ei.Info(), repoRef: itemPath}
progress.put(p, d)
@ -411,9 +412,9 @@ func (w Wrapper) makeSnapshotWithRoot(
func (w Wrapper) getEntry(
ctx context.Context,
snapshotID string,
itemPath []string,
itemPath path.Path,
) (fs.Entry, error) {
if len(itemPath) == 0 {
if itemPath == nil {
return nil, errors.New("no restore path given")
}
@ -428,7 +429,11 @@ func (w Wrapper) getEntry(
}
// GetNestedEntry handles nil properly.
e, err := snapshotfs.GetNestedEntry(ctx, rootDirEntry, itemPath[1:])
e, err := snapshotfs.GetNestedEntry(
ctx,
rootDirEntry,
itemPath.PopFront().Elements(),
)
if err != nil {
return nil, errors.Wrap(err, "getting nested object handle")
}
@ -447,7 +452,19 @@ func (w Wrapper) collectItems(
snapshotID string,
itemPath []string,
) ([]data.Collection, error) {
e, err := w.getEntry(ctx, snapshotID, itemPath)
// TODO(ashmrtn): Remove this extra parsing once selectors pass path.Path to
// this function.
pth, err := path.FromDataLayerPath(stdpath.Join(itemPath...), true)
if err != nil {
return nil, errors.Wrap(err, "parsing to path struct")
}
parentDir, err := pth.Dir()
if err != nil {
return nil, errors.Wrap(err, "getting parent directory from path")
}
e, err := w.getEntry(ctx, snapshotID, pth)
if err != nil {
return nil, err
}
@ -457,7 +474,7 @@ func (w Wrapper) collectItems(
return nil, errors.New("requested object is not a file")
}
c, err := restoreSingleItem(ctx, f, itemPath[:len(itemPath)-1])
c, err := restoreSingleItem(ctx, f, parentDir)
if err != nil {
return nil, err
}
@ -495,7 +512,7 @@ func (w Wrapper) RestoreSingleItem(
func restoreSingleItem(
ctx context.Context,
f fs.File,
itemPath []string,
itemDir path.Path,
) (data.Collection, error) {
r, err := f.Open(ctx)
if err != nil {
@ -509,7 +526,7 @@ func restoreSingleItem(
reader: r,
},
},
path: itemPath,
path: itemDir,
}, nil
}

View File

@ -5,7 +5,7 @@ import (
"context"
"io"
"io/ioutil"
"path"
stdpath "path"
"testing"
"github.com/google/uuid"
@ -19,6 +19,7 @@ import (
"github.com/alcionai/corso/src/internal/connector/mockconnector"
"github.com/alcionai/corso/src/internal/data"
"github.com/alcionai/corso/src/internal/kopia/mockkopia"
"github.com/alcionai/corso/src/internal/path"
"github.com/alcionai/corso/src/internal/tester"
"github.com/alcionai/corso/src/pkg/backup/details"
)
@ -26,7 +27,6 @@ import (
const (
testTenant = "a-tenant"
testUser = "user1"
testEmailDir = "email"
testInboxDir = "inbox"
testArchiveDir = "archive"
testFileName = "file1"
@ -38,8 +38,21 @@ const (
)
var (
testPath = []string{testTenant, testUser, testEmailDir, testInboxDir}
testPath2 = []string{testTenant, testUser, testEmailDir, testArchiveDir}
testEmailDir = path.EmailCategory.String()
testPath = []string{
testTenant,
path.ExchangeService.String(),
testUser,
path.EmailCategory.String(),
testInboxDir,
}
testPath2 = []string{
testTenant,
path.ExchangeService.String(),
testUser,
path.EmailCategory.String(),
testArchiveDir,
}
testFileData = []byte("abcdefghijklmnopqrstuvwxyz")
testFileData2 = []byte("zyxwvutsrqponmlkjihgfedcba")
testFileData3 = []byte("foo")
@ -71,7 +84,7 @@ func testForFiles(
for s := range c.Items() {
count++
fullPath := path.Join(append(c.FullPath(), s.UUID())...)
fullPath := stdpath.Join(append(c.FullPath(), s.UUID())...)
expected, ok := expected[fullPath]
require.True(t, ok, "unexpected file with path %q", fullPath)
@ -503,8 +516,20 @@ func (suite *KopiaIntegrationSuite) TestRestoreAfterCompressionChange() {
w := &Wrapper{k}
tid := uuid.NewString()
p1 := []string{tid, "uid", "emails", "fid"}
p2 := []string{tid, "uid2", "emails", "fid"}
p1 := []string{
tid,
path.ExchangeService.String(),
"uid",
path.EmailCategory.String(),
"fid",
}
p2 := []string{
tid,
path.ExchangeService.String(),
"uid2",
path.EmailCategory.String(),
"fid",
}
dc1 := mockconnector.NewMockExchangeCollection(p1, 1)
dc2 := mockconnector.NewMockExchangeCollection(p2, 1)
@ -517,8 +542,8 @@ func (suite *KopiaIntegrationSuite) TestRestoreAfterCompressionChange() {
require.NoError(t, k.Compression(ctx, "gzip"))
expected := map[string][]byte{
path.Join(fp1...): dc1.Data[0],
path.Join(fp2...): dc2.Data[0],
stdpath.Join(fp1...): dc1.Data[0],
stdpath.Join(fp2...): dc2.Data[0],
}
result, err := w.RestoreMultipleItems(
@ -534,10 +559,29 @@ func (suite *KopiaIntegrationSuite) TestRestoreAfterCompressionChange() {
func (suite *KopiaIntegrationSuite) TestBackupCollections_ReaderError() {
t := suite.T()
tmpBuilder := path.Builder{}.Append(testInboxDir)
p1, err := tmpBuilder.ToDataLayerExchangePathForCategory(
testTenant,
testUser,
path.EmailCategory,
false,
)
require.NoError(t, err)
tmpBuilder = path.Builder{}.Append(testArchiveDir)
p2, err := tmpBuilder.ToDataLayerExchangePathForCategory(
testTenant,
testUser,
path.EmailCategory,
false,
)
require.NoError(t, err)
collections := []data.Collection{
&kopiaDataCollection{
path: testPath,
path: p1,
streams: []data.Stream{
&mockconnector.MockExchangeData{
ID: testFileName,
@ -550,7 +594,7 @@ func (suite *KopiaIntegrationSuite) TestBackupCollections_ReaderError() {
},
},
&kopiaDataCollection{
path: testPath2,
path: p2,
streams: []data.Stream{
&mockconnector.MockExchangeData{
ID: testFileName3,
@ -577,7 +621,7 @@ func (suite *KopiaIntegrationSuite) TestBackupCollections_ReaderError() {
assert.Equal(t, 0, stats.ErrorCount)
assert.Equal(t, 5, stats.TotalFileCount)
assert.Equal(t, 5, stats.TotalDirectoryCount)
assert.Equal(t, 6, stats.TotalDirectoryCount)
assert.Equal(t, 1, stats.IgnoredErrorCount)
assert.False(t, stats.Incomplete)
assert.Len(t, rp.Entries, 5)
@ -616,10 +660,29 @@ func (suite *KopiaSimpleRepoIntegrationSuite) SetupTest() {
require.NoError(t, err)
suite.w = &Wrapper{c}
tmpBuilder := path.Builder{}.Append(testInboxDir)
p1, err := tmpBuilder.ToDataLayerExchangePathForCategory(
testTenant,
testUser,
path.EmailCategory,
false,
)
require.NoError(t, err)
tmpBuilder = path.Builder{}.Append(testArchiveDir)
p2, err := tmpBuilder.ToDataLayerExchangePathForCategory(
testTenant,
testUser,
path.EmailCategory,
false,
)
require.NoError(t, err)
collections := []data.Collection{
&kopiaDataCollection{
path: testPath,
path: p1,
streams: []data.Stream{
&mockconnector.MockExchangeData{
ID: testFileName,
@ -632,7 +695,7 @@ func (suite *KopiaSimpleRepoIntegrationSuite) SetupTest() {
},
},
&kopiaDataCollection{
path: testPath2,
path: p2,
streams: []data.Stream{
&mockconnector.MockExchangeData{
ID: testFileName3,
@ -658,7 +721,7 @@ func (suite *KopiaSimpleRepoIntegrationSuite) SetupTest() {
require.NoError(t, err)
require.Equal(t, stats.ErrorCount, 0)
require.Equal(t, stats.TotalFileCount, 6)
require.Equal(t, stats.TotalDirectoryCount, 5)
require.Equal(t, stats.TotalDirectoryCount, 6)
require.Equal(t, stats.IgnoredErrorCount, 0)
require.False(t, stats.Incomplete)
assert.Len(t, rp.Entries, 6)
@ -667,14 +730,14 @@ func (suite *KopiaSimpleRepoIntegrationSuite) SetupTest() {
// path.Join doesn't like (testPath..., testFileName).
suite.inboxExpectedFiles = map[string][]byte{
path.Join(append(testPath, testFileName)...): testFileData,
path.Join(append(testPath, testFileName2)...): testFileData2,
stdpath.Join(append(testPath, testFileName)...): testFileData,
stdpath.Join(append(testPath, testFileName2)...): testFileData2,
}
suite.archiveExpectedFiles = map[string][]byte{
path.Join(append(testPath2, testFileName3)...): testFileData3,
path.Join(append(testPath2, testFileName4)...): testFileData4,
path.Join(append(testPath2, testFileName5)...): testFileData5,
path.Join(append(testPath2, testFileName6)...): testFileData6,
stdpath.Join(append(testPath2, testFileName3)...): testFileData3,
stdpath.Join(append(testPath2, testFileName4)...): testFileData4,
stdpath.Join(append(testPath2, testFileName5)...): testFileData5,
stdpath.Join(append(testPath2, testFileName6)...): testFileData6,
}
suite.allExpectedFiles = map[string][]byte{}
@ -768,8 +831,20 @@ func (suite *KopiaSimpleRepoIntegrationSuite) TestRestoreMultipleItems() {
w := &Wrapper{k}
tid := uuid.NewString()
p1 := []string{tid, "uid", "emails", "fid"}
p2 := []string{tid, "uid2", "emails", "fid"}
p1 := []string{
tid,
path.ExchangeService.String(),
"uid",
path.EmailCategory.String(),
"fid",
}
p2 := []string{
tid,
path.ExchangeService.String(),
"uid2",
path.EmailCategory.String(),
"fid",
}
dc1 := mockconnector.NewMockExchangeCollection(p1, 1)
dc2 := mockconnector.NewMockExchangeCollection(p2, 1)
@ -780,8 +855,8 @@ func (suite *KopiaSimpleRepoIntegrationSuite) TestRestoreMultipleItems() {
require.NoError(t, err)
expected := map[string][]byte{
path.Join(fp1...): dc1.Data[0],
path.Join(fp2...): dc2.Data[0],
stdpath.Join(fp1...): dc1.Data[0],
stdpath.Join(fp2...): dc2.Data[0],
}
result, err := w.RestoreMultipleItems(

View File

@ -66,6 +66,14 @@ type Path interface {
ResourceOwner() string
Folder() string
Item() string
// PopFront returns a Builder object with the first element (left-side)
// removed. As the resulting set of elements is no longer a valid resource
// path a Builder is returned instead.
PopFront() *Builder
// Dir returns a Path object with the right-most element removed if possible.
// If removing the right-most element would discard one of the required prefix
// elements then an error is returned.
Dir() (Path, error)
}
// Builder is a simple path representation that only tracks path elements. It
@ -136,6 +144,30 @@ func (pb *Builder) appendElements(escaped bool, elements []string) error {
return nil
}
func (pb Builder) PopFront() *Builder {
if len(pb.elements) <= 1 {
return &Builder{}
}
elements := make([]string, len(pb.elements)-1)
copy(elements, pb.elements[1:])
return &Builder{
elements: elements,
}
}
func (pb Builder) dir() *Builder {
if len(pb.elements) <= 1 {
return &Builder{}
}
return &Builder{
// Safe to use the same elements because Builders are immutable.
elements: pb.elements[:len(pb.elements)-1],
}
}
// String returns a string that contains all path elements joined together.
// Elements of the path that need escaping are escaped.
func (pb Builder) String() string {
@ -148,6 +180,13 @@ func (pb Builder) String() string {
return join(escaped)
}
// Elements returns all the elements in the path. This is a temporary function
// and will likely be updated to handle encoded elements instead of clear-text
// elements in the future.
func (pb Builder) Elements() []string {
return append([]string{}, pb.elements...)
}
//nolint:unused
func (pb Builder) join(start, end int) string {
return join(pb.elements[start:end])

View File

@ -294,6 +294,77 @@ func (suite *PathUnitSuite) TestTrailingEscapeChar() {
}
}
func (suite *PathUnitSuite) TestElements() {
table := []struct {
name string
input []string
output []string
pathFunc func(elements []string) (*Builder, error)
}{
{
name: "SimpleEscapedPath",
input: []string{"this", "is", "a", "path"},
output: []string{"this", "is", "a", "path"},
pathFunc: func(elements []string) (*Builder, error) {
return Builder{}.UnescapeAndAppend(elements...)
},
},
{
name: "SimpleUnescapedPath",
input: []string{"this", "is", "a", "path"},
output: []string{"this", "is", "a", "path"},
pathFunc: func(elements []string) (*Builder, error) {
return Builder{}.Append(elements...), nil
},
},
{
name: "EscapedPath",
input: []string{"this", `is\/`, "a", "path"},
output: []string{"this", "is/", "a", "path"},
pathFunc: func(elements []string) (*Builder, error) {
return Builder{}.UnescapeAndAppend(elements...)
},
},
}
for _, test := range table {
suite.T().Run(test.name, func(t *testing.T) {
p, err := test.pathFunc(test.input)
require.NoError(t, err)
assert.Equal(t, test.output, p.Elements())
})
}
}
func (suite *PathUnitSuite) TestPopFront() {
table := []struct {
name string
base *Builder
expectedString string
}{
{
name: "Empty",
base: &Builder{},
expectedString: "",
},
{
name: "OneElement",
base: Builder{}.Append("something"),
expectedString: "",
},
{
name: "TwoElements",
base: Builder{}.Append("something", "else"),
expectedString: "else",
},
}
for _, test := range table {
suite.T().Run(test.name, func(t *testing.T) {
assert.Equal(t, test.expectedString, test.base.PopFront().String())
})
}
}
func (suite *PathUnitSuite) TestFromStringErrors() {
table := []struct {
name string

View File

@ -135,6 +135,9 @@ func (rp dataLayerResourcePath) ResourceOwner() string {
// Folder returns the folder segment embedded in the dataLayerResourcePath.
func (rp dataLayerResourcePath) Folder() string {
endIdx := len(rp.Builder.elements)
if endIdx == 4 {
return ""
}
if rp.hasItem {
endIdx--
@ -152,3 +155,16 @@ func (rp dataLayerResourcePath) Item() string {
return ""
}
func (rp dataLayerResourcePath) Dir() (Path, error) {
if len(rp.elements) <= 4 {
return nil, errors.Errorf("unable to shorten path %q", rp)
}
return &dataLayerResourcePath{
Builder: *rp.dir(),
service: rp.service,
category: rp.category,
hasItem: false,
}, nil
}

View File

@ -1,6 +1,7 @@
package path_test
import (
"fmt"
"strings"
"testing"
@ -139,6 +140,66 @@ func (suite *DataLayerResourcePath) TestMailItemNoFolder() {
}
}
func (suite *DataLayerResourcePath) TestPopFront() {
expected := path.Builder{}.Append(append(
[]string{path.ExchangeService.String(), testUser, path.EmailCategory.String()},
rest...,
)...)
for _, m := range modes {
suite.T().Run(m.name, func(t *testing.T) {
pb := path.Builder{}.Append(rest...)
p, err := pb.ToDataLayerExchangePathForCategory(
testTenant,
testUser,
path.EmailCategory,
m.isItem,
)
require.NoError(t, err)
b := p.PopFront()
assert.Equal(t, expected.String(), b.String())
})
}
}
func (suite *DataLayerResourcePath) TestDir() {
elements := []string{
testTenant,
path.ExchangeService.String(),
testUser,
path.EmailCategory.String(),
}
for _, m := range modes {
suite.T().Run(m.name, func(t1 *testing.T) {
pb := path.Builder{}.Append(rest...)
p, err := pb.ToDataLayerExchangePathForCategory(
testTenant,
testUser,
path.EmailCategory,
m.isItem,
)
require.NoError(t1, err)
for i := 1; i <= len(rest); i++ {
t1.Run(fmt.Sprintf("%v", i), func(t *testing.T) {
p, err = p.Dir()
require.NoError(t, err)
expected := path.Builder{}.Append(elements...).Append(rest[:len(rest)-i]...)
assert.Equal(t, expected.String(), p.String())
})
}
t1.Run("All", func(t *testing.T) {
p, err = p.Dir()
assert.Error(t, err)
})
})
}
}
func (suite *DataLayerResourcePath) TestToExchangePathForCategory() {
b := path.Builder{}.Append(rest...)
table := []struct {