diff --git a/src/internal/kopia/upload.go b/src/internal/kopia/upload.go index d8d3d9d7f..de48d4de3 100644 --- a/src/internal/kopia/upload.go +++ b/src/internal/kopia/upload.go @@ -15,6 +15,7 @@ import ( "github.com/hashicorp/go-multierror" "github.com/kopia/kopia/fs" "github.com/kopia/kopia/fs/virtualfs" + "github.com/kopia/kopia/snapshot" "github.com/kopia/kopia/snapshot/snapshotfs" "github.com/pkg/errors" @@ -25,6 +26,8 @@ import ( "github.com/alcionai/corso/src/pkg/path" ) +const maxInflateTraversalDepth = 500 + var versionSize = int(unsafe.Sizeof(serializationVersion)) func newBackupStreamReader(version uint32, reader io.ReadCloser) *backupStreamReader { @@ -396,6 +399,23 @@ func getStreamItemFunc( // buildKopiaDirs recursively builds a directory hierarchy from the roots up. // Returned directories are virtualfs.StreamingDirectory. func buildKopiaDirs(dirName string, dir *treeMap, progress *corsoProgress) (fs.Directory, error) { + // Reuse kopia directories directly if the subtree rooted at them is + // unchanged. + // + // TODO(ashmrtn): This will need updated when we have OneDrive backups where + // items have been deleted because we can't determine which directory used to + // have the item. + // + // TODO(ashmrtn): We could possibly also use this optimization if we know that + // the collection has no items in it. In that case though, we may need to take + // extra care to ensure the name of the directory is properly represented. For + // example, a directory that has been renamed but with no additional items may + // not be able to directly use kopia's version of the directory due to the + // rename. + if dir.collection == nil && len(dir.childDirs) == 0 && dir.baseDir != nil { + return dir.baseDir, nil + } + // Need to build the directory tree from the leaves up because intermediate // directories need to have all their entries at creation time. var childDirs []fs.Entry @@ -518,6 +538,163 @@ func inflateCollectionTree( return roots, updatedPaths, nil } +// traverseBaseDir is an unoptimized function that reads items in a directory +// and traverses subdirectories in the given directory. oldDirPath is the path +// the directory would be at if the hierarchy was unchanged. newDirPath is the +// path the directory would be at if all changes from the root to this directory +// were taken into account. Both are needed to detect some changes like moving +// a parent directory and moving one of the child directories out of the parent. +// If a directory on the path was deleted, newDirPath is set to nil. +// +// TODO(ashmrtn): A potentially more memory efficient version of this would +// traverse only the directories that we know are present in the collections +// passed in. The other directories could be dynamically discovered when kopia +// was requesting items. +func traverseBaseDir( + ctx context.Context, + depth int, + updatedPaths map[string]path.Path, + oldDirPath *path.Builder, + newDirPath *path.Builder, + dir fs.Directory, + roots map[string]*treeMap, +) error { + if depth >= maxInflateTraversalDepth { + return errors.Errorf("base snapshot tree too tall %s", oldDirPath) + } + + // Wrapper base64 encodes all file and folder names to avoid issues with + // special characters. Since we're working directly with files and folders + // from kopia we need to do the decoding here. + dirName, err := decodeElement(dir.Name()) + if err != nil { + return errors.Wrapf(err, "decoding base directory name %s", dir.Name()) + } + + // Form the path this directory would be at if the hierarchy remained the same + // as well as where it would be at if we take into account ancestor + // directories that may have had changes. The former is used to check if this + // directory specifically has been moved. The latter is used to handle + // deletions and moving subtrees in the hierarchy. + // + // Explicit movement of directories should have the final say though so we + // override any subtree movement with what's in updatedPaths if an entry + // exists. + oldDirPath = oldDirPath.Append(dirName) + currentPath := newDirPath + + if currentPath != nil { + currentPath = currentPath.Append(dirName) + } + + if upb, ok := updatedPaths[oldDirPath.String()]; ok { + // This directory was deleted. + if upb == nil { + currentPath = nil + } else { + // This directory was moved/renamed and the new location is in upb. + currentPath = upb.ToBuilder() + } + } + + // TODO(ashmrtn): If we can do prefix matching on elements in updatedPaths and + // we know that the tree node for this directory has no collection reference + // and no child nodes then we can skip traversing this directory. This will + // only work if we know what directory deleted items used to belong in (e.x. + // it won't work for OneDrive because we only know the ID of the deleted + // item). + + var hasItems bool + + err = dir.IterateEntries(ctx, func(innerCtx context.Context, entry fs.Entry) error { + dEntry, ok := entry.(fs.Directory) + if !ok { + hasItems = true + return nil + } + + return traverseBaseDir( + innerCtx, + depth+1, + updatedPaths, + oldDirPath, + currentPath, + dEntry, + roots, + ) + }) + if err != nil { + return errors.Wrapf(err, "traversing base directory %s", oldDirPath) + } + + // We only need to add this base directory to the tree we're building if it + // has items in it. The traversal of the directory here just finds + // subdirectories. This optimization will not be valid if we dynamically + // determine the subdirectories this directory has when handing items to + // kopia. + if currentPath != nil && hasItems { + // Having this in the if-block has the effect of removing empty directories + // from backups that have a base snapshot. If we'd like to preserve empty + // directories across incremental backups, move getting the node outside of + // the if-block. That will be sufficient to create a StreamingDirectory that + // kopia will pick up on. Assigning the baseDir of the node should remain + // in the if-block though as that is an optimization. + node := getTreeNode(roots, currentPath.Elements()) + if node == nil { + return errors.Errorf("unable to get tree node for path %s", currentPath) + } + + node.baseDir = dir + } + + return nil +} + +func inflateBaseTree( + ctx context.Context, + loader snapshotLoader, + snap *snapshot.Manifest, + updatedPaths map[string]path.Path, + roots map[string]*treeMap, +) error { + // Only complete snapshots should be used to source base information. + // Snapshots for checkpoints will rely on kopia-assisted dedupe to efficiently + // handle items that were completely uploaded before Corso crashed. + if len(snap.IncompleteReason) > 0 { + return nil + } + + root, err := loader.SnapshotRoot(snap) + if err != nil { + return errors.Wrapf(err, "getting snapshot %s root directory", snap.ID) + } + + dir, ok := root.(fs.Directory) + if !ok { + return errors.Errorf("snapshot %s root is not a directory", snap.ID) + } + + // TODO(ashmrtn): We should actually only traverse a subtree of the snapshot + // where the subtree corresponds to the "reason" this snapshot was chosen. + // Doing so will avoid pulling in data for categories that should not be + // included in the current backup or overwriting some entries with out-dated + // information. + + if err = traverseBaseDir( + ctx, + 0, + updatedPaths, + &path.Builder{}, + &path.Builder{}, + dir, + roots, + ); err != nil { + return errors.Wrapf(err, "traversing base snapshot %s", snap.ID) + } + + return nil +} + // inflateDirTree returns a set of tags representing all the resource owners and // service/categories in the snapshot and a fs.Directory tree rooted at the // oldest common ancestor of the streams. All nodes are @@ -526,14 +703,22 @@ func inflateCollectionTree( // caching reasons. func inflateDirTree( ctx context.Context, + loader snapshotLoader, + baseSnaps []*snapshot.Manifest, collections []data.Collection, progress *corsoProgress, ) (fs.Directory, error) { - roots, _, err := inflateCollectionTree(ctx, collections) + roots, updatedPaths, err := inflateCollectionTree(ctx, collections) if err != nil { return nil, errors.Wrap(err, "inflating collection tree") } + for _, snap := range baseSnaps { + if err = inflateBaseTree(ctx, loader, snap, updatedPaths, roots); err != nil { + return nil, errors.Wrap(err, "inflating base snapshot tree(s)") + } + } + if len(roots) > 1 { return nil, errors.New("multiple root directories") } diff --git a/src/internal/kopia/upload_test.go b/src/internal/kopia/upload_test.go index 0a73c2662..30b02358b 100644 --- a/src/internal/kopia/upload_test.go +++ b/src/internal/kopia/upload_test.go @@ -6,8 +6,11 @@ import ( "io" stdpath "path" "testing" + "time" "github.com/kopia/kopia/fs" + "github.com/kopia/kopia/fs/virtualfs" + "github.com/kopia/kopia/snapshot" "github.com/kopia/kopia/snapshot/snapshotfs" "github.com/pkg/errors" "github.com/stretchr/testify/assert" @@ -21,6 +24,163 @@ import ( "github.com/alcionai/corso/src/pkg/path" ) +func makePath(t *testing.T, elements []string) path.Path { + p, err := path.FromDataLayerPath(stdpath.Join(elements...), false) + require.NoError(t, err) + + return p +} + +// baseWithChildren returns an fs.Entry hierarchy where the first four levels +// are the encoded values of tenant, service, user, and category respectively. +// All items in children are made a direct descendent of the category entry. +func baseWithChildren( + tenant, service, user, category string, + children []fs.Entry, +) fs.Entry { + return virtualfs.NewStaticDirectory( + encodeElements(tenant)[0], + []fs.Entry{ + virtualfs.NewStaticDirectory( + encodeElements(service)[0], + []fs.Entry{ + virtualfs.NewStaticDirectory( + encodeElements(user)[0], + []fs.Entry{ + virtualfs.NewStaticDirectory( + encodeElements(category)[0], + children, + ), + }, + ), + }, + ), + }, + ) +} + +type expectedNode struct { + name string + children []*expectedNode + data []byte +} + +// expectedTreeWithChildren returns an expectedNode hierarchy where the first +// four levels are the tenant, service, user, and category respectively. All +// items in children are made a direct descendent of the category node. +func expectedTreeWithChildren( + tenant, service, user, category string, + children []*expectedNode, +) *expectedNode { + return &expectedNode{ + name: tenant, + children: []*expectedNode{ + { + name: service, + children: []*expectedNode{ + { + name: user, + children: []*expectedNode{ + { + name: category, + children: children, + }, + }, + }, + }, + }, + }, + } +} + +// Currently only works for files that Corso has serialized as it expects a +// version specifier at the start of the file. +// +//revive:disable:context-as-argument +func expectFileData( + t *testing.T, + ctx context.Context, + expected []byte, + f fs.StreamingFile, +) { + //revive:enable:context-as-argument + t.Helper() + + if len(expected) == 0 { + return + } + + name, err := decodeElement(f.Name()) + if err != nil { + name = f.Name() + } + + r, err := f.GetReader(ctx) + if !assert.NoErrorf(t, err, "getting reader for file: %s", name) { + return + } + + // Need to wrap with a restore stream reader to remove the version. + r = &restoreStreamReader{ + ReadCloser: io.NopCloser(r), + expectedVersion: serializationVersion, + } + + got, err := io.ReadAll(r) + if !assert.NoErrorf(t, err, "reading data in file: %s", name) { + return + } + + assert.Equalf(t, expected, got, "data in file: %s", name) +} + +//revive:disable:context-as-argument +func expectTree( + t *testing.T, + ctx context.Context, + expected *expectedNode, + got fs.Entry, +) { + //revive:enable:context-as-argument + t.Helper() + + if expected == nil { + return + } + + names := make([]string, 0, len(expected.children)) + mapped := make(map[string]*expectedNode, len(expected.children)) + + for _, child := range expected.children { + encoded := encodeElements(child.name)[0] + + names = append(names, encoded) + mapped[encoded] = child + } + + entries := getDirEntriesForEntry(t, ctx, got) + expectDirs(t, entries, names, true) + + for _, e := range entries { + expectedSubtree := mapped[e.Name()] + if !assert.NotNil(t, expectedSubtree) { + continue + } + + if f, ok := e.(fs.StreamingFile); ok { + expectFileData(t, ctx, expectedSubtree.data, f) + continue + } + + dir, ok := e.(fs.Directory) + if !ok { + continue + } + + expectTree(t, ctx, expectedSubtree, dir) + } +} + func expectDirs( t *testing.T, entries []fs.Entry, @@ -49,7 +209,7 @@ func getDirEntriesForEntry( ) []fs.Entry { //revive:enable:context-as-argument d, ok := entry.(fs.Directory) - require.True(t, ok, "returned entry is not a directory") + require.True(t, ok, "entry is not a directory") entries, err := fs.GetAllEntries(ctx, d) require.NoError(t, err) @@ -390,19 +550,10 @@ type HierarchyBuilderUnitSuite struct { } func (suite *HierarchyBuilderUnitSuite) SetupSuite() { - tmp, err := path.FromDataLayerPath( - stdpath.Join( - testTenant, - path.ExchangeService.String(), - testUser, - path.EmailCategory.String(), - testInboxDir, - ), - false, + suite.testPath = makePath( + suite.T(), + []string{testTenant, service, testUser, category, testInboxDir}, ) - require.NoError(suite.T(), err) - - suite.testPath = tmp } func TestHierarchyBuilderUnitSuite(t *testing.T) { @@ -422,17 +573,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTree() { user2 := "user2" user2Encoded := encodeAsPath(user2) - p2, err := path.FromDataLayerPath( - stdpath.Join( - tenant, - service, - user2, - category, - testInboxDir, - ), - false, - ) - require.NoError(t, err) + p2 := makePath(t, []string{tenant, service, user2, category, testInboxDir}) // Encode user names here so we don't have to decode things later. expectedFileCount := map[string]int{ @@ -464,7 +605,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTree() { // - emails // - Inbox // - 42 separate files - dirTree, err := inflateDirTree(ctx, collections, progress) + dirTree, err := inflateDirTree(ctx, nil, nil, collections, progress) require.NoError(t, err) assert.Equal(t, encodeAsPath(testTenant), dirTree.Name()) @@ -504,8 +645,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTree_MixedDirectory() subdir := "subfolder" - p2, err := suite.testPath.Append(subdir, false) - require.NoError(suite.T(), err) + p2 := makePath(suite.T(), append(suite.testPath.Elements(), subdir)) // Test multiple orders of items because right now order can matter. Both // orders result in a directory structure like: @@ -553,7 +693,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTree_MixedDirectory() suite.T().Run(test.name, func(t *testing.T) { progress := &corsoProgress{pending: map[string]*itemDetails{}} - dirTree, err := inflateDirTree(ctx, test.layout, progress) + dirTree, err := inflateDirTree(ctx, nil, nil, test.layout, progress) require.NoError(t, err) assert.Equal(t, encodeAsPath(testTenant), dirTree.Name()) @@ -597,13 +737,10 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTree_MixedDirectory() } func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTree_Fails() { - p2, err := path.Builder{}.Append(testInboxDir).ToDataLayerExchangePathForCategory( - "tenant2", - "user2", - path.EmailCategory, - false, + p2 := makePath( + suite.T(), + []string{"tenant2", service, "user2", category, testInboxDir}, ) - require.NoError(suite.T(), err) table := []struct { name string @@ -651,8 +788,712 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTree_Fails() { defer flush() suite.T().Run(test.name, func(t *testing.T) { - _, err := inflateDirTree(ctx, test.layout, nil) + _, err := inflateDirTree(ctx, nil, nil, test.layout, nil) assert.Error(t, err) }) } } + +type mockSnapshotWalker struct { + snapshotRoot fs.Entry +} + +func (msw *mockSnapshotWalker) SnapshotRoot(*snapshot.Manifest) (fs.Entry, error) { + return msw.snapshotRoot, nil +} + +func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeSingleSubtree() { + dirPath := makePath( + suite.T(), + []string{testTenant, service, testUser, category, testInboxDir}, + ) + + // Must be a function that returns a new instance each time as StreamingFile + // can only return its Reader once. + getBaseSnapshot := func() fs.Entry { + return baseWithChildren( + testTenant, + service, + testUser, + category, + []fs.Entry{ + virtualfs.NewStaticDirectory( + encodeElements(testInboxDir)[0], + []fs.Entry{ + virtualfs.StreamingFileWithModTimeFromReader( + encodeElements(testFileName)[0], + time.Time{}, + bytes.NewReader(testFileData), + ), + }, + ), + }, + ) + } + + table := []struct { + name string + inputCollections func() []data.Collection + expected *expectedNode + }{ + { + name: "SkipsDeletedItems", + inputCollections: func() []data.Collection { + mc := mockconnector.NewMockExchangeCollection(dirPath, 1) + mc.Names[0] = testFileName + mc.DeletedItems[0] = true + + return []data.Collection{mc} + }, + expected: expectedTreeWithChildren( + testTenant, + service, + testUser, + category, + []*expectedNode{ + { + name: testInboxDir, + children: []*expectedNode{}, + }, + }, + ), + }, + { + name: "AddsNewItems", + inputCollections: func() []data.Collection { + mc := mockconnector.NewMockExchangeCollection(dirPath, 1) + mc.Names[0] = testFileName2 + mc.Data[0] = testFileData2 + mc.ColState = data.NotMovedState + + return []data.Collection{mc} + }, + expected: expectedTreeWithChildren( + testTenant, + service, + testUser, + category, + []*expectedNode{ + { + name: testInboxDir, + children: []*expectedNode{ + { + name: testFileName, + children: []*expectedNode{}, + }, + { + name: testFileName2, + children: []*expectedNode{}, + data: testFileData2, + }, + }, + }, + }, + ), + }, + { + name: "SkipsUpdatedItems", + inputCollections: func() []data.Collection { + mc := mockconnector.NewMockExchangeCollection(dirPath, 1) + mc.Names[0] = testFileName + mc.Data[0] = testFileData2 + mc.ColState = data.NotMovedState + + return []data.Collection{mc} + }, + expected: expectedTreeWithChildren( + testTenant, + service, + testUser, + category, + []*expectedNode{ + { + name: testInboxDir, + children: []*expectedNode{ + { + name: testFileName, + children: []*expectedNode{}, + data: testFileData2, + }, + }, + }, + }, + ), + }, + } + + for _, test := range table { + suite.T().Run(test.name, func(t *testing.T) { + tester.LogTimeOfTest(t) + + ctx, flush := tester.NewContext() + defer flush() + + progress := &corsoProgress{pending: map[string]*itemDetails{}} + msw := &mockSnapshotWalker{ + snapshotRoot: getBaseSnapshot(), + } + + dirTree, err := inflateDirTree( + ctx, + msw, + []*snapshot.Manifest{{}}, + test.inputCollections(), + progress, + ) + require.NoError(t, err) + + expectTree(t, ctx, test.expected, dirTree) + }) + } +} + +func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeMultipleSubdirectories() { + const ( + personalDir = "personal" + workDir = "work" + ) + + inboxPath := makePath( + suite.T(), + []string{testTenant, service, testUser, category, testInboxDir}, + ) + + personalPath := makePath( + suite.T(), + append(inboxPath.Elements(), personalDir), + ) + personalFileName1 := testFileName + personalFileName2 := testFileName2 + + workPath := makePath( + suite.T(), + append(inboxPath.Elements(), workDir), + ) + workFileName := testFileName3 + + // Must be a function that returns a new instance each time as StreamingFile + // can only return its Reader once. + // baseSnapshot with the following layout: + // - a-tenant + // - exchange + // - user1 + // - email + // - Inbox + // - personal + // - file1 + // - file2 + // - work + // - file3 + getBaseSnapshot := func() fs.Entry { + return baseWithChildren( + testTenant, + service, + testUser, + category, + []fs.Entry{ + virtualfs.NewStaticDirectory( + encodeElements(testInboxDir)[0], + []fs.Entry{ + virtualfs.NewStaticDirectory( + encodeElements(personalDir)[0], + []fs.Entry{ + virtualfs.StreamingFileWithModTimeFromReader( + encodeElements(personalFileName1)[0], + time.Time{}, + bytes.NewReader(testFileData), + ), + virtualfs.StreamingFileWithModTimeFromReader( + encodeElements(personalFileName2)[0], + time.Time{}, + bytes.NewReader(testFileData2), + ), + }, + ), + virtualfs.NewStaticDirectory( + encodeElements(workDir)[0], + []fs.Entry{ + virtualfs.StreamingFileWithModTimeFromReader( + encodeElements(workFileName)[0], + time.Time{}, + bytes.NewReader(testFileData3), + ), + }, + ), + }, + ), + }, + ) + } + + table := []struct { + name string + inputCollections func(t *testing.T) []data.Collection + expected *expectedNode + }{ + { + name: "MovesSubtree", + inputCollections: func(t *testing.T) []data.Collection { + newPath := makePath( + t, + []string{testTenant, service, testUser, category, testInboxDir + "2"}, + ) + + mc := mockconnector.NewMockExchangeCollection(newPath, 0) + mc.PrevPath = inboxPath + mc.ColState = data.MovedState + + return []data.Collection{mc} + }, + expected: expectedTreeWithChildren( + testTenant, + service, + testUser, + category, + []*expectedNode{ + { + name: testInboxDir + "2", + children: []*expectedNode{ + { + name: personalDir, + children: []*expectedNode{ + { + name: personalFileName1, + children: []*expectedNode{}, + }, + { + name: personalFileName2, + children: []*expectedNode{}, + }, + }, + }, + { + name: workDir, + children: []*expectedNode{ + { + name: workFileName, + children: []*expectedNode{}, + }, + }, + }, + }, + }, + }, + ), + }, + { + name: "MovesChildAfterAncestorMove", + inputCollections: func(t *testing.T) []data.Collection { + newInboxPath := makePath( + t, + []string{testTenant, service, testUser, category, testInboxDir + "2"}, + ) + newWorkPath := makePath( + t, + []string{testTenant, service, testUser, category, workDir}, + ) + + inbox := mockconnector.NewMockExchangeCollection(newInboxPath, 0) + inbox.PrevPath = inboxPath + inbox.ColState = data.MovedState + + work := mockconnector.NewMockExchangeCollection(newWorkPath, 0) + work.PrevPath = workPath + work.ColState = data.MovedState + + return []data.Collection{inbox, work} + }, + expected: expectedTreeWithChildren( + testTenant, + service, + testUser, + category, + []*expectedNode{ + { + name: testInboxDir + "2", + children: []*expectedNode{ + { + name: personalDir, + children: []*expectedNode{ + { + name: personalFileName1, + children: []*expectedNode{}, + }, + { + name: personalFileName2, + children: []*expectedNode{}, + }, + }, + }, + }, + }, + { + name: workDir, + children: []*expectedNode{ + { + name: workFileName, + children: []*expectedNode{}, + }, + }, + }, + }, + ), + }, + { + name: "MovesChildAfterAncestorDelete", + inputCollections: func(t *testing.T) []data.Collection { + newWorkPath := makePath( + t, + []string{testTenant, service, testUser, category, workDir}, + ) + + inbox := mockconnector.NewMockExchangeCollection(inboxPath, 0) + inbox.PrevPath = inboxPath + inbox.ColState = data.DeletedState + + work := mockconnector.NewMockExchangeCollection(newWorkPath, 0) + work.PrevPath = workPath + work.ColState = data.MovedState + + return []data.Collection{inbox, work} + }, + expected: expectedTreeWithChildren( + testTenant, + service, + testUser, + category, + []*expectedNode{ + { + name: workDir, + children: []*expectedNode{ + { + name: workFileName, + children: []*expectedNode{}, + }, + }, + }, + }, + ), + }, + { + name: "ReplaceDeletedDirectory", + inputCollections: func(t *testing.T) []data.Collection { + personal := mockconnector.NewMockExchangeCollection(personalPath, 0) + personal.PrevPath = personalPath + personal.ColState = data.DeletedState + + work := mockconnector.NewMockExchangeCollection(personalPath, 0) + work.PrevPath = workPath + work.ColState = data.MovedState + + return []data.Collection{personal, work} + }, + expected: expectedTreeWithChildren( + testTenant, + service, + testUser, + category, + []*expectedNode{ + { + name: testInboxDir, + children: []*expectedNode{ + { + name: personalDir, + children: []*expectedNode{ + { + name: workFileName, + }, + }, + }, + }, + }, + }, + ), + }, + { + name: "ReplaceMovedDirectory", + inputCollections: func(t *testing.T) []data.Collection { + newPersonalPath := makePath( + t, + []string{testTenant, service, testUser, category, personalDir}, + ) + + personal := mockconnector.NewMockExchangeCollection(newPersonalPath, 0) + personal.PrevPath = personalPath + personal.ColState = data.MovedState + + work := mockconnector.NewMockExchangeCollection(personalPath, 0) + work.PrevPath = workPath + work.ColState = data.MovedState + + return []data.Collection{personal, work} + }, + expected: expectedTreeWithChildren( + testTenant, + service, + testUser, + category, + []*expectedNode{ + { + name: testInboxDir, + children: []*expectedNode{ + { + name: personalDir, + children: []*expectedNode{ + { + name: workFileName, + }, + }, + }, + }, + }, + { + name: personalDir, + children: []*expectedNode{ + { + name: personalFileName1, + }, + { + name: personalFileName2, + }, + }, + }, + }, + ), + }, + { + name: "MoveDirectoryAndMergeItems", + inputCollections: func(t *testing.T) []data.Collection { + newPersonalPath := makePath( + t, + []string{testTenant, service, testUser, category, workDir}, + ) + + personal := mockconnector.NewMockExchangeCollection(newPersonalPath, 2) + personal.PrevPath = personalPath + personal.ColState = data.MovedState + personal.Names[0] = personalFileName2 + personal.Data[0] = testFileData5 + personal.Names[1] = testFileName4 + personal.Data[1] = testFileData4 + + return []data.Collection{personal} + }, + expected: expectedTreeWithChildren( + testTenant, + service, + testUser, + category, + []*expectedNode{ + { + name: testInboxDir, + children: []*expectedNode{ + { + name: workDir, + children: []*expectedNode{ + { + name: workFileName, + children: []*expectedNode{}, + }, + }, + }, + }, + }, + { + name: workDir, + children: []*expectedNode{ + { + name: personalFileName1, + }, + { + name: personalFileName2, + data: testFileData5, + }, + { + name: testFileName4, + data: testFileData4, + }, + }, + }, + }, + ), + }, + } + + for _, test := range table { + suite.T().Run(test.name, func(t *testing.T) { + tester.LogTimeOfTest(t) + + ctx, flush := tester.NewContext() + defer flush() + + progress := &corsoProgress{pending: map[string]*itemDetails{}} + msw := &mockSnapshotWalker{ + snapshotRoot: getBaseSnapshot(), + } + + dirTree, err := inflateDirTree( + ctx, + msw, + []*snapshot.Manifest{{}}, + test.inputCollections(t), + progress, + ) + require.NoError(t, err) + + expectTree(t, ctx, test.expected, dirTree) + }) + } +} + +func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeSkipsDeletedSubtree() { + tester.LogTimeOfTest(suite.T()) + t := suite.T() + + ctx, flush := tester.NewContext() + defer flush() + + const ( + personalDir = "personal" + workDir = "work" + ) + + // baseSnapshot with the following layout: + // - a-tenant + // - exchange + // - user1 + // - email + // - Inbox + // - personal + // - file1 + // - work + // - file2 + // - Archive + // - personal + // - file3 + // - work + // - file4 + getBaseSnapshot := func() fs.Entry { + return baseWithChildren( + testTenant, + service, + testUser, + category, + []fs.Entry{ + virtualfs.NewStaticDirectory( + encodeElements(testInboxDir)[0], + []fs.Entry{ + virtualfs.NewStaticDirectory( + encodeElements(personalDir)[0], + []fs.Entry{ + virtualfs.StreamingFileWithModTimeFromReader( + encodeElements(testFileName)[0], + time.Time{}, + bytes.NewReader(testFileData), + ), + }, + ), + virtualfs.NewStaticDirectory( + encodeElements(workDir)[0], + []fs.Entry{ + virtualfs.StreamingFileWithModTimeFromReader( + encodeElements(testFileName2)[0], + time.Time{}, + bytes.NewReader(testFileData2), + ), + }, + ), + }, + ), + virtualfs.NewStaticDirectory( + encodeElements(testArchiveDir)[0], + []fs.Entry{ + virtualfs.NewStaticDirectory( + encodeElements(personalDir)[0], + []fs.Entry{ + virtualfs.StreamingFileWithModTimeFromReader( + encodeElements(testFileName3)[0], + time.Time{}, + bytes.NewReader(testFileData3), + ), + }, + ), + virtualfs.NewStaticDirectory( + encodeElements(workDir)[0], + []fs.Entry{ + virtualfs.StreamingFileWithModTimeFromReader( + encodeElements(testFileName4)[0], + time.Time{}, + bytes.NewReader(testFileData4), + ), + }, + ), + }, + ), + }, + ) + } + + expected := expectedTreeWithChildren( + testTenant, + service, + testUser, + category, + []*expectedNode{ + { + name: testArchiveDir, + children: []*expectedNode{ + { + name: personalDir, + children: []*expectedNode{ + { + name: testFileName3, + children: []*expectedNode{}, + }, + }, + }, + { + name: workDir, + children: []*expectedNode{ + { + name: testFileName4, + children: []*expectedNode{}, + }, + }, + }, + }, + }, + }, + ) + + progress := &corsoProgress{pending: map[string]*itemDetails{}} + mc := mockconnector.NewMockExchangeCollection(suite.testPath, 1) + mc.PrevPath = mc.FullPath() + mc.ColState = data.DeletedState + msw := &mockSnapshotWalker{ + snapshotRoot: getBaseSnapshot(), + } + + collections := []data.Collection{mc} + + // Returned directory structure should look like: + // - a-tenant + // - exchange + // - user1 + // - emails + // - Archive + // - personal + // - file3 + // - work + // - file4 + dirTree, err := inflateDirTree( + ctx, + msw, + []*snapshot.Manifest{{}}, + collections, + progress, + ) + require.NoError(t, err) + + expectTree(t, ctx, expected, dirTree) +} diff --git a/src/internal/kopia/wrapper.go b/src/internal/kopia/wrapper.go index 8d28c90ea..68be46e57 100644 --- a/src/internal/kopia/wrapper.go +++ b/src/internal/kopia/wrapper.go @@ -136,7 +136,7 @@ func (w Wrapper) BackupCollections( deets: &details.Details{}, } - dirTree, err := inflateDirTree(ctx, collections, progress) + dirTree, err := inflateDirTree(ctx, w.c, nil, collections, progress) if err != nil { return nil, nil, errors.Wrap(err, "building kopia directories") }