From 46d61c724640ca8f80023bc832963cefc5876a51 Mon Sep 17 00:00:00 2001 From: ashmrtn Date: Wed, 25 Jan 2023 08:28:09 -0800 Subject: [PATCH] Add a set of items that will be excluded from base directories during backup (#2143) ## Description Some external services like OneDrive do not have the ability to determine the path a deleted item used to exist at, they only know the item's old ID. This patch allows Kopia Wrapper to handle those items by implementing a global exclude set. The patch assumes that items in base directories are unique as the items in every base directory are checked against the set. This is not wired to anything outside of Kopia Wrapper. Currently this feature is disabled as the passed value is always nil. ## Does this PR need a docs update or release note? - [ ] :white_check_mark: Yes, it's included - [ ] :clock1: Yes, but in a later PR - [x] :no_entry: No ## Type of change - [x] :sunflower: Feature - [ ] :bug: Bugfix - [ ] :world_map: Documentation - [ ] :robot: Test - [ ] :computer: CI/Deployment - [ ] :broom: Tech Debt/Cleanup ## Issue(s) * closes #2121 ## Test Plan - [ ] :muscle: Manual - [x] :zap: Unit test - [ ] :green_heart: E2E --- src/internal/kopia/upload.go | 35 ++++++++++++++---- src/internal/kopia/upload_test.go | 61 +++++++++++++++++++++++++++---- src/internal/kopia/wrapper.go | 15 +++++++- 3 files changed, 94 insertions(+), 17 deletions(-) diff --git a/src/internal/kopia/upload.go b/src/internal/kopia/upload.go index 52452ffa9..5301e6872 100644 --- a/src/internal/kopia/upload.go +++ b/src/internal/kopia/upload.go @@ -345,6 +345,7 @@ func streamBaseEntries( prevPath path.Path, dir fs.Directory, encodedSeen map[string]struct{}, + globalExcludeSet map[string]struct{}, progress *corsoProgress, ) error { if dir == nil { @@ -373,6 +374,12 @@ func streamBaseEntries( return errors.Wrapf(err, "unable to decode entry name %s", entry.Name()) } + // This entry was marked as deleted by a service that can't tell us the + // previous path of deleted items, only the item ID. + if _, ok := globalExcludeSet[entName]; ok { + return nil + } + // For now assuming that item IDs don't need escaping. itemPath, err := curPath.Append(entName, true) if err != nil { @@ -421,6 +428,7 @@ func getStreamItemFunc( staticEnts []fs.Entry, streamedEnts data.Collection, baseDir fs.Directory, + globalExcludeSet map[string]struct{}, progress *corsoProgress, ) func(context.Context, func(context.Context, fs.Entry) error) error { return func(ctx context.Context, cb func(context.Context, fs.Entry) error) error { @@ -443,6 +451,7 @@ func getStreamItemFunc( prevPath, baseDir, seen, + globalExcludeSet, progress, ); err != nil { errs = multierror.Append( @@ -457,21 +466,22 @@ func getStreamItemFunc( // buildKopiaDirs recursively builds a directory hierarchy from the roots up. // Returned directories are virtualfs.StreamingDirectory. -func buildKopiaDirs(dirName string, dir *treeMap, progress *corsoProgress) (fs.Directory, error) { +func buildKopiaDirs( + dirName string, + dir *treeMap, + globalExcludeSet map[string]struct{}, + progress *corsoProgress, +) (fs.Directory, error) { // Reuse kopia directories directly if the subtree rooted at them is // unchanged. // - // TODO(ashmrtn): This will need updated when we have OneDrive backups where - // items have been deleted because we can't determine which directory used to - // have the item. - // // TODO(ashmrtn): We could possibly also use this optimization if we know that // the collection has no items in it. In that case though, we may need to take // extra care to ensure the name of the directory is properly represented. For // example, a directory that has been renamed but with no additional items may // not be able to directly use kopia's version of the directory due to the // rename. - if dir.collection == nil && len(dir.childDirs) == 0 && dir.baseDir != nil { + if dir.collection == nil && len(dir.childDirs) == 0 && dir.baseDir != nil && len(globalExcludeSet) == 0 { return dir.baseDir, nil } @@ -480,7 +490,7 @@ func buildKopiaDirs(dirName string, dir *treeMap, progress *corsoProgress) (fs.D var childDirs []fs.Entry for childName, childDir := range dir.childDirs { - child, err := buildKopiaDirs(childName, childDir, progress) + child, err := buildKopiaDirs(childName, childDir, globalExcludeSet, progress) if err != nil { return nil, err } @@ -496,6 +506,7 @@ func buildKopiaDirs(dirName string, dir *treeMap, progress *corsoProgress) (fs.D childDirs, dir.collection, dir.baseDir, + globalExcludeSet, progress, ), ), nil @@ -879,11 +890,19 @@ func inflateBaseTree( // virtualfs.StreamingDirectory with the given DataCollections if there is one // for that node. Tags can be used in future backups to fetch old snapshots for // caching reasons. +// +// globalExcludeSet represents a set of items, represented with file names, to +// exclude from base directories when uploading the snapshot. As items in *all* +// base directories will be checked for in every base directory, this assumes +// that items in the bases are unique. Deletions of directories or subtrees +// should be represented as changes in the status of a Collection, not an entry +// in the globalExcludeSet. func inflateDirTree( ctx context.Context, loader snapshotLoader, baseSnaps []IncrementalBase, collections []data.Collection, + globalExcludeSet map[string]struct{}, progress *corsoProgress, ) (fs.Directory, error) { roots, updatedPaths, err := inflateCollectionTree(ctx, collections) @@ -915,7 +934,7 @@ func inflateDirTree( var res fs.Directory for dirName, dir := range roots { - tmp, err := buildKopiaDirs(dirName, dir, progress) + tmp, err := buildKopiaDirs(dirName, dir, globalExcludeSet, progress) if err != nil { return nil, err } diff --git a/src/internal/kopia/upload_test.go b/src/internal/kopia/upload_test.go index 57ce9fd56..1cc4daf47 100644 --- a/src/internal/kopia/upload_test.go +++ b/src/internal/kopia/upload_test.go @@ -705,7 +705,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTree() { // - emails // - Inbox // - 42 separate files - dirTree, err := inflateDirTree(ctx, nil, nil, collections, progress) + dirTree, err := inflateDirTree(ctx, nil, nil, collections, nil, progress) require.NoError(t, err) assert.Equal(t, encodeAsPath(testTenant), dirTree.Name()) @@ -793,7 +793,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTree_MixedDirectory() suite.T().Run(test.name, func(t *testing.T) { progress := &corsoProgress{pending: map[string]*itemDetails{}} - dirTree, err := inflateDirTree(ctx, nil, nil, test.layout, progress) + dirTree, err := inflateDirTree(ctx, nil, nil, test.layout, nil, progress) require.NoError(t, err) assert.Equal(t, encodeAsPath(testTenant), dirTree.Name()) @@ -889,7 +889,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTree_Fails() { defer flush() suite.T().Run(test.name, func(t *testing.T) { - _, err := inflateDirTree(ctx, nil, nil, test.layout, nil) + _, err := inflateDirTree(ctx, nil, nil, test.layout, nil, nil) assert.Error(t, err) }) } @@ -992,7 +992,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeErrors() { cols = append(cols, mc) } - _, err := inflateDirTree(ctx, nil, nil, cols, progress) + _, err := inflateDirTree(ctx, nil, nil, cols, nil, progress) require.Error(t, err) }) } @@ -1261,6 +1261,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeSingleSubtree() { mockIncrementalBase("", testTenant, testUser, path.ExchangeService, path.EmailCategory), }, test.inputCollections(), + nil, progress, ) require.NoError(t, err) @@ -1281,7 +1282,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeMultipleSubdirecto []string{testTenant, service, testUser, category, testInboxDir}, false, ) - inboxFileName1 := testFileName4 + inboxFileName1 := testFileName inboxFileData1 := testFileData4 inboxFileName2 := testFileName5 inboxFileData2 := testFileData5 @@ -1291,7 +1292,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeMultipleSubdirecto append(inboxPath.Elements(), personalDir), false, ) - personalFileName1 := testFileName + personalFileName1 := inboxFileName1 personalFileName2 := testFileName2 workPath := makePath( @@ -1312,7 +1313,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeMultipleSubdirecto // - user1 // - email // - Inbox - // - file4 + // - file1 // - personal // - file1 // - file2 @@ -1369,8 +1370,51 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeMultipleSubdirecto table := []struct { name string inputCollections func(t *testing.T) []data.Collection + inputExcludes map[string]struct{} expected *expectedNode }{ + { + name: "GlobalExcludeSet", + inputCollections: func(t *testing.T) []data.Collection { + return nil + }, + inputExcludes: map[string]struct{}{ + inboxFileName1: {}, + }, + expected: expectedTreeWithChildren( + []string{ + testTenant, + service, + testUser, + category, + }, + []*expectedNode{ + { + name: testInboxDir, + children: []*expectedNode{ + { + name: personalDir, + children: []*expectedNode{ + { + name: personalFileName2, + children: []*expectedNode{}, + }, + }, + }, + { + name: workDir, + children: []*expectedNode{ + { + name: workFileName1, + children: []*expectedNode{}, + }, + }, + }, + }, + }, + }, + ), + }, { name: "MovesSubtree", inputCollections: func(t *testing.T) []data.Collection { @@ -1919,6 +1963,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeMultipleSubdirecto mockIncrementalBase("", testTenant, testUser, path.ExchangeService, path.EmailCategory), }, test.inputCollections(t), + test.inputExcludes, progress, ) require.NoError(t, err) @@ -2079,6 +2124,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeSkipsDeletedSubtre mockIncrementalBase("", testTenant, testUser, path.ExchangeService, path.EmailCategory), }, collections, + nil, progress, ) require.NoError(t, err) @@ -2325,6 +2371,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeSelectsCorrectSubt mockIncrementalBase("id2", testTenant, testUser, path.ExchangeService, path.EmailCategory), }, collections, + nil, progress, ) require.NoError(t, err) diff --git a/src/internal/kopia/wrapper.go b/src/internal/kopia/wrapper.go index 8171c853a..13db75ac4 100644 --- a/src/internal/kopia/wrapper.go +++ b/src/internal/kopia/wrapper.go @@ -129,7 +129,11 @@ func (w Wrapper) BackupCollections( ctx, end := D.Span(ctx, "kopia:backupCollections") defer end() - if len(collections) == 0 { + // TODO(ashmrtn): Make this a parameter when actually enabling the global + // exclude set. + var globalExcludeSet map[string]struct{} + + if len(collections) == 0 && len(globalExcludeSet) == 0 { return &BackupStats{}, &details.Builder{}, nil, nil } @@ -147,7 +151,14 @@ func (w Wrapper) BackupCollections( base = previousSnapshots } - dirTree, err := inflateDirTree(ctx, w.c, base, collections, progress) + dirTree, err := inflateDirTree( + ctx, + w.c, + base, + collections, + globalExcludeSet, + progress, + ) if err != nil { return nil, nil, nil, errors.Wrap(err, "building kopia directories") }