diff --git a/src/internal/kopia/upload.go b/src/internal/kopia/upload.go index 52452ffa9..5301e6872 100644 --- a/src/internal/kopia/upload.go +++ b/src/internal/kopia/upload.go @@ -345,6 +345,7 @@ func streamBaseEntries( prevPath path.Path, dir fs.Directory, encodedSeen map[string]struct{}, + globalExcludeSet map[string]struct{}, progress *corsoProgress, ) error { if dir == nil { @@ -373,6 +374,12 @@ func streamBaseEntries( return errors.Wrapf(err, "unable to decode entry name %s", entry.Name()) } + // This entry was marked as deleted by a service that can't tell us the + // previous path of deleted items, only the item ID. + if _, ok := globalExcludeSet[entName]; ok { + return nil + } + // For now assuming that item IDs don't need escaping. itemPath, err := curPath.Append(entName, true) if err != nil { @@ -421,6 +428,7 @@ func getStreamItemFunc( staticEnts []fs.Entry, streamedEnts data.Collection, baseDir fs.Directory, + globalExcludeSet map[string]struct{}, progress *corsoProgress, ) func(context.Context, func(context.Context, fs.Entry) error) error { return func(ctx context.Context, cb func(context.Context, fs.Entry) error) error { @@ -443,6 +451,7 @@ func getStreamItemFunc( prevPath, baseDir, seen, + globalExcludeSet, progress, ); err != nil { errs = multierror.Append( @@ -457,21 +466,22 @@ func getStreamItemFunc( // buildKopiaDirs recursively builds a directory hierarchy from the roots up. // Returned directories are virtualfs.StreamingDirectory. -func buildKopiaDirs(dirName string, dir *treeMap, progress *corsoProgress) (fs.Directory, error) { +func buildKopiaDirs( + dirName string, + dir *treeMap, + globalExcludeSet map[string]struct{}, + progress *corsoProgress, +) (fs.Directory, error) { // Reuse kopia directories directly if the subtree rooted at them is // unchanged. // - // TODO(ashmrtn): This will need updated when we have OneDrive backups where - // items have been deleted because we can't determine which directory used to - // have the item. - // // TODO(ashmrtn): We could possibly also use this optimization if we know that // the collection has no items in it. In that case though, we may need to take // extra care to ensure the name of the directory is properly represented. For // example, a directory that has been renamed but with no additional items may // not be able to directly use kopia's version of the directory due to the // rename. - if dir.collection == nil && len(dir.childDirs) == 0 && dir.baseDir != nil { + if dir.collection == nil && len(dir.childDirs) == 0 && dir.baseDir != nil && len(globalExcludeSet) == 0 { return dir.baseDir, nil } @@ -480,7 +490,7 @@ func buildKopiaDirs(dirName string, dir *treeMap, progress *corsoProgress) (fs.D var childDirs []fs.Entry for childName, childDir := range dir.childDirs { - child, err := buildKopiaDirs(childName, childDir, progress) + child, err := buildKopiaDirs(childName, childDir, globalExcludeSet, progress) if err != nil { return nil, err } @@ -496,6 +506,7 @@ func buildKopiaDirs(dirName string, dir *treeMap, progress *corsoProgress) (fs.D childDirs, dir.collection, dir.baseDir, + globalExcludeSet, progress, ), ), nil @@ -879,11 +890,19 @@ func inflateBaseTree( // virtualfs.StreamingDirectory with the given DataCollections if there is one // for that node. Tags can be used in future backups to fetch old snapshots for // caching reasons. +// +// globalExcludeSet represents a set of items, represented with file names, to +// exclude from base directories when uploading the snapshot. As items in *all* +// base directories will be checked for in every base directory, this assumes +// that items in the bases are unique. Deletions of directories or subtrees +// should be represented as changes in the status of a Collection, not an entry +// in the globalExcludeSet. func inflateDirTree( ctx context.Context, loader snapshotLoader, baseSnaps []IncrementalBase, collections []data.Collection, + globalExcludeSet map[string]struct{}, progress *corsoProgress, ) (fs.Directory, error) { roots, updatedPaths, err := inflateCollectionTree(ctx, collections) @@ -915,7 +934,7 @@ func inflateDirTree( var res fs.Directory for dirName, dir := range roots { - tmp, err := buildKopiaDirs(dirName, dir, progress) + tmp, err := buildKopiaDirs(dirName, dir, globalExcludeSet, progress) if err != nil { return nil, err } diff --git a/src/internal/kopia/upload_test.go b/src/internal/kopia/upload_test.go index 57ce9fd56..1cc4daf47 100644 --- a/src/internal/kopia/upload_test.go +++ b/src/internal/kopia/upload_test.go @@ -705,7 +705,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTree() { // - emails // - Inbox // - 42 separate files - dirTree, err := inflateDirTree(ctx, nil, nil, collections, progress) + dirTree, err := inflateDirTree(ctx, nil, nil, collections, nil, progress) require.NoError(t, err) assert.Equal(t, encodeAsPath(testTenant), dirTree.Name()) @@ -793,7 +793,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTree_MixedDirectory() suite.T().Run(test.name, func(t *testing.T) { progress := &corsoProgress{pending: map[string]*itemDetails{}} - dirTree, err := inflateDirTree(ctx, nil, nil, test.layout, progress) + dirTree, err := inflateDirTree(ctx, nil, nil, test.layout, nil, progress) require.NoError(t, err) assert.Equal(t, encodeAsPath(testTenant), dirTree.Name()) @@ -889,7 +889,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTree_Fails() { defer flush() suite.T().Run(test.name, func(t *testing.T) { - _, err := inflateDirTree(ctx, nil, nil, test.layout, nil) + _, err := inflateDirTree(ctx, nil, nil, test.layout, nil, nil) assert.Error(t, err) }) } @@ -992,7 +992,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeErrors() { cols = append(cols, mc) } - _, err := inflateDirTree(ctx, nil, nil, cols, progress) + _, err := inflateDirTree(ctx, nil, nil, cols, nil, progress) require.Error(t, err) }) } @@ -1261,6 +1261,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeSingleSubtree() { mockIncrementalBase("", testTenant, testUser, path.ExchangeService, path.EmailCategory), }, test.inputCollections(), + nil, progress, ) require.NoError(t, err) @@ -1281,7 +1282,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeMultipleSubdirecto []string{testTenant, service, testUser, category, testInboxDir}, false, ) - inboxFileName1 := testFileName4 + inboxFileName1 := testFileName inboxFileData1 := testFileData4 inboxFileName2 := testFileName5 inboxFileData2 := testFileData5 @@ -1291,7 +1292,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeMultipleSubdirecto append(inboxPath.Elements(), personalDir), false, ) - personalFileName1 := testFileName + personalFileName1 := inboxFileName1 personalFileName2 := testFileName2 workPath := makePath( @@ -1312,7 +1313,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeMultipleSubdirecto // - user1 // - email // - Inbox - // - file4 + // - file1 // - personal // - file1 // - file2 @@ -1369,8 +1370,51 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeMultipleSubdirecto table := []struct { name string inputCollections func(t *testing.T) []data.Collection + inputExcludes map[string]struct{} expected *expectedNode }{ + { + name: "GlobalExcludeSet", + inputCollections: func(t *testing.T) []data.Collection { + return nil + }, + inputExcludes: map[string]struct{}{ + inboxFileName1: {}, + }, + expected: expectedTreeWithChildren( + []string{ + testTenant, + service, + testUser, + category, + }, + []*expectedNode{ + { + name: testInboxDir, + children: []*expectedNode{ + { + name: personalDir, + children: []*expectedNode{ + { + name: personalFileName2, + children: []*expectedNode{}, + }, + }, + }, + { + name: workDir, + children: []*expectedNode{ + { + name: workFileName1, + children: []*expectedNode{}, + }, + }, + }, + }, + }, + }, + ), + }, { name: "MovesSubtree", inputCollections: func(t *testing.T) []data.Collection { @@ -1919,6 +1963,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeMultipleSubdirecto mockIncrementalBase("", testTenant, testUser, path.ExchangeService, path.EmailCategory), }, test.inputCollections(t), + test.inputExcludes, progress, ) require.NoError(t, err) @@ -2079,6 +2124,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeSkipsDeletedSubtre mockIncrementalBase("", testTenant, testUser, path.ExchangeService, path.EmailCategory), }, collections, + nil, progress, ) require.NoError(t, err) @@ -2325,6 +2371,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeSelectsCorrectSubt mockIncrementalBase("id2", testTenant, testUser, path.ExchangeService, path.EmailCategory), }, collections, + nil, progress, ) require.NoError(t, err) diff --git a/src/internal/kopia/wrapper.go b/src/internal/kopia/wrapper.go index 8171c853a..13db75ac4 100644 --- a/src/internal/kopia/wrapper.go +++ b/src/internal/kopia/wrapper.go @@ -129,7 +129,11 @@ func (w Wrapper) BackupCollections( ctx, end := D.Span(ctx, "kopia:backupCollections") defer end() - if len(collections) == 0 { + // TODO(ashmrtn): Make this a parameter when actually enabling the global + // exclude set. + var globalExcludeSet map[string]struct{} + + if len(collections) == 0 && len(globalExcludeSet) == 0 { return &BackupStats{}, &details.Builder{}, nil, nil } @@ -147,7 +151,14 @@ func (w Wrapper) BackupCollections( base = previousSnapshots } - dirTree, err := inflateDirTree(ctx, w.c, base, collections, progress) + dirTree, err := inflateDirTree( + ctx, + w.c, + base, + collections, + globalExcludeSet, + progress, + ) if err != nil { return nil, nil, nil, errors.Wrap(err, "building kopia directories") }