diff --git a/src/internal/connector/data_collections.go b/src/internal/connector/data_collections.go index 5968bb23c..3a49a7ac3 100644 --- a/src/internal/connector/data_collections.go +++ b/src/internal/connector/data_collections.go @@ -38,7 +38,7 @@ func (gc *GraphConnector) DataCollections( metadata []data.RestoreCollection, ctrlOpts control.Options, errs *fault.Bus, -) ([]data.BackupCollection, map[string]struct{}, error) { +) ([]data.BackupCollection, map[string]map[string]struct{}, error) { ctx, end := D.Span(ctx, "gc:dataCollections", D.Index("service", sels.Service.String())) defer end() diff --git a/src/internal/connector/exchange/data_collections.go b/src/internal/connector/exchange/data_collections.go index bb84116af..1eb29fce6 100644 --- a/src/internal/connector/exchange/data_collections.go +++ b/src/internal/connector/exchange/data_collections.go @@ -169,7 +169,7 @@ func DataCollections( su support.StatusUpdater, ctrlOpts control.Options, errs *fault.Bus, -) ([]data.BackupCollection, map[string]struct{}, error) { +) ([]data.BackupCollection, map[string]map[string]struct{}, error) { eb, err := selector.ToExchangeBackup() if err != nil { return nil, nil, clues.Wrap(err, "exchange dataCollection selector").WithClues(ctx) diff --git a/src/internal/connector/onedrive/collections.go b/src/internal/connector/onedrive/collections.go index 7cae3a88c..62979a54e 100644 --- a/src/internal/connector/onedrive/collections.go +++ b/src/internal/connector/onedrive/collections.go @@ -258,7 +258,7 @@ func (c *Collections) Get( ctx context.Context, prevMetadata []data.RestoreCollection, errs *fault.Bus, -) ([]data.BackupCollection, map[string]struct{}, error) { +) ([]data.BackupCollection, map[string]map[string]struct{}, error) { prevDeltas, oldPathsByDriveID, err := deserializeMetadata(ctx, prevMetadata, errs) if err != nil { return nil, nil, err @@ -283,11 +283,7 @@ func (c *Collections) Get( // Drive ID -> folder ID -> folder path folderPaths = map[string]map[string]string{} // Items that should be excluded when sourcing data from the base backup. - // TODO(ashmrtn): This list contains the M365 IDs of deleted items so while - // it's technically safe to pass all the way through to kopia (files are - // unlikely to be named their M365 ID) we should wait to do that until we've - // switched to using those IDs for file names in kopia. - excludedItems = map[string]struct{}{} + excludedItems = map[string]map[string]struct{}{} ) for _, d := range drives { @@ -349,7 +345,18 @@ func (c *Collections) Get( numDeltas) if !delta.Reset { - maps.Copy(excludedItems, excluded) + p, err := GetCanonicalPath( + fmt.Sprintf(rootDrivePattern, driveID), + c.tenant, + c.resourceOwner, + c.source) + if err != nil { + return nil, nil, + clues.Wrap(err, "making exclude prefix for drive").WithClues(ctx).With("drive_id", driveID) + } + + excludedItems[p.String()] = excluded + continue } @@ -377,7 +384,7 @@ func (c *Collections) Get( prevPath, err := path.FromDataLayerPath(p, false) if err != nil { - return nil, map[string]struct{}{}, + return nil, nil, clues.Wrap(err, "invalid previous path").WithClues(ctx).With("deleted_path", p) } diff --git a/src/internal/connector/onedrive/collections_test.go b/src/internal/connector/onedrive/collections_test.go index 9dee2bd6f..56068f487 100644 --- a/src/internal/connector/onedrive/collections_test.go +++ b/src/internal/connector/onedrive/collections_test.go @@ -1269,7 +1269,7 @@ func (suite *OneDriveCollectionsSuite) TestGet() { expectedCollections map[string]map[data.CollectionState][]string expectedDeltaURLs map[string]string expectedFolderPaths map[string]map[string]string - expectedDelList map[string]struct{} + expectedDelList map[string]map[string]struct{} expectedSkippedCount int doNotMergeItems bool }{ @@ -1300,7 +1300,9 @@ func (suite *OneDriveCollectionsSuite) TestGet() { expectedFolderPaths: map[string]map[string]string{ driveID1: {"root": rootFolderPath1}, }, - expectedDelList: getDelList("file"), + expectedDelList: map[string]map[string]struct{}{ + rootFolderPath1: getDelList("file"), + }, }, { name: "OneDrive_OneItemPage_NoFolders_NoErrors", @@ -1329,7 +1331,9 @@ func (suite *OneDriveCollectionsSuite) TestGet() { expectedFolderPaths: map[string]map[string]string{ driveID1: {"root": rootFolderPath1}, }, - expectedDelList: getDelList("file"), + expectedDelList: map[string]map[string]struct{}{ + rootFolderPath1: getDelList("file"), + }, }, { name: "OneDrive_OneItemPage_NoErrors", @@ -1363,7 +1367,9 @@ func (suite *OneDriveCollectionsSuite) TestGet() { "folder": folderPath1, }, }, - expectedDelList: getDelList("file"), + expectedDelList: map[string]map[string]struct{}{ + rootFolderPath1: getDelList("file"), + }, }, { name: "OneDrive_OneItemPage_NoErrors_FileRenamedMultiple", @@ -1398,7 +1404,9 @@ func (suite *OneDriveCollectionsSuite) TestGet() { "folder": folderPath1, }, }, - expectedDelList: getDelList("file"), + expectedDelList: map[string]map[string]struct{}{ + rootFolderPath1: getDelList("file"), + }, }, { name: "OneDrive_OneItemPage_NoErrors_FileMovedMultiple", @@ -1433,7 +1441,9 @@ func (suite *OneDriveCollectionsSuite) TestGet() { "folder": folderPath1, }, }, - expectedDelList: getDelList("file"), + expectedDelList: map[string]map[string]struct{}{ + rootFolderPath1: getDelList("file"), + }, }, { name: "OneDrive_OneItemPage_EmptyDelta_NoErrors", @@ -1460,7 +1470,9 @@ func (suite *OneDriveCollectionsSuite) TestGet() { }, expectedDeltaURLs: map[string]string{}, expectedFolderPaths: map[string]map[string]string{}, - expectedDelList: getDelList("file"), + expectedDelList: map[string]map[string]struct{}{ + rootFolderPath1: getDelList("file"), + }, }, { name: "OneDrive_TwoItemPages_NoErrors", @@ -1502,7 +1514,9 @@ func (suite *OneDriveCollectionsSuite) TestGet() { "folder": folderPath1, }, }, - expectedDelList: getDelList("file", "file2"), + expectedDelList: map[string]map[string]struct{}{ + rootFolderPath1: getDelList("file", "file2"), + }, }, { name: "TwoDrives_OneItemPageEach_NoErrors", @@ -1557,7 +1571,10 @@ func (suite *OneDriveCollectionsSuite) TestGet() { "folder2": folderPath2, }, }, - expectedDelList: getDelList("file", "file2"), + expectedDelList: map[string]map[string]struct{}{ + rootFolderPath1: getDelList("file"), + rootFolderPath2: getDelList("file2"), + }, }, { name: "OneDrive_OneItemPage_Errors", @@ -1607,7 +1624,7 @@ func (suite *OneDriveCollectionsSuite) TestGet() { "root": rootFolderPath1, }, }, - expectedDelList: map[string]struct{}{}, + expectedDelList: map[string]map[string]struct{}{}, doNotMergeItems: true, }, { @@ -1649,7 +1666,7 @@ func (suite *OneDriveCollectionsSuite) TestGet() { "folder": folderPath1, }, }, - expectedDelList: map[string]struct{}{}, + expectedDelList: map[string]map[string]struct{}{}, doNotMergeItems: true, }, { @@ -1691,7 +1708,9 @@ func (suite *OneDriveCollectionsSuite) TestGet() { "folder": folderPath1, }, }, - expectedDelList: getDelList("file", "file2"), + expectedDelList: map[string]map[string]struct{}{ + rootFolderPath1: getDelList("file", "file2"), + }, doNotMergeItems: false, }, { @@ -1733,7 +1752,7 @@ func (suite *OneDriveCollectionsSuite) TestGet() { "folder2": expectedPath1("/folder2"), }, }, - expectedDelList: map[string]struct{}{}, + expectedDelList: map[string]map[string]struct{}{}, doNotMergeItems: true, }, { @@ -1774,7 +1793,7 @@ func (suite *OneDriveCollectionsSuite) TestGet() { "folder2": expectedPath1("/folder"), }, }, - expectedDelList: map[string]struct{}{}, + expectedDelList: map[string]map[string]struct{}{}, doNotMergeItems: true, }, { @@ -1819,7 +1838,9 @@ func (suite *OneDriveCollectionsSuite) TestGet() { "folder": folderPath1, }, }, - expectedDelList: getDelList("file", "file2"), + expectedDelList: map[string]map[string]struct{}{ + rootFolderPath1: getDelList("file", "file2"), + }, expectedSkippedCount: 2, }, } diff --git a/src/internal/connector/onedrive/data_collections.go b/src/internal/connector/onedrive/data_collections.go index 31ed31f8d..113892d34 100644 --- a/src/internal/connector/onedrive/data_collections.go +++ b/src/internal/connector/onedrive/data_collections.go @@ -39,7 +39,7 @@ func DataCollections( su support.StatusUpdater, ctrlOpts control.Options, errs *fault.Bus, -) ([]data.BackupCollection, map[string]struct{}, error) { +) ([]data.BackupCollection, map[string]map[string]struct{}, error) { odb, err := selector.ToOneDriveBackup() if err != nil { return nil, nil, clues.Wrap(err, "parsing selector").WithClues(ctx) @@ -49,7 +49,7 @@ func DataCollections( el = errs.Local() user = selector.DiscreteOwner collections = []data.BackupCollection{} - allExcludes = map[string]struct{}{} + allExcludes = map[string]map[string]struct{}{} ) // for each scope that includes oneDrive items, get all @@ -77,7 +77,13 @@ func DataCollections( collections = append(collections, odcs...) - maps.Copy(allExcludes, excludes) + for k, v := range excludes { + if _, ok := allExcludes[k]; !ok { + allExcludes[k] = map[string]struct{}{} + } + + maps.Copy(allExcludes[k], v) + } } return collections, allExcludes, el.Failure() diff --git a/src/internal/connector/sharepoint/data_collections.go b/src/internal/connector/sharepoint/data_collections.go index 6752fa71c..f0d16c764 100644 --- a/src/internal/connector/sharepoint/data_collections.go +++ b/src/internal/connector/sharepoint/data_collections.go @@ -37,7 +37,7 @@ func DataCollections( su statusUpdater, ctrlOpts control.Options, errs *fault.Bus, -) ([]data.BackupCollection, map[string]struct{}, error) { +) ([]data.BackupCollection, map[string]map[string]struct{}, error) { b, err := selector.ToSharePointBackup() if err != nil { return nil, nil, errors.Wrap(err, "sharePointDataCollection: parsing selector") @@ -171,7 +171,7 @@ func collectLibraries( updater statusUpdater, ctrlOpts control.Options, errs *fault.Bus, -) ([]data.BackupCollection, map[string]struct{}, error) { +) ([]data.BackupCollection, map[string]map[string]struct{}, error) { logger.Ctx(ctx).Debug("creating SharePoint Library collections") var ( diff --git a/src/internal/kopia/upload.go b/src/internal/kopia/upload.go index 52ba99419..2b82c804d 100644 --- a/src/internal/kopia/upload.go +++ b/src/internal/kopia/upload.go @@ -422,13 +422,27 @@ func streamBaseEntries( locationPath path.Path, dir fs.Directory, encodedSeen map[string]struct{}, - globalExcludeSet map[string]struct{}, + globalExcludeSet map[string]map[string]struct{}, progress *corsoProgress, ) error { if dir == nil { return nil } + var ( + excludeSet map[string]struct{} + curPrefix string + curPathString = curPath.String() + ) + + for prefix, excludes := range globalExcludeSet { + // Select the set with the longest prefix to be most precise. + if strings.HasPrefix(curPathString, prefix) && len(prefix) >= len(curPrefix) { + excludeSet = excludes + curPrefix = prefix + } + } + err := dir.IterateEntries(ctx, func(innerCtx context.Context, entry fs.Entry) error { if err := innerCtx.Err(); err != nil { return err @@ -453,7 +467,7 @@ func streamBaseEntries( // This entry was marked as deleted by a service that can't tell us the // previous path of deleted items, only the item ID. - if _, ok := globalExcludeSet[entName]; ok { + if _, ok := excludeSet[entName]; ok { return nil } @@ -519,7 +533,7 @@ func getStreamItemFunc( staticEnts []fs.Entry, streamedEnts data.BackupCollection, baseDir fs.Directory, - globalExcludeSet map[string]struct{}, + globalExcludeSet map[string]map[string]struct{}, progress *corsoProgress, ) func(context.Context, func(context.Context, fs.Entry) error) error { return func(ctx context.Context, cb func(context.Context, fs.Entry) error) error { @@ -567,7 +581,7 @@ func getStreamItemFunc( func buildKopiaDirs( dirName string, dir *treeMap, - globalExcludeSet map[string]struct{}, + globalExcludeSet map[string]map[string]struct{}, progress *corsoProgress, ) (fs.Directory, error) { // Need to build the directory tree from the leaves up because intermediate @@ -1002,7 +1016,7 @@ func inflateDirTree( loader snapshotLoader, baseSnaps []IncrementalBase, collections []data.BackupCollection, - globalExcludeSet map[string]struct{}, + globalExcludeSet map[string]map[string]struct{}, progress *corsoProgress, ) (fs.Directory, error) { roots, updatedPaths, err := inflateCollectionTree(ctx, collections) diff --git a/src/internal/kopia/upload_test.go b/src/internal/kopia/upload_test.go index e2a769908..0ed4ab7af 100644 --- a/src/internal/kopia/upload_test.go +++ b/src/internal/kopia/upload_test.go @@ -1435,7 +1435,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeMultipleSubdirecto table := []struct { name string inputCollections func(t *testing.T) []data.BackupCollection - inputExcludes map[string]struct{} + inputExcludes map[string]map[string]struct{} expected *expectedNode }{ { @@ -1443,8 +1443,10 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeMultipleSubdirecto inputCollections: func(t *testing.T) []data.BackupCollection { return nil }, - inputExcludes: map[string]struct{}{ - inboxFileName1: {}, + inputExcludes: map[string]map[string]struct{}{ + "": { + inboxFileName1: {}, + }, }, expected: expectedTreeWithChildren( []string{ diff --git a/src/internal/kopia/wrapper.go b/src/internal/kopia/wrapper.go index a82c4c273..bd4b5d4e0 100644 --- a/src/internal/kopia/wrapper.go +++ b/src/internal/kopia/wrapper.go @@ -135,7 +135,7 @@ func (w Wrapper) BackupCollections( ctx context.Context, previousSnapshots []IncrementalBase, collections []data.BackupCollection, - globalExcludeSet map[string]struct{}, + globalExcludeSet map[string]map[string]struct{}, tags map[string]string, buildTreeWithBase bool, errs *fault.Bus, diff --git a/src/internal/kopia/wrapper_test.go b/src/internal/kopia/wrapper_test.go index 8accefeff..5a49516fd 100644 --- a/src/internal/kopia/wrapper_test.go +++ b/src/internal/kopia/wrapper_test.go @@ -925,6 +925,7 @@ func (suite *KopiaSimpleRepoIntegrationSuite) TestBackupExcludeItem() { table := []struct { name string excludeItem bool + excludePrefix bool expectedCachedItems int expectedUncachedItems int cols func() []data.BackupCollection @@ -932,7 +933,7 @@ func (suite *KopiaSimpleRepoIntegrationSuite) TestBackupExcludeItem() { restoreCheck assert.ErrorAssertionFunc }{ { - name: "ExcludeItem", + name: "ExcludeItem_NoPrefix", excludeItem: true, expectedCachedItems: len(suite.filesByPath) - 1, expectedUncachedItems: 0, @@ -942,6 +943,18 @@ func (suite *KopiaSimpleRepoIntegrationSuite) TestBackupExcludeItem() { backupIDCheck: require.NotEmpty, restoreCheck: assert.Error, }, + { + name: "ExcludeItem_WithPrefix", + excludeItem: true, + excludePrefix: true, + expectedCachedItems: len(suite.filesByPath) - 1, + expectedUncachedItems: 0, + cols: func() []data.BackupCollection { + return nil + }, + backupIDCheck: require.NotEmpty, + restoreCheck: assert.Error, + }, { name: "NoExcludeItemNoChanges", // No snapshot should be made since there were no changes. @@ -975,10 +988,22 @@ func (suite *KopiaSimpleRepoIntegrationSuite) TestBackupExcludeItem() { suite.Run(test.name, func() { t := suite.T() - var excluded map[string]struct{} + var ( + prefix string + itemPath = suite.files[suite.testPath1.String()][0].itemPath + ) + + if !test.excludePrefix { + prefix = itemPath.ToBuilder().Dir().Dir().String() + } + + var excluded map[string]map[string]struct{} if test.excludeItem { - excluded = map[string]struct{}{ - suite.files[suite.testPath1.String()][0].itemPath.Item(): {}, + excluded = map[string]map[string]struct{}{ + // Add a prefix if needed. + prefix: { + itemPath.Item(): {}, + }, } } diff --git a/src/internal/operations/backup.go b/src/internal/operations/backup.go index 5925c521a..dc2ae0349 100644 --- a/src/internal/operations/backup.go +++ b/src/internal/operations/backup.go @@ -312,7 +312,7 @@ func produceBackupDataCollections( metadata []data.RestoreCollection, ctrlOpts control.Options, errs *fault.Bus, -) ([]data.BackupCollection, map[string]struct{}, error) { +) ([]data.BackupCollection, map[string]map[string]struct{}, error) { complete, closer := observe.MessageWithCompletion(ctx, observe.Safe("Discovering items to backup")) defer func() { complete <- struct{}{} @@ -332,7 +332,7 @@ type backuper interface { ctx context.Context, bases []kopia.IncrementalBase, cs []data.BackupCollection, - excluded map[string]struct{}, + excluded map[string]map[string]struct{}, tags map[string]string, buildTreeWithBase bool, errs *fault.Bus, @@ -391,7 +391,7 @@ func consumeBackupDataCollections( reasons []kopia.Reason, mans []*kopia.ManifestEntry, cs []data.BackupCollection, - excludes map[string]struct{}, + excludes map[string]map[string]struct{}, backupID model.StableID, isIncremental bool, errs *fault.Bus, diff --git a/src/internal/operations/backup_test.go b/src/internal/operations/backup_test.go index 2972706b6..4f9a2934a 100644 --- a/src/internal/operations/backup_test.go +++ b/src/internal/operations/backup_test.go @@ -97,7 +97,7 @@ func (mbu mockBackuper) BackupCollections( ctx context.Context, bases []kopia.IncrementalBase, cs []data.BackupCollection, - excluded map[string]struct{}, + excluded map[string]map[string]struct{}, tags map[string]string, buildTreeWithBase bool, errs *fault.Bus,