From ea308055eb1603576cb3fcd17bc7e436f098622a Mon Sep 17 00:00:00 2001 From: Abin Simon Date: Tue, 7 Nov 2023 04:43:17 +0530 Subject: [PATCH] Handle folder being deleted and recreated in between a backup (#4612) Previously if a folder with the same name was deleted and recreated, we would error out. This fixes that by ensuring that only the final collection makes it into the final set. --- #### Does this PR need a docs update or release note? - [x] :white_check_mark: Yes, it's included - [ ] :clock1: Yes, but in a later PR - [ ] :no_entry: No #### Type of change - [ ] :sunflower: Feature - [x] :bug: Bugfix - [ ] :world_map: Documentation - [ ] :robot: Supportability/Tests - [ ] :computer: CI/Deployment - [ ] :broom: Tech Debt/Cleanup #### Issue(s) * # #### Test Plan - [ ] :muscle: Manual - [x] :zap: Unit test - [ ] :green_heart: E2E --- CHANGELOG.md | 3 + .../common/prefixmatcher/mock/mock.go | 8 +- .../m365/collection/drive/collections.go | 33 +- .../m365/collection/drive/collections_test.go | 338 +++++++++++++++++- 4 files changed, 367 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0a9719901..0ff1ca07a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - Change file extension of messages export to json to match the content +### Fixed +- Handle OneDrive folders being deleted and recreated midway through a backup + ## [v0.15.0] (beta) - 2023-10-31 ### Added diff --git a/src/internal/common/prefixmatcher/mock/mock.go b/src/internal/common/prefixmatcher/mock/mock.go index 4516f8665..c00335003 100644 --- a/src/internal/common/prefixmatcher/mock/mock.go +++ b/src/internal/common/prefixmatcher/mock/mock.go @@ -25,20 +25,20 @@ func NewPrefixMap(m map[string]map[string]struct{}) *PrefixMap { return &r } -func (pm PrefixMap) AssertEqual(t *testing.T, r prefixmatcher.StringSetReader) { +func (pm PrefixMap) AssertEqual(t *testing.T, r prefixmatcher.StringSetReader, description string) { if pm.Empty() { - require.True(t, r.Empty(), "result prefixMap should be empty but contains keys: %+v", r.Keys()) + require.Truef(t, r.Empty(), "%s: result prefixMap should be empty but contains keys: %+v", description, r.Keys()) return } pks := pm.Keys() rks := r.Keys() - assert.ElementsMatch(t, pks, rks, "prefix keys match") + assert.ElementsMatchf(t, pks, rks, "%s: prefix keys match", description) for _, pk := range pks { p, _ := pm.Get(pk) r, _ := r.Get(pk) - assert.Equal(t, p, r, "values match") + assert.Equal(t, p, r, description) } } diff --git a/src/internal/m365/collection/drive/collections.go b/src/internal/m365/collection/drive/collections.go index efcead63b..2f72e6740 100644 --- a/src/internal/m365/collection/drive/collections.go +++ b/src/internal/m365/collection/drive/collections.go @@ -492,7 +492,7 @@ func updateCollectionPaths( var initialCurPath path.Path col, found := cmap[driveID][itemID] - if found { + if found && col.FullPath() != nil { initialCurPath = col.FullPath() if initialCurPath.String() == curPath.String() { return found, nil @@ -689,6 +689,11 @@ func (c *Collections) PopulateDriveCollections( // different collection within the same delta query // item ID -> item ID currPrevPaths = map[string]string{} + + // seenFolders is used to track the folders that we have + // already seen. This will help us track in case a folder was + // recreated multiple times in between a run. + seenFolders = map[string]string{} ) if !invalidPrevDelta { @@ -728,6 +733,7 @@ func (c *Collections) PopulateDriveCollections( oldPrevPaths, currPrevPaths, newPrevPaths, + seenFolders, excludedItemIDs, topLevelPackages, invalidPrevDelta, @@ -751,6 +757,7 @@ func (c *Collections) processItem( item models.DriveItemable, driveID, driveName string, oldPrevPaths, currPrevPaths, newPrevPaths map[string]string, + seenFolders map[string]string, excludedItemIDs map[string]struct{}, topLevelPackages map[string]struct{}, invalidPrevDelta bool, @@ -856,6 +863,28 @@ func (c *Collections) processItem( PathPrefix(maps.Keys(topLevelPackages)). Compare(collectionPath.String()) + // This check is to ensure that if a folder was deleted and + // recreated multiple times between a backup, we only use the + // final one. + alreadyHandledFolderID, collPathAlreadyExists := seenFolders[collectionPath.String()] + collPathAlreadyExists = collPathAlreadyExists && alreadyHandledFolderID != itemID + + if collPathAlreadyExists { + // we don't have a good way of juggling multiple previous paths + // at this time. If a path was declared twice, it's a bit ambiguous + // which prior data the current folder now contains. Safest thing to + // do is to call it a new folder and ingest items fresh. + prevPath = nil + + c.NumContainers-- + c.NumItems-- + + delete(c.CollectionMap[driveID], alreadyHandledFolderID) + delete(newPrevPaths, alreadyHandledFolderID) + } + + seenFolders[collectionPath.String()] = itemID + col, err := NewCollection( c.handler, c.protectedResource, @@ -865,7 +894,7 @@ func (c *Collections) processItem( c.statusUpdater, c.ctrl, isPackage || childOfPackage, - invalidPrevDelta, + invalidPrevDelta || collPathAlreadyExists, nil) if err != nil { return clues.Stack(err).WithClues(ictx) diff --git a/src/internal/m365/collection/drive/collections_test.go b/src/internal/m365/collection/drive/collections_test.go index 5eba2c657..6dc378727 100644 --- a/src/internal/m365/collection/drive/collections_test.go +++ b/src/internal/m365/collection/drive/collections_test.go @@ -220,6 +220,30 @@ func (suite *OneDriveCollectionsUnitSuite) TestPopulateDriveCollections() { expectedExcludes: map[string]struct{}{}, expectedTopLevelPackages: map[string]struct{}{}, }, + { + name: "Single Folder created twice", // deleted a created with same name in between a backup + items: []models.DriveItemable{ + driveRootItem("root"), + driveItem("id1", "folder", testBaseDrivePath, "root", false, true, false), + driveItem("id2", "folder", testBaseDrivePath, "root", false, true, false), + }, + inputFolderMap: map[string]string{}, + scope: anyFolder, + topLevelPackages: map[string]struct{}{}, + expect: assert.NoError, + expectedCollectionIDs: map[string]statePath{ + "root": expectedStatePath(data.NotMovedState, ""), + "id2": expectedStatePath(data.NewState, folder), + }, + expectedPrevPaths: map[string]string{ + "root": expectedPath(""), + "id2": expectedPath("/folder"), + }, + expectedItemCount: 1, + expectedContainerCount: 2, + expectedExcludes: map[string]struct{}{}, + expectedTopLevelPackages: map[string]struct{}{}, + }, { name: "Single Package", items: []models.DriveItemable{ @@ -483,6 +507,123 @@ func (suite *OneDriveCollectionsUnitSuite) TestPopulateDriveCollections() { expectedTopLevelPackages: map[string]struct{}{}, expectedExcludes: map[string]struct{}{}, }, + { + name: "moved folder tree twice within backup", + items: []models.DriveItemable{ + driveRootItem("root"), + driveItem("id1", "folder", testBaseDrivePath, "root", false, true, false), + driveItem("id2", "folder", testBaseDrivePath, "root", false, true, false), + }, + inputFolderMap: map[string]string{ + "id1": expectedPath("/a-folder"), + "subfolder": expectedPath("/a-folder/subfolder"), + }, + scope: anyFolder, + topLevelPackages: map[string]struct{}{}, + expect: assert.NoError, + expectedCollectionIDs: map[string]statePath{ + "root": expectedStatePath(data.NotMovedState, ""), + "id2": expectedStatePath(data.NewState, folder), + }, + expectedItemCount: 1, + expectedFileCount: 0, + expectedContainerCount: 2, + expectedPrevPaths: map[string]string{ + "root": expectedPath(""), + "id2": expectedPath(folder), + "subfolder": expectedPath(folder + subFolder), + }, + expectedTopLevelPackages: map[string]struct{}{}, + expectedExcludes: map[string]struct{}{}, + }, + { + name: "deleted folder tree twice within backup", + items: []models.DriveItemable{ + driveRootItem("root"), + delItem("id1", testBaseDrivePath, "root", false, true, false), + driveItem("id1", "folder", testBaseDrivePath, "root", false, true, false), + delItem("id1", testBaseDrivePath, "root", false, true, false), + }, + inputFolderMap: map[string]string{ + "id1": expectedPath(""), + "subfolder": expectedPath("/a-folder/subfolder"), + }, + scope: anyFolder, + topLevelPackages: map[string]struct{}{}, + expect: assert.NoError, + expectedCollectionIDs: map[string]statePath{ + "root": expectedStatePath(data.NotMovedState, ""), + "id1": expectedStatePath(data.DeletedState, ""), + }, + expectedItemCount: 0, + expectedFileCount: 0, + expectedContainerCount: 1, + expectedPrevPaths: map[string]string{ + "root": expectedPath(""), + "subfolder": expectedPath("/a-folder" + subFolder), + }, + expectedTopLevelPackages: map[string]struct{}{}, + expectedExcludes: map[string]struct{}{}, + }, + { + name: "moved folder tree twice within backup including delete", + items: []models.DriveItemable{ + driveRootItem("root"), + driveItem("id1", "folder", testBaseDrivePath, "root", false, true, false), + delItem("id1", testBaseDrivePath, "root", false, true, false), + driveItem("id2", "folder", testBaseDrivePath, "root", false, true, false), + }, + inputFolderMap: map[string]string{ + "id1": expectedPath("/a-folder"), + "subfolder": expectedPath("/a-folder/subfolder"), + }, + scope: anyFolder, + topLevelPackages: map[string]struct{}{}, + expect: assert.NoError, + expectedCollectionIDs: map[string]statePath{ + "root": expectedStatePath(data.NotMovedState, ""), + "id2": expectedStatePath(data.NewState, folder), + }, + expectedItemCount: 1, + expectedFileCount: 0, + expectedContainerCount: 2, + expectedPrevPaths: map[string]string{ + "root": expectedPath(""), + "id2": expectedPath(folder), + "subfolder": expectedPath(folder + subFolder), + }, + expectedTopLevelPackages: map[string]struct{}{}, + expectedExcludes: map[string]struct{}{}, + }, + { + name: "deleted folder tree twice within backup with addition", + items: []models.DriveItemable{ + driveRootItem("root"), + driveItem("id1", "folder", testBaseDrivePath, "root", false, true, false), + delItem("id1", testBaseDrivePath, "root", false, true, false), + driveItem("id2", "folder", testBaseDrivePath, "root", false, true, false), + delItem("id2", testBaseDrivePath, "root", false, true, false), + }, + inputFolderMap: map[string]string{ + "id1": expectedPath("/a-folder"), + "subfolder": expectedPath("/a-folder/subfolder"), + }, + scope: anyFolder, + topLevelPackages: map[string]struct{}{}, + expect: assert.NoError, + expectedCollectionIDs: map[string]statePath{ + "root": expectedStatePath(data.NotMovedState, ""), + }, + expectedItemCount: 1, + expectedFileCount: 0, + expectedContainerCount: 2, + expectedPrevPaths: map[string]string{ + "root": expectedPath(""), + "subfolder": expectedPath(folder + subFolder), + }, + expectedTopLevelPackages: map[string]struct{}{}, + expectedExcludes: map[string]struct{}{}, + }, { name: "moved folder tree with file no previous", items: []models.DriveItemable{ @@ -888,7 +1029,8 @@ func (suite *OneDriveCollectionsUnitSuite) TestPopulateDriveCollections() { assert.ElementsMatch( t, maps.Keys(test.expectedCollectionIDs), - maps.Keys(c.CollectionMap[driveID])) + maps.Keys(c.CollectionMap[driveID]), + "expected collection IDs") assert.Equal(t, test.expectedItemCount, c.NumItems, "item count") assert.Equal(t, test.expectedFileCount, c.NumFiles, "file count") assert.Equal(t, test.expectedContainerCount, c.NumContainers, "container count") @@ -2373,6 +2515,179 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { rootFolderPath1: true, }, }, + { + name: "One Drive Folder Created -> Deleted -> Created", + drives: []models.Driveable{drive1}, + enumerator: mock.EnumerateItemsDeltaByDrive{ + DrivePagers: map[string]*mock.DriveItemsDeltaPager{ + driveID1: { + Pages: []mock.NextPage{ + { + Items: []models.DriveItemable{ + driveRootItem("root"), + driveItem("folder", "folder", driveBasePath1, "root", false, true, false), + driveItem("file", "file", driveBasePath1+"/folder", "folder", true, false, false), + }, + }, + { + Items: []models.DriveItemable{ + driveRootItem("root"), + delItem("folder", driveBasePath1, "root", false, true, false), + delItem("file", driveBasePath1, "root", true, false, false), + }, + }, + { + Items: []models.DriveItemable{ + driveRootItem("root"), + driveItem("folder1", "folder", driveBasePath1, "root", false, true, false), + driveItem("file1", "file", driveBasePath1+"/folder", "folder1", true, false, false), + }, + }, + }, + DeltaUpdate: pagers.DeltaUpdate{URL: delta2, Reset: true}, + }, + }, + }, + canUsePreviousBackup: true, + errCheck: assert.NoError, + prevFolderPaths: map[string]map[string]string{ + driveID1: {}, + }, + expectedCollections: map[string]map[data.CollectionState][]string{ + rootFolderPath1: {data.NewState: {}}, + folderPath1: {data.NewState: {"folder1", "file1"}}, + }, + expectedDeltaURLs: map[string]string{ + driveID1: delta2, + }, + expectedFolderPaths: map[string]map[string]string{ + driveID1: { + "root": rootFolderPath1, + "folder1": folderPath1, + }, + }, + expectedDelList: pmMock.NewPrefixMap(map[string]map[string]struct{}{}), + doNotMergeItems: map[string]bool{ + rootFolderPath1: true, + folderPath1: true, + }, + }, + { + name: "One Drive Folder Deleted -> Created -> Deleted", + drives: []models.Driveable{drive1}, + enumerator: mock.EnumerateItemsDeltaByDrive{ + DrivePagers: map[string]*mock.DriveItemsDeltaPager{ + driveID1: { + Pages: []mock.NextPage{ + { + Items: []models.DriveItemable{ + driveRootItem("root"), + delItem("folder", driveBasePath1, "root", false, true, false), + delItem("file", driveBasePath1+"/folder", "root", true, false, false), + }, + }, + { + Items: []models.DriveItemable{ + driveRootItem("root"), + driveItem("folder", "folder", driveBasePath1, "root", false, true, false), + driveItem("file", "file", driveBasePath1+"/folder", "folder", true, false, false), + }, + }, + { + Items: []models.DriveItemable{ + driveRootItem("root"), + delItem("folder", driveBasePath1, "root", false, true, false), + delItem("file", driveBasePath1+"/folder", "root", true, false, false), + }, + }, + }, + DeltaUpdate: pagers.DeltaUpdate{URL: delta2, Reset: true}, + }, + }, + }, + canUsePreviousBackup: true, + errCheck: assert.NoError, + prevFolderPaths: map[string]map[string]string{ + driveID1: { + "root": rootFolderPath1, + "folder": folderPath1, + }, + }, + expectedCollections: map[string]map[data.CollectionState][]string{ + rootFolderPath1: {data.NotMovedState: {}}, + folderPath1: {data.DeletedState: {}}, + }, + expectedDeltaURLs: map[string]string{ + driveID1: delta2, + }, + expectedFolderPaths: map[string]map[string]string{ + driveID1: { + "root": rootFolderPath1, + }, + }, + expectedDelList: pmMock.NewPrefixMap(map[string]map[string]struct{}{}), + doNotMergeItems: map[string]bool{}, + }, + { + name: "One Drive Folder Created -> Deleted -> Created with prev", + drives: []models.Driveable{drive1}, + enumerator: mock.EnumerateItemsDeltaByDrive{ + DrivePagers: map[string]*mock.DriveItemsDeltaPager{ + driveID1: { + Pages: []mock.NextPage{ + { + Items: []models.DriveItemable{ + driveRootItem("root"), + driveItem("folder", "folder", driveBasePath1, "root", false, true, false), + driveItem("file", "file", driveBasePath1+"/folder", "folder", true, false, false), + }, + }, + { + Items: []models.DriveItemable{ + driveRootItem("root"), + delItem("folder", driveBasePath1, "root", false, true, false), + delItem("file", driveBasePath1, "root", true, false, false), + }, + }, + { + Items: []models.DriveItemable{ + driveRootItem("root"), + driveItem("folder1", "folder", driveBasePath1, "root", false, true, false), + driveItem("file1", "file", driveBasePath1+"/folder", "folder1", true, false, false), + }, + }, + }, + DeltaUpdate: pagers.DeltaUpdate{URL: delta2, Reset: true}, + }, + }, + }, + canUsePreviousBackup: true, + errCheck: assert.NoError, + prevFolderPaths: map[string]map[string]string{ + driveID1: { + "root": rootFolderPath1, + "folder": folderPath1, + }, + }, + expectedCollections: map[string]map[data.CollectionState][]string{ + rootFolderPath1: {data.NewState: {}}, + folderPath1: {data.DeletedState: {}, data.NewState: {"folder1", "file1"}}, + }, + expectedDeltaURLs: map[string]string{ + driveID1: delta2, + }, + expectedFolderPaths: map[string]map[string]string{ + driveID1: { + "root": rootFolderPath1, + "folder1": folderPath1, + }, + }, + expectedDelList: pmMock.NewPrefixMap(map[string]map[string]struct{}{}), + doNotMergeItems: map[string]bool{ + rootFolderPath1: false, + folderPath1: true, + }, + }, { name: "One Drive Item Made And Deleted", drives: []models.Driveable{drive1}, @@ -2586,7 +2901,8 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { return } - collectionCount := 0 + collPaths := []string{} + for _, baseCol := range cols { var folderPath string if baseCol.State() != data.DeletedState { @@ -2613,7 +2929,7 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { continue } - collectionCount++ + collPaths = append(collPaths, folderPath) // TODO: We should really be getting items in the collection // via the Items() channel. The lack of that makes this check a bit more @@ -2632,7 +2948,7 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { t, test.expectedCollections[folderPath][baseCol.State()], itemIDs, - "state: %d, path: %s", + "expected elements to match in collection with:\nstate '%d'\npath '%s'", baseCol.State(), folderPath) @@ -2648,14 +2964,18 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { "DoNotMergeItems in collection: %s", p) } - expectedCollectionCount := 0 - for _, ec := range test.expectedCollections { - expectedCollectionCount += len(ec) + expectCollPaths := []string{} + + for cp, c := range test.expectedCollections { + // add one entry or each expected collection + for range c { + expectCollPaths = append(expectCollPaths, cp) + } } - assert.Equal(t, expectedCollectionCount, collectionCount, "number of collections") + assert.ElementsMatch(t, expectCollPaths, collPaths, "collection paths") - test.expectedDelList.AssertEqual(t, delList) + test.expectedDelList.AssertEqual(t, delList, "deleted items") }) } }