From 3b516e551fd5559fdf754b58b37e4a501d7979e5 Mon Sep 17 00:00:00 2001 From: Abin Simon Date: Tue, 21 Feb 2023 15:26:06 +0530 Subject: [PATCH] Handle files moved multiple times within a delta query (#2560) ## Description It is possible that within a single delta query that files could be moved multiple times, this fixes that. ## Does this PR need a docs update or release note? - [ ] :white_check_mark: Yes, it's included - [x] :clock1: Yes, but in a later PR - [ ] :no_entry: No ## Type of change - [x] :sunflower: Feature - [ ] :bug: Bugfix - [ ] :world_map: Documentation - [ ] :robot: Test - [ ] :computer: CI/Deployment - [ ] :broom: Tech Debt/Cleanup ## Issue(s) * fixes https://github.com/alcionai/corso/issues/2559 ## Test Plan - [ ] :muscle: Manual - [x] :zap: Unit test - [ ] :green_heart: E2E --- src/internal/connector/onedrive/collection.go | 28 +++- .../connector/onedrive/collections.go | 44 +++++- .../connector/onedrive/collections_test.go | 126 +++++++++++++++++- src/internal/connector/onedrive/drive.go | 20 ++- src/internal/connector/onedrive/item_test.go | 1 + .../sharepoint/data_collections_test.go | 12 +- 6 files changed, 214 insertions(+), 17 deletions(-) diff --git a/src/internal/connector/onedrive/collection.go b/src/internal/connector/onedrive/collection.go index 078d704fb..095c42941 100644 --- a/src/internal/connector/onedrive/collection.go +++ b/src/internal/connector/onedrive/collection.go @@ -133,10 +133,32 @@ func NewCollection( return c } -// Adds an itemID to the collection -// This will make it eligible to be populated -func (oc *Collection) Add(item models.DriveItemable) { +// Adds an itemID to the collection. This will make it eligible to be +// populated. The return values denotes if the item was previously +// present or is new one. +func (oc *Collection) Add(item models.DriveItemable) bool { + _, found := oc.driveItems[*item.GetId()] oc.driveItems[*item.GetId()] = item + + return !found // !found = new +} + +// Remove removes a item from the collection +func (oc *Collection) Remove(item models.DriveItemable) bool { + _, found := oc.driveItems[*item.GetId()] + if !found { + return false + } + + delete(oc.driveItems, *item.GetId()) + + return true +} + +// IsEmpty check if a collection does not contain any items +// TODO(meain): Should we just have function that returns driveItems? +func (oc *Collection) IsEmpty() bool { + return len(oc.driveItems) == 0 } // Items() returns the channel containing M365 Exchange objects diff --git a/src/internal/connector/onedrive/collections.go b/src/internal/connector/onedrive/collections.go index 70e3138fd..130339b9a 100644 --- a/src/internal/connector/onedrive/collections.go +++ b/src/internal/connector/onedrive/collections.go @@ -497,6 +497,7 @@ func (c *Collections) UpdateCollections( oldPaths map[string]string, newPaths map[string]string, excluded map[string]struct{}, + itemCollection map[string]string, invalidPrevDelta bool, ) error { for _, item := range items { @@ -713,14 +714,43 @@ func (c *Collections) UpdateCollections( // times within a single delta response, we might end up // storing the permissions multiple times. Switching the // files to IDs should fix this. - collection := col.(*Collection) - collection.Add(item) - c.NumItems++ - if item.GetFile() != nil { - // This is necessary as we have a fallthrough for - // folders and packages - c.NumFiles++ + // Delete the file from previous collection. This will + // only kick in if the file was moved multiple times + // within a single delta query + itemColID, found := itemCollection[*item.GetId()] + if found { + pcol, found := c.CollectionMap[itemColID] + if !found { + return clues.New("previous collection not found").With("item_id", *item.GetId()) + } + + pcollection := pcol.(*Collection) + + removed := pcollection.Remove(item) + if !removed { + return clues.New("removing from prev collection").With("item_id", *item.GetId()) + } + + // If that was the only item in that collection and is + // not getting added back, delete the collection + if itemColID != collectionID && + pcollection.IsEmpty() && + pcollection.State() == data.NewState { + delete(c.CollectionMap, itemColID) + } + } + + itemCollection[*item.GetId()] = collectionID + collection := col.(*Collection) + + if collection.Add(item) { + c.NumItems++ + if item.GetFile() != nil { + // This is necessary as we have a fallthrough for + // folders and packages + c.NumFiles++ + } } default: diff --git a/src/internal/connector/onedrive/collections_test.go b/src/internal/connector/onedrive/collections_test.go index a76542726..7e5ec8688 100644 --- a/src/internal/connector/onedrive/collections_test.go +++ b/src/internal/connector/onedrive/collections_test.go @@ -579,6 +579,36 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { }, expectedExcludes: map[string]struct{}{"itemInSubfolder": {}, "itemInFolder2": {}}, }, + { + testCase: "moved folder tree multiple times", + items: []models.DriveItemable{ + driveRootItem("root"), + driveItem("folder", "folder", testBaseDrivePath, "root", false, true, false), + driveItem("file", "file", testBaseDrivePath+"/folder", "folder", true, false, false), + driveItem("folder", "folder2", testBaseDrivePath, "root", false, true, false), + }, + inputFolderMap: map[string]string{ + "folder": expectedPath("/a-folder"), + "subfolder": expectedPath("/a-folder/subfolder"), + }, + scope: anyFolder, + expect: assert.NoError, + expectedCollectionIDs: map[string]statePath{ + "root": expectedStatePath(data.NotMovedState, ""), + "folder": expectedStatePath(data.MovedState, "/folder2", "/a-folder"), + }, + expectedItemCount: 3, + expectedFileCount: 1, + expectedContainerCount: 2, + expectedMetadataPaths: map[string]string{ + "root": expectedPath(""), + "folder": expectedPath("/folder2"), + "subfolder": expectedPath("/folder2/subfolder"), + }, + expectedExcludes: map[string]struct{}{ + "file": {}, + }, + }, { testCase: "deleted folder and package", items: []models.DriveItemable{ @@ -693,6 +723,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { nil, control.Options{ToggleFeatures: control.Toggles{EnablePermissionsBackup: true}}) + itemCollection := map[string]string{} err := c.UpdateCollections( ctx, "driveID1", @@ -701,6 +732,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { tt.inputFolderMap, outputFolderMap, excludes, + itemCollection, false, ) tt.expect(t, err) @@ -1272,6 +1304,75 @@ func (suite *OneDriveCollectionsSuite) TestGet() { }, expectedDelList: map[string]struct{}{"file": {}}, }, + { + name: "OneDrive_OneItemPage_NoErrors_FileRenamedMultiple", + drives: []models.Driveable{drive1}, + items: map[string][]deltaPagerResult{ + driveID1: { + { + items: []models.DriveItemable{ + driveRootItem("root"), + driveItem("folder", "folder", driveBasePath1, "root", false, true, false), + driveItem("file", "file", driveBasePath1+"/folder", "folder", true, false, false), + driveItem("file", "file2", driveBasePath1+"/folder", "folder", true, false, false), + }, + deltaLink: &delta, + }, + }, + }, + errCheck: assert.NoError, + prevFolderPaths: map[string]map[string]string{ + driveID1: {}, + }, + expectedCollections: map[string]map[data.CollectionState][]string{ + folderPath1: {data.NewState: {"file"}}, + rootFolderPath1: {data.NotMovedState: {"folder"}}, + }, + expectedDeltaURLs: map[string]string{ + driveID1: delta, + }, + expectedFolderPaths: map[string]map[string]string{ + driveID1: { + "root": rootFolderPath1, + "folder": folderPath1, + }, + }, + expectedDelList: map[string]struct{}{"file": {}}, + }, + { + name: "OneDrive_OneItemPage_NoErrors_FileMovedMultiple", + drives: []models.Driveable{drive1}, + items: map[string][]deltaPagerResult{ + driveID1: { + { + items: []models.DriveItemable{ + driveRootItem("root"), + driveItem("folder", "folder", driveBasePath1, "root", false, true, false), + driveItem("file", "file", driveBasePath1+"/folder", "folder", true, false, false), + driveItem("file", "file2", driveBasePath1, "root", true, false, false), + }, + deltaLink: &delta, + }, + }, + }, + errCheck: assert.NoError, + prevFolderPaths: map[string]map[string]string{ + driveID1: {}, + }, + expectedCollections: map[string]map[data.CollectionState][]string{ + rootFolderPath1: {data.NotMovedState: {"folder", "file"}}, + }, + expectedDeltaURLs: map[string]string{ + driveID1: delta, + }, + expectedFolderPaths: map[string]map[string]string{ + driveID1: { + "root": rootFolderPath1, + "folder": folderPath1, + }, + }, + expectedDelList: map[string]struct{}{"file": {}}, + }, { name: "OneDrive_OneItemPage_EmptyDelta_NoErrors", drives: []models.Driveable{drive1}, @@ -1466,7 +1567,7 @@ func (suite *OneDriveCollectionsSuite) TestGet() { items: []models.DriveItemable{ driveRootItem("root"), driveItem("folder", "folder", driveBasePath1, "root", false, true, false), - driveItem("file", "file", driveBasePath1+"/folder", "folder", true, false, false), + driveItem("file2", "file", driveBasePath1+"/folder", "folder", true, false, false), }, deltaLink: &delta, }, @@ -1475,7 +1576,7 @@ func (suite *OneDriveCollectionsSuite) TestGet() { errCheck: assert.NoError, expectedCollections: map[string]map[data.CollectionState][]string{ expectedPath1(""): {data.NotMovedState: {"file", "folder"}}, - expectedPath1("/folder"): {data.NewState: {"file"}}, + expectedPath1("/folder"): {data.NewState: {"file2"}}, }, expectedDeltaURLs: map[string]string{ driveID1: delta, @@ -1505,7 +1606,7 @@ func (suite *OneDriveCollectionsSuite) TestGet() { items: []models.DriveItemable{ driveRootItem("root"), driveItem("folder", "folder", driveBasePath1, "root", false, true, false), - driveItem("file", "file", driveBasePath1+"/folder", "folder", true, false, false), + driveItem("file2", "file", driveBasePath1+"/folder", "folder", true, false, false), }, deltaLink: &delta, }, @@ -1517,7 +1618,7 @@ func (suite *OneDriveCollectionsSuite) TestGet() { }, expectedCollections: map[string]map[data.CollectionState][]string{ expectedPath1(""): {data.NotMovedState: {"file", "folder"}}, - expectedPath1("/folder"): {data.NewState: {"file"}}, + expectedPath1("/folder"): {data.NewState: {"file2"}}, }, expectedDeltaURLs: map[string]string{ driveID1: delta, @@ -1528,7 +1629,7 @@ func (suite *OneDriveCollectionsSuite) TestGet() { "folder": folderPath1, }, }, - expectedDelList: map[string]struct{}{"file": {}}, + expectedDelList: map[string]struct{}{"file": {}, "file2": {}}, doNotMergeItems: false, }, { @@ -1688,6 +1789,7 @@ func (suite *OneDriveCollectionsSuite) TestGet() { return } + collectionCount := 0 for _, baseCol := range cols { var folderPath string if baseCol.State() != data.DeletedState { @@ -1710,6 +1812,8 @@ func (suite *OneDriveCollectionsSuite) TestGet() { continue } + collectionCount++ + // TODO(ashmrtn): We should really be getting items in the collection // via the Items() channel, but we don't have a way to mock out the // actual item fetch yet (mostly wiring issues). The lack of that makes @@ -1733,6 +1837,17 @@ func (suite *OneDriveCollectionsSuite) TestGet() { assert.Equal(t, test.doNotMergeItems, baseCol.DoNotMergeItems(), "DoNotMergeItems") } + expectedCollectionCount := 0 + for c := range test.expectedCollections { + for range test.expectedCollections[c] { + expectedCollectionCount++ + } + } + + // This check is necessary to make sure we are all the + // collections we expect it to + assert.Equal(t, expectedCollectionCount, collectionCount, "number of collections") + assert.Equal(t, test.expectedDelList, delList, "del list") }) } @@ -1893,6 +2008,7 @@ func (suite *OneDriveCollectionsSuite) TestCollectItems() { oldPaths map[string]string, newPaths map[string]string, excluded map[string]struct{}, + itemCollection map[string]string, doNotMergeItems bool, ) error { return nil diff --git a/src/internal/connector/onedrive/drive.go b/src/internal/connector/onedrive/drive.go index 5965e99d1..f95b826e1 100644 --- a/src/internal/connector/onedrive/drive.go +++ b/src/internal/connector/onedrive/drive.go @@ -148,6 +148,7 @@ type itemCollector func( oldPaths map[string]string, newPaths map[string]string, excluded map[string]struct{}, + fileCollectionMap map[string]string, validPrevDelta bool, ) error @@ -199,6 +200,12 @@ func collectItems( newPaths = map[string]string{} excluded = map[string]struct{}{} invalidPrevDelta = len(prevDelta) == 0 + + // itemCollection is used to identify which collection a + // file belongs to. This is useful to delete a file from the + // collection it was previously in, in case it was moved to a + // different collection within the same delta query + itemCollection = map[string]string{} ) if !invalidPrevDelta { @@ -233,7 +240,17 @@ func collectItems( return DeltaUpdate{}, nil, nil, errors.Wrap(err, "extracting items from response") } - err = collector(ctx, driveID, driveName, vals, oldPaths, newPaths, excluded, invalidPrevDelta) + err = collector( + ctx, + driveID, + driveName, + vals, + oldPaths, + newPaths, + excluded, + itemCollection, + invalidPrevDelta, + ) if err != nil { return DeltaUpdate{}, nil, nil, err } @@ -382,6 +399,7 @@ func GetAllFolders( oldPaths map[string]string, newPaths map[string]string, excluded map[string]struct{}, + itemCollection map[string]string, doNotMergeItems bool, ) error { for _, item := range items { diff --git a/src/internal/connector/onedrive/item_test.go b/src/internal/connector/onedrive/item_test.go index 5a6a36598..4327376b4 100644 --- a/src/internal/connector/onedrive/item_test.go +++ b/src/internal/connector/onedrive/item_test.go @@ -106,6 +106,7 @@ func (suite *ItemIntegrationSuite) TestItemReader_oneDrive() { oldPaths map[string]string, newPaths map[string]string, excluded map[string]struct{}, + itemCollection map[string]string, doNotMergeItems bool, ) error { for _, item := range items { diff --git a/src/internal/connector/sharepoint/data_collections_test.go b/src/internal/connector/sharepoint/data_collections_test.go index 03bf9b1e2..47cc3c93e 100644 --- a/src/internal/connector/sharepoint/data_collections_test.go +++ b/src/internal/connector/sharepoint/data_collections_test.go @@ -105,7 +105,17 @@ func (suite *SharePointLibrariesSuite) TestUpdateCollections() { &MockGraphService{}, nil, control.Options{}) - err := c.UpdateCollections(ctx, "driveID1", "General", test.items, paths, newPaths, excluded, true) + err := c.UpdateCollections( + ctx, + "driveID1", + "General", + test.items, + paths, + newPaths, + excluded, + map[string]string{}, + true, + ) test.expect(t, err) assert.Equal(t, len(test.expectedCollectionIDs), len(c.CollectionMap), "collection paths") assert.Equal(t, test.expectedItemCount, c.NumItems, "item count")