From a4f2f0a43787f698883e4d3b6df6a455c61c7ccc Mon Sep 17 00:00:00 2001 From: ashmrtn Date: Wed, 25 Jan 2023 08:57:30 -0800 Subject: [PATCH] Add deleted files to OneDrive excluded file list (#2250) ## Description Begin populating a global exclude set with files deleted from OneDrive. The set contains the IDs of files that have been deleted. This is actually safe to return from GraphConnector now (though it's not returned) as it uses item IDs instead of item names. Future PRs will need to address handling of (potentially) moved files ## Does this PR need a docs update or release note? - [ ] :white_check_mark: Yes, it's included - [ ] :clock1: Yes, but in a later PR - [x] :no_entry: No ## Type of change - [x] :sunflower: Feature - [ ] :bug: Bugfix - [ ] :world_map: Documentation - [ ] :robot: Test - [ ] :computer: CI/Deployment - [ ] :broom: Tech Debt/Cleanup ## Issue(s) * #2242 ## Test Plan - [ ] :muscle: Manual - [x] :zap: Unit test - [ ] :green_heart: E2E --- .../connector/onedrive/collections.go | 31 ++++++++++++- .../connector/onedrive/collections_test.go | 44 ++++++++++++++++++- src/internal/connector/onedrive/drive.go | 15 ++++--- src/internal/connector/onedrive/item_test.go | 3 +- .../sharepoint/data_collections_test.go | 3 +- 5 files changed, 86 insertions(+), 10 deletions(-) diff --git a/src/internal/connector/onedrive/collections.go b/src/internal/connector/onedrive/collections.go index 90734c43a..8f18aa780 100644 --- a/src/internal/connector/onedrive/collections.go +++ b/src/internal/connector/onedrive/collections.go @@ -7,6 +7,7 @@ import ( "github.com/microsoftgraph/msgraph-sdk-go/models" "github.com/pkg/errors" + "golang.org/x/exp/maps" "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/support" @@ -98,6 +99,12 @@ func (c *Collections) Get(ctx context.Context) ([]data.Collection, error) { deltaURLs = map[string]string{} // Drive ID -> folder ID -> folder path folderPaths = map[string]map[string]string{} + // Items that should be excluded when sourcing data from the base backup. + // TODO(ashmrtn): This list contains the M365 IDs of deleted items so while + // it's technically safe to pass all the way through to kopia (files are + // unlikely to be named their M365 ID) we should wait to do that until we've + // switched to using those IDs for file names in kopia. + excludedItems = map[string]struct{}{} ) // Update the collection map with items from each drive @@ -105,7 +112,13 @@ func (c *Collections) Get(ctx context.Context) ([]data.Collection, error) { driveID := *d.GetId() driveName := *d.GetName() - delta, paths, err := collectItems(ctx, c.service, driveID, driveName, c.UpdateCollections) + delta, paths, excluded, err := collectItems( + ctx, + c.service, + driveID, + driveName, + c.UpdateCollections, + ) if err != nil { return nil, err } @@ -121,6 +134,8 @@ func (c *Collections) Get(ctx context.Context) ([]data.Collection, error) { folderPaths[driveID][id] = p } } + + maps.Copy(excludedItems, excluded) } observe.Message(ctx, fmt.Sprintf("Discovered %d items to backup", c.NumItems)) @@ -171,6 +186,7 @@ func (c *Collections) UpdateCollections( items []models.DriveItemable, oldPaths map[string]string, newPaths map[string]string, + excluded map[string]struct{}, ) error { for _, item := range items { if item.GetRoot() != nil { @@ -231,6 +247,19 @@ func (c *Collections) UpdateCollections( updatePath(newPaths, *item.GetId(), folderPath.String()) case item.GetFile() != nil: + if item.GetDeleted() != nil { + excluded[*item.GetId()] = struct{}{} + // Exchange counts items streamed through it which includes deletions so + // add that here too. + c.NumFiles++ + c.NumItems++ + + continue + } + + // TODO(ashmrtn): Figure what when an item was moved (maybe) and add it to + // the exclude list. + col, found := c.CollectionMap[collectionPath.String()] if !found { // TODO(ashmrtn): Compare old and new path and set collection state diff --git a/src/internal/connector/onedrive/collections_test.go b/src/internal/connector/onedrive/collections_test.go index b9c313883..485b9ed86 100644 --- a/src/internal/connector/onedrive/collections_test.go +++ b/src/internal/connector/onedrive/collections_test.go @@ -105,6 +105,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { expectedContainerCount int expectedFileCount int expectedMetadataPaths map[string]string + expectedExcludes map[string]struct{} }{ { testCase: "Invalid item", @@ -115,6 +116,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { scope: anyFolder, expect: assert.Error, expectedMetadataPaths: map[string]string{}, + expectedExcludes: map[string]struct{}{}, }, { testCase: "Single File", @@ -135,6 +137,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { expectedContainerCount: 1, // Root folder is skipped since it's always present. expectedMetadataPaths: map[string]string{}, + expectedExcludes: map[string]struct{}{}, }, { testCase: "Single Folder", @@ -153,6 +156,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { testBaseDrivePath+"/folder", )[0], }, + expectedExcludes: map[string]struct{}{}, }, { testCase: "Single Package", @@ -171,6 +175,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { testBaseDrivePath+"/package", )[0], }, + expectedExcludes: map[string]struct{}{}, }, { testCase: "1 root file, 1 folder, 1 package, 2 files, 3 collections", @@ -209,6 +214,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { testBaseDrivePath+"/package", )[0], }, + expectedExcludes: map[string]struct{}{}, }, { testCase: "contains folder selector", @@ -258,6 +264,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { testBaseDrivePath+"/folder/subfolder/folder", )[0], }, + expectedExcludes: map[string]struct{}{}, }, { testCase: "prefix subfolder selector", @@ -292,6 +299,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { testBaseDrivePath+"/folder/subfolder/folder", )[0], }, + expectedExcludes: map[string]struct{}{}, }, { testCase: "match subfolder selector", @@ -318,6 +326,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { expectedContainerCount: 1, // No child folders for subfolder so nothing here. expectedMetadataPaths: map[string]string{}, + expectedExcludes: map[string]struct{}{}, }, { testCase: "not moved folder tree", @@ -358,6 +367,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { testBaseDrivePath+"/folder/subfolder", )[0], }, + expectedExcludes: map[string]struct{}{}, }, { testCase: "moved folder tree", @@ -398,6 +408,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { testBaseDrivePath+"/folder/subfolder", )[0], }, + expectedExcludes: map[string]struct{}{}, }, { testCase: "moved folder tree and subfolder 1", @@ -439,6 +450,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { testBaseDrivePath+"/subfolder", )[0], }, + expectedExcludes: map[string]struct{}{}, }, { testCase: "moved folder tree and subfolder 2", @@ -480,6 +492,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { testBaseDrivePath+"/subfolder", )[0], }, + expectedExcludes: map[string]struct{}{}, }, { testCase: "deleted folder and package", @@ -508,6 +521,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { expectedFileCount: 0, expectedContainerCount: 0, expectedMetadataPaths: map[string]string{}, + expectedExcludes: map[string]struct{}{}, }, { testCase: "delete folder tree move subfolder", @@ -543,6 +557,24 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { testBaseDrivePath+"/subfolder", )[0], }, + expectedExcludes: map[string]struct{}{}, + }, + { + testCase: "delete file", + items: []models.DriveItemable{ + delItem("item", testBaseDrivePath, true, false, false), + }, + inputFolderMap: map[string]string{}, + scope: anyFolder, + expect: assert.NoError, + expectedCollectionPaths: []string{}, + expectedItemCount: 1, + expectedFileCount: 1, + expectedContainerCount: 0, + expectedMetadataPaths: map[string]string{}, + expectedExcludes: map[string]struct{}{ + "item": {}, + }, }, } @@ -551,6 +583,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { ctx, flush := tester.NewContext() defer flush() + excludes := map[string]struct{}{} outputFolderMap := map[string]string{} maps.Copy(outputFolderMap, tt.inputFolderMap) c := NewCollections( @@ -562,7 +595,15 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { nil, control.Options{}) - err := c.UpdateCollections(ctx, "driveID", "General", tt.items, tt.inputFolderMap, outputFolderMap) + err := c.UpdateCollections( + ctx, + "driveID", + "General", + tt.items, + tt.inputFolderMap, + outputFolderMap, + excludes, + ) tt.expect(t, err) assert.Equal(t, len(tt.expectedCollectionPaths), len(c.CollectionMap), "collection paths") assert.Equal(t, tt.expectedItemCount, c.NumItems, "item count") @@ -573,6 +614,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { } assert.Equal(t, tt.expectedMetadataPaths, outputFolderMap) + assert.Equal(t, tt.expectedExcludes, excludes) }) } } diff --git a/src/internal/connector/onedrive/drive.go b/src/internal/connector/onedrive/drive.go index 17ee6f96d..315eaee5c 100644 --- a/src/internal/connector/onedrive/drive.go +++ b/src/internal/connector/onedrive/drive.go @@ -170,6 +170,7 @@ type itemCollector func( driveItems []models.DriveItemable, oldPaths map[string]string, newPaths map[string]string, + excluded map[string]struct{}, ) error // collectItems will enumerate all items in the specified drive and hand them to the @@ -179,13 +180,14 @@ func collectItems( service graph.Servicer, driveID, driveName string, collector itemCollector, -) (string, map[string]string, error) { +) (string, map[string]string, map[string]struct{}, error) { var ( newDeltaURL = "" // TODO(ashmrtn): Eventually this should probably be a parameter so we can // take in previous paths. oldPaths = map[string]string{} newPaths = map[string]string{} + excluded = map[string]struct{}{} ) maps.Copy(newPaths, oldPaths) @@ -219,16 +221,16 @@ func collectItems( for { r, err := builder.Get(ctx, requestConfig) if err != nil { - return "", nil, errors.Wrapf( + return "", nil, nil, errors.Wrapf( err, "failed to query drive items. details: %s", support.ConnectorStackErrorTrace(err), ) } - err = collector(ctx, driveID, driveName, r.GetValue(), oldPaths, newPaths) + err = collector(ctx, driveID, driveName, r.GetValue(), oldPaths, newPaths, excluded) if err != nil { - return "", nil, err + return "", nil, nil, err } if r.GetOdataDeltaLink() != nil && len(*r.GetOdataDeltaLink()) > 0 { @@ -245,7 +247,7 @@ func collectItems( builder = msdrives.NewItemRootDeltaRequestBuilder(*nextLink, service.Adapter()) } - return newDeltaURL, newPaths, nil + return newDeltaURL, newPaths, excluded, nil } // getFolder will lookup the specified folder name under `parentFolderID` @@ -352,7 +354,7 @@ func GetAllFolders( folders := map[string]*Displayable{} for _, d := range drives { - _, _, err = collectItems( + _, _, _, err = collectItems( ctx, gs, *d.GetId(), @@ -363,6 +365,7 @@ func GetAllFolders( items []models.DriveItemable, oldPaths map[string]string, newPaths map[string]string, + excluded map[string]struct{}, ) error { for _, item := range items { // Skip the root item. diff --git a/src/internal/connector/onedrive/item_test.go b/src/internal/connector/onedrive/item_test.go index f53607328..b3f45ca56 100644 --- a/src/internal/connector/onedrive/item_test.go +++ b/src/internal/connector/onedrive/item_test.go @@ -101,6 +101,7 @@ func (suite *ItemIntegrationSuite) TestItemReader_oneDrive() { items []models.DriveItemable, oldPaths map[string]string, newPaths map[string]string, + excluded map[string]struct{}, ) error { for _, item := range items { if item.GetFile() != nil { @@ -111,7 +112,7 @@ func (suite *ItemIntegrationSuite) TestItemReader_oneDrive() { return nil } - _, _, err := collectItems(ctx, suite, suite.userDriveID, "General", itemCollector) + _, _, _, err := collectItems(ctx, suite, suite.userDriveID, "General", itemCollector) require.NoError(suite.T(), err) // Test Requirement 2: Need a file diff --git a/src/internal/connector/sharepoint/data_collections_test.go b/src/internal/connector/sharepoint/data_collections_test.go index 9fc538e2c..423336253 100644 --- a/src/internal/connector/sharepoint/data_collections_test.go +++ b/src/internal/connector/sharepoint/data_collections_test.go @@ -89,6 +89,7 @@ func (suite *SharePointLibrariesSuite) TestUpdateCollections() { paths := map[string]string{} newPaths := map[string]string{} + excluded := map[string]struct{}{} c := onedrive.NewCollections( tenant, site, @@ -97,7 +98,7 @@ func (suite *SharePointLibrariesSuite) TestUpdateCollections() { &MockGraphService{}, nil, control.Options{}) - err := c.UpdateCollections(ctx, "driveID", "General", test.items, paths, newPaths) + err := c.UpdateCollections(ctx, "driveID", "General", test.items, paths, newPaths, excluded) test.expect(t, err) assert.Equal(t, len(test.expectedCollectionPaths), len(c.CollectionMap), "collection paths") assert.Equal(t, test.expectedItemCount, c.NumItems, "item count")