Add deleted files to OneDrive excluded file list (#2250)

## Description

Begin populating a global exclude set with files deleted from OneDrive. The set contains the IDs of files that have been deleted.

This is actually safe to return from GraphConnector now (though it's not returned) as it uses item IDs instead of item names.

Future PRs will need to address handling of (potentially) moved files

## Does this PR need a docs update or release note?

- [ ]  Yes, it's included
- [ ] 🕐 Yes, but in a later PR
- [x]  No 

## Type of change

- [x] 🌻 Feature
- [ ] 🐛 Bugfix
- [ ] 🗺️ Documentation
- [ ] 🤖 Test
- [ ] 💻 CI/Deployment
- [ ] 🧹 Tech Debt/Cleanup

## Issue(s)

* #2242 

## Test Plan

- [ ] 💪 Manual
- [x]  Unit test
- [ ] 💚 E2E
This commit is contained in:
ashmrtn 2023-01-25 08:57:30 -08:00 committed by GitHub
parent 46d61c7246
commit a4f2f0a437
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 86 additions and 10 deletions

View File

@ -7,6 +7,7 @@ import (
"github.com/microsoftgraph/msgraph-sdk-go/models" "github.com/microsoftgraph/msgraph-sdk-go/models"
"github.com/pkg/errors" "github.com/pkg/errors"
"golang.org/x/exp/maps"
"github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/graph"
"github.com/alcionai/corso/src/internal/connector/support" "github.com/alcionai/corso/src/internal/connector/support"
@ -98,6 +99,12 @@ func (c *Collections) Get(ctx context.Context) ([]data.Collection, error) {
deltaURLs = map[string]string{} deltaURLs = map[string]string{}
// Drive ID -> folder ID -> folder path // Drive ID -> folder ID -> folder path
folderPaths = map[string]map[string]string{} folderPaths = map[string]map[string]string{}
// Items that should be excluded when sourcing data from the base backup.
// TODO(ashmrtn): This list contains the M365 IDs of deleted items so while
// it's technically safe to pass all the way through to kopia (files are
// unlikely to be named their M365 ID) we should wait to do that until we've
// switched to using those IDs for file names in kopia.
excludedItems = map[string]struct{}{}
) )
// Update the collection map with items from each drive // Update the collection map with items from each drive
@ -105,7 +112,13 @@ func (c *Collections) Get(ctx context.Context) ([]data.Collection, error) {
driveID := *d.GetId() driveID := *d.GetId()
driveName := *d.GetName() driveName := *d.GetName()
delta, paths, err := collectItems(ctx, c.service, driveID, driveName, c.UpdateCollections) delta, paths, excluded, err := collectItems(
ctx,
c.service,
driveID,
driveName,
c.UpdateCollections,
)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -121,6 +134,8 @@ func (c *Collections) Get(ctx context.Context) ([]data.Collection, error) {
folderPaths[driveID][id] = p folderPaths[driveID][id] = p
} }
} }
maps.Copy(excludedItems, excluded)
} }
observe.Message(ctx, fmt.Sprintf("Discovered %d items to backup", c.NumItems)) observe.Message(ctx, fmt.Sprintf("Discovered %d items to backup", c.NumItems))
@ -171,6 +186,7 @@ func (c *Collections) UpdateCollections(
items []models.DriveItemable, items []models.DriveItemable,
oldPaths map[string]string, oldPaths map[string]string,
newPaths map[string]string, newPaths map[string]string,
excluded map[string]struct{},
) error { ) error {
for _, item := range items { for _, item := range items {
if item.GetRoot() != nil { if item.GetRoot() != nil {
@ -231,6 +247,19 @@ func (c *Collections) UpdateCollections(
updatePath(newPaths, *item.GetId(), folderPath.String()) updatePath(newPaths, *item.GetId(), folderPath.String())
case item.GetFile() != nil: case item.GetFile() != nil:
if item.GetDeleted() != nil {
excluded[*item.GetId()] = struct{}{}
// Exchange counts items streamed through it which includes deletions so
// add that here too.
c.NumFiles++
c.NumItems++
continue
}
// TODO(ashmrtn): Figure what when an item was moved (maybe) and add it to
// the exclude list.
col, found := c.CollectionMap[collectionPath.String()] col, found := c.CollectionMap[collectionPath.String()]
if !found { if !found {
// TODO(ashmrtn): Compare old and new path and set collection state // TODO(ashmrtn): Compare old and new path and set collection state

View File

@ -105,6 +105,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
expectedContainerCount int expectedContainerCount int
expectedFileCount int expectedFileCount int
expectedMetadataPaths map[string]string expectedMetadataPaths map[string]string
expectedExcludes map[string]struct{}
}{ }{
{ {
testCase: "Invalid item", testCase: "Invalid item",
@ -115,6 +116,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
scope: anyFolder, scope: anyFolder,
expect: assert.Error, expect: assert.Error,
expectedMetadataPaths: map[string]string{}, expectedMetadataPaths: map[string]string{},
expectedExcludes: map[string]struct{}{},
}, },
{ {
testCase: "Single File", testCase: "Single File",
@ -135,6 +137,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
expectedContainerCount: 1, expectedContainerCount: 1,
// Root folder is skipped since it's always present. // Root folder is skipped since it's always present.
expectedMetadataPaths: map[string]string{}, expectedMetadataPaths: map[string]string{},
expectedExcludes: map[string]struct{}{},
}, },
{ {
testCase: "Single Folder", testCase: "Single Folder",
@ -153,6 +156,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
testBaseDrivePath+"/folder", testBaseDrivePath+"/folder",
)[0], )[0],
}, },
expectedExcludes: map[string]struct{}{},
}, },
{ {
testCase: "Single Package", testCase: "Single Package",
@ -171,6 +175,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
testBaseDrivePath+"/package", testBaseDrivePath+"/package",
)[0], )[0],
}, },
expectedExcludes: map[string]struct{}{},
}, },
{ {
testCase: "1 root file, 1 folder, 1 package, 2 files, 3 collections", testCase: "1 root file, 1 folder, 1 package, 2 files, 3 collections",
@ -209,6 +214,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
testBaseDrivePath+"/package", testBaseDrivePath+"/package",
)[0], )[0],
}, },
expectedExcludes: map[string]struct{}{},
}, },
{ {
testCase: "contains folder selector", testCase: "contains folder selector",
@ -258,6 +264,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
testBaseDrivePath+"/folder/subfolder/folder", testBaseDrivePath+"/folder/subfolder/folder",
)[0], )[0],
}, },
expectedExcludes: map[string]struct{}{},
}, },
{ {
testCase: "prefix subfolder selector", testCase: "prefix subfolder selector",
@ -292,6 +299,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
testBaseDrivePath+"/folder/subfolder/folder", testBaseDrivePath+"/folder/subfolder/folder",
)[0], )[0],
}, },
expectedExcludes: map[string]struct{}{},
}, },
{ {
testCase: "match subfolder selector", testCase: "match subfolder selector",
@ -318,6 +326,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
expectedContainerCount: 1, expectedContainerCount: 1,
// No child folders for subfolder so nothing here. // No child folders for subfolder so nothing here.
expectedMetadataPaths: map[string]string{}, expectedMetadataPaths: map[string]string{},
expectedExcludes: map[string]struct{}{},
}, },
{ {
testCase: "not moved folder tree", testCase: "not moved folder tree",
@ -358,6 +367,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
testBaseDrivePath+"/folder/subfolder", testBaseDrivePath+"/folder/subfolder",
)[0], )[0],
}, },
expectedExcludes: map[string]struct{}{},
}, },
{ {
testCase: "moved folder tree", testCase: "moved folder tree",
@ -398,6 +408,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
testBaseDrivePath+"/folder/subfolder", testBaseDrivePath+"/folder/subfolder",
)[0], )[0],
}, },
expectedExcludes: map[string]struct{}{},
}, },
{ {
testCase: "moved folder tree and subfolder 1", testCase: "moved folder tree and subfolder 1",
@ -439,6 +450,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
testBaseDrivePath+"/subfolder", testBaseDrivePath+"/subfolder",
)[0], )[0],
}, },
expectedExcludes: map[string]struct{}{},
}, },
{ {
testCase: "moved folder tree and subfolder 2", testCase: "moved folder tree and subfolder 2",
@ -480,6 +492,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
testBaseDrivePath+"/subfolder", testBaseDrivePath+"/subfolder",
)[0], )[0],
}, },
expectedExcludes: map[string]struct{}{},
}, },
{ {
testCase: "deleted folder and package", testCase: "deleted folder and package",
@ -508,6 +521,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
expectedFileCount: 0, expectedFileCount: 0,
expectedContainerCount: 0, expectedContainerCount: 0,
expectedMetadataPaths: map[string]string{}, expectedMetadataPaths: map[string]string{},
expectedExcludes: map[string]struct{}{},
}, },
{ {
testCase: "delete folder tree move subfolder", testCase: "delete folder tree move subfolder",
@ -543,6 +557,24 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
testBaseDrivePath+"/subfolder", testBaseDrivePath+"/subfolder",
)[0], )[0],
}, },
expectedExcludes: map[string]struct{}{},
},
{
testCase: "delete file",
items: []models.DriveItemable{
delItem("item", testBaseDrivePath, true, false, false),
},
inputFolderMap: map[string]string{},
scope: anyFolder,
expect: assert.NoError,
expectedCollectionPaths: []string{},
expectedItemCount: 1,
expectedFileCount: 1,
expectedContainerCount: 0,
expectedMetadataPaths: map[string]string{},
expectedExcludes: map[string]struct{}{
"item": {},
},
}, },
} }
@ -551,6 +583,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
ctx, flush := tester.NewContext() ctx, flush := tester.NewContext()
defer flush() defer flush()
excludes := map[string]struct{}{}
outputFolderMap := map[string]string{} outputFolderMap := map[string]string{}
maps.Copy(outputFolderMap, tt.inputFolderMap) maps.Copy(outputFolderMap, tt.inputFolderMap)
c := NewCollections( c := NewCollections(
@ -562,7 +595,15 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
nil, nil,
control.Options{}) control.Options{})
err := c.UpdateCollections(ctx, "driveID", "General", tt.items, tt.inputFolderMap, outputFolderMap) err := c.UpdateCollections(
ctx,
"driveID",
"General",
tt.items,
tt.inputFolderMap,
outputFolderMap,
excludes,
)
tt.expect(t, err) tt.expect(t, err)
assert.Equal(t, len(tt.expectedCollectionPaths), len(c.CollectionMap), "collection paths") assert.Equal(t, len(tt.expectedCollectionPaths), len(c.CollectionMap), "collection paths")
assert.Equal(t, tt.expectedItemCount, c.NumItems, "item count") assert.Equal(t, tt.expectedItemCount, c.NumItems, "item count")
@ -573,6 +614,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
} }
assert.Equal(t, tt.expectedMetadataPaths, outputFolderMap) assert.Equal(t, tt.expectedMetadataPaths, outputFolderMap)
assert.Equal(t, tt.expectedExcludes, excludes)
}) })
} }
} }

View File

@ -170,6 +170,7 @@ type itemCollector func(
driveItems []models.DriveItemable, driveItems []models.DriveItemable,
oldPaths map[string]string, oldPaths map[string]string,
newPaths map[string]string, newPaths map[string]string,
excluded map[string]struct{},
) error ) error
// collectItems will enumerate all items in the specified drive and hand them to the // collectItems will enumerate all items in the specified drive and hand them to the
@ -179,13 +180,14 @@ func collectItems(
service graph.Servicer, service graph.Servicer,
driveID, driveName string, driveID, driveName string,
collector itemCollector, collector itemCollector,
) (string, map[string]string, error) { ) (string, map[string]string, map[string]struct{}, error) {
var ( var (
newDeltaURL = "" newDeltaURL = ""
// TODO(ashmrtn): Eventually this should probably be a parameter so we can // TODO(ashmrtn): Eventually this should probably be a parameter so we can
// take in previous paths. // take in previous paths.
oldPaths = map[string]string{} oldPaths = map[string]string{}
newPaths = map[string]string{} newPaths = map[string]string{}
excluded = map[string]struct{}{}
) )
maps.Copy(newPaths, oldPaths) maps.Copy(newPaths, oldPaths)
@ -219,16 +221,16 @@ func collectItems(
for { for {
r, err := builder.Get(ctx, requestConfig) r, err := builder.Get(ctx, requestConfig)
if err != nil { if err != nil {
return "", nil, errors.Wrapf( return "", nil, nil, errors.Wrapf(
err, err,
"failed to query drive items. details: %s", "failed to query drive items. details: %s",
support.ConnectorStackErrorTrace(err), support.ConnectorStackErrorTrace(err),
) )
} }
err = collector(ctx, driveID, driveName, r.GetValue(), oldPaths, newPaths) err = collector(ctx, driveID, driveName, r.GetValue(), oldPaths, newPaths, excluded)
if err != nil { if err != nil {
return "", nil, err return "", nil, nil, err
} }
if r.GetOdataDeltaLink() != nil && len(*r.GetOdataDeltaLink()) > 0 { if r.GetOdataDeltaLink() != nil && len(*r.GetOdataDeltaLink()) > 0 {
@ -245,7 +247,7 @@ func collectItems(
builder = msdrives.NewItemRootDeltaRequestBuilder(*nextLink, service.Adapter()) builder = msdrives.NewItemRootDeltaRequestBuilder(*nextLink, service.Adapter())
} }
return newDeltaURL, newPaths, nil return newDeltaURL, newPaths, excluded, nil
} }
// getFolder will lookup the specified folder name under `parentFolderID` // getFolder will lookup the specified folder name under `parentFolderID`
@ -352,7 +354,7 @@ func GetAllFolders(
folders := map[string]*Displayable{} folders := map[string]*Displayable{}
for _, d := range drives { for _, d := range drives {
_, _, err = collectItems( _, _, _, err = collectItems(
ctx, ctx,
gs, gs,
*d.GetId(), *d.GetId(),
@ -363,6 +365,7 @@ func GetAllFolders(
items []models.DriveItemable, items []models.DriveItemable,
oldPaths map[string]string, oldPaths map[string]string,
newPaths map[string]string, newPaths map[string]string,
excluded map[string]struct{},
) error { ) error {
for _, item := range items { for _, item := range items {
// Skip the root item. // Skip the root item.

View File

@ -101,6 +101,7 @@ func (suite *ItemIntegrationSuite) TestItemReader_oneDrive() {
items []models.DriveItemable, items []models.DriveItemable,
oldPaths map[string]string, oldPaths map[string]string,
newPaths map[string]string, newPaths map[string]string,
excluded map[string]struct{},
) error { ) error {
for _, item := range items { for _, item := range items {
if item.GetFile() != nil { if item.GetFile() != nil {
@ -111,7 +112,7 @@ func (suite *ItemIntegrationSuite) TestItemReader_oneDrive() {
return nil return nil
} }
_, _, err := collectItems(ctx, suite, suite.userDriveID, "General", itemCollector) _, _, _, err := collectItems(ctx, suite, suite.userDriveID, "General", itemCollector)
require.NoError(suite.T(), err) require.NoError(suite.T(), err)
// Test Requirement 2: Need a file // Test Requirement 2: Need a file

View File

@ -89,6 +89,7 @@ func (suite *SharePointLibrariesSuite) TestUpdateCollections() {
paths := map[string]string{} paths := map[string]string{}
newPaths := map[string]string{} newPaths := map[string]string{}
excluded := map[string]struct{}{}
c := onedrive.NewCollections( c := onedrive.NewCollections(
tenant, tenant,
site, site,
@ -97,7 +98,7 @@ func (suite *SharePointLibrariesSuite) TestUpdateCollections() {
&MockGraphService{}, &MockGraphService{},
nil, nil,
control.Options{}) control.Options{})
err := c.UpdateCollections(ctx, "driveID", "General", test.items, paths, newPaths) err := c.UpdateCollections(ctx, "driveID", "General", test.items, paths, newPaths, excluded)
test.expect(t, err) test.expect(t, err)
assert.Equal(t, len(test.expectedCollectionPaths), len(c.CollectionMap), "collection paths") assert.Equal(t, len(test.expectedCollectionPaths), len(c.CollectionMap), "collection paths")
assert.Equal(t, test.expectedItemCount, c.NumItems, "item count") assert.Equal(t, test.expectedItemCount, c.NumItems, "item count")