Add deleted files to OneDrive excluded file list (#2250)

## Description

Begin populating a global exclude set with files deleted from OneDrive. The set contains the IDs of files that have been deleted.

This is actually safe to return from GraphConnector now (though it's not returned) as it uses item IDs instead of item names.

Future PRs will need to address handling of (potentially) moved files

## Does this PR need a docs update or release note?

- [ ]  Yes, it's included
- [ ] 🕐 Yes, but in a later PR
- [x]  No 

## Type of change

- [x] 🌻 Feature
- [ ] 🐛 Bugfix
- [ ] 🗺️ Documentation
- [ ] 🤖 Test
- [ ] 💻 CI/Deployment
- [ ] 🧹 Tech Debt/Cleanup

## Issue(s)

* #2242 

## Test Plan

- [ ] 💪 Manual
- [x]  Unit test
- [ ] 💚 E2E
This commit is contained in:
ashmrtn 2023-01-25 08:57:30 -08:00 committed by GitHub
parent 46d61c7246
commit a4f2f0a437
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 86 additions and 10 deletions

View File

@ -7,6 +7,7 @@ import (
"github.com/microsoftgraph/msgraph-sdk-go/models"
"github.com/pkg/errors"
"golang.org/x/exp/maps"
"github.com/alcionai/corso/src/internal/connector/graph"
"github.com/alcionai/corso/src/internal/connector/support"
@ -98,6 +99,12 @@ func (c *Collections) Get(ctx context.Context) ([]data.Collection, error) {
deltaURLs = map[string]string{}
// Drive ID -> folder ID -> folder path
folderPaths = map[string]map[string]string{}
// Items that should be excluded when sourcing data from the base backup.
// TODO(ashmrtn): This list contains the M365 IDs of deleted items so while
// it's technically safe to pass all the way through to kopia (files are
// unlikely to be named their M365 ID) we should wait to do that until we've
// switched to using those IDs for file names in kopia.
excludedItems = map[string]struct{}{}
)
// Update the collection map with items from each drive
@ -105,7 +112,13 @@ func (c *Collections) Get(ctx context.Context) ([]data.Collection, error) {
driveID := *d.GetId()
driveName := *d.GetName()
delta, paths, err := collectItems(ctx, c.service, driveID, driveName, c.UpdateCollections)
delta, paths, excluded, err := collectItems(
ctx,
c.service,
driveID,
driveName,
c.UpdateCollections,
)
if err != nil {
return nil, err
}
@ -121,6 +134,8 @@ func (c *Collections) Get(ctx context.Context) ([]data.Collection, error) {
folderPaths[driveID][id] = p
}
}
maps.Copy(excludedItems, excluded)
}
observe.Message(ctx, fmt.Sprintf("Discovered %d items to backup", c.NumItems))
@ -171,6 +186,7 @@ func (c *Collections) UpdateCollections(
items []models.DriveItemable,
oldPaths map[string]string,
newPaths map[string]string,
excluded map[string]struct{},
) error {
for _, item := range items {
if item.GetRoot() != nil {
@ -231,6 +247,19 @@ func (c *Collections) UpdateCollections(
updatePath(newPaths, *item.GetId(), folderPath.String())
case item.GetFile() != nil:
if item.GetDeleted() != nil {
excluded[*item.GetId()] = struct{}{}
// Exchange counts items streamed through it which includes deletions so
// add that here too.
c.NumFiles++
c.NumItems++
continue
}
// TODO(ashmrtn): Figure what when an item was moved (maybe) and add it to
// the exclude list.
col, found := c.CollectionMap[collectionPath.String()]
if !found {
// TODO(ashmrtn): Compare old and new path and set collection state

View File

@ -105,6 +105,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
expectedContainerCount int
expectedFileCount int
expectedMetadataPaths map[string]string
expectedExcludes map[string]struct{}
}{
{
testCase: "Invalid item",
@ -115,6 +116,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
scope: anyFolder,
expect: assert.Error,
expectedMetadataPaths: map[string]string{},
expectedExcludes: map[string]struct{}{},
},
{
testCase: "Single File",
@ -135,6 +137,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
expectedContainerCount: 1,
// Root folder is skipped since it's always present.
expectedMetadataPaths: map[string]string{},
expectedExcludes: map[string]struct{}{},
},
{
testCase: "Single Folder",
@ -153,6 +156,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
testBaseDrivePath+"/folder",
)[0],
},
expectedExcludes: map[string]struct{}{},
},
{
testCase: "Single Package",
@ -171,6 +175,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
testBaseDrivePath+"/package",
)[0],
},
expectedExcludes: map[string]struct{}{},
},
{
testCase: "1 root file, 1 folder, 1 package, 2 files, 3 collections",
@ -209,6 +214,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
testBaseDrivePath+"/package",
)[0],
},
expectedExcludes: map[string]struct{}{},
},
{
testCase: "contains folder selector",
@ -258,6 +264,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
testBaseDrivePath+"/folder/subfolder/folder",
)[0],
},
expectedExcludes: map[string]struct{}{},
},
{
testCase: "prefix subfolder selector",
@ -292,6 +299,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
testBaseDrivePath+"/folder/subfolder/folder",
)[0],
},
expectedExcludes: map[string]struct{}{},
},
{
testCase: "match subfolder selector",
@ -318,6 +326,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
expectedContainerCount: 1,
// No child folders for subfolder so nothing here.
expectedMetadataPaths: map[string]string{},
expectedExcludes: map[string]struct{}{},
},
{
testCase: "not moved folder tree",
@ -358,6 +367,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
testBaseDrivePath+"/folder/subfolder",
)[0],
},
expectedExcludes: map[string]struct{}{},
},
{
testCase: "moved folder tree",
@ -398,6 +408,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
testBaseDrivePath+"/folder/subfolder",
)[0],
},
expectedExcludes: map[string]struct{}{},
},
{
testCase: "moved folder tree and subfolder 1",
@ -439,6 +450,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
testBaseDrivePath+"/subfolder",
)[0],
},
expectedExcludes: map[string]struct{}{},
},
{
testCase: "moved folder tree and subfolder 2",
@ -480,6 +492,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
testBaseDrivePath+"/subfolder",
)[0],
},
expectedExcludes: map[string]struct{}{},
},
{
testCase: "deleted folder and package",
@ -508,6 +521,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
expectedFileCount: 0,
expectedContainerCount: 0,
expectedMetadataPaths: map[string]string{},
expectedExcludes: map[string]struct{}{},
},
{
testCase: "delete folder tree move subfolder",
@ -543,6 +557,24 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
testBaseDrivePath+"/subfolder",
)[0],
},
expectedExcludes: map[string]struct{}{},
},
{
testCase: "delete file",
items: []models.DriveItemable{
delItem("item", testBaseDrivePath, true, false, false),
},
inputFolderMap: map[string]string{},
scope: anyFolder,
expect: assert.NoError,
expectedCollectionPaths: []string{},
expectedItemCount: 1,
expectedFileCount: 1,
expectedContainerCount: 0,
expectedMetadataPaths: map[string]string{},
expectedExcludes: map[string]struct{}{
"item": {},
},
},
}
@ -551,6 +583,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
ctx, flush := tester.NewContext()
defer flush()
excludes := map[string]struct{}{}
outputFolderMap := map[string]string{}
maps.Copy(outputFolderMap, tt.inputFolderMap)
c := NewCollections(
@ -562,7 +595,15 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
nil,
control.Options{})
err := c.UpdateCollections(ctx, "driveID", "General", tt.items, tt.inputFolderMap, outputFolderMap)
err := c.UpdateCollections(
ctx,
"driveID",
"General",
tt.items,
tt.inputFolderMap,
outputFolderMap,
excludes,
)
tt.expect(t, err)
assert.Equal(t, len(tt.expectedCollectionPaths), len(c.CollectionMap), "collection paths")
assert.Equal(t, tt.expectedItemCount, c.NumItems, "item count")
@ -573,6 +614,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
}
assert.Equal(t, tt.expectedMetadataPaths, outputFolderMap)
assert.Equal(t, tt.expectedExcludes, excludes)
})
}
}

View File

@ -170,6 +170,7 @@ type itemCollector func(
driveItems []models.DriveItemable,
oldPaths map[string]string,
newPaths map[string]string,
excluded map[string]struct{},
) error
// collectItems will enumerate all items in the specified drive and hand them to the
@ -179,13 +180,14 @@ func collectItems(
service graph.Servicer,
driveID, driveName string,
collector itemCollector,
) (string, map[string]string, error) {
) (string, map[string]string, map[string]struct{}, error) {
var (
newDeltaURL = ""
// TODO(ashmrtn): Eventually this should probably be a parameter so we can
// take in previous paths.
oldPaths = map[string]string{}
newPaths = map[string]string{}
excluded = map[string]struct{}{}
)
maps.Copy(newPaths, oldPaths)
@ -219,16 +221,16 @@ func collectItems(
for {
r, err := builder.Get(ctx, requestConfig)
if err != nil {
return "", nil, errors.Wrapf(
return "", nil, nil, errors.Wrapf(
err,
"failed to query drive items. details: %s",
support.ConnectorStackErrorTrace(err),
)
}
err = collector(ctx, driveID, driveName, r.GetValue(), oldPaths, newPaths)
err = collector(ctx, driveID, driveName, r.GetValue(), oldPaths, newPaths, excluded)
if err != nil {
return "", nil, err
return "", nil, nil, err
}
if r.GetOdataDeltaLink() != nil && len(*r.GetOdataDeltaLink()) > 0 {
@ -245,7 +247,7 @@ func collectItems(
builder = msdrives.NewItemRootDeltaRequestBuilder(*nextLink, service.Adapter())
}
return newDeltaURL, newPaths, nil
return newDeltaURL, newPaths, excluded, nil
}
// getFolder will lookup the specified folder name under `parentFolderID`
@ -352,7 +354,7 @@ func GetAllFolders(
folders := map[string]*Displayable{}
for _, d := range drives {
_, _, err = collectItems(
_, _, _, err = collectItems(
ctx,
gs,
*d.GetId(),
@ -363,6 +365,7 @@ func GetAllFolders(
items []models.DriveItemable,
oldPaths map[string]string,
newPaths map[string]string,
excluded map[string]struct{},
) error {
for _, item := range items {
// Skip the root item.

View File

@ -101,6 +101,7 @@ func (suite *ItemIntegrationSuite) TestItemReader_oneDrive() {
items []models.DriveItemable,
oldPaths map[string]string,
newPaths map[string]string,
excluded map[string]struct{},
) error {
for _, item := range items {
if item.GetFile() != nil {
@ -111,7 +112,7 @@ func (suite *ItemIntegrationSuite) TestItemReader_oneDrive() {
return nil
}
_, _, err := collectItems(ctx, suite, suite.userDriveID, "General", itemCollector)
_, _, _, err := collectItems(ctx, suite, suite.userDriveID, "General", itemCollector)
require.NoError(suite.T(), err)
// Test Requirement 2: Need a file

View File

@ -89,6 +89,7 @@ func (suite *SharePointLibrariesSuite) TestUpdateCollections() {
paths := map[string]string{}
newPaths := map[string]string{}
excluded := map[string]struct{}{}
c := onedrive.NewCollections(
tenant,
site,
@ -97,7 +98,7 @@ func (suite *SharePointLibrariesSuite) TestUpdateCollections() {
&MockGraphService{},
nil,
control.Options{})
err := c.UpdateCollections(ctx, "driveID", "General", test.items, paths, newPaths)
err := c.UpdateCollections(ctx, "driveID", "General", test.items, paths, newPaths, excluded)
test.expect(t, err)
assert.Equal(t, len(test.expectedCollectionPaths), len(c.CollectionMap), "collection paths")
assert.Equal(t, test.expectedItemCount, c.NumItems, "item count")