Update OneDrive metadata with folder moves (#2193)

## Description

Track folder moves and update the persisted metadata accordingly. Moves need to take into account subtree changes because OneDrive will not notify us of subfolders moving if nothing else was updated on the subfolder.

Handle updates by making a copy of the folder map for ease of updating and later on ease of finding out which collection paths have changed

## Does this PR need a docs update or release note?

- [ ]  Yes, it's included
- [ ] 🕐 Yes, but in a later PR
- [x]  No 

## Type of change

- [x] 🌻 Feature
- [ ] 🐛 Bugfix
- [ ] 🗺️ Documentation
- [ ] 🤖 Test
- [ ] 💻 CI/Deployment
- [ ] 🧹 Tech Debt/Cleanup

## Issue(s)

* #2120

## Test Plan

- [ ] 💪 Manual
- [x]  Unit test
- [ ] 💚 E2E
This commit is contained in:
ashmrtn 2023-01-20 11:18:59 -08:00 committed by GitHub
parent 7f44db6c7c
commit bb7f54b049
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 254 additions and 14 deletions

View File

@ -161,12 +161,16 @@ func (c *Collections) Get(ctx context.Context) ([]data.Collection, error) {
}
// UpdateCollections initializes and adds the provided drive items to Collections
// A new collection is created for every drive folder (or package)
// A new collection is created for every drive folder (or package).
// oldPaths is the unchanged data that was loaded from the metadata file.
// newPaths starts as a copy of oldPaths and is updated as changes are found in
// the returned results.
func (c *Collections) UpdateCollections(
ctx context.Context,
driveID, driveName string,
items []models.DriveItemable,
paths map[string]string,
oldPaths map[string]string,
newPaths map[string]string,
) error {
for _, item := range items {
if item.GetRoot() != nil {
@ -201,7 +205,7 @@ func (c *Collections) UpdateCollections(
// Nested folders also return deleted delta results so we don't have to
// worry about doing a prefix search in the map to remove the subtree of
// the deleted folder/package.
delete(paths, *item.GetId())
delete(newPaths, *item.GetId())
// TODO(ashmrtn): Create a collection with state Deleted.
@ -217,13 +221,20 @@ func (c *Collections) UpdateCollections(
return err
}
// TODO(ashmrtn): Handle moves by setting the collection state if the
// collection doesn't already exist/have that state.
paths[*item.GetId()] = folderPath.String()
// Moved folders don't cause delta results for any subfolders nested in
// them. We need to go through and update paths to handle that. We only
// update newPaths so we don't accidentally clobber previous deletes.
//
// TODO(ashmrtn): Since we're also getting notifications about folder
// moves we may need to handle updates to a path of a collection we've
// already created and partially populated.
updatePath(newPaths, *item.GetId(), folderPath.String())
case item.GetFile() != nil:
col, found := c.CollectionMap[collectionPath.String()]
if !found {
// TODO(ashmrtn): Compare old and new path and set collection state
// accordingly.
col = NewCollection(
collectionPath,
driveID,
@ -296,3 +307,27 @@ func includePath(ctx context.Context, m folderMatcher, folderPath path.Path) boo
return m.Matches(folderPathString)
}
func updatePath(paths map[string]string, id, newPath string) {
oldPath := paths[id]
if len(oldPath) == 0 {
paths[id] = newPath
return
}
if oldPath == newPath {
return
}
// We need to do a prefix search on the rest of the map to update the subtree.
// We don't need to make collections for all of these, as hierarchy merging in
// other components should take care of that. We do need to ensure that the
// resulting map contains all folders though so we know the next time around.
for folderID, p := range paths {
if !strings.HasPrefix(p, oldPath) {
continue
}
paths[folderID] = strings.Replace(p, oldPath, newPath, 1)
}
}

View File

@ -319,6 +319,168 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
// No child folders for subfolder so nothing here.
expectedMetadataPaths: map[string]string{},
},
{
testCase: "not moved folder tree",
items: []models.DriveItemable{
driveItem("folder", "folder", testBaseDrivePath, false, true, false),
},
inputFolderMap: map[string]string{
"folder": expectedPathAsSlice(
suite.T(),
tenant,
user,
testBaseDrivePath+"/folder",
)[0],
"subfolder": expectedPathAsSlice(
suite.T(),
tenant,
user,
testBaseDrivePath+"/folder/subfolder",
)[0],
},
scope: anyFolder,
expect: assert.NoError,
expectedCollectionPaths: []string{},
expectedItemCount: 0,
expectedFileCount: 0,
expectedContainerCount: 0,
expectedMetadataPaths: map[string]string{
"folder": expectedPathAsSlice(
suite.T(),
tenant,
user,
testBaseDrivePath+"/folder",
)[0],
"subfolder": expectedPathAsSlice(
suite.T(),
tenant,
user,
testBaseDrivePath+"/folder/subfolder",
)[0],
},
},
{
testCase: "moved folder tree",
items: []models.DriveItemable{
driveItem("folder", "folder", testBaseDrivePath, false, true, false),
},
inputFolderMap: map[string]string{
"folder": expectedPathAsSlice(
suite.T(),
tenant,
user,
testBaseDrivePath+"/a-folder",
)[0],
"subfolder": expectedPathAsSlice(
suite.T(),
tenant,
user,
testBaseDrivePath+"/a-folder/subfolder",
)[0],
},
scope: anyFolder,
expect: assert.NoError,
expectedCollectionPaths: []string{},
expectedItemCount: 0,
expectedFileCount: 0,
expectedContainerCount: 0,
expectedMetadataPaths: map[string]string{
"folder": expectedPathAsSlice(
suite.T(),
tenant,
user,
testBaseDrivePath+"/folder",
)[0],
"subfolder": expectedPathAsSlice(
suite.T(),
tenant,
user,
testBaseDrivePath+"/folder/subfolder",
)[0],
},
},
{
testCase: "moved folder tree and subfolder 1",
items: []models.DriveItemable{
driveItem("folder", "folder", testBaseDrivePath, false, true, false),
driveItem("subfolder", "subfolder", testBaseDrivePath, false, true, false),
},
inputFolderMap: map[string]string{
"folder": expectedPathAsSlice(
suite.T(),
tenant,
user,
testBaseDrivePath+"/a-folder",
)[0],
"subfolder": expectedPathAsSlice(
suite.T(),
tenant,
user,
testBaseDrivePath+"/a-folder/subfolder",
)[0],
},
scope: anyFolder,
expect: assert.NoError,
expectedCollectionPaths: []string{},
expectedItemCount: 0,
expectedFileCount: 0,
expectedContainerCount: 0,
expectedMetadataPaths: map[string]string{
"folder": expectedPathAsSlice(
suite.T(),
tenant,
user,
testBaseDrivePath+"/folder",
)[0],
"subfolder": expectedPathAsSlice(
suite.T(),
tenant,
user,
testBaseDrivePath+"/subfolder",
)[0],
},
},
{
testCase: "moved folder tree and subfolder 2",
items: []models.DriveItemable{
driveItem("subfolder", "subfolder", testBaseDrivePath, false, true, false),
driveItem("folder", "folder", testBaseDrivePath, false, true, false),
},
inputFolderMap: map[string]string{
"folder": expectedPathAsSlice(
suite.T(),
tenant,
user,
testBaseDrivePath+"/a-folder",
)[0],
"subfolder": expectedPathAsSlice(
suite.T(),
tenant,
user,
testBaseDrivePath+"/a-folder/subfolder",
)[0],
},
scope: anyFolder,
expect: assert.NoError,
expectedCollectionPaths: []string{},
expectedItemCount: 0,
expectedFileCount: 0,
expectedContainerCount: 0,
expectedMetadataPaths: map[string]string{
"folder": expectedPathAsSlice(
suite.T(),
tenant,
user,
testBaseDrivePath+"/folder",
)[0],
"subfolder": expectedPathAsSlice(
suite.T(),
tenant,
user,
testBaseDrivePath+"/subfolder",
)[0],
},
},
{
testCase: "deleted folder and package",
items: []models.DriveItemable{
@ -347,6 +509,41 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
expectedContainerCount: 0,
expectedMetadataPaths: map[string]string{},
},
{
testCase: "delete folder tree move subfolder",
items: []models.DriveItemable{
delItem("folder", testBaseDrivePath, false, true, false),
driveItem("subfolder", "subfolder", testBaseDrivePath, false, true, false),
},
inputFolderMap: map[string]string{
"folder": expectedPathAsSlice(
suite.T(),
tenant,
user,
testBaseDrivePath+"/folder",
)[0],
"subfolder": expectedPathAsSlice(
suite.T(),
tenant,
user,
testBaseDrivePath+"/folder/subfolder",
)[0],
},
scope: anyFolder,
expect: assert.NoError,
expectedCollectionPaths: []string{},
expectedItemCount: 0,
expectedFileCount: 0,
expectedContainerCount: 0,
expectedMetadataPaths: map[string]string{
"subfolder": expectedPathAsSlice(
suite.T(),
tenant,
user,
testBaseDrivePath+"/subfolder",
)[0],
},
},
}
for _, tt := range tests {
@ -365,7 +562,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
nil,
control.Options{})
err := c.UpdateCollections(ctx, "driveID", "General", tt.items, outputFolderMap)
err := c.UpdateCollections(ctx, "driveID", "General", tt.items, tt.inputFolderMap, outputFolderMap)
tt.expect(t, err)
assert.Equal(t, len(tt.expectedCollectionPaths), len(c.CollectionMap), "collection paths")
assert.Equal(t, tt.expectedItemCount, c.NumItems, "item count")

View File

@ -13,6 +13,7 @@ import (
"github.com/microsoftgraph/msgraph-sdk-go/models/odataerrors"
"github.com/microsoftgraph/msgraph-sdk-go/sites"
"github.com/pkg/errors"
"golang.org/x/exp/maps"
"github.com/alcionai/corso/src/internal/connector/graph"
"github.com/alcionai/corso/src/internal/connector/support"
@ -167,7 +168,8 @@ type itemCollector func(
ctx context.Context,
driveID, driveName string,
driveItems []models.DriveItemable,
paths map[string]string,
oldPaths map[string]string,
newPaths map[string]string,
) error
// collectItems will enumerate all items in the specified drive and hand them to the
@ -182,9 +184,12 @@ func collectItems(
newDeltaURL = ""
// TODO(ashmrtn): Eventually this should probably be a parameter so we can
// take in previous paths.
paths = map[string]string{}
oldPaths = map[string]string{}
newPaths = map[string]string{}
)
maps.Copy(newPaths, oldPaths)
// TODO: Specify a timestamp in the delta query
// https://docs.microsoft.com/en-us/graph/api/driveitem-delta?
// view=graph-rest-1.0&tabs=http#example-4-retrieving-delta-results-using-a-timestamp
@ -221,7 +226,7 @@ func collectItems(
)
}
err = collector(ctx, driveID, driveName, r.GetValue(), paths)
err = collector(ctx, driveID, driveName, r.GetValue(), oldPaths, newPaths)
if err != nil {
return "", nil, err
}
@ -240,7 +245,7 @@ func collectItems(
builder = msdrives.NewItemRootDeltaRequestBuilder(*nextLink, service.Adapter())
}
return newDeltaURL, paths, nil
return newDeltaURL, newPaths, nil
}
// getFolder will lookup the specified folder name under `parentFolderID`
@ -356,7 +361,8 @@ func GetAllFolders(
innerCtx context.Context,
driveID, driveName string,
items []models.DriveItemable,
paths map[string]string,
oldPaths map[string]string,
newPaths map[string]string,
) error {
for _, item := range items {
// Skip the root item.

View File

@ -99,7 +99,8 @@ func (suite *ItemIntegrationSuite) TestItemReader_oneDrive() {
ctx context.Context,
driveID, driveName string,
items []models.DriveItemable,
paths map[string]string,
oldPaths map[string]string,
newPaths map[string]string,
) error {
for _, item := range items {
if item.GetFile() != nil {

View File

@ -88,6 +88,7 @@ func (suite *SharePointLibrariesSuite) TestUpdateCollections() {
defer flush()
paths := map[string]string{}
newPaths := map[string]string{}
c := onedrive.NewCollections(
tenant,
site,
@ -96,7 +97,7 @@ func (suite *SharePointLibrariesSuite) TestUpdateCollections() {
&MockGraphService{},
nil,
control.Options{})
err := c.UpdateCollections(ctx, "driveID", "General", test.items, paths)
err := c.UpdateCollections(ctx, "driveID", "General", test.items, paths, newPaths)
test.expect(t, err)
assert.Equal(t, len(test.expectedCollectionPaths), len(c.CollectionMap), "collection paths")
assert.Equal(t, test.expectedItemCount, c.NumItems, "item count")