Begin persisting OneDrive/SharePoint library metdata (#2144)

## Description

Start persisting the folder path maps and delta URLs for backed up OneDrive/SharePoint drives. Delta URLs are saved in a map[drive ID]deltaURL while folder IDs are in a map[driveID]map[folder ID]folder path

Needs another patch to properly save the path for folders that match the selector, currently the selector comparison is only on the parent of an item

Later PRs can get the new folder map by taking the map from the previous backup and making changes to it when folder deletions/moves are encountered

## Does this PR need a docs update or release note?

- [ ]  Yes, it's included
- [ ] 🕐 Yes, but in a later PR
- [x]  No 

## Type of change

- [x] 🌻 Feature
- [ ] 🐛 Bugfix
- [ ] 🗺️ Documentation
- [ ] 🤖 Test
- [ ] 💻 CI/Deployment
- [ ] 🧹 Tech Debt/Cleanup

## Issue(s)

* #2120 

## Test Plan

- [x] 💪 Manual
- [x]  Unit test
- [ ] 💚 E2E
This commit is contained in:
ashmrtn 2023-01-17 12:50:24 -08:00 committed by GitHub
parent 1ef300d6c2
commit 45874abf7e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 259 additions and 101 deletions

View File

@ -13,6 +13,7 @@ import (
"github.com/alcionai/corso/src/internal/connector/sharepoint"
"github.com/alcionai/corso/src/internal/tester"
"github.com/alcionai/corso/src/pkg/control"
"github.com/alcionai/corso/src/pkg/path"
"github.com/alcionai/corso/src/pkg/selectors"
)
@ -303,9 +304,7 @@ func (suite *ConnectorCreateSharePointCollectionIntegrationSuite) SetupSuite() {
tester.LogTimeOfTest(suite.T())
}
// TestCreateSharePointCollection. Ensures the proper amount of collections are created based
// on the selector.
func (suite *ConnectorCreateSharePointCollectionIntegrationSuite) TestCreateSharePointCollection() {
func (suite *ConnectorCreateSharePointCollectionIntegrationSuite) TestCreateSharePointCollection_Libraries() {
ctx, flush := tester.NewContext()
defer flush()
@ -316,51 +315,46 @@ func (suite *ConnectorCreateSharePointCollectionIntegrationSuite) TestCreateShar
siteIDs = []string{siteID}
)
tables := []struct {
name string
sel func() selectors.Selector
comparator assert.ComparisonAssertionFunc
}{
{
name: "SharePoint.Libraries",
comparator: assert.Equal,
sel: func() selectors.Selector {
sel := selectors.NewSharePointBackup(siteIDs)
sel.Include(sel.Libraries([]string{"foo"}, selectors.PrefixMatch()))
return sel.Selector
},
},
{
name: "SharePoint.Lists",
comparator: assert.Less,
sel: func() selectors.Selector {
sel := selectors.NewSharePointBackup(siteIDs)
sel.Include(sel.Lists(selectors.Any(), selectors.PrefixMatch()))
sel := selectors.NewSharePointBackup(siteIDs)
sel.Include(sel.Libraries([]string{"foo"}, selectors.PrefixMatch()))
return sel.Selector
},
},
}
cols, err := gc.DataCollections(ctx, sel.Selector, nil, control.Options{})
require.NoError(t, err)
assert.Len(t, cols, 1)
for _, test := range tables {
t.Run(test.name, func(t *testing.T) {
cols, err := gc.DataCollections(ctx, test.sel(), nil, control.Options{})
require.NoError(t, err)
test.comparator(t, 0, len(cols))
if test.name == "SharePoint.Lists" {
for _, collection := range cols {
t.Logf("Path: %s\n", collection.FullPath().String())
for item := range collection.Items() {
t.Log("File: " + item.UUID())
bytes, err := io.ReadAll(item.ToReader())
require.NoError(t, err)
t.Log(string(bytes))
}
}
}
})
for _, collection := range cols {
t.Logf("Path: %s\n", collection.FullPath().String())
assert.Equal(t, path.SharePointMetadataService, collection.FullPath().Service())
}
}
func (suite *ConnectorCreateSharePointCollectionIntegrationSuite) TestCreateSharePointCollection_Lists() {
ctx, flush := tester.NewContext()
defer flush()
var (
t = suite.T()
siteID = tester.M365SiteID(t)
gc = loadConnector(ctx, t, Sites)
siteIDs = []string{siteID}
)
sel := selectors.NewSharePointBackup(siteIDs)
sel.Include(sel.Lists(selectors.Any(), selectors.PrefixMatch()))
cols, err := gc.DataCollections(ctx, sel.Selector, nil, control.Options{})
require.NoError(t, err)
assert.Less(t, 0, len(cols))
for _, collection := range cols {
t.Logf("Path: %s\n", collection.FullPath().String())
for item := range collection.Items() {
t.Log("File: " + item.UUID())
bs, err := io.ReadAll(item.ToReader())
require.NoError(t, err)
t.Log(string(bs))
}
}
}

View File

@ -25,6 +25,17 @@ const (
SharePointSource
)
func (ds driveSource) toPathServiceCat() (path.ServiceType, path.CategoryType) {
switch ds {
case OneDriveSource:
return path.OneDriveService, path.FilesCategory
case SharePointSource:
return path.SharePointService, path.LibrariesCategory
default:
return path.UnknownService, path.UnknownCategory
}
}
type folderMatcher interface {
IsAny() bool
Matches(string) bool
@ -81,27 +92,80 @@ func (c *Collections) Get(ctx context.Context) ([]data.Collection, error) {
return nil, err
}
var (
// Drive ID -> delta URL for drive
deltaURLs = map[string]string{}
// Drive ID -> folder ID -> folder path
folderPaths = map[string]map[string]string{}
)
// Update the collection map with items from each drive
for _, d := range drives {
err = collectItems(ctx, c.service, *d.GetId(), c.UpdateCollections)
driveID := *d.GetId()
delta, paths, err := collectItems(ctx, c.service, driveID, c.UpdateCollections)
if err != nil {
return nil, err
}
if len(delta) > 0 {
deltaURLs[driveID] = delta
}
if len(paths) > 0 {
folderPaths[driveID] = map[string]string{}
for id, p := range paths {
folderPaths[driveID][id] = p
}
}
}
observe.Message(ctx, fmt.Sprintf("Discovered %d items to backup", c.NumItems))
collections := make([]data.Collection, 0, len(c.CollectionMap))
// Add an extra for the metadata collection.
collections := make([]data.Collection, 0, len(c.CollectionMap)+1)
for _, coll := range c.CollectionMap {
collections = append(collections, coll)
}
service, category := c.source.toPathServiceCat()
metadata, err := graph.MakeMetadataCollection(
c.tenant,
c.resourceOwner,
service,
category,
[]graph.MetadataCollectionEntry{
graph.NewMetadataEntry(graph.PreviousPathFileName, folderPaths),
graph.NewMetadataEntry(graph.DeltaURLsFileName, deltaURLs),
},
c.statusUpdater,
)
if err != nil {
// Technically it's safe to continue here because the logic for starting an
// incremental backup should eventually find that the metadata files are
// empty/missing and default to a full backup.
logger.Ctx(ctx).Warnw(
"making metadata collection for future incremental backups",
"error",
err,
)
} else {
collections = append(collections, metadata)
}
return collections, nil
}
// UpdateCollections initializes and adds the provided drive items to Collections
// A new collection is created for every drive folder (or package)
func (c *Collections) UpdateCollections(ctx context.Context, driveID string, items []models.DriveItemable) error {
func (c *Collections) UpdateCollections(
ctx context.Context,
driveID string,
items []models.DriveItemable,
paths map[string]string,
) error {
for _, item := range items {
if item.GetRoot() != nil {
// Skip the root item
@ -131,9 +195,19 @@ func (c *Collections) UpdateCollections(ctx context.Context, driveID string, ite
switch {
case item.GetFolder() != nil, item.GetPackage() != nil:
// Leave this here so we don't fall into the default case.
// TODO: This is where we might create a "special file" to represent these in the backup repository
// e.g. a ".folderMetadataFile"
// Eventually, deletions of folders will be handled here so we may as well
// start off by saving the path.Path of the item instead of just the
// OneDrive parentRef or such.
folderPath, err := collectionPath.Append(*item.GetName(), false)
if err != nil {
logger.Ctx(ctx).Errorw("failed building collection path", "error", err)
return err
}
// TODO(ashmrtn): Handle deletions by removing this entry from the map.
// TODO(ashmrtn): Handle moves by setting the collection state if the
// collection doesn't already exist/have that state.
paths[*item.GetId()] = folderPath.String()
case item.GetFile() != nil:
col, found := c.CollectionMap[collectionPath.String()]

View File

@ -102,19 +102,21 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
expectedItemCount int
expectedContainerCount int
expectedFileCount int
expectedMetadataPaths map[string]string
}{
{
testCase: "Invalid item",
items: []models.DriveItemable{
driveItem("item", testBaseDrivePath, false, false, false),
driveItem("item", "item", testBaseDrivePath, false, false, false),
},
scope: anyFolder,
expect: assert.Error,
scope: anyFolder,
expect: assert.Error,
expectedMetadataPaths: map[string]string{},
},
{
testCase: "Single File",
items: []models.DriveItemable{
driveItem("file", testBaseDrivePath, true, false, false),
driveItem("file", "file", testBaseDrivePath, true, false, false),
},
scope: anyFolder,
expect: assert.NoError,
@ -127,33 +129,51 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
expectedItemCount: 2,
expectedFileCount: 1,
expectedContainerCount: 1,
// Root folder is skipped since it's always present.
expectedMetadataPaths: map[string]string{},
},
{
testCase: "Single Folder",
items: []models.DriveItemable{
driveItem("folder", testBaseDrivePath, false, true, false),
driveItem("folder", "folder", testBaseDrivePath, false, true, false),
},
scope: anyFolder,
expect: assert.NoError,
expectedCollectionPaths: []string{},
expectedMetadataPaths: map[string]string{
"folder": expectedPathAsSlice(
suite.T(),
tenant,
user,
testBaseDrivePath+"/folder",
)[0],
},
},
{
testCase: "Single Package",
items: []models.DriveItemable{
driveItem("package", testBaseDrivePath, false, false, true),
driveItem("package", "package", testBaseDrivePath, false, false, true),
},
scope: anyFolder,
expect: assert.NoError,
expectedCollectionPaths: []string{},
expectedMetadataPaths: map[string]string{
"package": expectedPathAsSlice(
suite.T(),
tenant,
user,
testBaseDrivePath+"/package",
)[0],
},
},
{
testCase: "1 root file, 1 folder, 1 package, 2 files, 3 collections",
items: []models.DriveItemable{
driveItem("fileInRoot", testBaseDrivePath, true, false, false),
driveItem("folder", testBaseDrivePath, false, true, false),
driveItem("package", testBaseDrivePath, false, false, true),
driveItem("fileInFolder", testBaseDrivePath+folder, true, false, false),
driveItem("fileInPackage", testBaseDrivePath+pkg, true, false, false),
driveItem("fileInRoot", "fileInRoot", testBaseDrivePath, true, false, false),
driveItem("folder", "folder", testBaseDrivePath, false, true, false),
driveItem("package", "package", testBaseDrivePath, false, false, true),
driveItem("fileInFolder", "fileInFolder", testBaseDrivePath+folder, true, false, false),
driveItem("fileInPackage", "fileInPackage", testBaseDrivePath+pkg, true, false, false),
},
scope: anyFolder,
expect: assert.NoError,
@ -168,18 +188,32 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
expectedItemCount: 6,
expectedFileCount: 3,
expectedContainerCount: 3,
expectedMetadataPaths: map[string]string{
"folder": expectedPathAsSlice(
suite.T(),
tenant,
user,
testBaseDrivePath+"/folder",
)[0],
"package": expectedPathAsSlice(
suite.T(),
tenant,
user,
testBaseDrivePath+"/package",
)[0],
},
},
{
testCase: "contains folder selector",
items: []models.DriveItemable{
driveItem("fileInRoot", testBaseDrivePath, true, false, false),
driveItem("folder", testBaseDrivePath, false, true, false),
driveItem("subfolder", testBaseDrivePath+folder, false, true, false),
driveItem("folder", testBaseDrivePath+folderSub, false, true, false),
driveItem("package", testBaseDrivePath, false, false, true),
driveItem("fileInFolder", testBaseDrivePath+folder, true, false, false),
driveItem("fileInFolder2", testBaseDrivePath+folderSub+folder, true, false, false),
driveItem("fileInPackage", testBaseDrivePath+pkg, true, false, false),
driveItem("fileInRoot", "fileInRoot", testBaseDrivePath, true, false, false),
driveItem("folder", "folder", testBaseDrivePath, false, true, false),
driveItem("subfolder", "subfolder", testBaseDrivePath+folder, false, true, false),
driveItem("folder2", "folder", testBaseDrivePath+folderSub, false, true, false),
driveItem("package", "package", testBaseDrivePath, false, false, true),
driveItem("fileInFolder", "fileInFolder", testBaseDrivePath+folder, true, false, false),
driveItem("fileInFolder2", "fileInFolder2", testBaseDrivePath+folderSub+folder, true, false, false),
driveItem("fileInFolderPackage", "fileInPackage", testBaseDrivePath+pkg, true, false, false),
},
scope: (&selectors.OneDriveBackup{}).Folders([]string{"folder"})[0],
expect: assert.NoError,
@ -200,18 +234,34 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
expectedItemCount: 4,
expectedFileCount: 2,
expectedContainerCount: 2,
// just "folder" isn't added here because the include check is done on the
// parent path since we only check later if something is a folder or not.
expectedMetadataPaths: map[string]string{
"subfolder": expectedPathAsSlice(
suite.T(),
tenant,
user,
testBaseDrivePath+"/folder/subfolder",
)[0],
"folder2": expectedPathAsSlice(
suite.T(),
tenant,
user,
testBaseDrivePath+"/folder/subfolder/folder",
)[0],
},
},
{
testCase: "prefix subfolder selector",
items: []models.DriveItemable{
driveItem("fileInRoot", testBaseDrivePath, true, false, false),
driveItem("folder", testBaseDrivePath, false, true, false),
driveItem("subfolder", testBaseDrivePath+folder, false, true, false),
driveItem("folder", testBaseDrivePath+folderSub, false, true, false),
driveItem("package", testBaseDrivePath, false, false, true),
driveItem("fileInFolder", testBaseDrivePath+folder, true, false, false),
driveItem("fileInFolder2", testBaseDrivePath+folderSub+folder, true, false, false),
driveItem("fileInPackage", testBaseDrivePath+pkg, true, false, false),
driveItem("fileInRoot", "fileInRoot", testBaseDrivePath, true, false, false),
driveItem("folder", "folder", testBaseDrivePath, false, true, false),
driveItem("subfolder", "subfolder", testBaseDrivePath+folder, false, true, false),
driveItem("folder", "folder", testBaseDrivePath+folderSub, false, true, false),
driveItem("package", "package", testBaseDrivePath, false, false, true),
driveItem("fileInFolder", "fileInFolder", testBaseDrivePath+folder, true, false, false),
driveItem("fileInFolder2", "fileInFolder2", testBaseDrivePath+folderSub+folder, true, false, false),
driveItem("fileInPackage", "fileInPackage", testBaseDrivePath+pkg, true, false, false),
},
scope: (&selectors.OneDriveBackup{}).
Folders([]string{"/folder/subfolder"}, selectors.PrefixMatch())[0],
@ -225,17 +275,25 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
expectedItemCount: 2,
expectedFileCount: 1,
expectedContainerCount: 1,
expectedMetadataPaths: map[string]string{
"folder": expectedPathAsSlice(
suite.T(),
tenant,
user,
testBaseDrivePath+"/folder/subfolder/folder",
)[0],
},
},
{
testCase: "match subfolder selector",
items: []models.DriveItemable{
driveItem("fileInRoot", testBaseDrivePath, true, false, false),
driveItem("folder", testBaseDrivePath, false, true, false),
driveItem("subfolder", testBaseDrivePath+folder, false, true, false),
driveItem("package", testBaseDrivePath, false, false, true),
driveItem("fileInFolder", testBaseDrivePath+folder, true, false, false),
driveItem("fileInSubfolder", testBaseDrivePath+folderSub, true, false, false),
driveItem("fileInPackage", testBaseDrivePath+pkg, true, false, false),
driveItem("fileInRoot", "fileInRoot", testBaseDrivePath, true, false, false),
driveItem("folder", "folder", testBaseDrivePath, false, true, false),
driveItem("subfolder", "subfolder", testBaseDrivePath+folder, false, true, false),
driveItem("package", "package", testBaseDrivePath, false, false, true),
driveItem("fileInFolder", "fileInFolder", testBaseDrivePath+folder, true, false, false),
driveItem("fileInSubfolder", "fileInSubfolder", testBaseDrivePath+folderSub, true, false, false),
driveItem("fileInPackage", "fileInPackage", testBaseDrivePath+pkg, true, false, false),
},
scope: (&selectors.OneDriveBackup{}).Folders([]string{"folder/subfolder"})[0],
expect: assert.NoError,
@ -248,6 +306,8 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
expectedItemCount: 2,
expectedFileCount: 1,
expectedContainerCount: 1,
// No child folders for subfolder so nothing here.
expectedMetadataPaths: map[string]string{},
},
}
@ -256,6 +316,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
ctx, flush := tester.NewContext()
defer flush()
paths := map[string]string{}
c := NewCollections(
tenant,
user,
@ -265,7 +326,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
nil,
control.Options{})
err := c.UpdateCollections(ctx, "driveID", tt.items)
err := c.UpdateCollections(ctx, "driveID", tt.items, paths)
tt.expect(t, err)
assert.Equal(t, len(tt.expectedCollectionPaths), len(c.CollectionMap), "collection paths")
assert.Equal(t, tt.expectedItemCount, c.NumItems, "item count")
@ -274,14 +335,16 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
for _, collPath := range tt.expectedCollectionPaths {
assert.Contains(t, c.CollectionMap, collPath)
}
assert.Equal(t, tt.expectedMetadataPaths, paths)
})
}
}
func driveItem(name string, path string, isFile, isFolder, isPackage bool) models.DriveItemable {
func driveItem(id string, name string, path string, isFile, isFolder, isPackage bool) models.DriveItemable {
item := models.NewDriveItem()
item.SetName(&name)
item.SetId(&name)
item.SetId(&id)
parentReference := models.NewItemReference()
parentReference.SetPath(&path)

View File

@ -161,7 +161,12 @@ func userDrives(ctx context.Context, service graph.Servicer, user string) ([]mod
}
// itemCollector functions collect the items found in a drive
type itemCollector func(ctx context.Context, driveID string, driveItems []models.DriveItemable) error
type itemCollector func(
ctx context.Context,
driveID string,
driveItems []models.DriveItemable,
paths map[string]string,
) error
// collectItems will enumerate all items in the specified drive and hand them to the
// provided `collector` method
@ -170,7 +175,14 @@ func collectItems(
service graph.Servicer,
driveID string,
collector itemCollector,
) error {
) (string, map[string]string, error) {
var (
newDeltaURL = ""
// TODO(ashmrtn): Eventually this should probably be a parameter so we can
// take in previous paths.
paths = map[string]string{}
)
// TODO: Specify a timestamp in the delta query
// https://docs.microsoft.com/en-us/graph/api/driveitem-delta?
// view=graph-rest-1.0&tabs=http#example-4-retrieving-delta-results-using-a-timestamp
@ -200,16 +212,20 @@ func collectItems(
for {
r, err := builder.Get(ctx, requestConfig)
if err != nil {
return errors.Wrapf(
return "", nil, errors.Wrapf(
err,
"failed to query drive items. details: %s",
support.ConnectorStackErrorTrace(err),
)
}
err = collector(ctx, driveID, r.GetValue())
err = collector(ctx, driveID, r.GetValue(), paths)
if err != nil {
return err
return "", nil, err
}
if r.GetOdataDeltaLink() != nil && len(*r.GetOdataDeltaLink()) > 0 {
newDeltaURL = *r.GetOdataDeltaLink()
}
// Check if there are more items
@ -222,7 +238,7 @@ func collectItems(
builder = msdrives.NewItemRootDeltaRequestBuilder(*nextLink, service.Adapter())
}
return nil
return newDeltaURL, paths, nil
}
// getFolder will lookup the specified folder name under `parentFolderID`
@ -329,11 +345,16 @@ func GetAllFolders(
folders := map[string]*Displayable{}
for _, d := range drives {
err = collectItems(
_, _, err = collectItems(
ctx,
gs,
*d.GetId(),
func(innerCtx context.Context, driveID string, items []models.DriveItemable) error {
func(
innerCtx context.Context,
driveID string,
items []models.DriveItemable,
paths map[string]string,
) error {
for _, item := range items {
// Skip the root item.
if item.GetRoot() != nil {

View File

@ -95,7 +95,12 @@ func (suite *ItemIntegrationSuite) TestItemReader_oneDrive() {
var driveItem models.DriveItemable
// This item collector tries to find "a" drive item that is a file to test the reader function
itemCollector := func(ctx context.Context, driveID string, items []models.DriveItemable) error {
itemCollector := func(
ctx context.Context,
driveID string,
items []models.DriveItemable,
paths map[string]string,
) error {
for _, item := range items {
if item.GetFile() != nil {
driveItem = item
@ -105,7 +110,7 @@ func (suite *ItemIntegrationSuite) TestItemReader_oneDrive() {
return nil
}
err := collectItems(ctx, suite, suite.userDriveID, itemCollector)
_, _, err := collectItems(ctx, suite, suite.userDriveID, itemCollector)
require.NoError(suite.T(), err)
// Test Requirement 2: Need a file

View File

@ -87,6 +87,7 @@ func (suite *SharePointLibrariesSuite) TestUpdateCollections() {
ctx, flush := tester.NewContext()
defer flush()
paths := map[string]string{}
c := onedrive.NewCollections(
tenant,
site,
@ -95,7 +96,7 @@ func (suite *SharePointLibrariesSuite) TestUpdateCollections() {
&MockGraphService{},
nil,
control.Options{})
err := c.UpdateCollections(ctx, "driveID", test.items)
err := c.UpdateCollections(ctx, "driveID", test.items, paths)
test.expect(t, err)
assert.Equal(t, len(test.expectedCollectionPaths), len(c.CollectionMap), "collection paths")
assert.Equal(t, test.expectedItemCount, c.NumItems, "item count")