diff --git a/src/internal/connector/data_collections_test.go b/src/internal/connector/data_collections_test.go index cfec30173..9ee113ed2 100644 --- a/src/internal/connector/data_collections_test.go +++ b/src/internal/connector/data_collections_test.go @@ -13,6 +13,7 @@ import ( "github.com/alcionai/corso/src/internal/connector/sharepoint" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/control" + "github.com/alcionai/corso/src/pkg/path" "github.com/alcionai/corso/src/pkg/selectors" ) @@ -303,9 +304,7 @@ func (suite *ConnectorCreateSharePointCollectionIntegrationSuite) SetupSuite() { tester.LogTimeOfTest(suite.T()) } -// TestCreateSharePointCollection. Ensures the proper amount of collections are created based -// on the selector. -func (suite *ConnectorCreateSharePointCollectionIntegrationSuite) TestCreateSharePointCollection() { +func (suite *ConnectorCreateSharePointCollectionIntegrationSuite) TestCreateSharePointCollection_Libraries() { ctx, flush := tester.NewContext() defer flush() @@ -316,51 +315,46 @@ func (suite *ConnectorCreateSharePointCollectionIntegrationSuite) TestCreateShar siteIDs = []string{siteID} ) - tables := []struct { - name string - sel func() selectors.Selector - comparator assert.ComparisonAssertionFunc - }{ - { - name: "SharePoint.Libraries", - comparator: assert.Equal, - sel: func() selectors.Selector { - sel := selectors.NewSharePointBackup(siteIDs) - sel.Include(sel.Libraries([]string{"foo"}, selectors.PrefixMatch())) - return sel.Selector - }, - }, - { - name: "SharePoint.Lists", - comparator: assert.Less, - sel: func() selectors.Selector { - sel := selectors.NewSharePointBackup(siteIDs) - sel.Include(sel.Lists(selectors.Any(), selectors.PrefixMatch())) + sel := selectors.NewSharePointBackup(siteIDs) + sel.Include(sel.Libraries([]string{"foo"}, selectors.PrefixMatch())) - return sel.Selector - }, - }, - } + cols, err := gc.DataCollections(ctx, sel.Selector, nil, control.Options{}) + require.NoError(t, err) + assert.Len(t, cols, 1) - for _, test := range tables { - t.Run(test.name, func(t *testing.T) { - cols, err := gc.DataCollections(ctx, test.sel(), nil, control.Options{}) - require.NoError(t, err) - test.comparator(t, 0, len(cols)) - - if test.name == "SharePoint.Lists" { - for _, collection := range cols { - t.Logf("Path: %s\n", collection.FullPath().String()) - for item := range collection.Items() { - t.Log("File: " + item.UUID()) - - bytes, err := io.ReadAll(item.ToReader()) - require.NoError(t, err) - t.Log(string(bytes)) - - } - } - } - }) + for _, collection := range cols { + t.Logf("Path: %s\n", collection.FullPath().String()) + assert.Equal(t, path.SharePointMetadataService, collection.FullPath().Service()) + } +} + +func (suite *ConnectorCreateSharePointCollectionIntegrationSuite) TestCreateSharePointCollection_Lists() { + ctx, flush := tester.NewContext() + defer flush() + + var ( + t = suite.T() + siteID = tester.M365SiteID(t) + gc = loadConnector(ctx, t, Sites) + siteIDs = []string{siteID} + ) + + sel := selectors.NewSharePointBackup(siteIDs) + sel.Include(sel.Lists(selectors.Any(), selectors.PrefixMatch())) + + cols, err := gc.DataCollections(ctx, sel.Selector, nil, control.Options{}) + require.NoError(t, err) + assert.Less(t, 0, len(cols)) + + for _, collection := range cols { + t.Logf("Path: %s\n", collection.FullPath().String()) + + for item := range collection.Items() { + t.Log("File: " + item.UUID()) + + bs, err := io.ReadAll(item.ToReader()) + require.NoError(t, err) + t.Log(string(bs)) + } } } diff --git a/src/internal/connector/onedrive/collections.go b/src/internal/connector/onedrive/collections.go index d3528cb02..7bdbfc8f5 100644 --- a/src/internal/connector/onedrive/collections.go +++ b/src/internal/connector/onedrive/collections.go @@ -25,6 +25,17 @@ const ( SharePointSource ) +func (ds driveSource) toPathServiceCat() (path.ServiceType, path.CategoryType) { + switch ds { + case OneDriveSource: + return path.OneDriveService, path.FilesCategory + case SharePointSource: + return path.SharePointService, path.LibrariesCategory + default: + return path.UnknownService, path.UnknownCategory + } +} + type folderMatcher interface { IsAny() bool Matches(string) bool @@ -81,27 +92,80 @@ func (c *Collections) Get(ctx context.Context) ([]data.Collection, error) { return nil, err } + var ( + // Drive ID -> delta URL for drive + deltaURLs = map[string]string{} + // Drive ID -> folder ID -> folder path + folderPaths = map[string]map[string]string{} + ) + // Update the collection map with items from each drive for _, d := range drives { - err = collectItems(ctx, c.service, *d.GetId(), c.UpdateCollections) + driveID := *d.GetId() + + delta, paths, err := collectItems(ctx, c.service, driveID, c.UpdateCollections) if err != nil { return nil, err } + + if len(delta) > 0 { + deltaURLs[driveID] = delta + } + + if len(paths) > 0 { + folderPaths[driveID] = map[string]string{} + + for id, p := range paths { + folderPaths[driveID][id] = p + } + } } observe.Message(ctx, fmt.Sprintf("Discovered %d items to backup", c.NumItems)) - collections := make([]data.Collection, 0, len(c.CollectionMap)) + // Add an extra for the metadata collection. + collections := make([]data.Collection, 0, len(c.CollectionMap)+1) for _, coll := range c.CollectionMap { collections = append(collections, coll) } + service, category := c.source.toPathServiceCat() + metadata, err := graph.MakeMetadataCollection( + c.tenant, + c.resourceOwner, + service, + category, + []graph.MetadataCollectionEntry{ + graph.NewMetadataEntry(graph.PreviousPathFileName, folderPaths), + graph.NewMetadataEntry(graph.DeltaURLsFileName, deltaURLs), + }, + c.statusUpdater, + ) + + if err != nil { + // Technically it's safe to continue here because the logic for starting an + // incremental backup should eventually find that the metadata files are + // empty/missing and default to a full backup. + logger.Ctx(ctx).Warnw( + "making metadata collection for future incremental backups", + "error", + err, + ) + } else { + collections = append(collections, metadata) + } + return collections, nil } // UpdateCollections initializes and adds the provided drive items to Collections // A new collection is created for every drive folder (or package) -func (c *Collections) UpdateCollections(ctx context.Context, driveID string, items []models.DriveItemable) error { +func (c *Collections) UpdateCollections( + ctx context.Context, + driveID string, + items []models.DriveItemable, + paths map[string]string, +) error { for _, item := range items { if item.GetRoot() != nil { // Skip the root item @@ -131,9 +195,19 @@ func (c *Collections) UpdateCollections(ctx context.Context, driveID string, ite switch { case item.GetFolder() != nil, item.GetPackage() != nil: - // Leave this here so we don't fall into the default case. - // TODO: This is where we might create a "special file" to represent these in the backup repository - // e.g. a ".folderMetadataFile" + // Eventually, deletions of folders will be handled here so we may as well + // start off by saving the path.Path of the item instead of just the + // OneDrive parentRef or such. + folderPath, err := collectionPath.Append(*item.GetName(), false) + if err != nil { + logger.Ctx(ctx).Errorw("failed building collection path", "error", err) + return err + } + + // TODO(ashmrtn): Handle deletions by removing this entry from the map. + // TODO(ashmrtn): Handle moves by setting the collection state if the + // collection doesn't already exist/have that state. + paths[*item.GetId()] = folderPath.String() case item.GetFile() != nil: col, found := c.CollectionMap[collectionPath.String()] diff --git a/src/internal/connector/onedrive/collections_test.go b/src/internal/connector/onedrive/collections_test.go index 923c5512d..f31ae8bab 100644 --- a/src/internal/connector/onedrive/collections_test.go +++ b/src/internal/connector/onedrive/collections_test.go @@ -102,19 +102,21 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { expectedItemCount int expectedContainerCount int expectedFileCount int + expectedMetadataPaths map[string]string }{ { testCase: "Invalid item", items: []models.DriveItemable{ - driveItem("item", testBaseDrivePath, false, false, false), + driveItem("item", "item", testBaseDrivePath, false, false, false), }, - scope: anyFolder, - expect: assert.Error, + scope: anyFolder, + expect: assert.Error, + expectedMetadataPaths: map[string]string{}, }, { testCase: "Single File", items: []models.DriveItemable{ - driveItem("file", testBaseDrivePath, true, false, false), + driveItem("file", "file", testBaseDrivePath, true, false, false), }, scope: anyFolder, expect: assert.NoError, @@ -127,33 +129,51 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { expectedItemCount: 2, expectedFileCount: 1, expectedContainerCount: 1, + // Root folder is skipped since it's always present. + expectedMetadataPaths: map[string]string{}, }, { testCase: "Single Folder", items: []models.DriveItemable{ - driveItem("folder", testBaseDrivePath, false, true, false), + driveItem("folder", "folder", testBaseDrivePath, false, true, false), }, scope: anyFolder, expect: assert.NoError, expectedCollectionPaths: []string{}, + expectedMetadataPaths: map[string]string{ + "folder": expectedPathAsSlice( + suite.T(), + tenant, + user, + testBaseDrivePath+"/folder", + )[0], + }, }, { testCase: "Single Package", items: []models.DriveItemable{ - driveItem("package", testBaseDrivePath, false, false, true), + driveItem("package", "package", testBaseDrivePath, false, false, true), }, scope: anyFolder, expect: assert.NoError, expectedCollectionPaths: []string{}, + expectedMetadataPaths: map[string]string{ + "package": expectedPathAsSlice( + suite.T(), + tenant, + user, + testBaseDrivePath+"/package", + )[0], + }, }, { testCase: "1 root file, 1 folder, 1 package, 2 files, 3 collections", items: []models.DriveItemable{ - driveItem("fileInRoot", testBaseDrivePath, true, false, false), - driveItem("folder", testBaseDrivePath, false, true, false), - driveItem("package", testBaseDrivePath, false, false, true), - driveItem("fileInFolder", testBaseDrivePath+folder, true, false, false), - driveItem("fileInPackage", testBaseDrivePath+pkg, true, false, false), + driveItem("fileInRoot", "fileInRoot", testBaseDrivePath, true, false, false), + driveItem("folder", "folder", testBaseDrivePath, false, true, false), + driveItem("package", "package", testBaseDrivePath, false, false, true), + driveItem("fileInFolder", "fileInFolder", testBaseDrivePath+folder, true, false, false), + driveItem("fileInPackage", "fileInPackage", testBaseDrivePath+pkg, true, false, false), }, scope: anyFolder, expect: assert.NoError, @@ -168,18 +188,32 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { expectedItemCount: 6, expectedFileCount: 3, expectedContainerCount: 3, + expectedMetadataPaths: map[string]string{ + "folder": expectedPathAsSlice( + suite.T(), + tenant, + user, + testBaseDrivePath+"/folder", + )[0], + "package": expectedPathAsSlice( + suite.T(), + tenant, + user, + testBaseDrivePath+"/package", + )[0], + }, }, { testCase: "contains folder selector", items: []models.DriveItemable{ - driveItem("fileInRoot", testBaseDrivePath, true, false, false), - driveItem("folder", testBaseDrivePath, false, true, false), - driveItem("subfolder", testBaseDrivePath+folder, false, true, false), - driveItem("folder", testBaseDrivePath+folderSub, false, true, false), - driveItem("package", testBaseDrivePath, false, false, true), - driveItem("fileInFolder", testBaseDrivePath+folder, true, false, false), - driveItem("fileInFolder2", testBaseDrivePath+folderSub+folder, true, false, false), - driveItem("fileInPackage", testBaseDrivePath+pkg, true, false, false), + driveItem("fileInRoot", "fileInRoot", testBaseDrivePath, true, false, false), + driveItem("folder", "folder", testBaseDrivePath, false, true, false), + driveItem("subfolder", "subfolder", testBaseDrivePath+folder, false, true, false), + driveItem("folder2", "folder", testBaseDrivePath+folderSub, false, true, false), + driveItem("package", "package", testBaseDrivePath, false, false, true), + driveItem("fileInFolder", "fileInFolder", testBaseDrivePath+folder, true, false, false), + driveItem("fileInFolder2", "fileInFolder2", testBaseDrivePath+folderSub+folder, true, false, false), + driveItem("fileInFolderPackage", "fileInPackage", testBaseDrivePath+pkg, true, false, false), }, scope: (&selectors.OneDriveBackup{}).Folders([]string{"folder"})[0], expect: assert.NoError, @@ -200,18 +234,34 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { expectedItemCount: 4, expectedFileCount: 2, expectedContainerCount: 2, + // just "folder" isn't added here because the include check is done on the + // parent path since we only check later if something is a folder or not. + expectedMetadataPaths: map[string]string{ + "subfolder": expectedPathAsSlice( + suite.T(), + tenant, + user, + testBaseDrivePath+"/folder/subfolder", + )[0], + "folder2": expectedPathAsSlice( + suite.T(), + tenant, + user, + testBaseDrivePath+"/folder/subfolder/folder", + )[0], + }, }, { testCase: "prefix subfolder selector", items: []models.DriveItemable{ - driveItem("fileInRoot", testBaseDrivePath, true, false, false), - driveItem("folder", testBaseDrivePath, false, true, false), - driveItem("subfolder", testBaseDrivePath+folder, false, true, false), - driveItem("folder", testBaseDrivePath+folderSub, false, true, false), - driveItem("package", testBaseDrivePath, false, false, true), - driveItem("fileInFolder", testBaseDrivePath+folder, true, false, false), - driveItem("fileInFolder2", testBaseDrivePath+folderSub+folder, true, false, false), - driveItem("fileInPackage", testBaseDrivePath+pkg, true, false, false), + driveItem("fileInRoot", "fileInRoot", testBaseDrivePath, true, false, false), + driveItem("folder", "folder", testBaseDrivePath, false, true, false), + driveItem("subfolder", "subfolder", testBaseDrivePath+folder, false, true, false), + driveItem("folder", "folder", testBaseDrivePath+folderSub, false, true, false), + driveItem("package", "package", testBaseDrivePath, false, false, true), + driveItem("fileInFolder", "fileInFolder", testBaseDrivePath+folder, true, false, false), + driveItem("fileInFolder2", "fileInFolder2", testBaseDrivePath+folderSub+folder, true, false, false), + driveItem("fileInPackage", "fileInPackage", testBaseDrivePath+pkg, true, false, false), }, scope: (&selectors.OneDriveBackup{}). Folders([]string{"/folder/subfolder"}, selectors.PrefixMatch())[0], @@ -225,17 +275,25 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { expectedItemCount: 2, expectedFileCount: 1, expectedContainerCount: 1, + expectedMetadataPaths: map[string]string{ + "folder": expectedPathAsSlice( + suite.T(), + tenant, + user, + testBaseDrivePath+"/folder/subfolder/folder", + )[0], + }, }, { testCase: "match subfolder selector", items: []models.DriveItemable{ - driveItem("fileInRoot", testBaseDrivePath, true, false, false), - driveItem("folder", testBaseDrivePath, false, true, false), - driveItem("subfolder", testBaseDrivePath+folder, false, true, false), - driveItem("package", testBaseDrivePath, false, false, true), - driveItem("fileInFolder", testBaseDrivePath+folder, true, false, false), - driveItem("fileInSubfolder", testBaseDrivePath+folderSub, true, false, false), - driveItem("fileInPackage", testBaseDrivePath+pkg, true, false, false), + driveItem("fileInRoot", "fileInRoot", testBaseDrivePath, true, false, false), + driveItem("folder", "folder", testBaseDrivePath, false, true, false), + driveItem("subfolder", "subfolder", testBaseDrivePath+folder, false, true, false), + driveItem("package", "package", testBaseDrivePath, false, false, true), + driveItem("fileInFolder", "fileInFolder", testBaseDrivePath+folder, true, false, false), + driveItem("fileInSubfolder", "fileInSubfolder", testBaseDrivePath+folderSub, true, false, false), + driveItem("fileInPackage", "fileInPackage", testBaseDrivePath+pkg, true, false, false), }, scope: (&selectors.OneDriveBackup{}).Folders([]string{"folder/subfolder"})[0], expect: assert.NoError, @@ -248,6 +306,8 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { expectedItemCount: 2, expectedFileCount: 1, expectedContainerCount: 1, + // No child folders for subfolder so nothing here. + expectedMetadataPaths: map[string]string{}, }, } @@ -256,6 +316,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { ctx, flush := tester.NewContext() defer flush() + paths := map[string]string{} c := NewCollections( tenant, user, @@ -265,7 +326,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { nil, control.Options{}) - err := c.UpdateCollections(ctx, "driveID", tt.items) + err := c.UpdateCollections(ctx, "driveID", tt.items, paths) tt.expect(t, err) assert.Equal(t, len(tt.expectedCollectionPaths), len(c.CollectionMap), "collection paths") assert.Equal(t, tt.expectedItemCount, c.NumItems, "item count") @@ -274,14 +335,16 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { for _, collPath := range tt.expectedCollectionPaths { assert.Contains(t, c.CollectionMap, collPath) } + + assert.Equal(t, tt.expectedMetadataPaths, paths) }) } } -func driveItem(name string, path string, isFile, isFolder, isPackage bool) models.DriveItemable { +func driveItem(id string, name string, path string, isFile, isFolder, isPackage bool) models.DriveItemable { item := models.NewDriveItem() item.SetName(&name) - item.SetId(&name) + item.SetId(&id) parentReference := models.NewItemReference() parentReference.SetPath(&path) diff --git a/src/internal/connector/onedrive/drive.go b/src/internal/connector/onedrive/drive.go index 36a79dca1..f063eec53 100644 --- a/src/internal/connector/onedrive/drive.go +++ b/src/internal/connector/onedrive/drive.go @@ -161,7 +161,12 @@ func userDrives(ctx context.Context, service graph.Servicer, user string) ([]mod } // itemCollector functions collect the items found in a drive -type itemCollector func(ctx context.Context, driveID string, driveItems []models.DriveItemable) error +type itemCollector func( + ctx context.Context, + driveID string, + driveItems []models.DriveItemable, + paths map[string]string, +) error // collectItems will enumerate all items in the specified drive and hand them to the // provided `collector` method @@ -170,7 +175,14 @@ func collectItems( service graph.Servicer, driveID string, collector itemCollector, -) error { +) (string, map[string]string, error) { + var ( + newDeltaURL = "" + // TODO(ashmrtn): Eventually this should probably be a parameter so we can + // take in previous paths. + paths = map[string]string{} + ) + // TODO: Specify a timestamp in the delta query // https://docs.microsoft.com/en-us/graph/api/driveitem-delta? // view=graph-rest-1.0&tabs=http#example-4-retrieving-delta-results-using-a-timestamp @@ -200,16 +212,20 @@ func collectItems( for { r, err := builder.Get(ctx, requestConfig) if err != nil { - return errors.Wrapf( + return "", nil, errors.Wrapf( err, "failed to query drive items. details: %s", support.ConnectorStackErrorTrace(err), ) } - err = collector(ctx, driveID, r.GetValue()) + err = collector(ctx, driveID, r.GetValue(), paths) if err != nil { - return err + return "", nil, err + } + + if r.GetOdataDeltaLink() != nil && len(*r.GetOdataDeltaLink()) > 0 { + newDeltaURL = *r.GetOdataDeltaLink() } // Check if there are more items @@ -222,7 +238,7 @@ func collectItems( builder = msdrives.NewItemRootDeltaRequestBuilder(*nextLink, service.Adapter()) } - return nil + return newDeltaURL, paths, nil } // getFolder will lookup the specified folder name under `parentFolderID` @@ -329,11 +345,16 @@ func GetAllFolders( folders := map[string]*Displayable{} for _, d := range drives { - err = collectItems( + _, _, err = collectItems( ctx, gs, *d.GetId(), - func(innerCtx context.Context, driveID string, items []models.DriveItemable) error { + func( + innerCtx context.Context, + driveID string, + items []models.DriveItemable, + paths map[string]string, + ) error { for _, item := range items { // Skip the root item. if item.GetRoot() != nil { diff --git a/src/internal/connector/onedrive/item_test.go b/src/internal/connector/onedrive/item_test.go index e423e65d9..d87878fc4 100644 --- a/src/internal/connector/onedrive/item_test.go +++ b/src/internal/connector/onedrive/item_test.go @@ -95,7 +95,12 @@ func (suite *ItemIntegrationSuite) TestItemReader_oneDrive() { var driveItem models.DriveItemable // This item collector tries to find "a" drive item that is a file to test the reader function - itemCollector := func(ctx context.Context, driveID string, items []models.DriveItemable) error { + itemCollector := func( + ctx context.Context, + driveID string, + items []models.DriveItemable, + paths map[string]string, + ) error { for _, item := range items { if item.GetFile() != nil { driveItem = item @@ -105,7 +110,7 @@ func (suite *ItemIntegrationSuite) TestItemReader_oneDrive() { return nil } - err := collectItems(ctx, suite, suite.userDriveID, itemCollector) + _, _, err := collectItems(ctx, suite, suite.userDriveID, itemCollector) require.NoError(suite.T(), err) // Test Requirement 2: Need a file diff --git a/src/internal/connector/sharepoint/data_collections_test.go b/src/internal/connector/sharepoint/data_collections_test.go index 9b391d1e8..f52a642ba 100644 --- a/src/internal/connector/sharepoint/data_collections_test.go +++ b/src/internal/connector/sharepoint/data_collections_test.go @@ -87,6 +87,7 @@ func (suite *SharePointLibrariesSuite) TestUpdateCollections() { ctx, flush := tester.NewContext() defer flush() + paths := map[string]string{} c := onedrive.NewCollections( tenant, site, @@ -95,7 +96,7 @@ func (suite *SharePointLibrariesSuite) TestUpdateCollections() { &MockGraphService{}, nil, control.Options{}) - err := c.UpdateCollections(ctx, "driveID", test.items) + err := c.UpdateCollections(ctx, "driveID", test.items, paths) test.expect(t, err) assert.Equal(t, len(test.expectedCollectionPaths), len(c.CollectionMap), "collection paths") assert.Equal(t, test.expectedItemCount, c.NumItems, "item count")