diff --git a/src/internal/connector/data_collections.go b/src/internal/connector/data_collections.go index 7d187a854..410a05462 100644 --- a/src/internal/connector/data_collections.go +++ b/src/internal/connector/data_collections.go @@ -83,7 +83,7 @@ func (gc *GraphConnector) DataCollections( return colls, excludes, nil case selectors.ServiceOneDrive: - return gc.OneDriveDataCollections(ctx, sels, ctrlOpts) + return gc.OneDriveDataCollections(ctx, sels, metadata, ctrlOpts) case selectors.ServiceSharePoint: colls, excludes, err := sharepoint.DataCollections( @@ -182,6 +182,7 @@ func (fm odFolderMatcher) Matches(dir string) bool { func (gc *GraphConnector) OneDriveDataCollections( ctx context.Context, selector selectors.Selector, + metadata []data.Collection, ctrlOpts control.Options, ) ([]data.Collection, map[string]struct{}, error) { odb, err := selector.ToOneDriveBackup() @@ -209,7 +210,7 @@ func (gc *GraphConnector) OneDriveDataCollections( gc.Service, gc.UpdateStatus, ctrlOpts, - ).Get(ctx) + ).Get(ctx, metadata) if err != nil { return nil, nil, support.WrapAndAppend(user, err, errs) } diff --git a/src/internal/connector/onedrive/collections.go b/src/internal/connector/onedrive/collections.go index f83ce342a..35b81f7f2 100644 --- a/src/internal/connector/onedrive/collections.go +++ b/src/internal/connector/onedrive/collections.go @@ -2,7 +2,9 @@ package onedrive import ( "context" + "encoding/json" "fmt" + "io" "net/http" "strings" @@ -92,9 +94,145 @@ func NewCollections( } } +func deserializeMetadata( + ctx context.Context, + cols []data.Collection, +) (map[string]string, map[string]map[string]string, error) { + logger.Ctx(ctx).Infow( + "deserialzing previous backup metadata", + "num_collections", + len(cols), + ) + + prevDeltas := map[string]string{} + prevFolders := map[string]map[string]string{} + + for _, col := range cols { + items := col.Items() + + for breakLoop := false; !breakLoop; { + select { + case <-ctx.Done(): + return nil, nil, errors.Wrap(ctx.Err(), "deserialzing previous backup metadata") + + case item, ok := <-items: + if !ok { + // End of collection items. + breakLoop = true + break + } + + var err error + + switch item.UUID() { + case graph.PreviousPathFileName: + err = deserializeMap(item.ToReader(), prevFolders) + + case graph.DeltaURLsFileName: + err = deserializeMap(item.ToReader(), prevDeltas) + + default: + logger.Ctx(ctx).Infow( + "skipping unknown metadata file", + "file_name", + item.UUID(), + ) + + continue + } + + if err == nil { + // Successful decode. + continue + } + + // This is conservative, but report an error if any of the items for + // any of the deserialized maps have duplicate drive IDs. This will + // cause the entire backup to fail, but it's not clear if higher + // layers would have caught this. Worst case if we don't handle this + // we end up in a situation where we're sourcing items from the wrong + // base in kopia wrapper. + if errors.Is(err, errExistingMapping) { + return nil, nil, errors.Wrapf( + err, + "deserializing metadata file %s", + item.UUID(), + ) + } + + logger.Ctx(ctx).Errorw( + "deserializing base backup metadata. Falling back to full backup for selected drives", + "error", + err, + "file_name", + item.UUID(), + ) + } + } + + // Go through and remove partial results (i.e. path mapping but no delta URL + // or vice-versa). + for k := range prevDeltas { + if _, ok := prevFolders[k]; !ok { + delete(prevDeltas, k) + } + } + + for k := range prevFolders { + if _, ok := prevDeltas[k]; !ok { + delete(prevFolders, k) + } + } + } + + return prevDeltas, prevFolders, nil +} + +var errExistingMapping = errors.New("mapping already exists for same drive ID") + +// deserializeMap takes an reader and a map of already deserialized items and +// adds the newly deserialized items to alreadyFound. Items are only added to +// alreadyFound if none of the keys in the freshly deserialized map already +// exist in alreadyFound. reader is closed at the end of this function. +func deserializeMap[T any](reader io.ReadCloser, alreadyFound map[string]T) error { + defer reader.Close() + + tmp := map[string]T{} + + err := json.NewDecoder(reader).Decode(&tmp) + if err != nil { + return errors.Wrap(err, "deserializing file contents") + } + + var duplicate bool + + for k := range tmp { + if _, ok := alreadyFound[k]; ok { + duplicate = true + break + } + } + + if duplicate { + return errors.WithStack(errExistingMapping) + } + + maps.Copy(alreadyFound, tmp) + + return nil +} + // Retrieves drive data as set of `data.Collections` and a set of item names to // be excluded from the upcoming backup. -func (c *Collections) Get(ctx context.Context) ([]data.Collection, map[string]struct{}, error) { +func (c *Collections) Get( + ctx context.Context, + prevMetadata []data.Collection, +) ([]data.Collection, map[string]struct{}, error) { + _, _, err := deserializeMetadata(ctx, prevMetadata) + if err != nil { + return nil, nil, err + } + // Enumerate drives for the specified resourceOwner pager, err := PagerForSource(c.source, c.service, c.resourceOwner, nil) if err != nil { diff --git a/src/internal/connector/onedrive/collections_test.go b/src/internal/connector/onedrive/collections_test.go index b69253918..c250afe2a 100644 --- a/src/internal/connector/onedrive/collections_test.go +++ b/src/internal/connector/onedrive/collections_test.go @@ -11,8 +11,11 @@ import ( "golang.org/x/exp/maps" "github.com/alcionai/corso/src/internal/connector/graph" + "github.com/alcionai/corso/src/internal/connector/support" + "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/control" + "github.com/alcionai/corso/src/pkg/path" "github.com/alcionai/corso/src/pkg/selectors" ) @@ -621,13 +624,304 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { } } -func driveItem(id string, name string, path string, isFile, isFolder, isPackage bool) models.DriveItemable { +func (suite *OneDriveCollectionsSuite) TestDeserializeMetadata() { + tenant := "a-tenant" + user := "a-user" + driveID1 := "1" + driveID2 := "2" + deltaURL1 := "url/1" + deltaURL2 := "url/2" + + folderID1 := "folder1" + folderID2 := "folder2" + path1 := "folder1/path" + path2 := "folder2/path" + + table := []struct { + name string + // Each function returns the set of files for a single data.Collection. + cols []func() []graph.MetadataCollectionEntry + expectedDeltas map[string]string + expectedPaths map[string]map[string]string + errCheck assert.ErrorAssertionFunc + }{ + { + name: "SuccessOneDriveAllOneCollection", + cols: []func() []graph.MetadataCollectionEntry{ + func() []graph.MetadataCollectionEntry { + return []graph.MetadataCollectionEntry{ + graph.NewMetadataEntry( + graph.DeltaURLsFileName, + map[string]string{driveID1: deltaURL1}, + ), + graph.NewMetadataEntry( + graph.PreviousPathFileName, + map[string]map[string]string{ + driveID1: { + folderID1: path1, + }, + }, + ), + } + }, + }, + expectedDeltas: map[string]string{ + driveID1: deltaURL1, + }, + expectedPaths: map[string]map[string]string{ + driveID1: { + folderID1: path1, + }, + }, + errCheck: assert.NoError, + }, + { + name: "MissingPaths", + cols: []func() []graph.MetadataCollectionEntry{ + func() []graph.MetadataCollectionEntry { + return []graph.MetadataCollectionEntry{ + graph.NewMetadataEntry( + graph.DeltaURLsFileName, + map[string]string{driveID1: deltaURL1}, + ), + } + }, + }, + expectedDeltas: map[string]string{}, + expectedPaths: map[string]map[string]string{}, + errCheck: assert.NoError, + }, + { + name: "MissingDeltas", + cols: []func() []graph.MetadataCollectionEntry{ + func() []graph.MetadataCollectionEntry { + return []graph.MetadataCollectionEntry{ + graph.NewMetadataEntry( + graph.PreviousPathFileName, + map[string]map[string]string{ + driveID1: { + folderID1: path1, + }, + }, + ), + } + }, + }, + expectedDeltas: map[string]string{}, + expectedPaths: map[string]map[string]string{}, + errCheck: assert.NoError, + }, + { + name: "SuccessTwoDrivesTwoCollections", + cols: []func() []graph.MetadataCollectionEntry{ + func() []graph.MetadataCollectionEntry { + return []graph.MetadataCollectionEntry{ + graph.NewMetadataEntry( + graph.DeltaURLsFileName, + map[string]string{driveID1: deltaURL1}, + ), + graph.NewMetadataEntry( + graph.PreviousPathFileName, + map[string]map[string]string{ + driveID1: { + folderID1: path1, + }, + }, + ), + } + }, + func() []graph.MetadataCollectionEntry { + return []graph.MetadataCollectionEntry{ + graph.NewMetadataEntry( + graph.DeltaURLsFileName, + map[string]string{driveID2: deltaURL2}, + ), + graph.NewMetadataEntry( + graph.PreviousPathFileName, + map[string]map[string]string{ + driveID2: { + folderID2: path2, + }, + }, + ), + } + }, + }, + expectedDeltas: map[string]string{ + driveID1: deltaURL1, + driveID2: deltaURL2, + }, + expectedPaths: map[string]map[string]string{ + driveID1: { + folderID1: path1, + }, + driveID2: { + folderID2: path2, + }, + }, + errCheck: assert.NoError, + }, + { + // Bad formats are logged but skip adding entries to the maps and don't + // return an error. + name: "BadFormat", + cols: []func() []graph.MetadataCollectionEntry{ + func() []graph.MetadataCollectionEntry { + return []graph.MetadataCollectionEntry{ + graph.NewMetadataEntry( + graph.PreviousPathFileName, + map[string]string{driveID1: deltaURL1}, + ), + } + }, + }, + expectedDeltas: map[string]string{}, + expectedPaths: map[string]map[string]string{}, + errCheck: assert.NoError, + }, + { + // Unexpected files are logged and skipped. They don't cause an error to + // be returned. + name: "BadFileName", + cols: []func() []graph.MetadataCollectionEntry{ + func() []graph.MetadataCollectionEntry { + return []graph.MetadataCollectionEntry{ + graph.NewMetadataEntry( + graph.DeltaURLsFileName, + map[string]string{driveID1: deltaURL1}, + ), + graph.NewMetadataEntry( + graph.PreviousPathFileName, + map[string]map[string]string{ + driveID1: { + folderID1: path1, + }, + }, + ), + graph.NewMetadataEntry( + "foo", + map[string]string{driveID1: deltaURL1}, + ), + } + }, + }, + expectedDeltas: map[string]string{ + driveID1: deltaURL1, + }, + expectedPaths: map[string]map[string]string{ + driveID1: { + folderID1: path1, + }, + }, + errCheck: assert.NoError, + }, + { + name: "DriveAlreadyFound_Paths", + cols: []func() []graph.MetadataCollectionEntry{ + func() []graph.MetadataCollectionEntry { + return []graph.MetadataCollectionEntry{ + graph.NewMetadataEntry( + graph.DeltaURLsFileName, + map[string]string{driveID1: deltaURL1}, + ), + graph.NewMetadataEntry( + graph.PreviousPathFileName, + map[string]map[string]string{ + driveID1: { + folderID1: path1, + }, + }, + ), + } + }, + func() []graph.MetadataCollectionEntry { + return []graph.MetadataCollectionEntry{ + graph.NewMetadataEntry( + graph.PreviousPathFileName, + map[string]map[string]string{ + driveID1: { + folderID2: path2, + }, + }, + ), + } + }, + }, + expectedDeltas: nil, + expectedPaths: nil, + errCheck: assert.Error, + }, + { + name: "DriveAlreadyFound_Deltas", + cols: []func() []graph.MetadataCollectionEntry{ + func() []graph.MetadataCollectionEntry { + return []graph.MetadataCollectionEntry{ + graph.NewMetadataEntry( + graph.DeltaURLsFileName, + map[string]string{driveID1: deltaURL1}, + ), + graph.NewMetadataEntry( + graph.PreviousPathFileName, + map[string]map[string]string{ + driveID1: { + folderID1: path1, + }, + }, + ), + } + }, + func() []graph.MetadataCollectionEntry { + return []graph.MetadataCollectionEntry{ + graph.NewMetadataEntry( + graph.DeltaURLsFileName, + map[string]string{driveID1: deltaURL2}, + ), + } + }, + }, + expectedDeltas: nil, + expectedPaths: nil, + errCheck: assert.Error, + }, + } + + for _, test := range table { + suite.T().Run(test.name, func(t *testing.T) { + ctx, flush := tester.NewContext() + defer flush() + + cols := []data.Collection{} + + for _, c := range test.cols { + mc, err := graph.MakeMetadataCollection( + tenant, + user, + path.OneDriveService, + path.FilesCategory, + c(), + func(*support.ConnectorOperationStatus) {}, + ) + require.NoError(t, err) + + cols = append(cols, mc) + } + + deltas, paths, err := deserializeMetadata(ctx, cols) + test.errCheck(t, err) + + assert.Equal(t, test.expectedDeltas, deltas) + assert.Equal(t, test.expectedPaths, paths) + }) + } +} + +func driveItem(id string, name string, parentPath string, isFile, isFolder, isPackage bool) models.DriveItemable { item := models.NewDriveItem() item.SetName(&name) item.SetId(&id) parentReference := models.NewItemReference() - parentReference.SetPath(&path) + parentReference.SetPath(&parentPath) item.SetParentReference(parentReference) switch { @@ -644,13 +938,13 @@ func driveItem(id string, name string, path string, isFile, isFolder, isPackage // delItem creates a DriveItemable that is marked as deleted. path must be set // to the base drive path. -func delItem(id string, path string, isFile, isFolder, isPackage bool) models.DriveItemable { +func delItem(id string, parentPath string, isFile, isFolder, isPackage bool) models.DriveItemable { item := models.NewDriveItem() item.SetId(&id) item.SetDeleted(models.NewDeleted()) parentReference := models.NewItemReference() - parentReference.SetPath(&path) + parentReference.SetPath(&parentPath) item.SetParentReference(parentReference) switch { diff --git a/src/internal/connector/onedrive/drive_test.go b/src/internal/connector/onedrive/drive_test.go index 36fef30ab..0ba3ec1c2 100644 --- a/src/internal/connector/onedrive/drive_test.go +++ b/src/internal/connector/onedrive/drive_test.go @@ -463,7 +463,7 @@ func (suite *OneDriveSuite) TestOneDriveNewCollections() { service, service.updateStatus, control.Options{}, - ).Get(ctx) + ).Get(ctx, nil) assert.NoError(t, err) // Don't expect excludes as this isn't an incremental backup. assert.Empty(t, excludes) diff --git a/src/internal/connector/sharepoint/data_collections.go b/src/internal/connector/sharepoint/data_collections.go index 6011c32a0..1fd2f786d 100644 --- a/src/internal/connector/sharepoint/data_collections.go +++ b/src/internal/connector/sharepoint/data_collections.go @@ -152,7 +152,9 @@ func collectLibraries( updater.UpdateStatus, ctrlOpts) - odcs, excludes, err := colls.Get(ctx) + // TODO(ashmrtn): Pass previous backup metadata when SharePoint supports delta + // token-based incrementals. + odcs, excludes, err := colls.Get(ctx, nil) if err != nil { return nil, nil, support.WrapAndAppend(siteID, err, errs) }