diff --git a/CHANGELOG.md b/CHANGELOG.md index 33485d926..09d3845da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Increase page size preference for delta requests for Exchange to reduce number of roundtrips - OneDrive file/folder permissions can now be backed up and restored - Add `--restore-permissions` flag to toggle restoration of OneDrive permissions +- Add versions to backups so that we can understand/handle older backup formats ### Known Issues diff --git a/src/cmd/factory/impl/common.go b/src/cmd/factory/impl/common.go index 3ed3831fc..78a5dca0e 100644 --- a/src/cmd/factory/impl/common.go +++ b/src/cmd/factory/impl/common.go @@ -16,6 +16,7 @@ import ( "github.com/alcionai/corso/src/internal/connector/mockconnector" "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/pkg/account" + "github.com/alcionai/corso/src/pkg/backup" "github.com/alcionai/corso/src/pkg/backup/details" "github.com/alcionai/corso/src/pkg/control" "github.com/alcionai/corso/src/pkg/credentials" @@ -91,7 +92,7 @@ func generateAndRestoreItems( Infof(ctx, "Generating %d %s items in %s\n", howMany, cat, Destination) - return gc.RestoreDataCollections(ctx, acct, sel, dest, opts, dataColls) + return gc.RestoreDataCollections(ctx, backup.Version, acct, sel, dest, opts, dataColls) } // ------------------------------------------------------------------------------------------ diff --git a/src/internal/connector/graph_connector.go b/src/internal/connector/graph_connector.go index 370948639..def430f14 100644 --- a/src/internal/connector/graph_connector.go +++ b/src/internal/connector/graph_connector.go @@ -266,6 +266,7 @@ func (gc *GraphConnector) UnionSiteIDsAndWebURLs(ctx context.Context, ids, urls // SideEffect: gc.status is updated at the completion of operation func (gc *GraphConnector) RestoreDataCollections( ctx context.Context, + backupVersion int, acct account.Account, selector selectors.Selector, dest control.RestoreDestination, @@ -290,9 +291,9 @@ func (gc *GraphConnector) RestoreDataCollections( case selectors.ServiceExchange: status, err = exchange.RestoreExchangeDataCollections(ctx, creds, gc.Service, dest, dcs, deets) case selectors.ServiceOneDrive: - status, err = onedrive.RestoreCollections(ctx, gc.Service, dest, opts, dcs, deets) + status, err = onedrive.RestoreCollections(ctx, backupVersion, gc.Service, dest, opts, dcs, deets) case selectors.ServiceSharePoint: - status, err = sharepoint.RestoreCollections(ctx, gc.Service, dest, dcs, deets) + status, err = sharepoint.RestoreCollections(ctx, backupVersion, gc.Service, dest, dcs, deets) default: err = errors.Errorf("restore data from service %s not supported", selector.Service.String()) } diff --git a/src/internal/connector/graph_connector_helper_test.go b/src/internal/connector/graph_connector_helper_test.go index 8a0e22a26..539cbf501 100644 --- a/src/internal/connector/graph_connector_helper_test.go +++ b/src/internal/connector/graph_connector_helper_test.go @@ -172,6 +172,14 @@ type restoreBackupInfo struct { resource resource } +type restoreBackupInfoMultiVersion struct { + name string + service path.ServiceType + collectionsLatest []colInfo + collectionsPrevious []colInfo + resource resource +} + func attachmentEqual( expected models.Attachmentable, got models.Attachmentable, @@ -653,7 +661,7 @@ func compareOneDriveItem( name := item.UUID() expectedData := expected[item.UUID()] - if !assert.NotNil(t, expectedData, "unexpected file with name %s", item.UUID) { + if !assert.NotNil(t, expectedData, "unexpected file with name %s", item.UUID()) { return } @@ -988,6 +996,52 @@ func collectionsForInfo( return totalItems, kopiaEntries, collections, expectedData } +func collectionsForInfoVersion0( + t *testing.T, + service path.ServiceType, + tenant, user string, + dest control.RestoreDestination, + allInfo []colInfo, +) (int, int, []data.Collection, map[string]map[string][]byte) { + collections := make([]data.Collection, 0, len(allInfo)) + expectedData := make(map[string]map[string][]byte, len(allInfo)) + totalItems := 0 + kopiaEntries := 0 + + for _, info := range allInfo { + pth := mustToDataLayerPath( + t, + service, + tenant, + user, + info.category, + info.pathElements, + false, + ) + c := mockconnector.NewMockExchangeCollection(pth, len(info.items)) + baseDestPath := backupOutputPathFromRestore(t, dest, pth) + + baseExpected := expectedData[baseDestPath.String()] + if baseExpected == nil { + expectedData[baseDestPath.String()] = make(map[string][]byte, len(info.items)) + baseExpected = expectedData[baseDestPath.String()] + } + + for i := 0; i < len(info.items); i++ { + c.Names[i] = info.items[i].name + c.Data[i] = info.items[i].data + + baseExpected[info.items[i].lookupKey] = info.items[i].data + } + + collections = append(collections, c) + totalItems += len(info.items) + kopiaEntries += len(info.items) + } + + return totalItems, kopiaEntries, collections, expectedData +} + //nolint:deadcode func getSelectorWith( t *testing.T, diff --git a/src/internal/connector/graph_connector_test.go b/src/internal/connector/graph_connector_test.go index 2c2280e37..1cdb0b59e 100644 --- a/src/internal/connector/graph_connector_test.go +++ b/src/internal/connector/graph_connector_test.go @@ -22,6 +22,7 @@ import ( "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/account" + "github.com/alcionai/corso/src/pkg/backup" "github.com/alcionai/corso/src/pkg/control" "github.com/alcionai/corso/src/pkg/path" "github.com/alcionai/corso/src/pkg/selectors" @@ -231,7 +232,15 @@ func (suite *GraphConnectorIntegrationSuite) TestRestoreFailsBadService() { } ) - deets, err := suite.connector.RestoreDataCollections(ctx, acct, sel, dest, control.Options{}, nil) + deets, err := suite.connector.RestoreDataCollections( + ctx, + backup.Version, + acct, + sel, + dest, + control.Options{}, + nil, + ) assert.Error(t, err) assert.NotNil(t, deets) @@ -299,6 +308,7 @@ func (suite *GraphConnectorIntegrationSuite) TestEmptyCollections() { deets, err := suite.connector.RestoreDataCollections( ctx, + backup.Version, suite.acct, test.sel, dest, @@ -393,6 +403,7 @@ func runRestoreBackupTest( restoreSel := getSelectorWith(t, test.service, resourceOwners, true) deets, err := restoreGC.RestoreDataCollections( ctx, + backup.Version, acct, restoreSel, dest, @@ -458,6 +469,121 @@ func runRestoreBackupTest( "backup status.Successful; wanted %d items + %d skipped", totalItems, skipped) } +// runRestoreBackupTestVersion0 restores with data from an older +// version of the backup and check the restored data against the +// something that would be in the form of a newer backup. +func runRestoreBackupTestVersion0( + t *testing.T, + acct account.Account, + test restoreBackupInfoMultiVersion, + tenant string, + resourceOwners []string, + opts control.Options, +) { + var ( + collections []data.Collection + expectedData = map[string]map[string][]byte{} + totalItems = 0 + totalKopiaItems = 0 + // Get a dest per test so they're independent. + dest = tester.DefaultTestRestoreDestination() + ) + + ctx, flush := tester.NewContext() + defer flush() + + for _, owner := range resourceOwners { + _, _, ownerCollections, _ := collectionsForInfoVersion0( + t, + test.service, + tenant, + owner, + dest, + test.collectionsPrevious, + ) + + collections = append(collections, ownerCollections...) + } + + t.Logf( + "Restoring collections to %s for resourceOwners(s) %v\n", + dest.ContainerName, + resourceOwners, + ) + + start := time.Now() + + restoreGC := loadConnector(ctx, t, graph.HTTPClient(graph.NoTimeout()), test.resource) + restoreSel := getSelectorWith(t, test.service, resourceOwners, true) + deets, err := restoreGC.RestoreDataCollections( + ctx, + 0, // The OG version ;) + acct, + restoreSel, + dest, + opts, + collections, + ) + require.NoError(t, err) + assert.NotNil(t, deets) + + assert.NotNil(t, restoreGC.AwaitStatus()) + + runTime := time.Since(start) + + t.Logf("Restore complete in %v\n", runTime) + + // Run a backup and compare its output with what we put in. + for _, owner := range resourceOwners { + numItems, kopiaItems, _, userExpectedData := collectionsForInfo( + t, + test.service, + tenant, + owner, + dest, + test.collectionsLatest, + ) + + totalItems += numItems + totalKopiaItems += kopiaItems + + maps.Copy(expectedData, userExpectedData) + } + + cats := make(map[path.CategoryType]struct{}, len(test.collectionsLatest)) + for _, c := range test.collectionsLatest { + cats[c.category] = struct{}{} + } + + expectedDests := make([]destAndCats, 0, len(resourceOwners)) + for _, ro := range resourceOwners { + expectedDests = append(expectedDests, destAndCats{ + resourceOwner: ro, + dest: dest.ContainerName, + cats: cats, + }) + } + + backupGC := loadConnector(ctx, t, graph.HTTPClient(graph.NoTimeout()), test.resource) + backupSel := backupSelectorForExpected(t, test.service, expectedDests) + + start = time.Now() + dcs, excludes, err := backupGC.DataCollections(ctx, backupSel, nil, control.Options{RestorePermissions: true}) + require.NoError(t, err) + // No excludes yet because this isn't an incremental backup. + assert.Empty(t, excludes) + + t.Logf("Backup enumeration complete in %v\n", time.Since(start)) + + // Pull the data prior to waiting for the status as otherwise it will + // deadlock. + skipped := checkCollections(t, totalKopiaItems, expectedData, dcs, opts.RestorePermissions) + + status := backupGC.AwaitStatus() + assert.Equal(t, totalItems+skipped, status.ObjectCount, "status.ObjectCount") + assert.Equal(t, totalItems+skipped, status.Successful, "status.Successful") +} + func getTestMetaJSON(t *testing.T, user string, roles []string) []byte { id := base64.StdEncoding.EncodeToString([]byte(user + strings.Join(roles, "+"))) testMeta := onedrive.Metadata{Permissions: []onedrive.UserPermission{ @@ -906,6 +1032,255 @@ func (suite *GraphConnectorIntegrationSuite) TestRestoreAndBackup() { } } +func (suite *GraphConnectorIntegrationSuite) TestRestoreAndBackupVersion0() { + ctx, flush := tester.NewContext() + defer flush() + + // Get the default drive ID for the test user. + driveID := mustGetDefaultDriveID( + suite.T(), + ctx, + suite.connector.Service, + suite.user, + ) + + table := []restoreBackupInfoMultiVersion{ + { + name: "OneDriveMultipleFoldersAndFiles", + service: path.OneDriveService, + resource: Users, + + collectionsPrevious: []colInfo{ + { + pathElements: []string{ + "drives", + driveID, + "root:", + }, + category: path.FilesCategory, + items: []itemInfo{ + { + name: "test-file.txt", + data: []byte(strings.Repeat("a", 33)), + lookupKey: "test-file.txt", + }, + }, + }, + { + pathElements: []string{ + "drives", + driveID, + "root:", + "folder-a", + }, + category: path.FilesCategory, + items: []itemInfo{ + { + name: "test-file.txt", + data: []byte(strings.Repeat("b", 65)), + lookupKey: "test-file.txt", + }, + }, + }, + { + pathElements: []string{ + "drives", + driveID, + "root:", + "folder-a", + "b", + }, + category: path.FilesCategory, + items: []itemInfo{ + { + name: "test-file.txt", + data: []byte(strings.Repeat("c", 129)), + lookupKey: "test-file.txt", + }, + }, + }, + { + pathElements: []string{ + "drives", + driveID, + "root:", + "folder-a", + "b", + "folder-a", + }, + category: path.FilesCategory, + items: []itemInfo{ + { + name: "test-file.txt", + data: []byte(strings.Repeat("d", 257)), + lookupKey: "test-file.txt", + }, + }, + }, + { + pathElements: []string{ + "drives", + driveID, + "root:", + "b", + }, + category: path.FilesCategory, + items: []itemInfo{ + { + name: "test-file.txt", + data: []byte(strings.Repeat("e", 257)), + lookupKey: "test-file.txt", + }, + }, + }, + }, + + collectionsLatest: []colInfo{ + { + pathElements: []string{ + "drives", + driveID, + "root:", + }, + category: path.FilesCategory, + items: []itemInfo{ + { + name: "test-file.txt" + onedrive.DataFileSuffix, + data: []byte(strings.Repeat("a", 33)), + lookupKey: "test-file.txt" + onedrive.DataFileSuffix, + }, + { + name: "test-file.txt" + onedrive.MetaFileSuffix, + data: []byte("{}"), + lookupKey: "test-file.txt" + onedrive.MetaFileSuffix, + }, + { + name: "folder-a" + onedrive.DirMetaFileSuffix, + data: []byte("{}"), + lookupKey: "folder-a" + onedrive.DirMetaFileSuffix, + }, + { + name: "b" + onedrive.DirMetaFileSuffix, + data: []byte("{}"), + lookupKey: "b" + onedrive.DirMetaFileSuffix, + }, + }, + }, + { + pathElements: []string{ + "drives", + driveID, + "root:", + "folder-a", + }, + category: path.FilesCategory, + items: []itemInfo{ + { + name: "test-file.txt" + onedrive.DataFileSuffix, + data: []byte(strings.Repeat("b", 65)), + lookupKey: "test-file.txt" + onedrive.DataFileSuffix, + }, + { + name: "test-file.txt" + onedrive.MetaFileSuffix, + data: []byte("{}"), + lookupKey: "test-file.txt" + onedrive.MetaFileSuffix, + }, + { + name: "b" + onedrive.DirMetaFileSuffix, + data: []byte("{}"), + lookupKey: "b" + onedrive.DirMetaFileSuffix, + }, + }, + }, + { + pathElements: []string{ + "drives", + driveID, + "root:", + "folder-a", + "b", + }, + category: path.FilesCategory, + items: []itemInfo{ + { + name: "test-file.txt" + onedrive.DataFileSuffix, + data: []byte(strings.Repeat("c", 129)), + lookupKey: "test-file.txt" + onedrive.DataFileSuffix, + }, + { + name: "test-file.txt" + onedrive.MetaFileSuffix, + data: []byte("{}"), + lookupKey: "test-file.txt" + onedrive.MetaFileSuffix, + }, + { + name: "folder-a" + onedrive.DirMetaFileSuffix, + data: []byte("{}"), + lookupKey: "folder-a" + onedrive.DirMetaFileSuffix, + }, + }, + }, + { + pathElements: []string{ + "drives", + driveID, + "root:", + "folder-a", + "b", + "folder-a", + }, + category: path.FilesCategory, + items: []itemInfo{ + { + name: "test-file.txt" + onedrive.DataFileSuffix, + data: []byte(strings.Repeat("d", 257)), + lookupKey: "test-file.txt" + onedrive.DataFileSuffix, + }, + { + name: "test-file.txt" + onedrive.MetaFileSuffix, + data: []byte("{}"), + lookupKey: "test-file.txt" + onedrive.MetaFileSuffix, + }, + }, + }, + { + pathElements: []string{ + "drives", + driveID, + "root:", + "b", + }, + category: path.FilesCategory, + items: []itemInfo{ + { + name: "test-file.txt" + onedrive.DataFileSuffix, + data: []byte(strings.Repeat("e", 257)), + lookupKey: "test-file.txt" + onedrive.DataFileSuffix, + }, + { + name: "test-file.txt" + onedrive.MetaFileSuffix, + data: []byte("{}"), + lookupKey: "test-file.txt" + onedrive.MetaFileSuffix, + }, + }, + }, + }, + }, + } + + for _, test := range table { + suite.T().Run(test.name, func(t *testing.T) { + runRestoreBackupTestVersion0( + t, + suite.acct, + test, + suite.connector.tenant, + []string{suite.user}, + control.Options{RestorePermissions: true}, + ) + }) + } +} + func (suite *GraphConnectorIntegrationSuite) TestMultiFolderBackupDifferentNames() { table := []restoreBackupInfo{ { @@ -1012,6 +1387,7 @@ func (suite *GraphConnectorIntegrationSuite) TestMultiFolderBackupDifferentNames restoreGC := loadConnector(ctx, t, graph.HTTPClient(graph.NoTimeout()), test.resource) deets, err := restoreGC.RestoreDataCollections( ctx, + backup.Version, suite.acct, restoreSel, dest, diff --git a/src/internal/connector/onedrive/restore.go b/src/internal/connector/onedrive/restore.go index af591cd86..0014457c4 100644 --- a/src/internal/connector/onedrive/restore.go +++ b/src/internal/connector/onedrive/restore.go @@ -29,6 +29,11 @@ const ( // Microsoft recommends 5-10MB buffers // https://docs.microsoft.com/en-us/graph/api/driveitem-createuploadsession?view=graph-rest-1.0#best-practices copyBufferSize = 5 * 1024 * 1024 + + // versionWithDataAndMetaFiles is the corso backup format version + // in which we split from storing just the data to storing both + // the data and metadata in two files. + versionWithDataAndMetaFiles = 1 ) func getParentPermissions( @@ -55,6 +60,7 @@ func getParentPermissions( // RestoreCollections will restore the specified data collections into OneDrive func RestoreCollections( ctx context.Context, + backupVersion int, service graph.Servicer, dest control.RestoreDestination, opts control.Options, @@ -101,6 +107,7 @@ func RestoreCollections( metrics, folderPerms, permissionIDMappings, canceled = RestoreCollection( ctx, + backupVersion, service, dc, parentPerms, @@ -139,6 +146,7 @@ func RestoreCollections( // - the context cancellation state (true if the context is canceled) func RestoreCollection( ctx context.Context, + backupVersion int, service graph.Servicer, dc data.Collection, parentPerms []UserPermission, @@ -211,7 +219,7 @@ func RestoreCollection( continue } - if source == OneDriveSource { + if source == OneDriveSource && backupVersion >= versionWithDataAndMetaFiles { name := itemData.UUID() if strings.HasSuffix(name, DataFileSuffix) { metrics.Objects++ diff --git a/src/internal/connector/sharepoint/restore.go b/src/internal/connector/sharepoint/restore.go index 4784ed209..3cf35d287 100644 --- a/src/internal/connector/sharepoint/restore.go +++ b/src/internal/connector/sharepoint/restore.go @@ -36,6 +36,7 @@ import ( // RestoreCollections will restore the specified data collections into OneDrive func RestoreCollections( ctx context.Context, + backupVersion int, service graph.Servicer, dest control.RestoreDestination, dcs []data.Collection, @@ -61,6 +62,7 @@ func RestoreCollections( case path.LibrariesCategory: metrics, _, _, canceled = onedrive.RestoreCollection( ctx, + backupVersion, service, dc, []onedrive.UserPermission{}, // Currently permission data is not stored for sharepoint diff --git a/src/internal/operations/backup_integration_test.go b/src/internal/operations/backup_integration_test.go index a57a9d2be..5e9af1c46 100644 --- a/src/internal/operations/backup_integration_test.go +++ b/src/internal/operations/backup_integration_test.go @@ -339,7 +339,15 @@ func generateContainerOfItems( dest, collections) - deets, err := gc.RestoreDataCollections(ctx, acct, sel, dest, control.Options{RestorePermissions: true}, dataColls) + deets, err := gc.RestoreDataCollections( + ctx, + backup.Version, + acct, + sel, + dest, + control.Options{RestorePermissions: true}, + dataColls, + ) require.NoError(t, err) return deets diff --git a/src/internal/operations/restore.go b/src/internal/operations/restore.go index 206eb8026..cd52e5be3 100644 --- a/src/internal/operations/restore.go +++ b/src/internal/operations/restore.go @@ -219,6 +219,7 @@ func (op *RestoreOperation) do(ctx context.Context) (restoreDetails *details.Det restoreDetails, err = gc.RestoreDataCollections( ctx, + bup.Version, op.account, op.Selectors, op.Destination, diff --git a/src/pkg/backup/backup.go b/src/pkg/backup/backup.go index d0d9ddffd..4422b3a47 100644 --- a/src/pkg/backup/backup.go +++ b/src/pkg/backup/backup.go @@ -14,6 +14,8 @@ import ( "github.com/alcionai/corso/src/pkg/selectors" ) +const Version = 1 + // Backup represents the result of a backup operation type Backup struct { model.BaseModel @@ -32,6 +34,9 @@ type Backup struct { // Selector used in this operation Selector selectors.Selector `json:"selectors"` + // Version represents the version of the backup format + Version int `json:"version"` + // Errors contains all errors aggregated during a backup operation. Errors fault.ErrorsData `json:"errors"` @@ -67,6 +72,7 @@ func New( Errors: errs.Data(), ReadWrites: rw, StartAndEndTime: se, + Version: Version, } }