Backup versioning (#2324)

## Description

Add backup format version information to the backups so that we can distinguish between backups which use a single file vs the ones that use both .data and .meta files.

Overrides https://github.com/alcionai/corso/pull/2297. I've also set it against `main` so that the diff shows up properly.
Ref: https://github.com/alcionai/corso/pull/2324#issuecomment-1409709118

## Does this PR need a docs update or release note?

- [x]  Yes, it's included
- [ ] 🕐 Yes, but in a later PR
- [ ]  No 

## Type of change

<!--- Please check the type of change your PR introduces: --->
- [x] 🌻 Feature
- [ ] 🐛 Bugfix
- [ ] 🗺️ Documentation
- [ ] 🤖 Test
- [ ] 💻 CI/Deployment
- [ ] 🧹 Tech Debt/Cleanup

## Issue(s)

<!-- Can reference multiple issues. Use one of the following "magic words" - "closes, fixes" to auto-close the Github issue. -->
* fixes https://github.com/alcionai/corso/issues/2230
* https://github.com/alcionai/corso/issues/2253

## Test Plan

<!-- How will this be tested prior to merging.-->
- [x] 💪 Manual (Tested manually, will add an e2e test in a followup)
- [ ]  Unit test
- [ ] 💚 E2E
This commit is contained in:
Abin Simon 2023-02-03 09:33:14 +05:30 committed by GitHub
parent b8bc85deba
commit 9da0a7878b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 465 additions and 7 deletions

View File

@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Increase page size preference for delta requests for Exchange to reduce number of roundtrips - Increase page size preference for delta requests for Exchange to reduce number of roundtrips
- OneDrive file/folder permissions can now be backed up and restored - OneDrive file/folder permissions can now be backed up and restored
- Add `--restore-permissions` flag to toggle restoration of OneDrive permissions - Add `--restore-permissions` flag to toggle restoration of OneDrive permissions
- Add versions to backups so that we can understand/handle older backup formats
### Known Issues ### Known Issues

View File

@ -16,6 +16,7 @@ import (
"github.com/alcionai/corso/src/internal/connector/mockconnector" "github.com/alcionai/corso/src/internal/connector/mockconnector"
"github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/data"
"github.com/alcionai/corso/src/pkg/account" "github.com/alcionai/corso/src/pkg/account"
"github.com/alcionai/corso/src/pkg/backup"
"github.com/alcionai/corso/src/pkg/backup/details" "github.com/alcionai/corso/src/pkg/backup/details"
"github.com/alcionai/corso/src/pkg/control" "github.com/alcionai/corso/src/pkg/control"
"github.com/alcionai/corso/src/pkg/credentials" "github.com/alcionai/corso/src/pkg/credentials"
@ -91,7 +92,7 @@ func generateAndRestoreItems(
Infof(ctx, "Generating %d %s items in %s\n", howMany, cat, Destination) Infof(ctx, "Generating %d %s items in %s\n", howMany, cat, Destination)
return gc.RestoreDataCollections(ctx, acct, sel, dest, opts, dataColls) return gc.RestoreDataCollections(ctx, backup.Version, acct, sel, dest, opts, dataColls)
} }
// ------------------------------------------------------------------------------------------ // ------------------------------------------------------------------------------------------

View File

@ -266,6 +266,7 @@ func (gc *GraphConnector) UnionSiteIDsAndWebURLs(ctx context.Context, ids, urls
// SideEffect: gc.status is updated at the completion of operation // SideEffect: gc.status is updated at the completion of operation
func (gc *GraphConnector) RestoreDataCollections( func (gc *GraphConnector) RestoreDataCollections(
ctx context.Context, ctx context.Context,
backupVersion int,
acct account.Account, acct account.Account,
selector selectors.Selector, selector selectors.Selector,
dest control.RestoreDestination, dest control.RestoreDestination,
@ -290,9 +291,9 @@ func (gc *GraphConnector) RestoreDataCollections(
case selectors.ServiceExchange: case selectors.ServiceExchange:
status, err = exchange.RestoreExchangeDataCollections(ctx, creds, gc.Service, dest, dcs, deets) status, err = exchange.RestoreExchangeDataCollections(ctx, creds, gc.Service, dest, dcs, deets)
case selectors.ServiceOneDrive: case selectors.ServiceOneDrive:
status, err = onedrive.RestoreCollections(ctx, gc.Service, dest, opts, dcs, deets) status, err = onedrive.RestoreCollections(ctx, backupVersion, gc.Service, dest, opts, dcs, deets)
case selectors.ServiceSharePoint: case selectors.ServiceSharePoint:
status, err = sharepoint.RestoreCollections(ctx, gc.Service, dest, dcs, deets) status, err = sharepoint.RestoreCollections(ctx, backupVersion, gc.Service, dest, dcs, deets)
default: default:
err = errors.Errorf("restore data from service %s not supported", selector.Service.String()) err = errors.Errorf("restore data from service %s not supported", selector.Service.String())
} }

View File

@ -172,6 +172,14 @@ type restoreBackupInfo struct {
resource resource resource resource
} }
type restoreBackupInfoMultiVersion struct {
name string
service path.ServiceType
collectionsLatest []colInfo
collectionsPrevious []colInfo
resource resource
}
func attachmentEqual( func attachmentEqual(
expected models.Attachmentable, expected models.Attachmentable,
got models.Attachmentable, got models.Attachmentable,
@ -653,7 +661,7 @@ func compareOneDriveItem(
name := item.UUID() name := item.UUID()
expectedData := expected[item.UUID()] expectedData := expected[item.UUID()]
if !assert.NotNil(t, expectedData, "unexpected file with name %s", item.UUID) { if !assert.NotNil(t, expectedData, "unexpected file with name %s", item.UUID()) {
return return
} }
@ -988,6 +996,52 @@ func collectionsForInfo(
return totalItems, kopiaEntries, collections, expectedData return totalItems, kopiaEntries, collections, expectedData
} }
func collectionsForInfoVersion0(
t *testing.T,
service path.ServiceType,
tenant, user string,
dest control.RestoreDestination,
allInfo []colInfo,
) (int, int, []data.Collection, map[string]map[string][]byte) {
collections := make([]data.Collection, 0, len(allInfo))
expectedData := make(map[string]map[string][]byte, len(allInfo))
totalItems := 0
kopiaEntries := 0
for _, info := range allInfo {
pth := mustToDataLayerPath(
t,
service,
tenant,
user,
info.category,
info.pathElements,
false,
)
c := mockconnector.NewMockExchangeCollection(pth, len(info.items))
baseDestPath := backupOutputPathFromRestore(t, dest, pth)
baseExpected := expectedData[baseDestPath.String()]
if baseExpected == nil {
expectedData[baseDestPath.String()] = make(map[string][]byte, len(info.items))
baseExpected = expectedData[baseDestPath.String()]
}
for i := 0; i < len(info.items); i++ {
c.Names[i] = info.items[i].name
c.Data[i] = info.items[i].data
baseExpected[info.items[i].lookupKey] = info.items[i].data
}
collections = append(collections, c)
totalItems += len(info.items)
kopiaEntries += len(info.items)
}
return totalItems, kopiaEntries, collections, expectedData
}
//nolint:deadcode //nolint:deadcode
func getSelectorWith( func getSelectorWith(
t *testing.T, t *testing.T,

View File

@ -22,6 +22,7 @@ import (
"github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/data"
"github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/internal/tester"
"github.com/alcionai/corso/src/pkg/account" "github.com/alcionai/corso/src/pkg/account"
"github.com/alcionai/corso/src/pkg/backup"
"github.com/alcionai/corso/src/pkg/control" "github.com/alcionai/corso/src/pkg/control"
"github.com/alcionai/corso/src/pkg/path" "github.com/alcionai/corso/src/pkg/path"
"github.com/alcionai/corso/src/pkg/selectors" "github.com/alcionai/corso/src/pkg/selectors"
@ -231,7 +232,15 @@ func (suite *GraphConnectorIntegrationSuite) TestRestoreFailsBadService() {
} }
) )
deets, err := suite.connector.RestoreDataCollections(ctx, acct, sel, dest, control.Options{}, nil) deets, err := suite.connector.RestoreDataCollections(
ctx,
backup.Version,
acct,
sel,
dest,
control.Options{},
nil,
)
assert.Error(t, err) assert.Error(t, err)
assert.NotNil(t, deets) assert.NotNil(t, deets)
@ -299,6 +308,7 @@ func (suite *GraphConnectorIntegrationSuite) TestEmptyCollections() {
deets, err := suite.connector.RestoreDataCollections( deets, err := suite.connector.RestoreDataCollections(
ctx, ctx,
backup.Version,
suite.acct, suite.acct,
test.sel, test.sel,
dest, dest,
@ -393,6 +403,7 @@ func runRestoreBackupTest(
restoreSel := getSelectorWith(t, test.service, resourceOwners, true) restoreSel := getSelectorWith(t, test.service, resourceOwners, true)
deets, err := restoreGC.RestoreDataCollections( deets, err := restoreGC.RestoreDataCollections(
ctx, ctx,
backup.Version,
acct, acct,
restoreSel, restoreSel,
dest, dest,
@ -458,6 +469,121 @@ func runRestoreBackupTest(
"backup status.Successful; wanted %d items + %d skipped", totalItems, skipped) "backup status.Successful; wanted %d items + %d skipped", totalItems, skipped)
} }
// runRestoreBackupTestVersion0 restores with data from an older
// version of the backup and check the restored data against the
// something that would be in the form of a newer backup.
func runRestoreBackupTestVersion0(
t *testing.T,
acct account.Account,
test restoreBackupInfoMultiVersion,
tenant string,
resourceOwners []string,
opts control.Options,
) {
var (
collections []data.Collection
expectedData = map[string]map[string][]byte{}
totalItems = 0
totalKopiaItems = 0
// Get a dest per test so they're independent.
dest = tester.DefaultTestRestoreDestination()
)
ctx, flush := tester.NewContext()
defer flush()
for _, owner := range resourceOwners {
_, _, ownerCollections, _ := collectionsForInfoVersion0(
t,
test.service,
tenant,
owner,
dest,
test.collectionsPrevious,
)
collections = append(collections, ownerCollections...)
}
t.Logf(
"Restoring collections to %s for resourceOwners(s) %v\n",
dest.ContainerName,
resourceOwners,
)
start := time.Now()
restoreGC := loadConnector(ctx, t, graph.HTTPClient(graph.NoTimeout()), test.resource)
restoreSel := getSelectorWith(t, test.service, resourceOwners, true)
deets, err := restoreGC.RestoreDataCollections(
ctx,
0, // The OG version ;)
acct,
restoreSel,
dest,
opts,
collections,
)
require.NoError(t, err)
assert.NotNil(t, deets)
assert.NotNil(t, restoreGC.AwaitStatus())
runTime := time.Since(start)
t.Logf("Restore complete in %v\n", runTime)
// Run a backup and compare its output with what we put in.
for _, owner := range resourceOwners {
numItems, kopiaItems, _, userExpectedData := collectionsForInfo(
t,
test.service,
tenant,
owner,
dest,
test.collectionsLatest,
)
totalItems += numItems
totalKopiaItems += kopiaItems
maps.Copy(expectedData, userExpectedData)
}
cats := make(map[path.CategoryType]struct{}, len(test.collectionsLatest))
for _, c := range test.collectionsLatest {
cats[c.category] = struct{}{}
}
expectedDests := make([]destAndCats, 0, len(resourceOwners))
for _, ro := range resourceOwners {
expectedDests = append(expectedDests, destAndCats{
resourceOwner: ro,
dest: dest.ContainerName,
cats: cats,
})
}
backupGC := loadConnector(ctx, t, graph.HTTPClient(graph.NoTimeout()), test.resource)
backupSel := backupSelectorForExpected(t, test.service, expectedDests)
start = time.Now()
dcs, excludes, err := backupGC.DataCollections(ctx, backupSel, nil, control.Options{RestorePermissions: true})
require.NoError(t, err)
// No excludes yet because this isn't an incremental backup.
assert.Empty(t, excludes)
t.Logf("Backup enumeration complete in %v\n", time.Since(start))
// Pull the data prior to waiting for the status as otherwise it will
// deadlock.
skipped := checkCollections(t, totalKopiaItems, expectedData, dcs, opts.RestorePermissions)
status := backupGC.AwaitStatus()
assert.Equal(t, totalItems+skipped, status.ObjectCount, "status.ObjectCount")
assert.Equal(t, totalItems+skipped, status.Successful, "status.Successful")
}
func getTestMetaJSON(t *testing.T, user string, roles []string) []byte { func getTestMetaJSON(t *testing.T, user string, roles []string) []byte {
id := base64.StdEncoding.EncodeToString([]byte(user + strings.Join(roles, "+"))) id := base64.StdEncoding.EncodeToString([]byte(user + strings.Join(roles, "+")))
testMeta := onedrive.Metadata{Permissions: []onedrive.UserPermission{ testMeta := onedrive.Metadata{Permissions: []onedrive.UserPermission{
@ -906,6 +1032,255 @@ func (suite *GraphConnectorIntegrationSuite) TestRestoreAndBackup() {
} }
} }
func (suite *GraphConnectorIntegrationSuite) TestRestoreAndBackupVersion0() {
ctx, flush := tester.NewContext()
defer flush()
// Get the default drive ID for the test user.
driveID := mustGetDefaultDriveID(
suite.T(),
ctx,
suite.connector.Service,
suite.user,
)
table := []restoreBackupInfoMultiVersion{
{
name: "OneDriveMultipleFoldersAndFiles",
service: path.OneDriveService,
resource: Users,
collectionsPrevious: []colInfo{
{
pathElements: []string{
"drives",
driveID,
"root:",
},
category: path.FilesCategory,
items: []itemInfo{
{
name: "test-file.txt",
data: []byte(strings.Repeat("a", 33)),
lookupKey: "test-file.txt",
},
},
},
{
pathElements: []string{
"drives",
driveID,
"root:",
"folder-a",
},
category: path.FilesCategory,
items: []itemInfo{
{
name: "test-file.txt",
data: []byte(strings.Repeat("b", 65)),
lookupKey: "test-file.txt",
},
},
},
{
pathElements: []string{
"drives",
driveID,
"root:",
"folder-a",
"b",
},
category: path.FilesCategory,
items: []itemInfo{
{
name: "test-file.txt",
data: []byte(strings.Repeat("c", 129)),
lookupKey: "test-file.txt",
},
},
},
{
pathElements: []string{
"drives",
driveID,
"root:",
"folder-a",
"b",
"folder-a",
},
category: path.FilesCategory,
items: []itemInfo{
{
name: "test-file.txt",
data: []byte(strings.Repeat("d", 257)),
lookupKey: "test-file.txt",
},
},
},
{
pathElements: []string{
"drives",
driveID,
"root:",
"b",
},
category: path.FilesCategory,
items: []itemInfo{
{
name: "test-file.txt",
data: []byte(strings.Repeat("e", 257)),
lookupKey: "test-file.txt",
},
},
},
},
collectionsLatest: []colInfo{
{
pathElements: []string{
"drives",
driveID,
"root:",
},
category: path.FilesCategory,
items: []itemInfo{
{
name: "test-file.txt" + onedrive.DataFileSuffix,
data: []byte(strings.Repeat("a", 33)),
lookupKey: "test-file.txt" + onedrive.DataFileSuffix,
},
{
name: "test-file.txt" + onedrive.MetaFileSuffix,
data: []byte("{}"),
lookupKey: "test-file.txt" + onedrive.MetaFileSuffix,
},
{
name: "folder-a" + onedrive.DirMetaFileSuffix,
data: []byte("{}"),
lookupKey: "folder-a" + onedrive.DirMetaFileSuffix,
},
{
name: "b" + onedrive.DirMetaFileSuffix,
data: []byte("{}"),
lookupKey: "b" + onedrive.DirMetaFileSuffix,
},
},
},
{
pathElements: []string{
"drives",
driveID,
"root:",
"folder-a",
},
category: path.FilesCategory,
items: []itemInfo{
{
name: "test-file.txt" + onedrive.DataFileSuffix,
data: []byte(strings.Repeat("b", 65)),
lookupKey: "test-file.txt" + onedrive.DataFileSuffix,
},
{
name: "test-file.txt" + onedrive.MetaFileSuffix,
data: []byte("{}"),
lookupKey: "test-file.txt" + onedrive.MetaFileSuffix,
},
{
name: "b" + onedrive.DirMetaFileSuffix,
data: []byte("{}"),
lookupKey: "b" + onedrive.DirMetaFileSuffix,
},
},
},
{
pathElements: []string{
"drives",
driveID,
"root:",
"folder-a",
"b",
},
category: path.FilesCategory,
items: []itemInfo{
{
name: "test-file.txt" + onedrive.DataFileSuffix,
data: []byte(strings.Repeat("c", 129)),
lookupKey: "test-file.txt" + onedrive.DataFileSuffix,
},
{
name: "test-file.txt" + onedrive.MetaFileSuffix,
data: []byte("{}"),
lookupKey: "test-file.txt" + onedrive.MetaFileSuffix,
},
{
name: "folder-a" + onedrive.DirMetaFileSuffix,
data: []byte("{}"),
lookupKey: "folder-a" + onedrive.DirMetaFileSuffix,
},
},
},
{
pathElements: []string{
"drives",
driveID,
"root:",
"folder-a",
"b",
"folder-a",
},
category: path.FilesCategory,
items: []itemInfo{
{
name: "test-file.txt" + onedrive.DataFileSuffix,
data: []byte(strings.Repeat("d", 257)),
lookupKey: "test-file.txt" + onedrive.DataFileSuffix,
},
{
name: "test-file.txt" + onedrive.MetaFileSuffix,
data: []byte("{}"),
lookupKey: "test-file.txt" + onedrive.MetaFileSuffix,
},
},
},
{
pathElements: []string{
"drives",
driveID,
"root:",
"b",
},
category: path.FilesCategory,
items: []itemInfo{
{
name: "test-file.txt" + onedrive.DataFileSuffix,
data: []byte(strings.Repeat("e", 257)),
lookupKey: "test-file.txt" + onedrive.DataFileSuffix,
},
{
name: "test-file.txt" + onedrive.MetaFileSuffix,
data: []byte("{}"),
lookupKey: "test-file.txt" + onedrive.MetaFileSuffix,
},
},
},
},
},
}
for _, test := range table {
suite.T().Run(test.name, func(t *testing.T) {
runRestoreBackupTestVersion0(
t,
suite.acct,
test,
suite.connector.tenant,
[]string{suite.user},
control.Options{RestorePermissions: true},
)
})
}
}
func (suite *GraphConnectorIntegrationSuite) TestMultiFolderBackupDifferentNames() { func (suite *GraphConnectorIntegrationSuite) TestMultiFolderBackupDifferentNames() {
table := []restoreBackupInfo{ table := []restoreBackupInfo{
{ {
@ -1012,6 +1387,7 @@ func (suite *GraphConnectorIntegrationSuite) TestMultiFolderBackupDifferentNames
restoreGC := loadConnector(ctx, t, graph.HTTPClient(graph.NoTimeout()), test.resource) restoreGC := loadConnector(ctx, t, graph.HTTPClient(graph.NoTimeout()), test.resource)
deets, err := restoreGC.RestoreDataCollections( deets, err := restoreGC.RestoreDataCollections(
ctx, ctx,
backup.Version,
suite.acct, suite.acct,
restoreSel, restoreSel,
dest, dest,

View File

@ -29,6 +29,11 @@ const (
// Microsoft recommends 5-10MB buffers // Microsoft recommends 5-10MB buffers
// https://docs.microsoft.com/en-us/graph/api/driveitem-createuploadsession?view=graph-rest-1.0#best-practices // https://docs.microsoft.com/en-us/graph/api/driveitem-createuploadsession?view=graph-rest-1.0#best-practices
copyBufferSize = 5 * 1024 * 1024 copyBufferSize = 5 * 1024 * 1024
// versionWithDataAndMetaFiles is the corso backup format version
// in which we split from storing just the data to storing both
// the data and metadata in two files.
versionWithDataAndMetaFiles = 1
) )
func getParentPermissions( func getParentPermissions(
@ -55,6 +60,7 @@ func getParentPermissions(
// RestoreCollections will restore the specified data collections into OneDrive // RestoreCollections will restore the specified data collections into OneDrive
func RestoreCollections( func RestoreCollections(
ctx context.Context, ctx context.Context,
backupVersion int,
service graph.Servicer, service graph.Servicer,
dest control.RestoreDestination, dest control.RestoreDestination,
opts control.Options, opts control.Options,
@ -101,6 +107,7 @@ func RestoreCollections(
metrics, folderPerms, permissionIDMappings, canceled = RestoreCollection( metrics, folderPerms, permissionIDMappings, canceled = RestoreCollection(
ctx, ctx,
backupVersion,
service, service,
dc, dc,
parentPerms, parentPerms,
@ -139,6 +146,7 @@ func RestoreCollections(
// - the context cancellation state (true if the context is canceled) // - the context cancellation state (true if the context is canceled)
func RestoreCollection( func RestoreCollection(
ctx context.Context, ctx context.Context,
backupVersion int,
service graph.Servicer, service graph.Servicer,
dc data.Collection, dc data.Collection,
parentPerms []UserPermission, parentPerms []UserPermission,
@ -211,7 +219,7 @@ func RestoreCollection(
continue continue
} }
if source == OneDriveSource { if source == OneDriveSource && backupVersion >= versionWithDataAndMetaFiles {
name := itemData.UUID() name := itemData.UUID()
if strings.HasSuffix(name, DataFileSuffix) { if strings.HasSuffix(name, DataFileSuffix) {
metrics.Objects++ metrics.Objects++

View File

@ -36,6 +36,7 @@ import (
// RestoreCollections will restore the specified data collections into OneDrive // RestoreCollections will restore the specified data collections into OneDrive
func RestoreCollections( func RestoreCollections(
ctx context.Context, ctx context.Context,
backupVersion int,
service graph.Servicer, service graph.Servicer,
dest control.RestoreDestination, dest control.RestoreDestination,
dcs []data.Collection, dcs []data.Collection,
@ -61,6 +62,7 @@ func RestoreCollections(
case path.LibrariesCategory: case path.LibrariesCategory:
metrics, _, _, canceled = onedrive.RestoreCollection( metrics, _, _, canceled = onedrive.RestoreCollection(
ctx, ctx,
backupVersion,
service, service,
dc, dc,
[]onedrive.UserPermission{}, // Currently permission data is not stored for sharepoint []onedrive.UserPermission{}, // Currently permission data is not stored for sharepoint

View File

@ -339,7 +339,15 @@ func generateContainerOfItems(
dest, dest,
collections) collections)
deets, err := gc.RestoreDataCollections(ctx, acct, sel, dest, control.Options{RestorePermissions: true}, dataColls) deets, err := gc.RestoreDataCollections(
ctx,
backup.Version,
acct,
sel,
dest,
control.Options{RestorePermissions: true},
dataColls,
)
require.NoError(t, err) require.NoError(t, err)
return deets return deets

View File

@ -219,6 +219,7 @@ func (op *RestoreOperation) do(ctx context.Context) (restoreDetails *details.Det
restoreDetails, err = gc.RestoreDataCollections( restoreDetails, err = gc.RestoreDataCollections(
ctx, ctx,
bup.Version,
op.account, op.account,
op.Selectors, op.Selectors,
op.Destination, op.Destination,

View File

@ -14,6 +14,8 @@ import (
"github.com/alcionai/corso/src/pkg/selectors" "github.com/alcionai/corso/src/pkg/selectors"
) )
const Version = 1
// Backup represents the result of a backup operation // Backup represents the result of a backup operation
type Backup struct { type Backup struct {
model.BaseModel model.BaseModel
@ -32,6 +34,9 @@ type Backup struct {
// Selector used in this operation // Selector used in this operation
Selector selectors.Selector `json:"selectors"` Selector selectors.Selector `json:"selectors"`
// Version represents the version of the backup format
Version int `json:"version"`
// Errors contains all errors aggregated during a backup operation. // Errors contains all errors aggregated during a backup operation.
Errors fault.ErrorsData `json:"errors"` Errors fault.ErrorsData `json:"errors"`
@ -67,6 +72,7 @@ func New(
Errors: errs.Data(), Errors: errs.Data(),
ReadWrites: rw, ReadWrites: rw,
StartAndEndTime: se, StartAndEndTime: se,
Version: Version,
} }
} }