Backup versioning (#2324)

## Description

Add backup format version information to the backups so that we can distinguish between backups which use a single file vs the ones that use both .data and .meta files.

Overrides https://github.com/alcionai/corso/pull/2297. I've also set it against `main` so that the diff shows up properly.
Ref: https://github.com/alcionai/corso/pull/2324#issuecomment-1409709118

## Does this PR need a docs update or release note?

- [x]  Yes, it's included
- [ ] 🕐 Yes, but in a later PR
- [ ]  No 

## Type of change

<!--- Please check the type of change your PR introduces: --->
- [x] 🌻 Feature
- [ ] 🐛 Bugfix
- [ ] 🗺️ Documentation
- [ ] 🤖 Test
- [ ] 💻 CI/Deployment
- [ ] 🧹 Tech Debt/Cleanup

## Issue(s)

<!-- Can reference multiple issues. Use one of the following "magic words" - "closes, fixes" to auto-close the Github issue. -->
* fixes https://github.com/alcionai/corso/issues/2230
* https://github.com/alcionai/corso/issues/2253

## Test Plan

<!-- How will this be tested prior to merging.-->
- [x] 💪 Manual (Tested manually, will add an e2e test in a followup)
- [ ]  Unit test
- [ ] 💚 E2E
This commit is contained in:
Abin Simon 2023-02-03 09:33:14 +05:30 committed by GitHub
parent b8bc85deba
commit 9da0a7878b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 465 additions and 7 deletions

View File

@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Increase page size preference for delta requests for Exchange to reduce number of roundtrips
- OneDrive file/folder permissions can now be backed up and restored
- Add `--restore-permissions` flag to toggle restoration of OneDrive permissions
- Add versions to backups so that we can understand/handle older backup formats
### Known Issues

View File

@ -16,6 +16,7 @@ import (
"github.com/alcionai/corso/src/internal/connector/mockconnector"
"github.com/alcionai/corso/src/internal/data"
"github.com/alcionai/corso/src/pkg/account"
"github.com/alcionai/corso/src/pkg/backup"
"github.com/alcionai/corso/src/pkg/backup/details"
"github.com/alcionai/corso/src/pkg/control"
"github.com/alcionai/corso/src/pkg/credentials"
@ -91,7 +92,7 @@ func generateAndRestoreItems(
Infof(ctx, "Generating %d %s items in %s\n", howMany, cat, Destination)
return gc.RestoreDataCollections(ctx, acct, sel, dest, opts, dataColls)
return gc.RestoreDataCollections(ctx, backup.Version, acct, sel, dest, opts, dataColls)
}
// ------------------------------------------------------------------------------------------

View File

@ -266,6 +266,7 @@ func (gc *GraphConnector) UnionSiteIDsAndWebURLs(ctx context.Context, ids, urls
// SideEffect: gc.status is updated at the completion of operation
func (gc *GraphConnector) RestoreDataCollections(
ctx context.Context,
backupVersion int,
acct account.Account,
selector selectors.Selector,
dest control.RestoreDestination,
@ -290,9 +291,9 @@ func (gc *GraphConnector) RestoreDataCollections(
case selectors.ServiceExchange:
status, err = exchange.RestoreExchangeDataCollections(ctx, creds, gc.Service, dest, dcs, deets)
case selectors.ServiceOneDrive:
status, err = onedrive.RestoreCollections(ctx, gc.Service, dest, opts, dcs, deets)
status, err = onedrive.RestoreCollections(ctx, backupVersion, gc.Service, dest, opts, dcs, deets)
case selectors.ServiceSharePoint:
status, err = sharepoint.RestoreCollections(ctx, gc.Service, dest, dcs, deets)
status, err = sharepoint.RestoreCollections(ctx, backupVersion, gc.Service, dest, dcs, deets)
default:
err = errors.Errorf("restore data from service %s not supported", selector.Service.String())
}

View File

@ -172,6 +172,14 @@ type restoreBackupInfo struct {
resource resource
}
type restoreBackupInfoMultiVersion struct {
name string
service path.ServiceType
collectionsLatest []colInfo
collectionsPrevious []colInfo
resource resource
}
func attachmentEqual(
expected models.Attachmentable,
got models.Attachmentable,
@ -653,7 +661,7 @@ func compareOneDriveItem(
name := item.UUID()
expectedData := expected[item.UUID()]
if !assert.NotNil(t, expectedData, "unexpected file with name %s", item.UUID) {
if !assert.NotNil(t, expectedData, "unexpected file with name %s", item.UUID()) {
return
}
@ -988,6 +996,52 @@ func collectionsForInfo(
return totalItems, kopiaEntries, collections, expectedData
}
func collectionsForInfoVersion0(
t *testing.T,
service path.ServiceType,
tenant, user string,
dest control.RestoreDestination,
allInfo []colInfo,
) (int, int, []data.Collection, map[string]map[string][]byte) {
collections := make([]data.Collection, 0, len(allInfo))
expectedData := make(map[string]map[string][]byte, len(allInfo))
totalItems := 0
kopiaEntries := 0
for _, info := range allInfo {
pth := mustToDataLayerPath(
t,
service,
tenant,
user,
info.category,
info.pathElements,
false,
)
c := mockconnector.NewMockExchangeCollection(pth, len(info.items))
baseDestPath := backupOutputPathFromRestore(t, dest, pth)
baseExpected := expectedData[baseDestPath.String()]
if baseExpected == nil {
expectedData[baseDestPath.String()] = make(map[string][]byte, len(info.items))
baseExpected = expectedData[baseDestPath.String()]
}
for i := 0; i < len(info.items); i++ {
c.Names[i] = info.items[i].name
c.Data[i] = info.items[i].data
baseExpected[info.items[i].lookupKey] = info.items[i].data
}
collections = append(collections, c)
totalItems += len(info.items)
kopiaEntries += len(info.items)
}
return totalItems, kopiaEntries, collections, expectedData
}
//nolint:deadcode
func getSelectorWith(
t *testing.T,

View File

@ -22,6 +22,7 @@ import (
"github.com/alcionai/corso/src/internal/data"
"github.com/alcionai/corso/src/internal/tester"
"github.com/alcionai/corso/src/pkg/account"
"github.com/alcionai/corso/src/pkg/backup"
"github.com/alcionai/corso/src/pkg/control"
"github.com/alcionai/corso/src/pkg/path"
"github.com/alcionai/corso/src/pkg/selectors"
@ -231,7 +232,15 @@ func (suite *GraphConnectorIntegrationSuite) TestRestoreFailsBadService() {
}
)
deets, err := suite.connector.RestoreDataCollections(ctx, acct, sel, dest, control.Options{}, nil)
deets, err := suite.connector.RestoreDataCollections(
ctx,
backup.Version,
acct,
sel,
dest,
control.Options{},
nil,
)
assert.Error(t, err)
assert.NotNil(t, deets)
@ -299,6 +308,7 @@ func (suite *GraphConnectorIntegrationSuite) TestEmptyCollections() {
deets, err := suite.connector.RestoreDataCollections(
ctx,
backup.Version,
suite.acct,
test.sel,
dest,
@ -393,6 +403,7 @@ func runRestoreBackupTest(
restoreSel := getSelectorWith(t, test.service, resourceOwners, true)
deets, err := restoreGC.RestoreDataCollections(
ctx,
backup.Version,
acct,
restoreSel,
dest,
@ -458,6 +469,121 @@ func runRestoreBackupTest(
"backup status.Successful; wanted %d items + %d skipped", totalItems, skipped)
}
// runRestoreBackupTestVersion0 restores with data from an older
// version of the backup and check the restored data against the
// something that would be in the form of a newer backup.
func runRestoreBackupTestVersion0(
t *testing.T,
acct account.Account,
test restoreBackupInfoMultiVersion,
tenant string,
resourceOwners []string,
opts control.Options,
) {
var (
collections []data.Collection
expectedData = map[string]map[string][]byte{}
totalItems = 0
totalKopiaItems = 0
// Get a dest per test so they're independent.
dest = tester.DefaultTestRestoreDestination()
)
ctx, flush := tester.NewContext()
defer flush()
for _, owner := range resourceOwners {
_, _, ownerCollections, _ := collectionsForInfoVersion0(
t,
test.service,
tenant,
owner,
dest,
test.collectionsPrevious,
)
collections = append(collections, ownerCollections...)
}
t.Logf(
"Restoring collections to %s for resourceOwners(s) %v\n",
dest.ContainerName,
resourceOwners,
)
start := time.Now()
restoreGC := loadConnector(ctx, t, graph.HTTPClient(graph.NoTimeout()), test.resource)
restoreSel := getSelectorWith(t, test.service, resourceOwners, true)
deets, err := restoreGC.RestoreDataCollections(
ctx,
0, // The OG version ;)
acct,
restoreSel,
dest,
opts,
collections,
)
require.NoError(t, err)
assert.NotNil(t, deets)
assert.NotNil(t, restoreGC.AwaitStatus())
runTime := time.Since(start)
t.Logf("Restore complete in %v\n", runTime)
// Run a backup and compare its output with what we put in.
for _, owner := range resourceOwners {
numItems, kopiaItems, _, userExpectedData := collectionsForInfo(
t,
test.service,
tenant,
owner,
dest,
test.collectionsLatest,
)
totalItems += numItems
totalKopiaItems += kopiaItems
maps.Copy(expectedData, userExpectedData)
}
cats := make(map[path.CategoryType]struct{}, len(test.collectionsLatest))
for _, c := range test.collectionsLatest {
cats[c.category] = struct{}{}
}
expectedDests := make([]destAndCats, 0, len(resourceOwners))
for _, ro := range resourceOwners {
expectedDests = append(expectedDests, destAndCats{
resourceOwner: ro,
dest: dest.ContainerName,
cats: cats,
})
}
backupGC := loadConnector(ctx, t, graph.HTTPClient(graph.NoTimeout()), test.resource)
backupSel := backupSelectorForExpected(t, test.service, expectedDests)
start = time.Now()
dcs, excludes, err := backupGC.DataCollections(ctx, backupSel, nil, control.Options{RestorePermissions: true})
require.NoError(t, err)
// No excludes yet because this isn't an incremental backup.
assert.Empty(t, excludes)
t.Logf("Backup enumeration complete in %v\n", time.Since(start))
// Pull the data prior to waiting for the status as otherwise it will
// deadlock.
skipped := checkCollections(t, totalKopiaItems, expectedData, dcs, opts.RestorePermissions)
status := backupGC.AwaitStatus()
assert.Equal(t, totalItems+skipped, status.ObjectCount, "status.ObjectCount")
assert.Equal(t, totalItems+skipped, status.Successful, "status.Successful")
}
func getTestMetaJSON(t *testing.T, user string, roles []string) []byte {
id := base64.StdEncoding.EncodeToString([]byte(user + strings.Join(roles, "+")))
testMeta := onedrive.Metadata{Permissions: []onedrive.UserPermission{
@ -906,6 +1032,255 @@ func (suite *GraphConnectorIntegrationSuite) TestRestoreAndBackup() {
}
}
func (suite *GraphConnectorIntegrationSuite) TestRestoreAndBackupVersion0() {
ctx, flush := tester.NewContext()
defer flush()
// Get the default drive ID for the test user.
driveID := mustGetDefaultDriveID(
suite.T(),
ctx,
suite.connector.Service,
suite.user,
)
table := []restoreBackupInfoMultiVersion{
{
name: "OneDriveMultipleFoldersAndFiles",
service: path.OneDriveService,
resource: Users,
collectionsPrevious: []colInfo{
{
pathElements: []string{
"drives",
driveID,
"root:",
},
category: path.FilesCategory,
items: []itemInfo{
{
name: "test-file.txt",
data: []byte(strings.Repeat("a", 33)),
lookupKey: "test-file.txt",
},
},
},
{
pathElements: []string{
"drives",
driveID,
"root:",
"folder-a",
},
category: path.FilesCategory,
items: []itemInfo{
{
name: "test-file.txt",
data: []byte(strings.Repeat("b", 65)),
lookupKey: "test-file.txt",
},
},
},
{
pathElements: []string{
"drives",
driveID,
"root:",
"folder-a",
"b",
},
category: path.FilesCategory,
items: []itemInfo{
{
name: "test-file.txt",
data: []byte(strings.Repeat("c", 129)),
lookupKey: "test-file.txt",
},
},
},
{
pathElements: []string{
"drives",
driveID,
"root:",
"folder-a",
"b",
"folder-a",
},
category: path.FilesCategory,
items: []itemInfo{
{
name: "test-file.txt",
data: []byte(strings.Repeat("d", 257)),
lookupKey: "test-file.txt",
},
},
},
{
pathElements: []string{
"drives",
driveID,
"root:",
"b",
},
category: path.FilesCategory,
items: []itemInfo{
{
name: "test-file.txt",
data: []byte(strings.Repeat("e", 257)),
lookupKey: "test-file.txt",
},
},
},
},
collectionsLatest: []colInfo{
{
pathElements: []string{
"drives",
driveID,
"root:",
},
category: path.FilesCategory,
items: []itemInfo{
{
name: "test-file.txt" + onedrive.DataFileSuffix,
data: []byte(strings.Repeat("a", 33)),
lookupKey: "test-file.txt" + onedrive.DataFileSuffix,
},
{
name: "test-file.txt" + onedrive.MetaFileSuffix,
data: []byte("{}"),
lookupKey: "test-file.txt" + onedrive.MetaFileSuffix,
},
{
name: "folder-a" + onedrive.DirMetaFileSuffix,
data: []byte("{}"),
lookupKey: "folder-a" + onedrive.DirMetaFileSuffix,
},
{
name: "b" + onedrive.DirMetaFileSuffix,
data: []byte("{}"),
lookupKey: "b" + onedrive.DirMetaFileSuffix,
},
},
},
{
pathElements: []string{
"drives",
driveID,
"root:",
"folder-a",
},
category: path.FilesCategory,
items: []itemInfo{
{
name: "test-file.txt" + onedrive.DataFileSuffix,
data: []byte(strings.Repeat("b", 65)),
lookupKey: "test-file.txt" + onedrive.DataFileSuffix,
},
{
name: "test-file.txt" + onedrive.MetaFileSuffix,
data: []byte("{}"),
lookupKey: "test-file.txt" + onedrive.MetaFileSuffix,
},
{
name: "b" + onedrive.DirMetaFileSuffix,
data: []byte("{}"),
lookupKey: "b" + onedrive.DirMetaFileSuffix,
},
},
},
{
pathElements: []string{
"drives",
driveID,
"root:",
"folder-a",
"b",
},
category: path.FilesCategory,
items: []itemInfo{
{
name: "test-file.txt" + onedrive.DataFileSuffix,
data: []byte(strings.Repeat("c", 129)),
lookupKey: "test-file.txt" + onedrive.DataFileSuffix,
},
{
name: "test-file.txt" + onedrive.MetaFileSuffix,
data: []byte("{}"),
lookupKey: "test-file.txt" + onedrive.MetaFileSuffix,
},
{
name: "folder-a" + onedrive.DirMetaFileSuffix,
data: []byte("{}"),
lookupKey: "folder-a" + onedrive.DirMetaFileSuffix,
},
},
},
{
pathElements: []string{
"drives",
driveID,
"root:",
"folder-a",
"b",
"folder-a",
},
category: path.FilesCategory,
items: []itemInfo{
{
name: "test-file.txt" + onedrive.DataFileSuffix,
data: []byte(strings.Repeat("d", 257)),
lookupKey: "test-file.txt" + onedrive.DataFileSuffix,
},
{
name: "test-file.txt" + onedrive.MetaFileSuffix,
data: []byte("{}"),
lookupKey: "test-file.txt" + onedrive.MetaFileSuffix,
},
},
},
{
pathElements: []string{
"drives",
driveID,
"root:",
"b",
},
category: path.FilesCategory,
items: []itemInfo{
{
name: "test-file.txt" + onedrive.DataFileSuffix,
data: []byte(strings.Repeat("e", 257)),
lookupKey: "test-file.txt" + onedrive.DataFileSuffix,
},
{
name: "test-file.txt" + onedrive.MetaFileSuffix,
data: []byte("{}"),
lookupKey: "test-file.txt" + onedrive.MetaFileSuffix,
},
},
},
},
},
}
for _, test := range table {
suite.T().Run(test.name, func(t *testing.T) {
runRestoreBackupTestVersion0(
t,
suite.acct,
test,
suite.connector.tenant,
[]string{suite.user},
control.Options{RestorePermissions: true},
)
})
}
}
func (suite *GraphConnectorIntegrationSuite) TestMultiFolderBackupDifferentNames() {
table := []restoreBackupInfo{
{
@ -1012,6 +1387,7 @@ func (suite *GraphConnectorIntegrationSuite) TestMultiFolderBackupDifferentNames
restoreGC := loadConnector(ctx, t, graph.HTTPClient(graph.NoTimeout()), test.resource)
deets, err := restoreGC.RestoreDataCollections(
ctx,
backup.Version,
suite.acct,
restoreSel,
dest,

View File

@ -29,6 +29,11 @@ const (
// Microsoft recommends 5-10MB buffers
// https://docs.microsoft.com/en-us/graph/api/driveitem-createuploadsession?view=graph-rest-1.0#best-practices
copyBufferSize = 5 * 1024 * 1024
// versionWithDataAndMetaFiles is the corso backup format version
// in which we split from storing just the data to storing both
// the data and metadata in two files.
versionWithDataAndMetaFiles = 1
)
func getParentPermissions(
@ -55,6 +60,7 @@ func getParentPermissions(
// RestoreCollections will restore the specified data collections into OneDrive
func RestoreCollections(
ctx context.Context,
backupVersion int,
service graph.Servicer,
dest control.RestoreDestination,
opts control.Options,
@ -101,6 +107,7 @@ func RestoreCollections(
metrics, folderPerms, permissionIDMappings, canceled = RestoreCollection(
ctx,
backupVersion,
service,
dc,
parentPerms,
@ -139,6 +146,7 @@ func RestoreCollections(
// - the context cancellation state (true if the context is canceled)
func RestoreCollection(
ctx context.Context,
backupVersion int,
service graph.Servicer,
dc data.Collection,
parentPerms []UserPermission,
@ -211,7 +219,7 @@ func RestoreCollection(
continue
}
if source == OneDriveSource {
if source == OneDriveSource && backupVersion >= versionWithDataAndMetaFiles {
name := itemData.UUID()
if strings.HasSuffix(name, DataFileSuffix) {
metrics.Objects++

View File

@ -36,6 +36,7 @@ import (
// RestoreCollections will restore the specified data collections into OneDrive
func RestoreCollections(
ctx context.Context,
backupVersion int,
service graph.Servicer,
dest control.RestoreDestination,
dcs []data.Collection,
@ -61,6 +62,7 @@ func RestoreCollections(
case path.LibrariesCategory:
metrics, _, _, canceled = onedrive.RestoreCollection(
ctx,
backupVersion,
service,
dc,
[]onedrive.UserPermission{}, // Currently permission data is not stored for sharepoint

View File

@ -339,7 +339,15 @@ func generateContainerOfItems(
dest,
collections)
deets, err := gc.RestoreDataCollections(ctx, acct, sel, dest, control.Options{RestorePermissions: true}, dataColls)
deets, err := gc.RestoreDataCollections(
ctx,
backup.Version,
acct,
sel,
dest,
control.Options{RestorePermissions: true},
dataColls,
)
require.NoError(t, err)
return deets

View File

@ -219,6 +219,7 @@ func (op *RestoreOperation) do(ctx context.Context) (restoreDetails *details.Det
restoreDetails, err = gc.RestoreDataCollections(
ctx,
bup.Version,
op.account,
op.Selectors,
op.Destination,

View File

@ -14,6 +14,8 @@ import (
"github.com/alcionai/corso/src/pkg/selectors"
)
const Version = 1
// Backup represents the result of a backup operation
type Backup struct {
model.BaseModel
@ -32,6 +34,9 @@ type Backup struct {
// Selector used in this operation
Selector selectors.Selector `json:"selectors"`
// Version represents the version of the backup format
Version int `json:"version"`
// Errors contains all errors aggregated during a backup operation.
Errors fault.ErrorsData `json:"errors"`
@ -67,6 +72,7 @@ func New(
Errors: errs.Data(),
ReadWrites: rw,
StartAndEndTime: se,
Version: Version,
}
}