From 12544d88d3a827492a121ef599ca3ea259a794ea Mon Sep 17 00:00:00 2001 From: ashmrtn Date: Wed, 14 Dec 2022 08:36:39 -0800 Subject: [PATCH] Have BackupOp pass in OwnersCats to kopia.BackupCollections (#1805) ## Description Instead of relying on KopiaWrapper to create the OwnersCats for a backup, have BackupOp create them from the selector and pass them in. This is necessary as incremental backups will no longer see all the data in the backup, meaning it cannot accurately create the OwnersCats because some data categories or owners in the backup may not have had changes. OwnersCats are eventually converted to tags on a kopia snapshot and used to lookup snapshots when trying to find base snapshots for incrementals. Additional minor changes: * use pointers instead of values when passing parameters * set backup details OwnersCats to nil ## Type of change - [x] :sunflower: Feature - [ ] :bug: Bugfix - [ ] :world_map: Documentation - [ ] :robot: Test - [ ] :computer: CI/Deployment - [ ] :hamster: Trivial/Minor ## Issue(s) * closes #1781 ## Test Plan - [x] :muscle: Manual - [x] :zap: Unit test - [x] :green_heart: E2E --- src/internal/kopia/snapshot_manager.go | 8 +++++ src/internal/kopia/upload.go | 20 +++++------ src/internal/kopia/upload_test.go | 29 ++-------------- src/internal/kopia/wrapper.go | 7 ++-- src/internal/kopia/wrapper_test.go | 45 +++++++++++++++++++++++++ src/internal/operations/backup.go | 21 +++++++----- src/internal/operations/backup_test.go | 2 +- src/internal/streamstore/streamstore.go | 2 +- 8 files changed, 84 insertions(+), 50 deletions(-) diff --git a/src/internal/kopia/snapshot_manager.go b/src/internal/kopia/snapshot_manager.go index 5eeb23feb..f4579a2bb 100644 --- a/src/internal/kopia/snapshot_manager.go +++ b/src/internal/kopia/snapshot_manager.go @@ -71,6 +71,10 @@ func MakeTagKV(k string) (string, string) { // passed in. Currently uses placeholder values for each tag because there can // be multiple instances of resource owners and categories in a single snapshot. func tagsFromStrings(oc *OwnersCats) map[string]string { + if oc == nil { + return map[string]string{} + } + res := make(map[string]string, len(oc.ServiceCats)+len(oc.ResourceOwners)) for k := range oc.ServiceCats { @@ -218,6 +222,10 @@ func fetchPrevSnapshotManifests( oc *OwnersCats, tags map[string]string, ) []*snapshot.Manifest { + if oc == nil { + return nil + } + mans := map[manifest.ID]*snapshot.Manifest{} tags = normalizeTagKVs(tags) diff --git a/src/internal/kopia/upload.go b/src/internal/kopia/upload.go index 378be6e8a..52ce0cd94 100644 --- a/src/internal/kopia/upload.go +++ b/src/internal/kopia/upload.go @@ -401,7 +401,7 @@ func getTreeNode(roots map[string]*treeMap, pathElements []string) *treeMap { func inflateCollectionTree( ctx context.Context, collections []data.Collection, -) (map[string]*treeMap, map[string]path.Path, *OwnersCats, error) { +) (map[string]*treeMap, map[string]path.Path, error) { roots := make(map[string]*treeMap) // Contains the old path for collections that have been moved or renamed. // Allows resolving what the new path should be when walking the base @@ -423,12 +423,12 @@ func inflateCollectionTree( } if s.FullPath() == nil || len(s.FullPath().Elements()) == 0 { - return nil, nil, nil, errors.New("no identifier for collection") + return nil, nil, errors.New("no identifier for collection") } node := getTreeNode(roots, s.FullPath().Elements()) if node == nil { - return nil, nil, nil, errors.Errorf( + return nil, nil, errors.Errorf( "unable to get tree node for path %s", s.FullPath(), ) @@ -441,7 +441,7 @@ func inflateCollectionTree( node.collection = s } - return roots, updatedPaths, ownerCats, nil + return roots, updatedPaths, nil } // inflateDirTree returns a set of tags representing all the resource owners and @@ -454,14 +454,14 @@ func inflateDirTree( ctx context.Context, collections []data.Collection, progress *corsoProgress, -) (fs.Directory, *OwnersCats, error) { - roots, _, ownerCats, err := inflateCollectionTree(ctx, collections) +) (fs.Directory, error) { + roots, _, err := inflateCollectionTree(ctx, collections) if err != nil { - return nil, nil, errors.Wrap(err, "inflating collection tree") + return nil, errors.Wrap(err, "inflating collection tree") } if len(roots) > 1 { - return nil, nil, errors.New("multiple root directories") + return nil, errors.New("multiple root directories") } var res fs.Directory @@ -469,11 +469,11 @@ func inflateDirTree( for dirName, dir := range roots { tmp, err := buildKopiaDirs(dirName, dir, progress) if err != nil { - return nil, nil, err + return nil, err } res = tmp } - return res, ownerCats, nil + return res, nil } diff --git a/src/internal/kopia/upload_test.go b/src/internal/kopia/upload_test.go index 9b7efaced..0a73c2662 100644 --- a/src/internal/kopia/upload_test.go +++ b/src/internal/kopia/upload_test.go @@ -439,14 +439,6 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTree() { user1Encoded: 5, user2Encoded: 42, } - expectedServiceCats := map[string]ServiceCat{ - serviceCatTag(suite.testPath): {}, - serviceCatTag(p2): {}, - } - expectedResourceOwners := map[string]struct{}{ - suite.testPath.ResourceOwner(): {}, - p2.ResourceOwner(): {}, - } progress := &corsoProgress{pending: map[string]*itemDetails{}} @@ -472,12 +464,9 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTree() { // - emails // - Inbox // - 42 separate files - dirTree, oc, err := inflateDirTree(ctx, collections, progress) + dirTree, err := inflateDirTree(ctx, collections, progress) require.NoError(t, err) - assert.Equal(t, expectedServiceCats, oc.ServiceCats) - assert.Equal(t, expectedResourceOwners, oc.ResourceOwners) - assert.Equal(t, encodeAsPath(testTenant), dirTree.Name()) entries, err := fs.GetAllEntries(ctx, dirTree) @@ -518,15 +507,6 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTree_MixedDirectory() p2, err := suite.testPath.Append(subdir, false) require.NoError(suite.T(), err) - expectedServiceCats := map[string]ServiceCat{ - serviceCatTag(suite.testPath): {}, - serviceCatTag(p2): {}, - } - expectedResourceOwners := map[string]struct{}{ - suite.testPath.ResourceOwner(): {}, - p2.ResourceOwner(): {}, - } - // Test multiple orders of items because right now order can matter. Both // orders result in a directory structure like: // - a-tenant @@ -573,12 +553,9 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTree_MixedDirectory() suite.T().Run(test.name, func(t *testing.T) { progress := &corsoProgress{pending: map[string]*itemDetails{}} - dirTree, oc, err := inflateDirTree(ctx, test.layout, progress) + dirTree, err := inflateDirTree(ctx, test.layout, progress) require.NoError(t, err) - assert.Equal(t, expectedServiceCats, oc.ServiceCats) - assert.Equal(t, expectedResourceOwners, oc.ResourceOwners) - assert.Equal(t, encodeAsPath(testTenant), dirTree.Name()) entries, err := fs.GetAllEntries(ctx, dirTree) @@ -674,7 +651,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTree_Fails() { defer flush() suite.T().Run(test.name, func(t *testing.T) { - _, _, err := inflateDirTree(ctx, test.layout, nil) + _, err := inflateDirTree(ctx, test.layout, nil) assert.Error(t, err) }) } diff --git a/src/internal/kopia/wrapper.go b/src/internal/kopia/wrapper.go index d4d1ce2b4..6c249d094 100644 --- a/src/internal/kopia/wrapper.go +++ b/src/internal/kopia/wrapper.go @@ -116,6 +116,7 @@ func (w Wrapper) BackupCollections( previousSnapshots []*snapshot.Manifest, collections []data.Collection, service path.ServiceType, + oc *OwnersCats, tags map[string]string, ) (*BackupStats, *details.Details, error) { if w.c == nil { @@ -134,7 +135,7 @@ func (w Wrapper) BackupCollections( deets: &details.Details{}, } - dirTree, oc, err := inflateDirTree(ctx, collections, progress) + dirTree, err := inflateDirTree(ctx, collections, progress) if err != nil { return nil, nil, errors.Wrap(err, "building kopia directories") } @@ -411,12 +412,12 @@ func (w Wrapper) DeleteSnapshot( // normalized inside the func using MakeTagKV. func (w Wrapper) FetchPrevSnapshotManifests( ctx context.Context, - oc OwnersCats, + oc *OwnersCats, tags map[string]string, ) ([]*snapshot.Manifest, error) { if w.c == nil { return nil, errors.WithStack(errNotConnected) } - return fetchPrevSnapshotManifests(ctx, w.c, &oc, tags), nil + return fetchPrevSnapshotManifests(ctx, w.c, oc, tags), nil } diff --git a/src/internal/kopia/wrapper_test.go b/src/internal/kopia/wrapper_test.go index afe418748..83651f392 100644 --- a/src/internal/kopia/wrapper_test.go +++ b/src/internal/kopia/wrapper_test.go @@ -209,6 +209,16 @@ func (suite *KopiaIntegrationSuite) TestBackupCollections() { ), } + k, v := MakeServiceCat(path.ExchangeService, path.EmailCategory) + oc := &OwnersCats{ + ResourceOwners: map[string]struct{}{ + testUser: {}, + }, + ServiceCats: map[string]ServiceCat{ + k: v, + }, + } + // tags that are expected to populate as a side effect // of the backup process. baseTagKeys := []string{ @@ -259,6 +269,7 @@ func (suite *KopiaIntegrationSuite) TestBackupCollections() { nil, collections, path.ExchangeService, + oc, customTags, ) assert.NoError(t, err) @@ -301,6 +312,16 @@ func (suite *KopiaIntegrationSuite) TestRestoreAfterCompressionChange() { w := &Wrapper{k} + mapK, mapV := MakeServiceCat(path.ExchangeService, path.EmailCategory) + oc := &OwnersCats{ + ResourceOwners: map[string]struct{}{ + testUser: {}, + }, + ServiceCats: map[string]ServiceCat{ + mapK: mapV, + }, + } + dc1 := mockconnector.NewMockExchangeCollection(suite.testPath1, 1) dc2 := mockconnector.NewMockExchangeCollection(suite.testPath2, 1) @@ -315,6 +336,7 @@ func (suite *KopiaIntegrationSuite) TestRestoreAfterCompressionChange() { nil, []data.Collection{dc1, dc2}, path.ExchangeService, + oc, nil, ) require.NoError(t, err) @@ -344,6 +366,16 @@ func (suite *KopiaIntegrationSuite) TestRestoreAfterCompressionChange() { func (suite *KopiaIntegrationSuite) TestBackupCollections_ReaderError() { t := suite.T() + k, v := MakeServiceCat(path.ExchangeService, path.EmailCategory) + oc := &OwnersCats{ + ResourceOwners: map[string]struct{}{ + testUser: {}, + }, + ServiceCats: map[string]ServiceCat{ + k: v, + }, + } + collections := []data.Collection{ &kopiaDataCollection{ path: suite.testPath1, @@ -386,6 +418,7 @@ func (suite *KopiaIntegrationSuite) TestBackupCollections_ReaderError() { nil, collections, path.ExchangeService, + oc, nil, ) require.NoError(t, err) @@ -430,6 +463,7 @@ func (suite *KopiaIntegrationSuite) TestBackupCollectionsHandlesNoCollections() nil, test.collections, path.UnknownService, + &OwnersCats{}, nil, ) require.NoError(t, err) @@ -575,11 +609,22 @@ func (suite *KopiaSimpleRepoIntegrationSuite) SetupTest() { collections = append(collections, collection) } + k, v := MakeServiceCat(path.ExchangeService, path.EmailCategory) + oc := &OwnersCats{ + ResourceOwners: map[string]struct{}{ + testUser: {}, + }, + ServiceCats: map[string]ServiceCat{ + k: v, + }, + } + stats, deets, err := suite.w.BackupCollections( suite.ctx, nil, collections, path.ExchangeService, + oc, nil, ) require.NoError(t, err) diff --git a/src/internal/operations/backup.go b/src/internal/operations/backup.go index c36762aaf..0d41bbd06 100644 --- a/src/internal/operations/backup.go +++ b/src/internal/operations/backup.go @@ -127,7 +127,9 @@ func (op *BackupOperation) Run(ctx context.Context) (err error) { } }() - mans, mdColls, err := produceManifestsAndMetadata(ctx, op.kopia, op.store, op.Selectors, op.account) + oc := selectorToOwnersCats(op.Selectors) + + mans, mdColls, err := produceManifestsAndMetadata(ctx, op.kopia, op.store, oc, op.account) if err != nil { opStats.readErr = errors.Wrap(err, "connecting to M365") return opStats.readErr @@ -149,6 +151,7 @@ func (op *BackupOperation) Run(ctx context.Context) (err error) { ctx, op.kopia, op.Selectors, + oc, mans, cs, op.Results.BackupID) @@ -175,7 +178,7 @@ func produceManifestsAndMetadata( ctx context.Context, kw *kopia.Wrapper, sw *store.Wrapper, - sel selectors.Selector, + oc *kopia.OwnersCats, acct account.Account, ) ([]*snapshot.Manifest, []data.Collection, error) { complete, closer := observe.MessageWithCompletion("Fetching backup heuristics:") @@ -192,7 +195,6 @@ func produceManifestsAndMetadata( var ( tid = m365.AzureTenantID - oc = selectorToOwnersCats(sel) collections []data.Collection ) @@ -244,7 +246,7 @@ func collectMetadata( ctx context.Context, kw *kopia.Wrapper, fileNames []string, - oc kopia.OwnersCats, + oc *kopia.OwnersCats, tenantID, snapshotID string, ) ([]data.Collection, error) { paths := []path.Path{} @@ -277,16 +279,16 @@ func collectMetadata( return dcs, nil } -func selectorToOwnersCats(sel selectors.Selector) kopia.OwnersCats { +func selectorToOwnersCats(sel selectors.Selector) *kopia.OwnersCats { service := sel.PathService() - oc := kopia.OwnersCats{ + oc := &kopia.OwnersCats{ ResourceOwners: map[string]struct{}{}, ServiceCats: map[string]kopia.ServiceCat{}, } ros, err := sel.ResourceOwners() if err != nil { - return kopia.OwnersCats{} + return &kopia.OwnersCats{} } for _, sl := range [][]string{ros.Includes, ros.Filters} { @@ -297,7 +299,7 @@ func selectorToOwnersCats(sel selectors.Selector) kopia.OwnersCats { pcs, err := sel.PathCategories() if err != nil { - return kopia.OwnersCats{} + return &kopia.OwnersCats{} } for _, sl := range [][]path.CategoryType{pcs.Includes, pcs.Filters} { @@ -333,6 +335,7 @@ func consumeBackupDataCollections( ctx context.Context, kw *kopia.Wrapper, sel selectors.Selector, + oc *kopia.OwnersCats, mans []*snapshot.Manifest, cs []data.Collection, backupID model.StableID, @@ -349,7 +352,7 @@ func consumeBackupDataCollections( kopia.TagBackupCategory: "", } - return kw.BackupCollections(ctx, mans, cs, sel.PathService(), tags) + return kw.BackupCollections(ctx, mans, cs, sel.PathService(), oc, tags) } // writes the results metrics to the operation results. diff --git a/src/internal/operations/backup_test.go b/src/internal/operations/backup_test.go index 6d6e55929..aacac38ea 100644 --- a/src/internal/operations/backup_test.go +++ b/src/internal/operations/backup_test.go @@ -405,7 +405,7 @@ func (suite *BackupOpIntegrationSuite) TestBackup_Run_exchange() { // verify that we can find the new backup id in the manifests var ( sck, scv = kopia.MakeServiceCat(sel.PathService(), test.category) - oc = kopia.OwnersCats{ + oc = &kopia.OwnersCats{ ResourceOwners: map[string]struct{}{test.resourceOwner: {}}, ServiceCats: map[string]kopia.ServiceCat{sck: scv}, } diff --git a/src/internal/streamstore/streamstore.go b/src/internal/streamstore/streamstore.go index f81cc8f30..287a23413 100644 --- a/src/internal/streamstore/streamstore.go +++ b/src/internal/streamstore/streamstore.go @@ -73,7 +73,7 @@ func (ss *streamStore) WriteBackupDetails( }, } - backupStats, _, err := ss.kw.BackupCollections(ctx, nil, []data.Collection{dc}, ss.service, nil) + backupStats, _, err := ss.kw.BackupCollections(ctx, nil, []data.Collection{dc}, ss.service, nil, nil) if err != nil { return "", nil }