From 43c2017492936b05af4f5bba50bce01897925510 Mon Sep 17 00:00:00 2001 From: ashmrtn Date: Mon, 9 Jan 2023 17:10:19 -0800 Subject: [PATCH] Don't use OwnersCats struct (#2055) ## Description Remove OwnersCats so only the Reason struct or tags pass information between BackupOp and kopia Instead of having a separate struct (OwnersCats) to fetch previous snapshots, generate and use reasons. While this results in some repeated data, it cuts down on the number of distinct structs and simplifies some of the code for getting previous manifests. A future PR should create a shared function to create a service/cat tag given a reason. Only pass in a set of tags to BackupCollections. This pushes the onus of generating the tags for later snapshot lookups to BackupOp and creates a somewhat asymmetric interface as Reason is used for the lookup but tags is used for the backup. This will be updated later so that both paths use a common function to convert from Reason->tags. Despite that, it may result in a cleaner interface with kopia (depending on how far we want to push it) where tags become the main mean of communication. ## Does this PR need a docs update or release note? - [ ] :white_check_mark: Yes, it's included - [ ] :clock1: Yes, but in a later PR - [x] :no_entry: No ## Type of change - [ ] :sunflower: Feature - [ ] :bug: Bugfix - [ ] :world_map: Documentation - [ ] :robot: Test - [ ] :computer: CI/Deployment - [x] :broom: Tech Debt/Cleanup ## Issue(s) * #1916 ## Test Plan - [x] :muscle: Manual - [x] :zap: Unit test - [ ] :green_heart: E2E --- src/internal/kopia/snapshot_manager.go | 90 +++++++++---------- src/internal/kopia/snapshot_manager_test.go | 62 +++++++++---- src/internal/kopia/wrapper.go | 11 +-- src/internal/kopia/wrapper_test.go | 87 +++++------------- src/internal/operations/backup.go | 56 +++++++----- .../operations/backup_integration_test.go | 12 +-- src/internal/operations/backup_test.go | 11 +-- src/internal/streamstore/streamstore.go | 2 - 8 files changed, 155 insertions(+), 176 deletions(-) diff --git a/src/internal/kopia/snapshot_manager.go b/src/internal/kopia/snapshot_manager.go index f82d742ed..fa2cb78b1 100644 --- a/src/internal/kopia/snapshot_manager.go +++ b/src/internal/kopia/snapshot_manager.go @@ -89,6 +89,10 @@ func MakeTagKV(k string) (string, string) { // tagsFromStrings returns a map[string]string with tags for all ownersCats // passed in. Currently uses placeholder values for each tag because there can // be multiple instances of resource owners and categories in a single snapshot. +// TODO(ashmrtn): Remove in future PR. +// +//nolint:unused +//lint:ignore U1000 will be removed in future PR. func tagsFromStrings(oc *OwnersCats) map[string]string { if oc == nil { return map[string]string{} @@ -188,25 +192,18 @@ func fetchPrevManifests( ctx context.Context, sm snapshotManager, foundMans map[manifest.ID]*ManifestEntry, - serviceCat ServiceCat, - resourceOwner string, + reason Reason, tags map[string]string, ) ([]*ManifestEntry, error) { tags = normalizeTagKVs(tags) - serviceCatKey, _ := MakeServiceCat(serviceCat.Service, serviceCat.Category) + serviceCatKey, _ := MakeServiceCat(reason.Service, reason.Category) allTags := normalizeTagKVs(map[string]string{ - serviceCatKey: "", - resourceOwner: "", + serviceCatKey: "", + reason.ResourceOwner: "", }) maps.Copy(allTags, tags) - reason := Reason{ - ResourceOwner: resourceOwner, - Service: serviceCat.Service, - Category: serviceCat.Category, - } - metas, err := sm.FindManifests(ctx, allTags) if err != nil { return nil, errors.Wrap(err, "fetching manifest metas by tag") @@ -275,57 +272,54 @@ func fetchPrevManifests( func fetchPrevSnapshotManifests( ctx context.Context, sm snapshotManager, - oc *OwnersCats, + reasons []Reason, tags map[string]string, ) []*ManifestEntry { - if oc == nil { - return nil - } - mans := map[manifest.ID]*ManifestEntry{} // For each serviceCat/resource owner pair that we will be backing up, see if // there's a previous incomplete snapshot and/or a previous complete snapshot // we can pass in. Can be expanded to return more than the most recent // snapshots, but may require more memory at runtime. - for _, serviceCat := range oc.ServiceCats { - for resourceOwner := range oc.ResourceOwners { - found, err := fetchPrevManifests( - ctx, - sm, - mans, - serviceCat, - resourceOwner, - tags, + for _, reason := range reasons { + found, err := fetchPrevManifests( + ctx, + sm, + mans, + reason, + tags, + ) + if err != nil { + logger.Ctx(ctx).Warnw( + "fetching previous snapshot manifests for service/category/resource owner", + "error", + err, + "service", + reason.Service.String(), + "category", + reason.Category.String(), ) - if err != nil { - logger.Ctx(ctx).Warnw( - "fetching previous snapshot manifests for service/category/resource owner", - "error", - err, - "service/category", - serviceCat, - ) - // Snapshot can still complete fine, just not as efficient. + // Snapshot can still complete fine, just not as efficient. + continue + } + + // If we found more recent snapshots then add them. + for _, m := range found { + man := mans[m.ID] + if man == nil { + mans[m.ID] = m continue } - // If we found more recent snapshots then add them. - for _, m := range found { - found := mans[m.ID] - if found == nil { - mans[m.ID] = m - continue - } - - // If the manifest already exists and it's incomplete then we should - // merge the reasons for consistency. This will become easier to handle - // once we update how checkpoint manifests are tagged. - if len(found.IncompleteReason) > 0 { - found.Reasons = append(found.Reasons, m.Reasons...) - } + // If the manifest already exists and it's incomplete then we should + // merge the reasons for consistency. This will become easier to handle + // once we update how checkpoint manifests are tagged. + if len(man.IncompleteReason) == 0 { + continue } + + man.Reasons = append(man.Reasons, m.Reasons...) } } diff --git a/src/internal/kopia/snapshot_manager_test.go b/src/internal/kopia/snapshot_manager_test.go index 9cf2246b3..324f471ad 100644 --- a/src/internal/kopia/snapshot_manager_test.go +++ b/src/internal/kopia/snapshot_manager_test.go @@ -43,25 +43,53 @@ var ( testUser2 = "user2" testUser3 = "user3" - testAllUsersAllCats = &OwnersCats{ - ResourceOwners: map[string]struct{}{ - testUser1: {}, - testUser2: {}, - testUser3: {}, + testAllUsersAllCats = []Reason{ + { + ResourceOwner: testUser1, + Service: path.ExchangeService, + Category: path.EmailCategory, }, - ServiceCats: map[string]ServiceCat{ - testMail: testMailServiceCat, - testEvents: testEventsServiceCat, + { + ResourceOwner: testUser1, + Service: path.ExchangeService, + Category: path.EventsCategory, + }, + { + ResourceOwner: testUser2, + Service: path.ExchangeService, + Category: path.EmailCategory, + }, + { + ResourceOwner: testUser2, + Service: path.ExchangeService, + Category: path.EventsCategory, + }, + { + ResourceOwner: testUser3, + Service: path.ExchangeService, + Category: path.EmailCategory, + }, + { + ResourceOwner: testUser3, + Service: path.ExchangeService, + Category: path.EventsCategory, }, } - testAllUsersMail = &OwnersCats{ - ResourceOwners: map[string]struct{}{ - testUser1: {}, - testUser2: {}, - testUser3: {}, + testAllUsersMail = []Reason{ + { + ResourceOwner: testUser1, + Service: path.ExchangeService, + Category: path.EmailCategory, }, - ServiceCats: map[string]ServiceCat{ - testMail: testMailServiceCat, + { + ResourceOwner: testUser2, + Service: path.ExchangeService, + Category: path.EmailCategory, + }, + { + ResourceOwner: testUser3, + Service: path.ExchangeService, + Category: path.EmailCategory, }, } ) @@ -176,7 +204,7 @@ func TestSnapshotFetchUnitSuite(t *testing.T) { func (suite *SnapshotFetchUnitSuite) TestFetchPrevSnapshots() { table := []struct { name string - input *OwnersCats + input []Reason data []manifestInfo // Use this to denote which manifests in data should be expected. Allows // defining data in a table while not repeating things between data and @@ -813,7 +841,7 @@ func (suite *SnapshotFetchUnitSuite) TestFetchPrevSnapshots_customTags() { table := []struct { name string - input *OwnersCats + input []Reason tags map[string]string // Use this to denote which manifests in data should be expected. Allows // defining data in a table while not repeating things between data and diff --git a/src/internal/kopia/wrapper.go b/src/internal/kopia/wrapper.go index 859892a1c..ab0331ff8 100644 --- a/src/internal/kopia/wrapper.go +++ b/src/internal/kopia/wrapper.go @@ -119,8 +119,6 @@ func (w Wrapper) BackupCollections( ctx context.Context, previousSnapshots []IncrementalBase, collections []data.Collection, - service path.ServiceType, - oc *OwnersCats, tags map[string]string, buildTreeWithBase bool, ) (*BackupStats, *details.Builder, map[string]path.Path, error) { @@ -158,7 +156,6 @@ func (w Wrapper) BackupCollections( ctx, previousSnapshots, dirTree, - oc, tags, progress, ) @@ -173,7 +170,6 @@ func (w Wrapper) makeSnapshotWithRoot( ctx context.Context, prevSnapEntries []IncrementalBase, root fs.Directory, - oc *OwnersCats, addlTags map[string]string, progress *corsoProgress, ) (*BackupStats, error) { @@ -231,7 +227,8 @@ func (w Wrapper) makeSnapshotWithRoot( return err } - man.Tags = tagsFromStrings(oc) + man.Tags = map[string]string{} + for k, v := range addlTags { mk, mv := MakeTagKV(k) @@ -442,12 +439,12 @@ func (w Wrapper) DeleteSnapshot( // normalized inside the func using MakeTagKV. func (w Wrapper) FetchPrevSnapshotManifests( ctx context.Context, - oc *OwnersCats, + reasons []Reason, tags map[string]string, ) ([]*ManifestEntry, error) { if w.c == nil { return nil, errors.WithStack(errNotConnected) } - return fetchPrevSnapshotManifests(ctx, w.c, oc, tags), nil + return fetchPrevSnapshotManifests(ctx, w.c, reasons, tags), nil } diff --git a/src/internal/kopia/wrapper_test.go b/src/internal/kopia/wrapper_test.go index e650723b6..947b4439e 100644 --- a/src/internal/kopia/wrapper_test.go +++ b/src/internal/kopia/wrapper_test.go @@ -207,39 +207,21 @@ func (suite *KopiaIntegrationSuite) TestBackupCollections() { ), } - k, v := MakeServiceCat(path.ExchangeService, path.EmailCategory) - oc := &OwnersCats{ - ResourceOwners: map[string]struct{}{ - testUser: {}, - }, - ServiceCats: map[string]ServiceCat{ - k: v, - }, - } - - // tags that are expected to populate as a side effect - // of the backup process. - baseTagKeys := []string{ - serviceCatTag(suite.testPath1), - suite.testPath1.ResourceOwner(), - serviceCatTag(suite.testPath2), - suite.testPath2.ResourceOwner(), - } - - // tags that are supplied by the caller. - customTags := map[string]string{ + // tags that are supplied by the caller. This includes basic tags to support + // lookups and extra tags the caller may want to apply. + tags := map[string]string{ "fnords": "smarf", "brunhilda": "", + + suite.testPath1.ResourceOwner(): "", + suite.testPath2.ResourceOwner(): "", + serviceCatTag(suite.testPath1): "", + serviceCatTag(suite.testPath2): "", } expectedTags := map[string]string{} - for _, k := range baseTagKeys { - tk, tv := MakeTagKV(k) - expectedTags[tk] = tv - } - - maps.Copy(expectedTags, normalizeTagKVs(customTags)) + maps.Copy(expectedTags, normalizeTagKVs(tags)) table := []struct { name string @@ -266,9 +248,7 @@ func (suite *KopiaIntegrationSuite) TestBackupCollections() { suite.ctx, prevSnaps, collections, - path.ExchangeService, - oc, - customTags, + tags, true, ) assert.NoError(t, err) @@ -325,14 +305,9 @@ func (suite *KopiaIntegrationSuite) TestRestoreAfterCompressionChange() { w := &Wrapper{k} - mapK, mapV := MakeServiceCat(path.ExchangeService, path.EmailCategory) - oc := &OwnersCats{ - ResourceOwners: map[string]struct{}{ - testUser: {}, - }, - ServiceCats: map[string]ServiceCat{ - mapK: mapV, - }, + tags := map[string]string{ + testUser: "", + serviceCatString(path.ExchangeService, path.EmailCategory): "", } dc1 := mockconnector.NewMockExchangeCollection(suite.testPath1, 1) @@ -348,9 +323,7 @@ func (suite *KopiaIntegrationSuite) TestRestoreAfterCompressionChange() { ctx, nil, []data.Collection{dc1, dc2}, - path.ExchangeService, - oc, - nil, + tags, true, ) require.NoError(t, err) @@ -380,14 +353,9 @@ func (suite *KopiaIntegrationSuite) TestRestoreAfterCompressionChange() { func (suite *KopiaIntegrationSuite) TestBackupCollections_ReaderError() { t := suite.T() - k, v := MakeServiceCat(path.ExchangeService, path.EmailCategory) - oc := &OwnersCats{ - ResourceOwners: map[string]struct{}{ - testUser: {}, - }, - ServiceCats: map[string]ServiceCat{ - k: v, - }, + tags := map[string]string{ + testUser: "", + serviceCatString(path.ExchangeService, path.EmailCategory): "", } collections := []data.Collection{ @@ -431,9 +399,7 @@ func (suite *KopiaIntegrationSuite) TestBackupCollections_ReaderError() { suite.ctx, nil, collections, - path.ExchangeService, - oc, - nil, + tags, true, ) require.NoError(t, err) @@ -477,8 +443,6 @@ func (suite *KopiaIntegrationSuite) TestBackupCollectionsHandlesNoCollections() ctx, nil, test.collections, - path.UnknownService, - &OwnersCats{}, nil, true, ) @@ -622,23 +586,16 @@ func (suite *KopiaSimpleRepoIntegrationSuite) SetupTest() { collections = append(collections, collection) } - k, v := MakeServiceCat(path.ExchangeService, path.EmailCategory) - oc := &OwnersCats{ - ResourceOwners: map[string]struct{}{ - testUser: {}, - }, - ServiceCats: map[string]ServiceCat{ - k: v, - }, + tags := map[string]string{ + testUser: "", + serviceCatString(path.ExchangeService, path.EmailCategory): "", } stats, deets, _, err := suite.w.BackupCollections( suite.ctx, nil, collections, - path.ExchangeService, - oc, - nil, + tags, false, ) require.NoError(t, err) diff --git a/src/internal/operations/backup.go b/src/internal/operations/backup.go index 718b964bc..1c6f790e3 100644 --- a/src/internal/operations/backup.go +++ b/src/internal/operations/backup.go @@ -115,7 +115,7 @@ func (op *BackupOperation) Run(ctx context.Context) (err error) { tenantID = op.account.ID() startTime = time.Now() detailsStore = streamstore.New(op.kopia, tenantID, op.Selectors.PathService()) - oc = selectorToOwnersCats(op.Selectors) + reasons = selectorToReasons(op.Selectors) uib = useIncrementalBackup(op.Selectors, op.Options) ) @@ -151,7 +151,14 @@ func (op *BackupOperation) Run(ctx context.Context) (err error) { } }() - mans, mdColls, canUseMetaData, err := produceManifestsAndMetadata(ctx, op.kopia, op.store, oc, tenantID, uib) + mans, mdColls, canUseMetaData, err := produceManifestsAndMetadata( + ctx, + op.kopia, + op.store, + reasons, + tenantID, + uib, + ) if err != nil { opStats.readErr = errors.Wrap(err, "connecting to M365") return opStats.readErr @@ -173,8 +180,7 @@ func (op *BackupOperation) Run(ctx context.Context) (err error) { ctx, op.kopia, tenantID, - op.Selectors, - oc, + reasons, mans, cs, op.Results.BackupID, @@ -246,8 +252,6 @@ type backuper interface { ctx context.Context, bases []kopia.IncrementalBase, cs []data.Collection, - service path.ServiceType, - oc *kopia.OwnersCats, tags map[string]string, buildTreeWithBase bool, ) (*kopia.BackupStats, *details.Builder, map[string]path.Path, error) @@ -298,7 +302,7 @@ func produceManifestsAndMetadata( ctx context.Context, kw *kopia.Wrapper, sw *store.Wrapper, - oc *kopia.OwnersCats, + reasons []kopia.Reason, tenantID string, getMetadata bool, ) ([]*kopia.ManifestEntry, []data.Collection, bool, error) { @@ -309,7 +313,7 @@ func produceManifestsAndMetadata( ms, err := kw.FetchPrevSnapshotManifests( ctx, - oc, + reasons, map[string]string{kopia.TagBackupCategory: ""}) if err != nil { return nil, nil, false, err @@ -427,28 +431,28 @@ func collectMetadata( return dcs, nil } -func selectorToOwnersCats(sel selectors.Selector) *kopia.OwnersCats { +func selectorToReasons(sel selectors.Selector) []kopia.Reason { service := sel.PathService() - oc := &kopia.OwnersCats{ - ResourceOwners: map[string]struct{}{}, - ServiceCats: map[string]kopia.ServiceCat{}, - } - - oc.ResourceOwners[sel.DiscreteOwner] = struct{}{} + reasons := []kopia.Reason{} pcs, err := sel.PathCategories() if err != nil { - return &kopia.OwnersCats{} + // This is technically safe, it's just that the resulting backup won't be + // usable as a base for future incremental backups. + return nil } for _, sl := range [][]path.CategoryType{pcs.Includes, pcs.Filters} { for _, cat := range sl { - k, v := kopia.MakeServiceCat(service, cat) - oc.ServiceCats[k] = v + reasons = append(reasons, kopia.Reason{ + ResourceOwner: sel.DiscreteOwner, + Service: service, + Category: cat, + }) } } - return oc + return reasons } func builderFromReason(tenant string, r kopia.Reason) (*path.Builder, error) { @@ -479,8 +483,7 @@ func consumeBackupDataCollections( ctx context.Context, bu backuper, tenantID string, - sel selectors.Selector, - oc *kopia.OwnersCats, + reasons []kopia.Reason, mans []*kopia.ManifestEntry, cs []data.Collection, backupID model.StableID, @@ -498,6 +501,15 @@ func consumeBackupDataCollections( kopia.TagBackupCategory: "", } + for _, reason := range reasons { + tags[reason.ResourceOwner] = "" + + // TODO(ashmrtn): Create a separate helper function to go from service/cat + // to a tag. + serviceCat, _ := kopia.MakeServiceCat(reason.Service, reason.Category) + tags[serviceCat] = "" + } + bases := make([]kopia.IncrementalBase, 0, len(mans)) for _, m := range mans { @@ -518,7 +530,7 @@ func consumeBackupDataCollections( }) } - return bu.BackupCollections(ctx, bases, cs, sel.PathService(), oc, tags, isIncremental) + return bu.BackupCollections(ctx, bases, cs, tags, isIncremental) } func matchesReason(reasons []kopia.Reason, p path.Path) bool { diff --git a/src/internal/operations/backup_integration_test.go b/src/internal/operations/backup_integration_test.go index 1b9e50c9d..6df6da829 100644 --- a/src/internal/operations/backup_integration_test.go +++ b/src/internal/operations/backup_integration_test.go @@ -176,16 +176,18 @@ func checkBackupIsInManifests( for _, category := range categories { t.Run(category.String(), func(t *testing.T) { var ( - sck, scv = kopia.MakeServiceCat(sel.PathService(), category) - oc = &kopia.OwnersCats{ - ResourceOwners: map[string]struct{}{resourceOwner: {}}, - ServiceCats: map[string]kopia.ServiceCat{sck: scv}, + reasons = []kopia.Reason{ + { + ResourceOwner: resourceOwner, + Service: sel.PathService(), + Category: category, + }, } tags = map[string]string{kopia.TagBackupCategory: ""} found bool ) - mans, err := kw.FetchPrevSnapshotManifests(ctx, oc, tags) + mans, err := kw.FetchPrevSnapshotManifests(ctx, reasons, tags) require.NoError(t, err) for _, man := range mans { diff --git a/src/internal/operations/backup_test.go b/src/internal/operations/backup_test.go index 382c55515..e858a319e 100644 --- a/src/internal/operations/backup_test.go +++ b/src/internal/operations/backup_test.go @@ -61,8 +61,6 @@ type mockBackuper struct { checkFunc func( bases []kopia.IncrementalBase, cs []data.Collection, - service path.ServiceType, - oc *kopia.OwnersCats, tags map[string]string, buildTreeWithBase bool, ) @@ -72,13 +70,11 @@ func (mbu mockBackuper) BackupCollections( ctx context.Context, bases []kopia.IncrementalBase, cs []data.Collection, - service path.ServiceType, - oc *kopia.OwnersCats, tags map[string]string, buildTreeWithBase bool, ) (*kopia.BackupStats, *details.Builder, map[string]path.Path, error) { if mbu.checkFunc != nil { - mbu.checkFunc(bases, cs, service, oc, tags, buildTreeWithBase) + mbu.checkFunc(bases, cs, tags, buildTreeWithBase) } return &kopia.BackupStats{}, &details.Builder{}, nil, nil @@ -673,8 +669,6 @@ func (suite *BackupOpSuite) TestBackupOperation_ConsumeBackupDataCollections_Pat manifest2 = &snapshot.Manifest{ ID: "id2", } - - sel = selectors.NewExchangeBackup([]string{resourceOwner}).Selector ) table := []struct { @@ -768,8 +762,6 @@ func (suite *BackupOpSuite) TestBackupOperation_ConsumeBackupDataCollections_Pat checkFunc: func( bases []kopia.IncrementalBase, cs []data.Collection, - service path.ServiceType, - oc *kopia.OwnersCats, tags map[string]string, buildTreeWithBase bool, ) { @@ -782,7 +774,6 @@ func (suite *BackupOpSuite) TestBackupOperation_ConsumeBackupDataCollections_Pat ctx, mbu, tenant, - sel, nil, test.inputMan, nil, diff --git a/src/internal/streamstore/streamstore.go b/src/internal/streamstore/streamstore.go index 285bff78f..92123194e 100644 --- a/src/internal/streamstore/streamstore.go +++ b/src/internal/streamstore/streamstore.go @@ -77,8 +77,6 @@ func (ss *streamStore) WriteBackupDetails( ctx, nil, []data.Collection{dc}, - ss.service, - nil, nil, false, )