From f71de7a021a17f0f23d8bf28806249f8306f77b9 Mon Sep 17 00:00:00 2001 From: ashmrtn Date: Fri, 16 Dec 2022 17:12:42 -0800 Subject: [PATCH] Select specific metadata from base snapshot by Reasons base snapshot was picked (#1836) ## Description Use the Reasons a snapshot was selected to retrieve only the metadata corresponding to those reasons. This will avoid having multiple versions of metadata for the same (resource owner, service, category) tuple as well as pulling in more metadata than required for some backups. ## Does this PR need a docs update or release note? - [ ] :white_check_mark: Yes, it's included - [ ] :clock1: Yes, but in a later PR - [x] :no_entry: No ## Type of change - [x] :sunflower: Feature - [ ] :bug: Bugfix - [ ] :world_map: Documentation - [ ] :robot: Test - [ ] :computer: CI/Deployment - [ ] :hamster: Trivial/Minor ## Issue(s) * closes #1829 ## Test Plan - [x] :muscle: Manual - [ ] :zap: Unit test - [ ] :green_heart: E2E --- src/internal/kopia/wrapper.go | 6 +- src/internal/operations/backup.go | 66 +++++----- src/internal/operations/backup_test.go | 163 +++++++++++++++++++++++++ 3 files changed, 204 insertions(+), 31 deletions(-) diff --git a/src/internal/kopia/wrapper.go b/src/internal/kopia/wrapper.go index 68be46e57..de32336d6 100644 --- a/src/internal/kopia/wrapper.go +++ b/src/internal/kopia/wrapper.go @@ -265,7 +265,7 @@ func getItemStream( ctx context.Context, itemPath path.Path, snapshotRoot fs.Entry, - bcounter byteCounter, + bcounter ByteCounter, ) (data.Stream, error) { if itemPath == nil { return nil, errors.WithStack(errNoRestorePath) @@ -314,7 +314,7 @@ func getItemStream( }, nil } -type byteCounter interface { +type ByteCounter interface { Count(numBytes int64) } @@ -329,7 +329,7 @@ func (w Wrapper) RestoreMultipleItems( ctx context.Context, snapshotID string, paths []path.Path, - bcounter byteCounter, + bcounter ByteCounter, ) ([]data.Collection, error) { ctx, end := D.Span(ctx, "kopia:restoreMultipleItems") defer end() diff --git a/src/internal/operations/backup.go b/src/internal/operations/backup.go index 6d732e789..ddb809f28 100644 --- a/src/internal/operations/backup.go +++ b/src/internal/operations/backup.go @@ -194,8 +194,9 @@ func produceManifestsAndMetadata( } var ( - tid = m365.AzureTenantID - collections []data.Collection + tid = m365.AzureTenantID + metadataFiles = graph.AllMetadataFileNames() + collections []data.Collection ) ms, err := kw.FetchPrevSnapshotManifests( @@ -211,15 +212,17 @@ func produceManifestsAndMetadata( continue } - k, _ := kopia.MakeTagKV(kopia.TagBackupID) - bupID := man.Tags[k] + // TODO(ashmrtn): Uncomment this again when we need to fetch and merge + // backup details from previous snapshots. + // k, _ := kopia.MakeTagKV(kopia.TagBackupID) + // bupID := man.Tags[k] - bup, err := sw.GetBackup(ctx, model.StableID(bupID)) - if err != nil { - return nil, nil, err - } + // bup, err := sw.GetBackup(ctx, model.StableID(bupID)) + // if err != nil { + // return nil, nil, err + // } - colls, err := collectMetadata(ctx, kw, graph.AllMetadataFileNames(), oc, tid, bup.SnapshotID) + colls, err := collectMetadata(ctx, kw, man, metadataFiles, tid) if err != nil && !errors.Is(err, kopia.ErrNotFound) { // prior metadata isn't guaranteed to exist. // if it doesn't, we'll just have to do a @@ -233,36 +236,43 @@ func produceManifestsAndMetadata( return ms, collections, err } +type restorer interface { + RestoreMultipleItems( + ctx context.Context, + snapshotID string, + paths []path.Path, + bc kopia.ByteCounter, + ) ([]data.Collection, error) +} + func collectMetadata( ctx context.Context, - kw *kopia.Wrapper, + r restorer, + man *kopia.ManifestEntry, fileNames []string, - oc *kopia.OwnersCats, - tenantID, snapshotID string, + tenantID string, ) ([]data.Collection, error) { paths := []path.Path{} for _, fn := range fileNames { - for ro := range oc.ResourceOwners { - for _, sc := range oc.ServiceCats { - p, err := path.Builder{}. - Append(fn). - ToServiceCategoryMetadataPath( - tenantID, - ro, - sc.Service, - sc.Category, - true) - if err != nil { - return nil, errors.Wrapf(err, "building metadata path") - } - - paths = append(paths, p) + for _, reason := range man.Reasons { + p, err := path.Builder{}. + Append(fn). + ToServiceCategoryMetadataPath( + tenantID, + reason.ResourceOwner, + reason.Service, + reason.Category, + true) + if err != nil { + return nil, errors.Wrapf(err, "building metadata path") } + + paths = append(paths, p) } } - dcs, err := kw.RestoreMultipleItems(ctx, snapshotID, paths, nil) + dcs, err := r.RestoreMultipleItems(ctx, string(man.ID), paths, nil) if err != nil { return nil, errors.Wrap(err, "collecting prior metadata") } diff --git a/src/internal/operations/backup_test.go b/src/internal/operations/backup_test.go index b7b3fa45f..8a5ebd27f 100644 --- a/src/internal/operations/backup_test.go +++ b/src/internal/operations/backup_test.go @@ -5,6 +5,7 @@ import ( "testing" "time" + "github.com/kopia/kopia/snapshot" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" @@ -115,6 +116,168 @@ func (suite *BackupOpSuite) TestBackupOperation_PersistResults() { } } +type mockRestorer struct { + gotPaths []path.Path +} + +func (mr *mockRestorer) RestoreMultipleItems( + ctx context.Context, + snapshotID string, + paths []path.Path, + bc kopia.ByteCounter, +) ([]data.Collection, error) { + mr.gotPaths = append(mr.gotPaths, paths...) + + return nil, nil +} + +func (mr mockRestorer) checkPaths(t *testing.T, expected []path.Path) { + t.Helper() + + assert.ElementsMatch(t, expected, mr.gotPaths) +} + +func makeMetadataPath( + t *testing.T, + tenant string, + service path.ServiceType, + resourceOwner string, + category path.CategoryType, + fileName string, +) path.Path { + p, err := path.Builder{}.Append(fileName).ToServiceCategoryMetadataPath( + tenant, + resourceOwner, + service, + category, + true, + ) + require.NoError(t, err) + + return p +} + +func (suite *BackupOpSuite) TestBackupOperation_CollectMetadata() { + var ( + tenant = "a-tenant" + resourceOwner = "a-user" + fileNames = []string{ + "delta", + "paths", + } + + emailDeltaPath = makeMetadataPath( + suite.T(), + tenant, + path.ExchangeService, + resourceOwner, + path.EmailCategory, + fileNames[0], + ) + emailPathsPath = makeMetadataPath( + suite.T(), + tenant, + path.ExchangeService, + resourceOwner, + path.EmailCategory, + fileNames[1], + ) + contactsDeltaPath = makeMetadataPath( + suite.T(), + tenant, + path.ExchangeService, + resourceOwner, + path.ContactsCategory, + fileNames[0], + ) + contactsPathsPath = makeMetadataPath( + suite.T(), + tenant, + path.ExchangeService, + resourceOwner, + path.ContactsCategory, + fileNames[1], + ) + ) + + table := []struct { + name string + inputMan *kopia.ManifestEntry + inputFiles []string + expected []path.Path + }{ + { + name: "SingleReasonSingleFile", + inputMan: &kopia.ManifestEntry{ + Manifest: &snapshot.Manifest{}, + Reasons: []kopia.Reason{ + { + ResourceOwner: resourceOwner, + Service: path.ExchangeService, + Category: path.EmailCategory, + }, + }, + }, + inputFiles: []string{fileNames[0]}, + expected: []path.Path{emailDeltaPath}, + }, + { + name: "SingleReasonMultipleFiles", + inputMan: &kopia.ManifestEntry{ + Manifest: &snapshot.Manifest{}, + Reasons: []kopia.Reason{ + { + ResourceOwner: resourceOwner, + Service: path.ExchangeService, + Category: path.EmailCategory, + }, + }, + }, + inputFiles: fileNames, + expected: []path.Path{emailDeltaPath, emailPathsPath}, + }, + { + name: "MultipleReasonsMultipleFiles", + inputMan: &kopia.ManifestEntry{ + Manifest: &snapshot.Manifest{}, + Reasons: []kopia.Reason{ + { + ResourceOwner: resourceOwner, + Service: path.ExchangeService, + Category: path.EmailCategory, + }, + { + ResourceOwner: resourceOwner, + Service: path.ExchangeService, + Category: path.ContactsCategory, + }, + }, + }, + inputFiles: fileNames, + expected: []path.Path{ + emailDeltaPath, + emailPathsPath, + contactsDeltaPath, + contactsPathsPath, + }, + }, + } + + for _, test := range table { + suite.T().Run(test.name, func(t *testing.T) { + ctx, flush := tester.NewContext() + defer flush() + + mr := &mockRestorer{} + + _, err := collectMetadata(ctx, mr, test.inputMan, test.inputFiles, tenant) + assert.NoError(t, err) + + mr.checkPaths(t, test.expected) + }) + } +} + // --------------------------------------------------------------------------- // integration // ---------------------------------------------------------------------------