Select specific metadata from base snapshot by Reasons base snapshot was picked (#1836)

## Description

Use the Reasons a snapshot was selected to retrieve only the metadata
corresponding to those reasons. This will avoid having multiple versions
of metadata for the same (resource owner, service, category) tuple as
well as pulling in more metadata than required for some backups.

## Does this PR need a docs update or release note?

- [ ]  Yes, it's included
- [ ] 🕐 Yes, but in a later PR
- [x]  No 

## Type of change

- [x] 🌻 Feature
- [ ] 🐛 Bugfix
- [ ] 🗺️ Documentation
- [ ] 🤖 Test
- [ ] 💻 CI/Deployment
- [ ] 🐹 Trivial/Minor

## Issue(s)

* closes #1829 

## Test Plan

<!-- How will this be tested prior to merging.-->
- [x] 💪 Manual
- [ ]  Unit test
- [ ] 💚 E2E
This commit is contained in:
ashmrtn 2022-12-16 17:12:42 -08:00 committed by GitHub
parent 715e436dd9
commit f71de7a021
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 204 additions and 31 deletions

View File

@ -265,7 +265,7 @@ func getItemStream(
ctx context.Context, ctx context.Context,
itemPath path.Path, itemPath path.Path,
snapshotRoot fs.Entry, snapshotRoot fs.Entry,
bcounter byteCounter, bcounter ByteCounter,
) (data.Stream, error) { ) (data.Stream, error) {
if itemPath == nil { if itemPath == nil {
return nil, errors.WithStack(errNoRestorePath) return nil, errors.WithStack(errNoRestorePath)
@ -314,7 +314,7 @@ func getItemStream(
}, nil }, nil
} }
type byteCounter interface { type ByteCounter interface {
Count(numBytes int64) Count(numBytes int64)
} }
@ -329,7 +329,7 @@ func (w Wrapper) RestoreMultipleItems(
ctx context.Context, ctx context.Context,
snapshotID string, snapshotID string,
paths []path.Path, paths []path.Path,
bcounter byteCounter, bcounter ByteCounter,
) ([]data.Collection, error) { ) ([]data.Collection, error) {
ctx, end := D.Span(ctx, "kopia:restoreMultipleItems") ctx, end := D.Span(ctx, "kopia:restoreMultipleItems")
defer end() defer end()

View File

@ -195,6 +195,7 @@ func produceManifestsAndMetadata(
var ( var (
tid = m365.AzureTenantID tid = m365.AzureTenantID
metadataFiles = graph.AllMetadataFileNames()
collections []data.Collection collections []data.Collection
) )
@ -211,15 +212,17 @@ func produceManifestsAndMetadata(
continue continue
} }
k, _ := kopia.MakeTagKV(kopia.TagBackupID) // TODO(ashmrtn): Uncomment this again when we need to fetch and merge
bupID := man.Tags[k] // backup details from previous snapshots.
// k, _ := kopia.MakeTagKV(kopia.TagBackupID)
// bupID := man.Tags[k]
bup, err := sw.GetBackup(ctx, model.StableID(bupID)) // bup, err := sw.GetBackup(ctx, model.StableID(bupID))
if err != nil { // if err != nil {
return nil, nil, err // return nil, nil, err
} // }
colls, err := collectMetadata(ctx, kw, graph.AllMetadataFileNames(), oc, tid, bup.SnapshotID) colls, err := collectMetadata(ctx, kw, man, metadataFiles, tid)
if err != nil && !errors.Is(err, kopia.ErrNotFound) { if err != nil && !errors.Is(err, kopia.ErrNotFound) {
// prior metadata isn't guaranteed to exist. // prior metadata isn't guaranteed to exist.
// if it doesn't, we'll just have to do a // if it doesn't, we'll just have to do a
@ -233,25 +236,33 @@ func produceManifestsAndMetadata(
return ms, collections, err return ms, collections, err
} }
type restorer interface {
RestoreMultipleItems(
ctx context.Context,
snapshotID string,
paths []path.Path,
bc kopia.ByteCounter,
) ([]data.Collection, error)
}
func collectMetadata( func collectMetadata(
ctx context.Context, ctx context.Context,
kw *kopia.Wrapper, r restorer,
man *kopia.ManifestEntry,
fileNames []string, fileNames []string,
oc *kopia.OwnersCats, tenantID string,
tenantID, snapshotID string,
) ([]data.Collection, error) { ) ([]data.Collection, error) {
paths := []path.Path{} paths := []path.Path{}
for _, fn := range fileNames { for _, fn := range fileNames {
for ro := range oc.ResourceOwners { for _, reason := range man.Reasons {
for _, sc := range oc.ServiceCats {
p, err := path.Builder{}. p, err := path.Builder{}.
Append(fn). Append(fn).
ToServiceCategoryMetadataPath( ToServiceCategoryMetadataPath(
tenantID, tenantID,
ro, reason.ResourceOwner,
sc.Service, reason.Service,
sc.Category, reason.Category,
true) true)
if err != nil { if err != nil {
return nil, errors.Wrapf(err, "building metadata path") return nil, errors.Wrapf(err, "building metadata path")
@ -260,9 +271,8 @@ func collectMetadata(
paths = append(paths, p) paths = append(paths, p)
} }
} }
}
dcs, err := kw.RestoreMultipleItems(ctx, snapshotID, paths, nil) dcs, err := r.RestoreMultipleItems(ctx, string(man.ID), paths, nil)
if err != nil { if err != nil {
return nil, errors.Wrap(err, "collecting prior metadata") return nil, errors.Wrap(err, "collecting prior metadata")
} }

View File

@ -5,6 +5,7 @@ import (
"testing" "testing"
"time" "time"
"github.com/kopia/kopia/snapshot"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"github.com/stretchr/testify/suite" "github.com/stretchr/testify/suite"
@ -115,6 +116,168 @@ func (suite *BackupOpSuite) TestBackupOperation_PersistResults() {
} }
} }
type mockRestorer struct {
gotPaths []path.Path
}
func (mr *mockRestorer) RestoreMultipleItems(
ctx context.Context,
snapshotID string,
paths []path.Path,
bc kopia.ByteCounter,
) ([]data.Collection, error) {
mr.gotPaths = append(mr.gotPaths, paths...)
return nil, nil
}
func (mr mockRestorer) checkPaths(t *testing.T, expected []path.Path) {
t.Helper()
assert.ElementsMatch(t, expected, mr.gotPaths)
}
func makeMetadataPath(
t *testing.T,
tenant string,
service path.ServiceType,
resourceOwner string,
category path.CategoryType,
fileName string,
) path.Path {
p, err := path.Builder{}.Append(fileName).ToServiceCategoryMetadataPath(
tenant,
resourceOwner,
service,
category,
true,
)
require.NoError(t, err)
return p
}
func (suite *BackupOpSuite) TestBackupOperation_CollectMetadata() {
var (
tenant = "a-tenant"
resourceOwner = "a-user"
fileNames = []string{
"delta",
"paths",
}
emailDeltaPath = makeMetadataPath(
suite.T(),
tenant,
path.ExchangeService,
resourceOwner,
path.EmailCategory,
fileNames[0],
)
emailPathsPath = makeMetadataPath(
suite.T(),
tenant,
path.ExchangeService,
resourceOwner,
path.EmailCategory,
fileNames[1],
)
contactsDeltaPath = makeMetadataPath(
suite.T(),
tenant,
path.ExchangeService,
resourceOwner,
path.ContactsCategory,
fileNames[0],
)
contactsPathsPath = makeMetadataPath(
suite.T(),
tenant,
path.ExchangeService,
resourceOwner,
path.ContactsCategory,
fileNames[1],
)
)
table := []struct {
name string
inputMan *kopia.ManifestEntry
inputFiles []string
expected []path.Path
}{
{
name: "SingleReasonSingleFile",
inputMan: &kopia.ManifestEntry{
Manifest: &snapshot.Manifest{},
Reasons: []kopia.Reason{
{
ResourceOwner: resourceOwner,
Service: path.ExchangeService,
Category: path.EmailCategory,
},
},
},
inputFiles: []string{fileNames[0]},
expected: []path.Path{emailDeltaPath},
},
{
name: "SingleReasonMultipleFiles",
inputMan: &kopia.ManifestEntry{
Manifest: &snapshot.Manifest{},
Reasons: []kopia.Reason{
{
ResourceOwner: resourceOwner,
Service: path.ExchangeService,
Category: path.EmailCategory,
},
},
},
inputFiles: fileNames,
expected: []path.Path{emailDeltaPath, emailPathsPath},
},
{
name: "MultipleReasonsMultipleFiles",
inputMan: &kopia.ManifestEntry{
Manifest: &snapshot.Manifest{},
Reasons: []kopia.Reason{
{
ResourceOwner: resourceOwner,
Service: path.ExchangeService,
Category: path.EmailCategory,
},
{
ResourceOwner: resourceOwner,
Service: path.ExchangeService,
Category: path.ContactsCategory,
},
},
},
inputFiles: fileNames,
expected: []path.Path{
emailDeltaPath,
emailPathsPath,
contactsDeltaPath,
contactsPathsPath,
},
},
}
for _, test := range table {
suite.T().Run(test.name, func(t *testing.T) {
ctx, flush := tester.NewContext()
defer flush()
mr := &mockRestorer{}
_, err := collectMetadata(ctx, mr, test.inputMan, test.inputFiles, tenant)
assert.NoError(t, err)
mr.checkPaths(t, test.expected)
})
}
}
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
// integration // integration
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------