Select specific metadata from base snapshot by Reasons base snapshot was picked (#1836)

## Description

Use the Reasons a snapshot was selected to retrieve only the metadata
corresponding to those reasons. This will avoid having multiple versions
of metadata for the same (resource owner, service, category) tuple as
well as pulling in more metadata than required for some backups.

## Does this PR need a docs update or release note?

- [ ]  Yes, it's included
- [ ] 🕐 Yes, but in a later PR
- [x]  No 

## Type of change

- [x] 🌻 Feature
- [ ] 🐛 Bugfix
- [ ] 🗺️ Documentation
- [ ] 🤖 Test
- [ ] 💻 CI/Deployment
- [ ] 🐹 Trivial/Minor

## Issue(s)

* closes #1829 

## Test Plan

<!-- How will this be tested prior to merging.-->
- [x] 💪 Manual
- [ ]  Unit test
- [ ] 💚 E2E
This commit is contained in:
ashmrtn 2022-12-16 17:12:42 -08:00 committed by GitHub
parent 715e436dd9
commit f71de7a021
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 204 additions and 31 deletions

View File

@ -265,7 +265,7 @@ func getItemStream(
ctx context.Context,
itemPath path.Path,
snapshotRoot fs.Entry,
bcounter byteCounter,
bcounter ByteCounter,
) (data.Stream, error) {
if itemPath == nil {
return nil, errors.WithStack(errNoRestorePath)
@ -314,7 +314,7 @@ func getItemStream(
}, nil
}
type byteCounter interface {
type ByteCounter interface {
Count(numBytes int64)
}
@ -329,7 +329,7 @@ func (w Wrapper) RestoreMultipleItems(
ctx context.Context,
snapshotID string,
paths []path.Path,
bcounter byteCounter,
bcounter ByteCounter,
) ([]data.Collection, error) {
ctx, end := D.Span(ctx, "kopia:restoreMultipleItems")
defer end()

View File

@ -195,6 +195,7 @@ func produceManifestsAndMetadata(
var (
tid = m365.AzureTenantID
metadataFiles = graph.AllMetadataFileNames()
collections []data.Collection
)
@ -211,15 +212,17 @@ func produceManifestsAndMetadata(
continue
}
k, _ := kopia.MakeTagKV(kopia.TagBackupID)
bupID := man.Tags[k]
// TODO(ashmrtn): Uncomment this again when we need to fetch and merge
// backup details from previous snapshots.
// k, _ := kopia.MakeTagKV(kopia.TagBackupID)
// bupID := man.Tags[k]
bup, err := sw.GetBackup(ctx, model.StableID(bupID))
if err != nil {
return nil, nil, err
}
// bup, err := sw.GetBackup(ctx, model.StableID(bupID))
// if err != nil {
// return nil, nil, err
// }
colls, err := collectMetadata(ctx, kw, graph.AllMetadataFileNames(), oc, tid, bup.SnapshotID)
colls, err := collectMetadata(ctx, kw, man, metadataFiles, tid)
if err != nil && !errors.Is(err, kopia.ErrNotFound) {
// prior metadata isn't guaranteed to exist.
// if it doesn't, we'll just have to do a
@ -233,25 +236,33 @@ func produceManifestsAndMetadata(
return ms, collections, err
}
type restorer interface {
RestoreMultipleItems(
ctx context.Context,
snapshotID string,
paths []path.Path,
bc kopia.ByteCounter,
) ([]data.Collection, error)
}
func collectMetadata(
ctx context.Context,
kw *kopia.Wrapper,
r restorer,
man *kopia.ManifestEntry,
fileNames []string,
oc *kopia.OwnersCats,
tenantID, snapshotID string,
tenantID string,
) ([]data.Collection, error) {
paths := []path.Path{}
for _, fn := range fileNames {
for ro := range oc.ResourceOwners {
for _, sc := range oc.ServiceCats {
for _, reason := range man.Reasons {
p, err := path.Builder{}.
Append(fn).
ToServiceCategoryMetadataPath(
tenantID,
ro,
sc.Service,
sc.Category,
reason.ResourceOwner,
reason.Service,
reason.Category,
true)
if err != nil {
return nil, errors.Wrapf(err, "building metadata path")
@ -260,9 +271,8 @@ func collectMetadata(
paths = append(paths, p)
}
}
}
dcs, err := kw.RestoreMultipleItems(ctx, snapshotID, paths, nil)
dcs, err := r.RestoreMultipleItems(ctx, string(man.ID), paths, nil)
if err != nil {
return nil, errors.Wrap(err, "collecting prior metadata")
}

View File

@ -5,6 +5,7 @@ import (
"testing"
"time"
"github.com/kopia/kopia/snapshot"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/stretchr/testify/suite"
@ -115,6 +116,168 @@ func (suite *BackupOpSuite) TestBackupOperation_PersistResults() {
}
}
type mockRestorer struct {
gotPaths []path.Path
}
func (mr *mockRestorer) RestoreMultipleItems(
ctx context.Context,
snapshotID string,
paths []path.Path,
bc kopia.ByteCounter,
) ([]data.Collection, error) {
mr.gotPaths = append(mr.gotPaths, paths...)
return nil, nil
}
func (mr mockRestorer) checkPaths(t *testing.T, expected []path.Path) {
t.Helper()
assert.ElementsMatch(t, expected, mr.gotPaths)
}
func makeMetadataPath(
t *testing.T,
tenant string,
service path.ServiceType,
resourceOwner string,
category path.CategoryType,
fileName string,
) path.Path {
p, err := path.Builder{}.Append(fileName).ToServiceCategoryMetadataPath(
tenant,
resourceOwner,
service,
category,
true,
)
require.NoError(t, err)
return p
}
func (suite *BackupOpSuite) TestBackupOperation_CollectMetadata() {
var (
tenant = "a-tenant"
resourceOwner = "a-user"
fileNames = []string{
"delta",
"paths",
}
emailDeltaPath = makeMetadataPath(
suite.T(),
tenant,
path.ExchangeService,
resourceOwner,
path.EmailCategory,
fileNames[0],
)
emailPathsPath = makeMetadataPath(
suite.T(),
tenant,
path.ExchangeService,
resourceOwner,
path.EmailCategory,
fileNames[1],
)
contactsDeltaPath = makeMetadataPath(
suite.T(),
tenant,
path.ExchangeService,
resourceOwner,
path.ContactsCategory,
fileNames[0],
)
contactsPathsPath = makeMetadataPath(
suite.T(),
tenant,
path.ExchangeService,
resourceOwner,
path.ContactsCategory,
fileNames[1],
)
)
table := []struct {
name string
inputMan *kopia.ManifestEntry
inputFiles []string
expected []path.Path
}{
{
name: "SingleReasonSingleFile",
inputMan: &kopia.ManifestEntry{
Manifest: &snapshot.Manifest{},
Reasons: []kopia.Reason{
{
ResourceOwner: resourceOwner,
Service: path.ExchangeService,
Category: path.EmailCategory,
},
},
},
inputFiles: []string{fileNames[0]},
expected: []path.Path{emailDeltaPath},
},
{
name: "SingleReasonMultipleFiles",
inputMan: &kopia.ManifestEntry{
Manifest: &snapshot.Manifest{},
Reasons: []kopia.Reason{
{
ResourceOwner: resourceOwner,
Service: path.ExchangeService,
Category: path.EmailCategory,
},
},
},
inputFiles: fileNames,
expected: []path.Path{emailDeltaPath, emailPathsPath},
},
{
name: "MultipleReasonsMultipleFiles",
inputMan: &kopia.ManifestEntry{
Manifest: &snapshot.Manifest{},
Reasons: []kopia.Reason{
{
ResourceOwner: resourceOwner,
Service: path.ExchangeService,
Category: path.EmailCategory,
},
{
ResourceOwner: resourceOwner,
Service: path.ExchangeService,
Category: path.ContactsCategory,
},
},
},
inputFiles: fileNames,
expected: []path.Path{
emailDeltaPath,
emailPathsPath,
contactsDeltaPath,
contactsPathsPath,
},
},
}
for _, test := range table {
suite.T().Run(test.name, func(t *testing.T) {
ctx, flush := tester.NewContext()
defer flush()
mr := &mockRestorer{}
_, err := collectMetadata(ctx, mr, test.inputMan, test.inputFiles, tenant)
assert.NoError(t, err)
mr.checkPaths(t, test.expected)
})
}
}
// ---------------------------------------------------------------------------
// integration
// ---------------------------------------------------------------------------