Have BackupOp pass subtree paths to BackupCollections (#1833)

## Description

Have BackupOp produce the paths for relevant subtrees in each snapshot and pass those to BackupCollections. This removes the need for code in the kopia package to call into more service/category-specific path package code, thus keeping the kopia package more generic.

As in #1828, prefix info for each subtree path is pulled from the Reason a snapshot was selected.

## Does this PR need a docs update or release note?

- [ ]  Yes, it's included
- [ ] 🕐 Yes, but in a later PR
- [x]  No 

## Type of change

- [x] 🌻 Feature
- [ ] 🐛 Bugfix
- [ ] 🗺️ Documentation
- [ ] 🤖 Test
- [ ] 💻 CI/Deployment
- [ ] 🐹 Trivial/Minor

## Issue(s)

* closes #1832 

## Test Plan

- [ ] 💪 Manual
- [x]  Unit test
- [ ] 💚 E2E
This commit is contained in:
ashmrtn 2022-12-19 13:48:11 -08:00 committed by GitHub
parent c94fbc21f6
commit 5b568a4b1a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 274 additions and 75 deletions

View File

@ -649,33 +649,10 @@ func traverseBaseDir(
return nil
}
// TODO(ashmrtn): We may want to move this to BackupOp and pass in
// (Manifest, path) to kopia.BackupCollections() instead of passing in
// ManifestEntry. That would keep kopia from having to know anything about how
// paths are formed. It would just encode/decode them and do basic manipulations
// like pushing/popping elements on a path based on location in the hierarchy.
func encodedElementsForPath(tenant string, r Reason) (*path.Builder, error) {
// This is hacky, but we want the path package to format the path the right
// way (e.x. proper order for service, category, etc), but we don't care about
// the folders after the prefix.
p, err := path.Builder{}.Append("tmp").ToDataLayerPath(
tenant,
r.ResourceOwner,
r.Service,
r.Category,
false,
)
if err != nil {
return nil, errors.Wrap(err, "building path")
}
return p.ToBuilder().Dir(), nil
}
func inflateBaseTree(
ctx context.Context,
loader snapshotLoader,
snap *ManifestEntry,
snap IncrementalBase,
updatedPaths map[string]path.Path,
roots map[string]*treeMap,
) error {
@ -696,22 +673,12 @@ func inflateBaseTree(
return errors.Errorf("snapshot %s root is not a directory", snap.ID)
}
rootName, err := decodeElement(dir.Name())
if err != nil {
return errors.Wrapf(err, "snapshot %s root has undecode-able name", snap.ID)
}
// For each subtree corresponding to the tuple
// (resource owner, service, category) merge the directories in the base with
// what has been reported in the collections we got.
for _, reason := range snap.Reasons {
pb, err := encodedElementsForPath(rootName, reason)
if err != nil {
return errors.Wrapf(err, "snapshot %s getting path elements", snap.ID)
}
for _, subtreePath := range snap.SubtreePaths {
// We're starting from the root directory so don't need it in the path.
pathElems := encodeElements(pb.PopFront().Elements()...)
pathElems := encodeElements(subtreePath.PopFront().Elements()...)
ent, err := snapshotfs.GetNestedEntry(ctx, dir, pathElems)
if err != nil {
@ -730,8 +697,8 @@ func inflateBaseTree(
ctx,
0,
updatedPaths,
pb.Dir(),
pb.Dir(),
subtreePath.Dir(),
subtreePath.Dir(),
subtreeDir,
roots,
); err != nil {
@ -751,7 +718,7 @@ func inflateBaseTree(
func inflateDirTree(
ctx context.Context,
loader snapshotLoader,
baseSnaps []*ManifestEntry,
baseSnaps []IncrementalBase,
collections []data.Collection,
progress *corsoProgress,
) (fs.Directory, error) {

View File

@ -799,21 +799,17 @@ func (msw *mockSnapshotWalker) SnapshotRoot(*snapshot.Manifest) (fs.Entry, error
return msw.snapshotRoot, nil
}
func mockSnapshotEntry(
id, resourceOwner string,
func mockIncrementalBase(
id, tenant, resourceOwner string,
service path.ServiceType,
category path.CategoryType,
) *ManifestEntry {
return &ManifestEntry{
) IncrementalBase {
return IncrementalBase{
Manifest: &snapshot.Manifest{
ID: manifest.ID(id),
},
Reasons: []Reason{
{
ResourceOwner: resourceOwner,
Service: service,
Category: category,
},
SubtreePaths: []*path.Builder{
path.Builder{}.Append(tenant, service.String(), resourceOwner, category.String()),
},
}
}
@ -961,8 +957,8 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeSingleSubtree() {
dirTree, err := inflateDirTree(
ctx,
msw,
[]*ManifestEntry{
mockSnapshotEntry("", testUser, path.ExchangeService, path.EmailCategory),
[]IncrementalBase{
mockIncrementalBase("", testTenant, testUser, path.ExchangeService, path.EmailCategory),
},
test.inputCollections(),
progress,
@ -1375,8 +1371,8 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeMultipleSubdirecto
dirTree, err := inflateDirTree(
ctx,
msw,
[]*ManifestEntry{
mockSnapshotEntry("", testUser, path.ExchangeService, path.EmailCategory),
[]IncrementalBase{
mockIncrementalBase("", testTenant, testUser, path.ExchangeService, path.EmailCategory),
},
test.inputCollections(t),
progress,
@ -1535,8 +1531,8 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeSkipsDeletedSubtre
dirTree, err := inflateDirTree(
ctx,
msw,
[]*ManifestEntry{
mockSnapshotEntry("", testUser, path.ExchangeService, path.EmailCategory),
[]IncrementalBase{
mockIncrementalBase("", testTenant, testUser, path.ExchangeService, path.EmailCategory),
},
collections,
progress,
@ -1779,9 +1775,9 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeSelectsCorrectSubt
dirTree, err := inflateDirTree(
ctx,
msw,
[]*ManifestEntry{
mockSnapshotEntry("id1", testUser, path.ExchangeService, path.ContactsCategory),
mockSnapshotEntry("id2", testUser, path.ExchangeService, path.EmailCategory),
[]IncrementalBase{
mockIncrementalBase("id1", testTenant, testUser, path.ExchangeService, path.ContactsCategory),
mockIncrementalBase("id2", testTenant, testUser, path.ExchangeService, path.EmailCategory),
},
collections,
progress,

View File

@ -104,6 +104,11 @@ func (w *Wrapper) Close(ctx context.Context) error {
return errors.Wrap(err, "closing Wrapper")
}
type IncrementalBase struct {
*snapshot.Manifest
SubtreePaths []*path.Builder
}
// BackupCollections takes a set of collections and creates a kopia snapshot
// with the data that they contain. previousSnapshots is used for incremental
// backups and should represent the base snapshot from which metadata is sourced
@ -112,7 +117,7 @@ func (w *Wrapper) Close(ctx context.Context) error {
// complete backup of all data.
func (w Wrapper) BackupCollections(
ctx context.Context,
previousSnapshots []*ManifestEntry,
previousSnapshots []IncrementalBase,
collections []data.Collection,
service path.ServiceType,
oc *OwnersCats,
@ -158,7 +163,7 @@ func (w Wrapper) BackupCollections(
func (w Wrapper) makeSnapshotWithRoot(
ctx context.Context,
prevSnapEntries []*ManifestEntry,
prevSnapEntries []IncrementalBase,
root fs.Directory,
oc *OwnersCats,
addlTags map[string]string,

View File

@ -262,7 +262,7 @@ func (suite *KopiaIntegrationSuite) TestBackupCollections() {
},
}
prevSnaps := []*ManifestEntry{}
prevSnaps := []IncrementalBase{}
for _, test := range table {
suite.T().Run(test.name, func(t *testing.T) {
@ -305,10 +305,11 @@ func (suite *KopiaIntegrationSuite) TestBackupCollections() {
)
require.NoError(t, err)
prevSnaps = append(prevSnaps, &ManifestEntry{
// Will need to fill out reason if/when we use this test with
// incrementals.
prevSnaps = append(prevSnaps, IncrementalBase{
Manifest: snap,
SubtreePaths: []*path.Builder{
suite.testPath1.ToBuilder().Dir(),
},
})
})
}

View File

@ -96,6 +96,7 @@ func (op *BackupOperation) Run(ctx context.Context) (err error) {
var (
opStats backupStats
backupDetails *details.Details
tenantID = op.account.ID()
startTime = time.Now()
)
@ -129,7 +130,7 @@ func (op *BackupOperation) Run(ctx context.Context) (err error) {
oc := selectorToOwnersCats(op.Selectors)
mans, mdColls, err := produceManifestsAndMetadata(ctx, op.kopia, op.store, oc, op.account)
mans, mdColls, err := produceManifestsAndMetadata(ctx, op.kopia, op.store, oc, tenantID)
if err != nil {
opStats.readErr = errors.Wrap(err, "connecting to M365")
return opStats.readErr
@ -150,6 +151,7 @@ func (op *BackupOperation) Run(ctx context.Context) (err error) {
opStats.k, backupDetails, err = consumeBackupDataCollections(
ctx,
op.kopia,
tenantID,
op.Selectors,
oc,
mans,
@ -179,7 +181,7 @@ func produceManifestsAndMetadata(
kw *kopia.Wrapper,
sw *store.Wrapper,
oc *kopia.OwnersCats,
acct account.Account,
tenantID string,
) ([]*kopia.ManifestEntry, []data.Collection, error) {
complete, closer := observe.MessageWithCompletion("Fetching backup heuristics:")
defer func() {
@ -188,13 +190,7 @@ func produceManifestsAndMetadata(
closer()
}()
m365, err := acct.M365Config()
if err != nil {
return nil, nil, err
}
var (
tid = m365.AzureTenantID
metadataFiles = graph.AllMetadataFileNames()
collections []data.Collection
)
@ -222,7 +218,7 @@ func produceManifestsAndMetadata(
// return nil, nil, err
// }
colls, err := collectMetadata(ctx, kw, man, metadataFiles, tid)
colls, err := collectMetadata(ctx, kw, man, metadataFiles, tenantID)
if err != nil && !errors.Is(err, kopia.ErrNotFound) {
// prior metadata isn't guaranteed to exist.
// if it doesn't, we'll just have to do a
@ -331,10 +327,45 @@ func produceBackupDataCollections(
return gc.DataCollections(ctx, sel, metadata, ctrlOpts)
}
func builderFromReason(tenant string, r kopia.Reason) (*path.Builder, error) {
// This is hacky, but we want the path package to format the path the right
// way (e.x. proper order for service, category, etc), but we don't care about
// the folders after the prefix.
p, err := path.Builder{}.Append("tmp").ToDataLayerPath(
tenant,
r.ResourceOwner,
r.Service,
r.Category,
false,
)
if err != nil {
return nil, errors.Wrapf(
err,
"building path for service %s category %s",
r.Service.String(),
r.Category.String(),
)
}
return p.ToBuilder().Dir(), nil
}
type backuper interface {
BackupCollections(
ctx context.Context,
bases []kopia.IncrementalBase,
cs []data.Collection,
service path.ServiceType,
oc *kopia.OwnersCats,
tags map[string]string,
) (*kopia.BackupStats, *details.Details, error)
}
// calls kopia to backup the collections of data
func consumeBackupDataCollections(
ctx context.Context,
kw *kopia.Wrapper,
bu backuper,
tenantID string,
sel selectors.Selector,
oc *kopia.OwnersCats,
mans []*kopia.ManifestEntry,
@ -353,7 +384,27 @@ func consumeBackupDataCollections(
kopia.TagBackupCategory: "",
}
return kw.BackupCollections(ctx, mans, cs, sel.PathService(), oc, tags)
bases := make([]kopia.IncrementalBase, 0, len(mans))
for _, m := range mans {
paths := make([]*path.Builder, 0, len(m.Reasons))
for _, reason := range m.Reasons {
pb, err := builderFromReason(tenantID, reason)
if err != nil {
return nil, nil, errors.Wrap(err, "getting subtree paths for bases")
}
paths = append(paths, pb)
}
bases = append(bases, kopia.IncrementalBase{
Manifest: m.Manifest,
SubtreePaths: paths,
})
}
return bu.BackupCollections(ctx, bases, cs, sel.PathService(), oc, tags)
}
// writes the results metrics to the operation results.

View File

@ -20,6 +20,7 @@ import (
"github.com/alcionai/corso/src/internal/tester"
"github.com/alcionai/corso/src/pkg/account"
"github.com/alcionai/corso/src/pkg/backup"
"github.com/alcionai/corso/src/pkg/backup/details"
"github.com/alcionai/corso/src/pkg/control"
"github.com/alcionai/corso/src/pkg/path"
"github.com/alcionai/corso/src/pkg/selectors"
@ -278,6 +279,184 @@ func (suite *BackupOpSuite) TestBackupOperation_CollectMetadata() {
}
}
type mockBackuper struct {
checkFunc func(
bases []kopia.IncrementalBase,
cs []data.Collection,
service path.ServiceType,
oc *kopia.OwnersCats,
tags map[string]string,
)
}
func (mbu mockBackuper) BackupCollections(
ctx context.Context,
bases []kopia.IncrementalBase,
cs []data.Collection,
service path.ServiceType,
oc *kopia.OwnersCats,
tags map[string]string,
) (*kopia.BackupStats, *details.Details, error) {
if mbu.checkFunc != nil {
mbu.checkFunc(bases, cs, service, oc, tags)
}
return &kopia.BackupStats{}, &details.Details{}, nil
}
func (suite *BackupOpSuite) TestBackupOperation_ConsumeBackupDataCollections_Paths() {
var (
tenant = "a-tenant"
resourceOwner = "a-user"
emailBuilder = path.Builder{}.Append(
tenant,
path.ExchangeService.String(),
resourceOwner,
path.EmailCategory.String(),
)
contactsBuilder = path.Builder{}.Append(
tenant,
path.ExchangeService.String(),
resourceOwner,
path.ContactsCategory.String(),
)
emailReason = kopia.Reason{
ResourceOwner: resourceOwner,
Service: path.ExchangeService,
Category: path.EmailCategory,
}
contactsReason = kopia.Reason{
ResourceOwner: resourceOwner,
Service: path.ExchangeService,
Category: path.ContactsCategory,
}
manifest1 = &snapshot.Manifest{
ID: "id1",
}
manifest2 = &snapshot.Manifest{
ID: "id2",
}
sel = selectors.NewExchangeBackup().Selector
)
table := []struct {
name string
inputMan []*kopia.ManifestEntry
expected []kopia.IncrementalBase
}{
{
name: "SingleManifestSingleReason",
inputMan: []*kopia.ManifestEntry{
{
Manifest: manifest1,
Reasons: []kopia.Reason{
emailReason,
},
},
},
expected: []kopia.IncrementalBase{
{
Manifest: manifest1,
SubtreePaths: []*path.Builder{
emailBuilder,
},
},
},
},
{
name: "SingleManifestMultipleReasons",
inputMan: []*kopia.ManifestEntry{
{
Manifest: manifest1,
Reasons: []kopia.Reason{
emailReason,
contactsReason,
},
},
},
expected: []kopia.IncrementalBase{
{
Manifest: manifest1,
SubtreePaths: []*path.Builder{
emailBuilder,
contactsBuilder,
},
},
},
},
{
name: "MultipleManifestsMultipleReasons",
inputMan: []*kopia.ManifestEntry{
{
Manifest: manifest1,
Reasons: []kopia.Reason{
emailReason,
contactsReason,
},
},
{
Manifest: manifest2,
Reasons: []kopia.Reason{
emailReason,
contactsReason,
},
},
},
expected: []kopia.IncrementalBase{
{
Manifest: manifest1,
SubtreePaths: []*path.Builder{
emailBuilder,
contactsBuilder,
},
},
{
Manifest: manifest2,
SubtreePaths: []*path.Builder{
emailBuilder,
contactsBuilder,
},
},
},
},
}
for _, test := range table {
suite.T().Run(test.name, func(t *testing.T) {
ctx, flush := tester.NewContext()
defer flush()
mbu := &mockBackuper{
checkFunc: func(
bases []kopia.IncrementalBase,
cs []data.Collection,
service path.ServiceType,
oc *kopia.OwnersCats,
tags map[string]string,
) {
assert.ElementsMatch(t, test.expected, bases)
},
}
//nolint:errcheck
consumeBackupDataCollections(
ctx,
mbu,
tenant,
sel,
nil,
test.inputMan,
nil,
model.StableID(""),
)
})
}
}
// ---------------------------------------------------------------------------
// integration
// ---------------------------------------------------------------------------