From a64abcb702971a634c6241bd34a6228935d65d29 Mon Sep 17 00:00:00 2001 From: ashmrtn Date: Tue, 1 Nov 2022 11:03:58 -0700 Subject: [PATCH] Tag kopia snapshots for later lookups (#1408) ## Description Add tags to each kopia snapshot that include all service/category pairs in the snapshot and all resource owners in the snapshot. This allows future snapshots to lookup existing snapshots by those tags so they can be fed into the snapshot function. Feeding previous snapshots into the snapshot function enables kopia to detect previously uploaded files and skip uploading the data again ## Type of change - [x] :sunflower: Feature - [ ] :bug: Bugfix - [ ] :world_map: Documentation - [ ] :robot: Test - [ ] :computer: CI/Deployment - [ ] :hamster: Trivial/Minor ## Issue(s) * #1404 ## Test Plan - [ ] :muscle: Manual - [x] :zap: Unit test - [ ] :green_heart: E2E --- src/internal/kopia/wrapper.go | 63 ++++++++++++++++++++++++------ src/internal/kopia/wrapper_test.go | 61 +++++++++++++++++++++++++++-- 2 files changed, 109 insertions(+), 15 deletions(-) diff --git a/src/internal/kopia/wrapper.go b/src/internal/kopia/wrapper.go index 885e7974c..9184b3147 100644 --- a/src/internal/kopia/wrapper.go +++ b/src/internal/kopia/wrapper.go @@ -389,26 +389,36 @@ func newTreeMap() *treeMap { } } -// inflateDirTree returns an fs.Directory tree rooted at the oldest common -// ancestor of the streams and uses virtualfs.StaticDirectory for internal nodes -// in the hierarchy. Leaf nodes are virtualfs.StreamingDirectory with the given -// DataCollections. +// inflateDirTree returns a set of tags representing all the resource owners and +// service/categories in the snapshot and a fs.Directory tree rooted at the +// oldest common ancestor of the streams. All nodes are +// virtualfs.StreamingDirectory with the given DataCollections if there is one +// for that node. Tags can be used in future backups to fetch old snapshots for +// caching reasons. func inflateDirTree( ctx context.Context, collections []data.Collection, progress *corsoProgress, -) (fs.Directory, error) { +) (fs.Directory, *ownersCats, error) { roots := make(map[string]*treeMap) + ownerCats := &ownersCats{ + resourceOwners: make(map[string]struct{}), + serviceCats: make(map[string]struct{}), + } for _, s := range collections { if s.FullPath() == nil { - return nil, errors.New("no identifier for collection") + return nil, nil, errors.New("no identifier for collection") } + serviceCat := serviceCatTag(s.FullPath()) + ownerCats.serviceCats[serviceCat] = struct{}{} + ownerCats.resourceOwners[s.FullPath().ResourceOwner()] = struct{}{} + itemPath := s.FullPath().Elements() if len(itemPath) == 0 { - return nil, errors.New("no identifier for collection") + return nil, nil, errors.New("no identifier for collection") } dir, ok := roots[itemPath[0]] @@ -455,7 +465,7 @@ func inflateDirTree( } if len(roots) > 1 { - return nil, errors.New("multiple root directories") + return nil, nil, errors.New("multiple root directories") } var res fs.Directory @@ -463,13 +473,13 @@ func inflateDirTree( for dirName, dir := range roots { tmp, err := buildKopiaDirs(dirName, dir, progress) if err != nil { - return nil, err + return nil, nil, err } res = tmp } - return res, nil + return res, ownerCats, nil } func (w Wrapper) BackupCollections( @@ -497,12 +507,12 @@ func (w Wrapper) BackupCollections( model.ServiceTag: service.String(), } - dirTree, err := inflateDirTree(ctx, collections, progress) + dirTree, oc, err := inflateDirTree(ctx, collections, progress) if err != nil { return nil, nil, errors.Wrap(err, "building kopia directories") } - s, err := w.makeSnapshotWithRoot(ctx, dirTree, progress) + s, err := w.makeSnapshotWithRoot(ctx, dirTree, oc, progress) if err != nil { return nil, nil, err } @@ -510,9 +520,36 @@ func (w Wrapper) BackupCollections( return s, progress.deets, nil } +type ownersCats struct { + resourceOwners map[string]struct{} + serviceCats map[string]struct{} +} + +func serviceCatTag(p path.Path) string { + return p.Service().String() + p.Category().String() +} + +// tagsFromStrings returns a map[string]string with the union of both maps +// passed in. Currently uses empty values for each tag because there can be +// multiple instances of resource owners and categories in a single snapshot. +func tagsFromStrings(oc *ownersCats) map[string]string { + res := make(map[string]string, len(oc.serviceCats)+len(oc.resourceOwners)) + + for k := range oc.serviceCats { + res[k] = "" + } + + for k := range oc.resourceOwners { + res[k] = "" + } + + return res +} + func (w Wrapper) makeSnapshotWithRoot( ctx context.Context, root fs.Directory, + oc *ownersCats, progress *corsoProgress, ) (*BackupStats, error) { var man *snapshot.Manifest @@ -563,6 +600,8 @@ func (w Wrapper) makeSnapshotWithRoot( return err } + man.Tags = tagsFromStrings(oc) + if _, err := snapshot.SaveSnapshot(innerCtx, rw, man); err != nil { err = errors.Wrap(err, "saving snapshot") logger.Ctx(innerCtx).Errorw("kopia backup", err) diff --git a/src/internal/kopia/wrapper_test.go b/src/internal/kopia/wrapper_test.go index 7e213e2b1..8bf2b155f 100644 --- a/src/internal/kopia/wrapper_test.go +++ b/src/internal/kopia/wrapper_test.go @@ -12,7 +12,9 @@ import ( "github.com/google/uuid" "github.com/kopia/kopia/fs" + "github.com/kopia/kopia/repo" "github.com/kopia/kopia/repo/manifest" + "github.com/kopia/kopia/snapshot" "github.com/kopia/kopia/snapshot/snapshotfs" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -122,6 +124,20 @@ func getDirEntriesForEntry( return entries } +//revive:disable:context-as-argument +func checkSnapshotTags( + t *testing.T, + ctx context.Context, + rep repo.Repository, + expectedTags map[string]string, + snapshotID string, +) { + //revive:enable:context-as-argument + man, err := snapshot.LoadSnapshot(ctx, rep, manifest.ID(snapshotID)) + require.NoError(t, err) + assert.Equal(t, expectedTags, man.Tags) +} + // --------------- // unit tests // --------------- @@ -517,6 +533,14 @@ func (suite *KopiaUnitSuite) TestBuildDirectoryTree() { user1Encoded: 5, user2Encoded: 42, } + expectedServiceCats := map[string]struct{}{ + serviceCatTag(suite.testPath): {}, + serviceCatTag(p2): {}, + } + expectedResourceOwners := map[string]struct{}{ + suite.testPath.ResourceOwner(): {}, + p2.ResourceOwner(): {}, + } progress := &corsoProgress{pending: map[string]*itemDetails{}} @@ -542,8 +566,12 @@ func (suite *KopiaUnitSuite) TestBuildDirectoryTree() { // - emails // - Inbox // - 42 separate files - dirTree, err := inflateDirTree(ctx, collections, progress) + dirTree, oc, err := inflateDirTree(ctx, collections, progress) require.NoError(t, err) + + assert.Equal(t, expectedServiceCats, oc.serviceCats) + assert.Equal(t, expectedResourceOwners, oc.resourceOwners) + assert.Equal(t, encodeAsPath(testTenant), dirTree.Name()) entries, err := fs.GetAllEntries(ctx, dirTree) @@ -584,6 +612,15 @@ func (suite *KopiaUnitSuite) TestBuildDirectoryTree_MixedDirectory() { p2, err := suite.testPath.Append(subdir, false) require.NoError(suite.T(), err) + expectedServiceCats := map[string]struct{}{ + serviceCatTag(suite.testPath): {}, + serviceCatTag(p2): {}, + } + expectedResourceOwners := map[string]struct{}{ + suite.testPath.ResourceOwner(): {}, + p2.ResourceOwner(): {}, + } + // Test multiple orders of items because right now order can matter. Both // orders result in a directory structure like: // - a-tenant @@ -630,8 +667,12 @@ func (suite *KopiaUnitSuite) TestBuildDirectoryTree_MixedDirectory() { suite.T().Run(test.name, func(t *testing.T) { progress := &corsoProgress{pending: map[string]*itemDetails{}} - dirTree, err := inflateDirTree(ctx, test.layout, progress) + dirTree, oc, err := inflateDirTree(ctx, test.layout, progress) require.NoError(t, err) + + assert.Equal(t, expectedServiceCats, oc.serviceCats) + assert.Equal(t, expectedResourceOwners, oc.resourceOwners) + assert.Equal(t, encodeAsPath(testTenant), dirTree.Name()) entries, err := fs.GetAllEntries(ctx, dirTree) @@ -727,7 +768,7 @@ func (suite *KopiaUnitSuite) TestBuildDirectoryTree_Fails() { defer flush() suite.T().Run(test.name, func(t *testing.T) { - _, err := inflateDirTree(ctx, test.layout, nil) + _, _, err := inflateDirTree(ctx, test.layout, nil) assert.Error(t, err) }) } @@ -810,6 +851,12 @@ func (suite *KopiaIntegrationSuite) TestBackupCollections() { 42, ), } + expectedTags := map[string]string{ + serviceCatTag(suite.testPath1): "", + suite.testPath1.ResourceOwner(): "", + serviceCatTag(suite.testPath2): "", + suite.testPath2.ResourceOwner(): "", + } stats, deets, err := suite.w.BackupCollections(suite.ctx, collections, path.ExchangeService) assert.NoError(t, err) @@ -821,6 +868,14 @@ func (suite *KopiaIntegrationSuite) TestBackupCollections() { assert.Equal(t, path.ExchangeService.String(), deets.Tags[model.ServiceTag]) // 47 file and 6 folder entries. assert.Len(t, deets.Entries, 47+6) + + checkSnapshotTags( + t, + suite.ctx, + suite.w.c, + expectedTags, + stats.SnapshotID, + ) } func (suite *KopiaIntegrationSuite) TestRestoreAfterCompressionChange() {