Add a set of items that will be excluded from base directories during backup (#2143)
## Description Some external services like OneDrive do not have the ability to determine the path a deleted item used to exist at, they only know the item's old ID. This patch allows Kopia Wrapper to handle those items by implementing a global exclude set. The patch assumes that items in base directories are unique as the items in every base directory are checked against the set. This is not wired to anything outside of Kopia Wrapper. Currently this feature is disabled as the passed value is always nil. ## Does this PR need a docs update or release note? - [ ] ✅ Yes, it's included - [ ] 🕐 Yes, but in a later PR - [x] ⛔ No ## Type of change - [x] 🌻 Feature - [ ] 🐛 Bugfix - [ ] 🗺️ Documentation - [ ] 🤖 Test - [ ] 💻 CI/Deployment - [ ] 🧹 Tech Debt/Cleanup ## Issue(s) * closes #2121 ## Test Plan - [ ] 💪 Manual - [x] ⚡ Unit test - [ ] 💚 E2E
This commit is contained in:
parent
91a9077d90
commit
46d61c7246
@ -345,6 +345,7 @@ func streamBaseEntries(
|
|||||||
prevPath path.Path,
|
prevPath path.Path,
|
||||||
dir fs.Directory,
|
dir fs.Directory,
|
||||||
encodedSeen map[string]struct{},
|
encodedSeen map[string]struct{},
|
||||||
|
globalExcludeSet map[string]struct{},
|
||||||
progress *corsoProgress,
|
progress *corsoProgress,
|
||||||
) error {
|
) error {
|
||||||
if dir == nil {
|
if dir == nil {
|
||||||
@ -373,6 +374,12 @@ func streamBaseEntries(
|
|||||||
return errors.Wrapf(err, "unable to decode entry name %s", entry.Name())
|
return errors.Wrapf(err, "unable to decode entry name %s", entry.Name())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This entry was marked as deleted by a service that can't tell us the
|
||||||
|
// previous path of deleted items, only the item ID.
|
||||||
|
if _, ok := globalExcludeSet[entName]; ok {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// For now assuming that item IDs don't need escaping.
|
// For now assuming that item IDs don't need escaping.
|
||||||
itemPath, err := curPath.Append(entName, true)
|
itemPath, err := curPath.Append(entName, true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -421,6 +428,7 @@ func getStreamItemFunc(
|
|||||||
staticEnts []fs.Entry,
|
staticEnts []fs.Entry,
|
||||||
streamedEnts data.Collection,
|
streamedEnts data.Collection,
|
||||||
baseDir fs.Directory,
|
baseDir fs.Directory,
|
||||||
|
globalExcludeSet map[string]struct{},
|
||||||
progress *corsoProgress,
|
progress *corsoProgress,
|
||||||
) func(context.Context, func(context.Context, fs.Entry) error) error {
|
) func(context.Context, func(context.Context, fs.Entry) error) error {
|
||||||
return func(ctx context.Context, cb func(context.Context, fs.Entry) error) error {
|
return func(ctx context.Context, cb func(context.Context, fs.Entry) error) error {
|
||||||
@ -443,6 +451,7 @@ func getStreamItemFunc(
|
|||||||
prevPath,
|
prevPath,
|
||||||
baseDir,
|
baseDir,
|
||||||
seen,
|
seen,
|
||||||
|
globalExcludeSet,
|
||||||
progress,
|
progress,
|
||||||
); err != nil {
|
); err != nil {
|
||||||
errs = multierror.Append(
|
errs = multierror.Append(
|
||||||
@ -457,21 +466,22 @@ func getStreamItemFunc(
|
|||||||
|
|
||||||
// buildKopiaDirs recursively builds a directory hierarchy from the roots up.
|
// buildKopiaDirs recursively builds a directory hierarchy from the roots up.
|
||||||
// Returned directories are virtualfs.StreamingDirectory.
|
// Returned directories are virtualfs.StreamingDirectory.
|
||||||
func buildKopiaDirs(dirName string, dir *treeMap, progress *corsoProgress) (fs.Directory, error) {
|
func buildKopiaDirs(
|
||||||
|
dirName string,
|
||||||
|
dir *treeMap,
|
||||||
|
globalExcludeSet map[string]struct{},
|
||||||
|
progress *corsoProgress,
|
||||||
|
) (fs.Directory, error) {
|
||||||
// Reuse kopia directories directly if the subtree rooted at them is
|
// Reuse kopia directories directly if the subtree rooted at them is
|
||||||
// unchanged.
|
// unchanged.
|
||||||
//
|
//
|
||||||
// TODO(ashmrtn): This will need updated when we have OneDrive backups where
|
|
||||||
// items have been deleted because we can't determine which directory used to
|
|
||||||
// have the item.
|
|
||||||
//
|
|
||||||
// TODO(ashmrtn): We could possibly also use this optimization if we know that
|
// TODO(ashmrtn): We could possibly also use this optimization if we know that
|
||||||
// the collection has no items in it. In that case though, we may need to take
|
// the collection has no items in it. In that case though, we may need to take
|
||||||
// extra care to ensure the name of the directory is properly represented. For
|
// extra care to ensure the name of the directory is properly represented. For
|
||||||
// example, a directory that has been renamed but with no additional items may
|
// example, a directory that has been renamed but with no additional items may
|
||||||
// not be able to directly use kopia's version of the directory due to the
|
// not be able to directly use kopia's version of the directory due to the
|
||||||
// rename.
|
// rename.
|
||||||
if dir.collection == nil && len(dir.childDirs) == 0 && dir.baseDir != nil {
|
if dir.collection == nil && len(dir.childDirs) == 0 && dir.baseDir != nil && len(globalExcludeSet) == 0 {
|
||||||
return dir.baseDir, nil
|
return dir.baseDir, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -480,7 +490,7 @@ func buildKopiaDirs(dirName string, dir *treeMap, progress *corsoProgress) (fs.D
|
|||||||
var childDirs []fs.Entry
|
var childDirs []fs.Entry
|
||||||
|
|
||||||
for childName, childDir := range dir.childDirs {
|
for childName, childDir := range dir.childDirs {
|
||||||
child, err := buildKopiaDirs(childName, childDir, progress)
|
child, err := buildKopiaDirs(childName, childDir, globalExcludeSet, progress)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -496,6 +506,7 @@ func buildKopiaDirs(dirName string, dir *treeMap, progress *corsoProgress) (fs.D
|
|||||||
childDirs,
|
childDirs,
|
||||||
dir.collection,
|
dir.collection,
|
||||||
dir.baseDir,
|
dir.baseDir,
|
||||||
|
globalExcludeSet,
|
||||||
progress,
|
progress,
|
||||||
),
|
),
|
||||||
), nil
|
), nil
|
||||||
@ -879,11 +890,19 @@ func inflateBaseTree(
|
|||||||
// virtualfs.StreamingDirectory with the given DataCollections if there is one
|
// virtualfs.StreamingDirectory with the given DataCollections if there is one
|
||||||
// for that node. Tags can be used in future backups to fetch old snapshots for
|
// for that node. Tags can be used in future backups to fetch old snapshots for
|
||||||
// caching reasons.
|
// caching reasons.
|
||||||
|
//
|
||||||
|
// globalExcludeSet represents a set of items, represented with file names, to
|
||||||
|
// exclude from base directories when uploading the snapshot. As items in *all*
|
||||||
|
// base directories will be checked for in every base directory, this assumes
|
||||||
|
// that items in the bases are unique. Deletions of directories or subtrees
|
||||||
|
// should be represented as changes in the status of a Collection, not an entry
|
||||||
|
// in the globalExcludeSet.
|
||||||
func inflateDirTree(
|
func inflateDirTree(
|
||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
loader snapshotLoader,
|
loader snapshotLoader,
|
||||||
baseSnaps []IncrementalBase,
|
baseSnaps []IncrementalBase,
|
||||||
collections []data.Collection,
|
collections []data.Collection,
|
||||||
|
globalExcludeSet map[string]struct{},
|
||||||
progress *corsoProgress,
|
progress *corsoProgress,
|
||||||
) (fs.Directory, error) {
|
) (fs.Directory, error) {
|
||||||
roots, updatedPaths, err := inflateCollectionTree(ctx, collections)
|
roots, updatedPaths, err := inflateCollectionTree(ctx, collections)
|
||||||
@ -915,7 +934,7 @@ func inflateDirTree(
|
|||||||
var res fs.Directory
|
var res fs.Directory
|
||||||
|
|
||||||
for dirName, dir := range roots {
|
for dirName, dir := range roots {
|
||||||
tmp, err := buildKopiaDirs(dirName, dir, progress)
|
tmp, err := buildKopiaDirs(dirName, dir, globalExcludeSet, progress)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|||||||
@ -705,7 +705,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTree() {
|
|||||||
// - emails
|
// - emails
|
||||||
// - Inbox
|
// - Inbox
|
||||||
// - 42 separate files
|
// - 42 separate files
|
||||||
dirTree, err := inflateDirTree(ctx, nil, nil, collections, progress)
|
dirTree, err := inflateDirTree(ctx, nil, nil, collections, nil, progress)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
assert.Equal(t, encodeAsPath(testTenant), dirTree.Name())
|
assert.Equal(t, encodeAsPath(testTenant), dirTree.Name())
|
||||||
@ -793,7 +793,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTree_MixedDirectory()
|
|||||||
suite.T().Run(test.name, func(t *testing.T) {
|
suite.T().Run(test.name, func(t *testing.T) {
|
||||||
progress := &corsoProgress{pending: map[string]*itemDetails{}}
|
progress := &corsoProgress{pending: map[string]*itemDetails{}}
|
||||||
|
|
||||||
dirTree, err := inflateDirTree(ctx, nil, nil, test.layout, progress)
|
dirTree, err := inflateDirTree(ctx, nil, nil, test.layout, nil, progress)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
assert.Equal(t, encodeAsPath(testTenant), dirTree.Name())
|
assert.Equal(t, encodeAsPath(testTenant), dirTree.Name())
|
||||||
@ -889,7 +889,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTree_Fails() {
|
|||||||
defer flush()
|
defer flush()
|
||||||
|
|
||||||
suite.T().Run(test.name, func(t *testing.T) {
|
suite.T().Run(test.name, func(t *testing.T) {
|
||||||
_, err := inflateDirTree(ctx, nil, nil, test.layout, nil)
|
_, err := inflateDirTree(ctx, nil, nil, test.layout, nil, nil)
|
||||||
assert.Error(t, err)
|
assert.Error(t, err)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@ -992,7 +992,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeErrors() {
|
|||||||
cols = append(cols, mc)
|
cols = append(cols, mc)
|
||||||
}
|
}
|
||||||
|
|
||||||
_, err := inflateDirTree(ctx, nil, nil, cols, progress)
|
_, err := inflateDirTree(ctx, nil, nil, cols, nil, progress)
|
||||||
require.Error(t, err)
|
require.Error(t, err)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@ -1261,6 +1261,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeSingleSubtree() {
|
|||||||
mockIncrementalBase("", testTenant, testUser, path.ExchangeService, path.EmailCategory),
|
mockIncrementalBase("", testTenant, testUser, path.ExchangeService, path.EmailCategory),
|
||||||
},
|
},
|
||||||
test.inputCollections(),
|
test.inputCollections(),
|
||||||
|
nil,
|
||||||
progress,
|
progress,
|
||||||
)
|
)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
@ -1281,7 +1282,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeMultipleSubdirecto
|
|||||||
[]string{testTenant, service, testUser, category, testInboxDir},
|
[]string{testTenant, service, testUser, category, testInboxDir},
|
||||||
false,
|
false,
|
||||||
)
|
)
|
||||||
inboxFileName1 := testFileName4
|
inboxFileName1 := testFileName
|
||||||
inboxFileData1 := testFileData4
|
inboxFileData1 := testFileData4
|
||||||
inboxFileName2 := testFileName5
|
inboxFileName2 := testFileName5
|
||||||
inboxFileData2 := testFileData5
|
inboxFileData2 := testFileData5
|
||||||
@ -1291,7 +1292,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeMultipleSubdirecto
|
|||||||
append(inboxPath.Elements(), personalDir),
|
append(inboxPath.Elements(), personalDir),
|
||||||
false,
|
false,
|
||||||
)
|
)
|
||||||
personalFileName1 := testFileName
|
personalFileName1 := inboxFileName1
|
||||||
personalFileName2 := testFileName2
|
personalFileName2 := testFileName2
|
||||||
|
|
||||||
workPath := makePath(
|
workPath := makePath(
|
||||||
@ -1312,7 +1313,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeMultipleSubdirecto
|
|||||||
// - user1
|
// - user1
|
||||||
// - email
|
// - email
|
||||||
// - Inbox
|
// - Inbox
|
||||||
// - file4
|
// - file1
|
||||||
// - personal
|
// - personal
|
||||||
// - file1
|
// - file1
|
||||||
// - file2
|
// - file2
|
||||||
@ -1369,8 +1370,51 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeMultipleSubdirecto
|
|||||||
table := []struct {
|
table := []struct {
|
||||||
name string
|
name string
|
||||||
inputCollections func(t *testing.T) []data.Collection
|
inputCollections func(t *testing.T) []data.Collection
|
||||||
|
inputExcludes map[string]struct{}
|
||||||
expected *expectedNode
|
expected *expectedNode
|
||||||
}{
|
}{
|
||||||
|
{
|
||||||
|
name: "GlobalExcludeSet",
|
||||||
|
inputCollections: func(t *testing.T) []data.Collection {
|
||||||
|
return nil
|
||||||
|
},
|
||||||
|
inputExcludes: map[string]struct{}{
|
||||||
|
inboxFileName1: {},
|
||||||
|
},
|
||||||
|
expected: expectedTreeWithChildren(
|
||||||
|
[]string{
|
||||||
|
testTenant,
|
||||||
|
service,
|
||||||
|
testUser,
|
||||||
|
category,
|
||||||
|
},
|
||||||
|
[]*expectedNode{
|
||||||
|
{
|
||||||
|
name: testInboxDir,
|
||||||
|
children: []*expectedNode{
|
||||||
|
{
|
||||||
|
name: personalDir,
|
||||||
|
children: []*expectedNode{
|
||||||
|
{
|
||||||
|
name: personalFileName2,
|
||||||
|
children: []*expectedNode{},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: workDir,
|
||||||
|
children: []*expectedNode{
|
||||||
|
{
|
||||||
|
name: workFileName1,
|
||||||
|
children: []*expectedNode{},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
),
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "MovesSubtree",
|
name: "MovesSubtree",
|
||||||
inputCollections: func(t *testing.T) []data.Collection {
|
inputCollections: func(t *testing.T) []data.Collection {
|
||||||
@ -1919,6 +1963,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeMultipleSubdirecto
|
|||||||
mockIncrementalBase("", testTenant, testUser, path.ExchangeService, path.EmailCategory),
|
mockIncrementalBase("", testTenant, testUser, path.ExchangeService, path.EmailCategory),
|
||||||
},
|
},
|
||||||
test.inputCollections(t),
|
test.inputCollections(t),
|
||||||
|
test.inputExcludes,
|
||||||
progress,
|
progress,
|
||||||
)
|
)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
@ -2079,6 +2124,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeSkipsDeletedSubtre
|
|||||||
mockIncrementalBase("", testTenant, testUser, path.ExchangeService, path.EmailCategory),
|
mockIncrementalBase("", testTenant, testUser, path.ExchangeService, path.EmailCategory),
|
||||||
},
|
},
|
||||||
collections,
|
collections,
|
||||||
|
nil,
|
||||||
progress,
|
progress,
|
||||||
)
|
)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
@ -2325,6 +2371,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeSelectsCorrectSubt
|
|||||||
mockIncrementalBase("id2", testTenant, testUser, path.ExchangeService, path.EmailCategory),
|
mockIncrementalBase("id2", testTenant, testUser, path.ExchangeService, path.EmailCategory),
|
||||||
},
|
},
|
||||||
collections,
|
collections,
|
||||||
|
nil,
|
||||||
progress,
|
progress,
|
||||||
)
|
)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|||||||
@ -129,7 +129,11 @@ func (w Wrapper) BackupCollections(
|
|||||||
ctx, end := D.Span(ctx, "kopia:backupCollections")
|
ctx, end := D.Span(ctx, "kopia:backupCollections")
|
||||||
defer end()
|
defer end()
|
||||||
|
|
||||||
if len(collections) == 0 {
|
// TODO(ashmrtn): Make this a parameter when actually enabling the global
|
||||||
|
// exclude set.
|
||||||
|
var globalExcludeSet map[string]struct{}
|
||||||
|
|
||||||
|
if len(collections) == 0 && len(globalExcludeSet) == 0 {
|
||||||
return &BackupStats{}, &details.Builder{}, nil, nil
|
return &BackupStats{}, &details.Builder{}, nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -147,7 +151,14 @@ func (w Wrapper) BackupCollections(
|
|||||||
base = previousSnapshots
|
base = previousSnapshots
|
||||||
}
|
}
|
||||||
|
|
||||||
dirTree, err := inflateDirTree(ctx, w.c, base, collections, progress)
|
dirTree, err := inflateDirTree(
|
||||||
|
ctx,
|
||||||
|
w.c,
|
||||||
|
base,
|
||||||
|
collections,
|
||||||
|
globalExcludeSet,
|
||||||
|
progress,
|
||||||
|
)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, nil, errors.Wrap(err, "building kopia directories")
|
return nil, nil, nil, errors.Wrap(err, "building kopia directories")
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user