Merge directory layout for kopia incremental backups (#1801)

## Description

Merge directory layouts between the passed in collections and the base snapshot(s). Also add unit tests to ensure the output kopia hierarchy looks as expected. (CLI) user observable behavior is not affected by this PR

This PR does not address:
* selecting subtrees for specific data categories in base snapshots
* not clobbering more recent info if multiple snapshots have subtrees for the same data category (ties into above)
* file deletions for services that can only report item deletions at a global level (e.x. OneDrive file deletions)

Viewing individual commits in PR may make changes easier to review

## Type of change

- [x] 🌻 Feature
- [ ] 🐛 Bugfix
- [ ] 🗺️ Documentation
- [ ] 🤖 Test
- [ ] 💻 CI/Deployment
- [ ] 🐹 Trivial/Minor

## Issue(s)

* #1740

## Test Plan

- [ ] 💪 Manual
- [x]  Unit test
- [ ] 💚 E2E
This commit is contained in:
ashmrtn 2022-12-16 14:05:21 -08:00 committed by GitHub
parent 2c6fe7d2ea
commit da929d8448
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 1063 additions and 37 deletions

View File

@ -15,6 +15,7 @@ import (
"github.com/hashicorp/go-multierror"
"github.com/kopia/kopia/fs"
"github.com/kopia/kopia/fs/virtualfs"
"github.com/kopia/kopia/snapshot"
"github.com/kopia/kopia/snapshot/snapshotfs"
"github.com/pkg/errors"
@ -25,6 +26,8 @@ import (
"github.com/alcionai/corso/src/pkg/path"
)
const maxInflateTraversalDepth = 500
var versionSize = int(unsafe.Sizeof(serializationVersion))
func newBackupStreamReader(version uint32, reader io.ReadCloser) *backupStreamReader {
@ -396,6 +399,23 @@ func getStreamItemFunc(
// buildKopiaDirs recursively builds a directory hierarchy from the roots up.
// Returned directories are virtualfs.StreamingDirectory.
func buildKopiaDirs(dirName string, dir *treeMap, progress *corsoProgress) (fs.Directory, error) {
// Reuse kopia directories directly if the subtree rooted at them is
// unchanged.
//
// TODO(ashmrtn): This will need updated when we have OneDrive backups where
// items have been deleted because we can't determine which directory used to
// have the item.
//
// TODO(ashmrtn): We could possibly also use this optimization if we know that
// the collection has no items in it. In that case though, we may need to take
// extra care to ensure the name of the directory is properly represented. For
// example, a directory that has been renamed but with no additional items may
// not be able to directly use kopia's version of the directory due to the
// rename.
if dir.collection == nil && len(dir.childDirs) == 0 && dir.baseDir != nil {
return dir.baseDir, nil
}
// Need to build the directory tree from the leaves up because intermediate
// directories need to have all their entries at creation time.
var childDirs []fs.Entry
@ -518,6 +538,163 @@ func inflateCollectionTree(
return roots, updatedPaths, nil
}
// traverseBaseDir is an unoptimized function that reads items in a directory
// and traverses subdirectories in the given directory. oldDirPath is the path
// the directory would be at if the hierarchy was unchanged. newDirPath is the
// path the directory would be at if all changes from the root to this directory
// were taken into account. Both are needed to detect some changes like moving
// a parent directory and moving one of the child directories out of the parent.
// If a directory on the path was deleted, newDirPath is set to nil.
//
// TODO(ashmrtn): A potentially more memory efficient version of this would
// traverse only the directories that we know are present in the collections
// passed in. The other directories could be dynamically discovered when kopia
// was requesting items.
func traverseBaseDir(
ctx context.Context,
depth int,
updatedPaths map[string]path.Path,
oldDirPath *path.Builder,
newDirPath *path.Builder,
dir fs.Directory,
roots map[string]*treeMap,
) error {
if depth >= maxInflateTraversalDepth {
return errors.Errorf("base snapshot tree too tall %s", oldDirPath)
}
// Wrapper base64 encodes all file and folder names to avoid issues with
// special characters. Since we're working directly with files and folders
// from kopia we need to do the decoding here.
dirName, err := decodeElement(dir.Name())
if err != nil {
return errors.Wrapf(err, "decoding base directory name %s", dir.Name())
}
// Form the path this directory would be at if the hierarchy remained the same
// as well as where it would be at if we take into account ancestor
// directories that may have had changes. The former is used to check if this
// directory specifically has been moved. The latter is used to handle
// deletions and moving subtrees in the hierarchy.
//
// Explicit movement of directories should have the final say though so we
// override any subtree movement with what's in updatedPaths if an entry
// exists.
oldDirPath = oldDirPath.Append(dirName)
currentPath := newDirPath
if currentPath != nil {
currentPath = currentPath.Append(dirName)
}
if upb, ok := updatedPaths[oldDirPath.String()]; ok {
// This directory was deleted.
if upb == nil {
currentPath = nil
} else {
// This directory was moved/renamed and the new location is in upb.
currentPath = upb.ToBuilder()
}
}
// TODO(ashmrtn): If we can do prefix matching on elements in updatedPaths and
// we know that the tree node for this directory has no collection reference
// and no child nodes then we can skip traversing this directory. This will
// only work if we know what directory deleted items used to belong in (e.x.
// it won't work for OneDrive because we only know the ID of the deleted
// item).
var hasItems bool
err = dir.IterateEntries(ctx, func(innerCtx context.Context, entry fs.Entry) error {
dEntry, ok := entry.(fs.Directory)
if !ok {
hasItems = true
return nil
}
return traverseBaseDir(
innerCtx,
depth+1,
updatedPaths,
oldDirPath,
currentPath,
dEntry,
roots,
)
})
if err != nil {
return errors.Wrapf(err, "traversing base directory %s", oldDirPath)
}
// We only need to add this base directory to the tree we're building if it
// has items in it. The traversal of the directory here just finds
// subdirectories. This optimization will not be valid if we dynamically
// determine the subdirectories this directory has when handing items to
// kopia.
if currentPath != nil && hasItems {
// Having this in the if-block has the effect of removing empty directories
// from backups that have a base snapshot. If we'd like to preserve empty
// directories across incremental backups, move getting the node outside of
// the if-block. That will be sufficient to create a StreamingDirectory that
// kopia will pick up on. Assigning the baseDir of the node should remain
// in the if-block though as that is an optimization.
node := getTreeNode(roots, currentPath.Elements())
if node == nil {
return errors.Errorf("unable to get tree node for path %s", currentPath)
}
node.baseDir = dir
}
return nil
}
func inflateBaseTree(
ctx context.Context,
loader snapshotLoader,
snap *snapshot.Manifest,
updatedPaths map[string]path.Path,
roots map[string]*treeMap,
) error {
// Only complete snapshots should be used to source base information.
// Snapshots for checkpoints will rely on kopia-assisted dedupe to efficiently
// handle items that were completely uploaded before Corso crashed.
if len(snap.IncompleteReason) > 0 {
return nil
}
root, err := loader.SnapshotRoot(snap)
if err != nil {
return errors.Wrapf(err, "getting snapshot %s root directory", snap.ID)
}
dir, ok := root.(fs.Directory)
if !ok {
return errors.Errorf("snapshot %s root is not a directory", snap.ID)
}
// TODO(ashmrtn): We should actually only traverse a subtree of the snapshot
// where the subtree corresponds to the "reason" this snapshot was chosen.
// Doing so will avoid pulling in data for categories that should not be
// included in the current backup or overwriting some entries with out-dated
// information.
if err = traverseBaseDir(
ctx,
0,
updatedPaths,
&path.Builder{},
&path.Builder{},
dir,
roots,
); err != nil {
return errors.Wrapf(err, "traversing base snapshot %s", snap.ID)
}
return nil
}
// inflateDirTree returns a set of tags representing all the resource owners and
// service/categories in the snapshot and a fs.Directory tree rooted at the
// oldest common ancestor of the streams. All nodes are
@ -526,14 +703,22 @@ func inflateCollectionTree(
// caching reasons.
func inflateDirTree(
ctx context.Context,
loader snapshotLoader,
baseSnaps []*snapshot.Manifest,
collections []data.Collection,
progress *corsoProgress,
) (fs.Directory, error) {
roots, _, err := inflateCollectionTree(ctx, collections)
roots, updatedPaths, err := inflateCollectionTree(ctx, collections)
if err != nil {
return nil, errors.Wrap(err, "inflating collection tree")
}
for _, snap := range baseSnaps {
if err = inflateBaseTree(ctx, loader, snap, updatedPaths, roots); err != nil {
return nil, errors.Wrap(err, "inflating base snapshot tree(s)")
}
}
if len(roots) > 1 {
return nil, errors.New("multiple root directories")
}

View File

@ -6,8 +6,11 @@ import (
"io"
stdpath "path"
"testing"
"time"
"github.com/kopia/kopia/fs"
"github.com/kopia/kopia/fs/virtualfs"
"github.com/kopia/kopia/snapshot"
"github.com/kopia/kopia/snapshot/snapshotfs"
"github.com/pkg/errors"
"github.com/stretchr/testify/assert"
@ -21,6 +24,163 @@ import (
"github.com/alcionai/corso/src/pkg/path"
)
func makePath(t *testing.T, elements []string) path.Path {
p, err := path.FromDataLayerPath(stdpath.Join(elements...), false)
require.NoError(t, err)
return p
}
// baseWithChildren returns an fs.Entry hierarchy where the first four levels
// are the encoded values of tenant, service, user, and category respectively.
// All items in children are made a direct descendent of the category entry.
func baseWithChildren(
tenant, service, user, category string,
children []fs.Entry,
) fs.Entry {
return virtualfs.NewStaticDirectory(
encodeElements(tenant)[0],
[]fs.Entry{
virtualfs.NewStaticDirectory(
encodeElements(service)[0],
[]fs.Entry{
virtualfs.NewStaticDirectory(
encodeElements(user)[0],
[]fs.Entry{
virtualfs.NewStaticDirectory(
encodeElements(category)[0],
children,
),
},
),
},
),
},
)
}
type expectedNode struct {
name string
children []*expectedNode
data []byte
}
// expectedTreeWithChildren returns an expectedNode hierarchy where the first
// four levels are the tenant, service, user, and category respectively. All
// items in children are made a direct descendent of the category node.
func expectedTreeWithChildren(
tenant, service, user, category string,
children []*expectedNode,
) *expectedNode {
return &expectedNode{
name: tenant,
children: []*expectedNode{
{
name: service,
children: []*expectedNode{
{
name: user,
children: []*expectedNode{
{
name: category,
children: children,
},
},
},
},
},
},
}
}
// Currently only works for files that Corso has serialized as it expects a
// version specifier at the start of the file.
//
//revive:disable:context-as-argument
func expectFileData(
t *testing.T,
ctx context.Context,
expected []byte,
f fs.StreamingFile,
) {
//revive:enable:context-as-argument
t.Helper()
if len(expected) == 0 {
return
}
name, err := decodeElement(f.Name())
if err != nil {
name = f.Name()
}
r, err := f.GetReader(ctx)
if !assert.NoErrorf(t, err, "getting reader for file: %s", name) {
return
}
// Need to wrap with a restore stream reader to remove the version.
r = &restoreStreamReader{
ReadCloser: io.NopCloser(r),
expectedVersion: serializationVersion,
}
got, err := io.ReadAll(r)
if !assert.NoErrorf(t, err, "reading data in file: %s", name) {
return
}
assert.Equalf(t, expected, got, "data in file: %s", name)
}
//revive:disable:context-as-argument
func expectTree(
t *testing.T,
ctx context.Context,
expected *expectedNode,
got fs.Entry,
) {
//revive:enable:context-as-argument
t.Helper()
if expected == nil {
return
}
names := make([]string, 0, len(expected.children))
mapped := make(map[string]*expectedNode, len(expected.children))
for _, child := range expected.children {
encoded := encodeElements(child.name)[0]
names = append(names, encoded)
mapped[encoded] = child
}
entries := getDirEntriesForEntry(t, ctx, got)
expectDirs(t, entries, names, true)
for _, e := range entries {
expectedSubtree := mapped[e.Name()]
if !assert.NotNil(t, expectedSubtree) {
continue
}
if f, ok := e.(fs.StreamingFile); ok {
expectFileData(t, ctx, expectedSubtree.data, f)
continue
}
dir, ok := e.(fs.Directory)
if !ok {
continue
}
expectTree(t, ctx, expectedSubtree, dir)
}
}
func expectDirs(
t *testing.T,
entries []fs.Entry,
@ -49,7 +209,7 @@ func getDirEntriesForEntry(
) []fs.Entry {
//revive:enable:context-as-argument
d, ok := entry.(fs.Directory)
require.True(t, ok, "returned entry is not a directory")
require.True(t, ok, "entry is not a directory")
entries, err := fs.GetAllEntries(ctx, d)
require.NoError(t, err)
@ -390,19 +550,10 @@ type HierarchyBuilderUnitSuite struct {
}
func (suite *HierarchyBuilderUnitSuite) SetupSuite() {
tmp, err := path.FromDataLayerPath(
stdpath.Join(
testTenant,
path.ExchangeService.String(),
testUser,
path.EmailCategory.String(),
testInboxDir,
),
false,
suite.testPath = makePath(
suite.T(),
[]string{testTenant, service, testUser, category, testInboxDir},
)
require.NoError(suite.T(), err)
suite.testPath = tmp
}
func TestHierarchyBuilderUnitSuite(t *testing.T) {
@ -422,17 +573,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTree() {
user2 := "user2"
user2Encoded := encodeAsPath(user2)
p2, err := path.FromDataLayerPath(
stdpath.Join(
tenant,
service,
user2,
category,
testInboxDir,
),
false,
)
require.NoError(t, err)
p2 := makePath(t, []string{tenant, service, user2, category, testInboxDir})
// Encode user names here so we don't have to decode things later.
expectedFileCount := map[string]int{
@ -464,7 +605,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTree() {
// - emails
// - Inbox
// - 42 separate files
dirTree, err := inflateDirTree(ctx, collections, progress)
dirTree, err := inflateDirTree(ctx, nil, nil, collections, progress)
require.NoError(t, err)
assert.Equal(t, encodeAsPath(testTenant), dirTree.Name())
@ -504,8 +645,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTree_MixedDirectory()
subdir := "subfolder"
p2, err := suite.testPath.Append(subdir, false)
require.NoError(suite.T(), err)
p2 := makePath(suite.T(), append(suite.testPath.Elements(), subdir))
// Test multiple orders of items because right now order can matter. Both
// orders result in a directory structure like:
@ -553,7 +693,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTree_MixedDirectory()
suite.T().Run(test.name, func(t *testing.T) {
progress := &corsoProgress{pending: map[string]*itemDetails{}}
dirTree, err := inflateDirTree(ctx, test.layout, progress)
dirTree, err := inflateDirTree(ctx, nil, nil, test.layout, progress)
require.NoError(t, err)
assert.Equal(t, encodeAsPath(testTenant), dirTree.Name())
@ -597,13 +737,10 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTree_MixedDirectory()
}
func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTree_Fails() {
p2, err := path.Builder{}.Append(testInboxDir).ToDataLayerExchangePathForCategory(
"tenant2",
"user2",
path.EmailCategory,
false,
p2 := makePath(
suite.T(),
[]string{"tenant2", service, "user2", category, testInboxDir},
)
require.NoError(suite.T(), err)
table := []struct {
name string
@ -651,8 +788,712 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTree_Fails() {
defer flush()
suite.T().Run(test.name, func(t *testing.T) {
_, err := inflateDirTree(ctx, test.layout, nil)
_, err := inflateDirTree(ctx, nil, nil, test.layout, nil)
assert.Error(t, err)
})
}
}
type mockSnapshotWalker struct {
snapshotRoot fs.Entry
}
func (msw *mockSnapshotWalker) SnapshotRoot(*snapshot.Manifest) (fs.Entry, error) {
return msw.snapshotRoot, nil
}
func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeSingleSubtree() {
dirPath := makePath(
suite.T(),
[]string{testTenant, service, testUser, category, testInboxDir},
)
// Must be a function that returns a new instance each time as StreamingFile
// can only return its Reader once.
getBaseSnapshot := func() fs.Entry {
return baseWithChildren(
testTenant,
service,
testUser,
category,
[]fs.Entry{
virtualfs.NewStaticDirectory(
encodeElements(testInboxDir)[0],
[]fs.Entry{
virtualfs.StreamingFileWithModTimeFromReader(
encodeElements(testFileName)[0],
time.Time{},
bytes.NewReader(testFileData),
),
},
),
},
)
}
table := []struct {
name string
inputCollections func() []data.Collection
expected *expectedNode
}{
{
name: "SkipsDeletedItems",
inputCollections: func() []data.Collection {
mc := mockconnector.NewMockExchangeCollection(dirPath, 1)
mc.Names[0] = testFileName
mc.DeletedItems[0] = true
return []data.Collection{mc}
},
expected: expectedTreeWithChildren(
testTenant,
service,
testUser,
category,
[]*expectedNode{
{
name: testInboxDir,
children: []*expectedNode{},
},
},
),
},
{
name: "AddsNewItems",
inputCollections: func() []data.Collection {
mc := mockconnector.NewMockExchangeCollection(dirPath, 1)
mc.Names[0] = testFileName2
mc.Data[0] = testFileData2
mc.ColState = data.NotMovedState
return []data.Collection{mc}
},
expected: expectedTreeWithChildren(
testTenant,
service,
testUser,
category,
[]*expectedNode{
{
name: testInboxDir,
children: []*expectedNode{
{
name: testFileName,
children: []*expectedNode{},
},
{
name: testFileName2,
children: []*expectedNode{},
data: testFileData2,
},
},
},
},
),
},
{
name: "SkipsUpdatedItems",
inputCollections: func() []data.Collection {
mc := mockconnector.NewMockExchangeCollection(dirPath, 1)
mc.Names[0] = testFileName
mc.Data[0] = testFileData2
mc.ColState = data.NotMovedState
return []data.Collection{mc}
},
expected: expectedTreeWithChildren(
testTenant,
service,
testUser,
category,
[]*expectedNode{
{
name: testInboxDir,
children: []*expectedNode{
{
name: testFileName,
children: []*expectedNode{},
data: testFileData2,
},
},
},
},
),
},
}
for _, test := range table {
suite.T().Run(test.name, func(t *testing.T) {
tester.LogTimeOfTest(t)
ctx, flush := tester.NewContext()
defer flush()
progress := &corsoProgress{pending: map[string]*itemDetails{}}
msw := &mockSnapshotWalker{
snapshotRoot: getBaseSnapshot(),
}
dirTree, err := inflateDirTree(
ctx,
msw,
[]*snapshot.Manifest{{}},
test.inputCollections(),
progress,
)
require.NoError(t, err)
expectTree(t, ctx, test.expected, dirTree)
})
}
}
func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeMultipleSubdirectories() {
const (
personalDir = "personal"
workDir = "work"
)
inboxPath := makePath(
suite.T(),
[]string{testTenant, service, testUser, category, testInboxDir},
)
personalPath := makePath(
suite.T(),
append(inboxPath.Elements(), personalDir),
)
personalFileName1 := testFileName
personalFileName2 := testFileName2
workPath := makePath(
suite.T(),
append(inboxPath.Elements(), workDir),
)
workFileName := testFileName3
// Must be a function that returns a new instance each time as StreamingFile
// can only return its Reader once.
// baseSnapshot with the following layout:
// - a-tenant
// - exchange
// - user1
// - email
// - Inbox
// - personal
// - file1
// - file2
// - work
// - file3
getBaseSnapshot := func() fs.Entry {
return baseWithChildren(
testTenant,
service,
testUser,
category,
[]fs.Entry{
virtualfs.NewStaticDirectory(
encodeElements(testInboxDir)[0],
[]fs.Entry{
virtualfs.NewStaticDirectory(
encodeElements(personalDir)[0],
[]fs.Entry{
virtualfs.StreamingFileWithModTimeFromReader(
encodeElements(personalFileName1)[0],
time.Time{},
bytes.NewReader(testFileData),
),
virtualfs.StreamingFileWithModTimeFromReader(
encodeElements(personalFileName2)[0],
time.Time{},
bytes.NewReader(testFileData2),
),
},
),
virtualfs.NewStaticDirectory(
encodeElements(workDir)[0],
[]fs.Entry{
virtualfs.StreamingFileWithModTimeFromReader(
encodeElements(workFileName)[0],
time.Time{},
bytes.NewReader(testFileData3),
),
},
),
},
),
},
)
}
table := []struct {
name string
inputCollections func(t *testing.T) []data.Collection
expected *expectedNode
}{
{
name: "MovesSubtree",
inputCollections: func(t *testing.T) []data.Collection {
newPath := makePath(
t,
[]string{testTenant, service, testUser, category, testInboxDir + "2"},
)
mc := mockconnector.NewMockExchangeCollection(newPath, 0)
mc.PrevPath = inboxPath
mc.ColState = data.MovedState
return []data.Collection{mc}
},
expected: expectedTreeWithChildren(
testTenant,
service,
testUser,
category,
[]*expectedNode{
{
name: testInboxDir + "2",
children: []*expectedNode{
{
name: personalDir,
children: []*expectedNode{
{
name: personalFileName1,
children: []*expectedNode{},
},
{
name: personalFileName2,
children: []*expectedNode{},
},
},
},
{
name: workDir,
children: []*expectedNode{
{
name: workFileName,
children: []*expectedNode{},
},
},
},
},
},
},
),
},
{
name: "MovesChildAfterAncestorMove",
inputCollections: func(t *testing.T) []data.Collection {
newInboxPath := makePath(
t,
[]string{testTenant, service, testUser, category, testInboxDir + "2"},
)
newWorkPath := makePath(
t,
[]string{testTenant, service, testUser, category, workDir},
)
inbox := mockconnector.NewMockExchangeCollection(newInboxPath, 0)
inbox.PrevPath = inboxPath
inbox.ColState = data.MovedState
work := mockconnector.NewMockExchangeCollection(newWorkPath, 0)
work.PrevPath = workPath
work.ColState = data.MovedState
return []data.Collection{inbox, work}
},
expected: expectedTreeWithChildren(
testTenant,
service,
testUser,
category,
[]*expectedNode{
{
name: testInboxDir + "2",
children: []*expectedNode{
{
name: personalDir,
children: []*expectedNode{
{
name: personalFileName1,
children: []*expectedNode{},
},
{
name: personalFileName2,
children: []*expectedNode{},
},
},
},
},
},
{
name: workDir,
children: []*expectedNode{
{
name: workFileName,
children: []*expectedNode{},
},
},
},
},
),
},
{
name: "MovesChildAfterAncestorDelete",
inputCollections: func(t *testing.T) []data.Collection {
newWorkPath := makePath(
t,
[]string{testTenant, service, testUser, category, workDir},
)
inbox := mockconnector.NewMockExchangeCollection(inboxPath, 0)
inbox.PrevPath = inboxPath
inbox.ColState = data.DeletedState
work := mockconnector.NewMockExchangeCollection(newWorkPath, 0)
work.PrevPath = workPath
work.ColState = data.MovedState
return []data.Collection{inbox, work}
},
expected: expectedTreeWithChildren(
testTenant,
service,
testUser,
category,
[]*expectedNode{
{
name: workDir,
children: []*expectedNode{
{
name: workFileName,
children: []*expectedNode{},
},
},
},
},
),
},
{
name: "ReplaceDeletedDirectory",
inputCollections: func(t *testing.T) []data.Collection {
personal := mockconnector.NewMockExchangeCollection(personalPath, 0)
personal.PrevPath = personalPath
personal.ColState = data.DeletedState
work := mockconnector.NewMockExchangeCollection(personalPath, 0)
work.PrevPath = workPath
work.ColState = data.MovedState
return []data.Collection{personal, work}
},
expected: expectedTreeWithChildren(
testTenant,
service,
testUser,
category,
[]*expectedNode{
{
name: testInboxDir,
children: []*expectedNode{
{
name: personalDir,
children: []*expectedNode{
{
name: workFileName,
},
},
},
},
},
},
),
},
{
name: "ReplaceMovedDirectory",
inputCollections: func(t *testing.T) []data.Collection {
newPersonalPath := makePath(
t,
[]string{testTenant, service, testUser, category, personalDir},
)
personal := mockconnector.NewMockExchangeCollection(newPersonalPath, 0)
personal.PrevPath = personalPath
personal.ColState = data.MovedState
work := mockconnector.NewMockExchangeCollection(personalPath, 0)
work.PrevPath = workPath
work.ColState = data.MovedState
return []data.Collection{personal, work}
},
expected: expectedTreeWithChildren(
testTenant,
service,
testUser,
category,
[]*expectedNode{
{
name: testInboxDir,
children: []*expectedNode{
{
name: personalDir,
children: []*expectedNode{
{
name: workFileName,
},
},
},
},
},
{
name: personalDir,
children: []*expectedNode{
{
name: personalFileName1,
},
{
name: personalFileName2,
},
},
},
},
),
},
{
name: "MoveDirectoryAndMergeItems",
inputCollections: func(t *testing.T) []data.Collection {
newPersonalPath := makePath(
t,
[]string{testTenant, service, testUser, category, workDir},
)
personal := mockconnector.NewMockExchangeCollection(newPersonalPath, 2)
personal.PrevPath = personalPath
personal.ColState = data.MovedState
personal.Names[0] = personalFileName2
personal.Data[0] = testFileData5
personal.Names[1] = testFileName4
personal.Data[1] = testFileData4
return []data.Collection{personal}
},
expected: expectedTreeWithChildren(
testTenant,
service,
testUser,
category,
[]*expectedNode{
{
name: testInboxDir,
children: []*expectedNode{
{
name: workDir,
children: []*expectedNode{
{
name: workFileName,
children: []*expectedNode{},
},
},
},
},
},
{
name: workDir,
children: []*expectedNode{
{
name: personalFileName1,
},
{
name: personalFileName2,
data: testFileData5,
},
{
name: testFileName4,
data: testFileData4,
},
},
},
},
),
},
}
for _, test := range table {
suite.T().Run(test.name, func(t *testing.T) {
tester.LogTimeOfTest(t)
ctx, flush := tester.NewContext()
defer flush()
progress := &corsoProgress{pending: map[string]*itemDetails{}}
msw := &mockSnapshotWalker{
snapshotRoot: getBaseSnapshot(),
}
dirTree, err := inflateDirTree(
ctx,
msw,
[]*snapshot.Manifest{{}},
test.inputCollections(t),
progress,
)
require.NoError(t, err)
expectTree(t, ctx, test.expected, dirTree)
})
}
}
func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeSkipsDeletedSubtree() {
tester.LogTimeOfTest(suite.T())
t := suite.T()
ctx, flush := tester.NewContext()
defer flush()
const (
personalDir = "personal"
workDir = "work"
)
// baseSnapshot with the following layout:
// - a-tenant
// - exchange
// - user1
// - email
// - Inbox
// - personal
// - file1
// - work
// - file2
// - Archive
// - personal
// - file3
// - work
// - file4
getBaseSnapshot := func() fs.Entry {
return baseWithChildren(
testTenant,
service,
testUser,
category,
[]fs.Entry{
virtualfs.NewStaticDirectory(
encodeElements(testInboxDir)[0],
[]fs.Entry{
virtualfs.NewStaticDirectory(
encodeElements(personalDir)[0],
[]fs.Entry{
virtualfs.StreamingFileWithModTimeFromReader(
encodeElements(testFileName)[0],
time.Time{},
bytes.NewReader(testFileData),
),
},
),
virtualfs.NewStaticDirectory(
encodeElements(workDir)[0],
[]fs.Entry{
virtualfs.StreamingFileWithModTimeFromReader(
encodeElements(testFileName2)[0],
time.Time{},
bytes.NewReader(testFileData2),
),
},
),
},
),
virtualfs.NewStaticDirectory(
encodeElements(testArchiveDir)[0],
[]fs.Entry{
virtualfs.NewStaticDirectory(
encodeElements(personalDir)[0],
[]fs.Entry{
virtualfs.StreamingFileWithModTimeFromReader(
encodeElements(testFileName3)[0],
time.Time{},
bytes.NewReader(testFileData3),
),
},
),
virtualfs.NewStaticDirectory(
encodeElements(workDir)[0],
[]fs.Entry{
virtualfs.StreamingFileWithModTimeFromReader(
encodeElements(testFileName4)[0],
time.Time{},
bytes.NewReader(testFileData4),
),
},
),
},
),
},
)
}
expected := expectedTreeWithChildren(
testTenant,
service,
testUser,
category,
[]*expectedNode{
{
name: testArchiveDir,
children: []*expectedNode{
{
name: personalDir,
children: []*expectedNode{
{
name: testFileName3,
children: []*expectedNode{},
},
},
},
{
name: workDir,
children: []*expectedNode{
{
name: testFileName4,
children: []*expectedNode{},
},
},
},
},
},
},
)
progress := &corsoProgress{pending: map[string]*itemDetails{}}
mc := mockconnector.NewMockExchangeCollection(suite.testPath, 1)
mc.PrevPath = mc.FullPath()
mc.ColState = data.DeletedState
msw := &mockSnapshotWalker{
snapshotRoot: getBaseSnapshot(),
}
collections := []data.Collection{mc}
// Returned directory structure should look like:
// - a-tenant
// - exchange
// - user1
// - emails
// - Archive
// - personal
// - file3
// - work
// - file4
dirTree, err := inflateDirTree(
ctx,
msw,
[]*snapshot.Manifest{{}},
collections,
progress,
)
require.NoError(t, err)
expectTree(t, ctx, expected, dirTree)
}

View File

@ -136,7 +136,7 @@ func (w Wrapper) BackupCollections(
deets: &details.Details{},
}
dirTree, err := inflateDirTree(ctx, collections, progress)
dirTree, err := inflateDirTree(ctx, w.c, nil, collections, progress)
if err != nil {
return nil, nil, errors.Wrap(err, "building kopia directories")
}