Scope excluded items by prefix (#2760)

In kopia select the longest prefix's exclude set. Has undeterministic
behavior if there are somehow prefixes of the same length.

In OneDrive, add a prefix that contains the drive ID to all excludes.
This makes incrementals safe even if two items in different drives
somehow have the same ID.

---

#### Does this PR need a docs update or release note?

- [ ]  Yes, it's included
- [ ] 🕐 Yes, but in a later PR
- [x]  No

#### Type of change

- [ ] 🌻 Feature
- [x] 🐛 Bugfix
- [ ] 🗺️ Documentation
- [ ] 🤖 Test
- [ ] 💻 CI/Deployment
- [ ] 🧹 Tech Debt/Cleanup

#### Issue(s)

* closes #2759

#### Test Plan

- [ ] 💪 Manual
- [x]  Unit test
- [ ] 💚 E2E
This commit is contained in:
ashmrtn 2023-03-10 16:28:13 -08:00 committed by GitHub
parent 871f0b406d
commit fd661d216a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 122 additions and 47 deletions

View File

@ -38,7 +38,7 @@ func (gc *GraphConnector) DataCollections(
metadata []data.RestoreCollection,
ctrlOpts control.Options,
errs *fault.Bus,
) ([]data.BackupCollection, map[string]struct{}, error) {
) ([]data.BackupCollection, map[string]map[string]struct{}, error) {
ctx, end := D.Span(ctx, "gc:dataCollections", D.Index("service", sels.Service.String()))
defer end()

View File

@ -169,7 +169,7 @@ func DataCollections(
su support.StatusUpdater,
ctrlOpts control.Options,
errs *fault.Bus,
) ([]data.BackupCollection, map[string]struct{}, error) {
) ([]data.BackupCollection, map[string]map[string]struct{}, error) {
eb, err := selector.ToExchangeBackup()
if err != nil {
return nil, nil, clues.Wrap(err, "exchange dataCollection selector").WithClues(ctx)

View File

@ -258,7 +258,7 @@ func (c *Collections) Get(
ctx context.Context,
prevMetadata []data.RestoreCollection,
errs *fault.Bus,
) ([]data.BackupCollection, map[string]struct{}, error) {
) ([]data.BackupCollection, map[string]map[string]struct{}, error) {
prevDeltas, oldPathsByDriveID, err := deserializeMetadata(ctx, prevMetadata, errs)
if err != nil {
return nil, nil, err
@ -283,11 +283,7 @@ func (c *Collections) Get(
// Drive ID -> folder ID -> folder path
folderPaths = map[string]map[string]string{}
// Items that should be excluded when sourcing data from the base backup.
// TODO(ashmrtn): This list contains the M365 IDs of deleted items so while
// it's technically safe to pass all the way through to kopia (files are
// unlikely to be named their M365 ID) we should wait to do that until we've
// switched to using those IDs for file names in kopia.
excludedItems = map[string]struct{}{}
excludedItems = map[string]map[string]struct{}{}
)
for _, d := range drives {
@ -349,7 +345,18 @@ func (c *Collections) Get(
numDeltas)
if !delta.Reset {
maps.Copy(excludedItems, excluded)
p, err := GetCanonicalPath(
fmt.Sprintf(rootDrivePattern, driveID),
c.tenant,
c.resourceOwner,
c.source)
if err != nil {
return nil, nil,
clues.Wrap(err, "making exclude prefix for drive").WithClues(ctx).With("drive_id", driveID)
}
excludedItems[p.String()] = excluded
continue
}
@ -377,7 +384,7 @@ func (c *Collections) Get(
prevPath, err := path.FromDataLayerPath(p, false)
if err != nil {
return nil, map[string]struct{}{},
return nil, nil,
clues.Wrap(err, "invalid previous path").WithClues(ctx).With("deleted_path", p)
}

View File

@ -1269,7 +1269,7 @@ func (suite *OneDriveCollectionsSuite) TestGet() {
expectedCollections map[string]map[data.CollectionState][]string
expectedDeltaURLs map[string]string
expectedFolderPaths map[string]map[string]string
expectedDelList map[string]struct{}
expectedDelList map[string]map[string]struct{}
expectedSkippedCount int
doNotMergeItems bool
}{
@ -1300,7 +1300,9 @@ func (suite *OneDriveCollectionsSuite) TestGet() {
expectedFolderPaths: map[string]map[string]string{
driveID1: {"root": rootFolderPath1},
},
expectedDelList: getDelList("file"),
expectedDelList: map[string]map[string]struct{}{
rootFolderPath1: getDelList("file"),
},
},
{
name: "OneDrive_OneItemPage_NoFolders_NoErrors",
@ -1329,7 +1331,9 @@ func (suite *OneDriveCollectionsSuite) TestGet() {
expectedFolderPaths: map[string]map[string]string{
driveID1: {"root": rootFolderPath1},
},
expectedDelList: getDelList("file"),
expectedDelList: map[string]map[string]struct{}{
rootFolderPath1: getDelList("file"),
},
},
{
name: "OneDrive_OneItemPage_NoErrors",
@ -1363,7 +1367,9 @@ func (suite *OneDriveCollectionsSuite) TestGet() {
"folder": folderPath1,
},
},
expectedDelList: getDelList("file"),
expectedDelList: map[string]map[string]struct{}{
rootFolderPath1: getDelList("file"),
},
},
{
name: "OneDrive_OneItemPage_NoErrors_FileRenamedMultiple",
@ -1398,7 +1404,9 @@ func (suite *OneDriveCollectionsSuite) TestGet() {
"folder": folderPath1,
},
},
expectedDelList: getDelList("file"),
expectedDelList: map[string]map[string]struct{}{
rootFolderPath1: getDelList("file"),
},
},
{
name: "OneDrive_OneItemPage_NoErrors_FileMovedMultiple",
@ -1433,7 +1441,9 @@ func (suite *OneDriveCollectionsSuite) TestGet() {
"folder": folderPath1,
},
},
expectedDelList: getDelList("file"),
expectedDelList: map[string]map[string]struct{}{
rootFolderPath1: getDelList("file"),
},
},
{
name: "OneDrive_OneItemPage_EmptyDelta_NoErrors",
@ -1460,7 +1470,9 @@ func (suite *OneDriveCollectionsSuite) TestGet() {
},
expectedDeltaURLs: map[string]string{},
expectedFolderPaths: map[string]map[string]string{},
expectedDelList: getDelList("file"),
expectedDelList: map[string]map[string]struct{}{
rootFolderPath1: getDelList("file"),
},
},
{
name: "OneDrive_TwoItemPages_NoErrors",
@ -1502,7 +1514,9 @@ func (suite *OneDriveCollectionsSuite) TestGet() {
"folder": folderPath1,
},
},
expectedDelList: getDelList("file", "file2"),
expectedDelList: map[string]map[string]struct{}{
rootFolderPath1: getDelList("file", "file2"),
},
},
{
name: "TwoDrives_OneItemPageEach_NoErrors",
@ -1557,7 +1571,10 @@ func (suite *OneDriveCollectionsSuite) TestGet() {
"folder2": folderPath2,
},
},
expectedDelList: getDelList("file", "file2"),
expectedDelList: map[string]map[string]struct{}{
rootFolderPath1: getDelList("file"),
rootFolderPath2: getDelList("file2"),
},
},
{
name: "OneDrive_OneItemPage_Errors",
@ -1607,7 +1624,7 @@ func (suite *OneDriveCollectionsSuite) TestGet() {
"root": rootFolderPath1,
},
},
expectedDelList: map[string]struct{}{},
expectedDelList: map[string]map[string]struct{}{},
doNotMergeItems: true,
},
{
@ -1649,7 +1666,7 @@ func (suite *OneDriveCollectionsSuite) TestGet() {
"folder": folderPath1,
},
},
expectedDelList: map[string]struct{}{},
expectedDelList: map[string]map[string]struct{}{},
doNotMergeItems: true,
},
{
@ -1691,7 +1708,9 @@ func (suite *OneDriveCollectionsSuite) TestGet() {
"folder": folderPath1,
},
},
expectedDelList: getDelList("file", "file2"),
expectedDelList: map[string]map[string]struct{}{
rootFolderPath1: getDelList("file", "file2"),
},
doNotMergeItems: false,
},
{
@ -1733,7 +1752,7 @@ func (suite *OneDriveCollectionsSuite) TestGet() {
"folder2": expectedPath1("/folder2"),
},
},
expectedDelList: map[string]struct{}{},
expectedDelList: map[string]map[string]struct{}{},
doNotMergeItems: true,
},
{
@ -1774,7 +1793,7 @@ func (suite *OneDriveCollectionsSuite) TestGet() {
"folder2": expectedPath1("/folder"),
},
},
expectedDelList: map[string]struct{}{},
expectedDelList: map[string]map[string]struct{}{},
doNotMergeItems: true,
},
{
@ -1819,7 +1838,9 @@ func (suite *OneDriveCollectionsSuite) TestGet() {
"folder": folderPath1,
},
},
expectedDelList: getDelList("file", "file2"),
expectedDelList: map[string]map[string]struct{}{
rootFolderPath1: getDelList("file", "file2"),
},
expectedSkippedCount: 2,
},
}

View File

@ -39,7 +39,7 @@ func DataCollections(
su support.StatusUpdater,
ctrlOpts control.Options,
errs *fault.Bus,
) ([]data.BackupCollection, map[string]struct{}, error) {
) ([]data.BackupCollection, map[string]map[string]struct{}, error) {
odb, err := selector.ToOneDriveBackup()
if err != nil {
return nil, nil, clues.Wrap(err, "parsing selector").WithClues(ctx)
@ -49,7 +49,7 @@ func DataCollections(
el = errs.Local()
user = selector.DiscreteOwner
collections = []data.BackupCollection{}
allExcludes = map[string]struct{}{}
allExcludes = map[string]map[string]struct{}{}
)
// for each scope that includes oneDrive items, get all
@ -77,7 +77,13 @@ func DataCollections(
collections = append(collections, odcs...)
maps.Copy(allExcludes, excludes)
for k, v := range excludes {
if _, ok := allExcludes[k]; !ok {
allExcludes[k] = map[string]struct{}{}
}
maps.Copy(allExcludes[k], v)
}
}
return collections, allExcludes, el.Failure()

View File

@ -37,7 +37,7 @@ func DataCollections(
su statusUpdater,
ctrlOpts control.Options,
errs *fault.Bus,
) ([]data.BackupCollection, map[string]struct{}, error) {
) ([]data.BackupCollection, map[string]map[string]struct{}, error) {
b, err := selector.ToSharePointBackup()
if err != nil {
return nil, nil, errors.Wrap(err, "sharePointDataCollection: parsing selector")
@ -171,7 +171,7 @@ func collectLibraries(
updater statusUpdater,
ctrlOpts control.Options,
errs *fault.Bus,
) ([]data.BackupCollection, map[string]struct{}, error) {
) ([]data.BackupCollection, map[string]map[string]struct{}, error) {
logger.Ctx(ctx).Debug("creating SharePoint Library collections")
var (

View File

@ -422,13 +422,27 @@ func streamBaseEntries(
locationPath path.Path,
dir fs.Directory,
encodedSeen map[string]struct{},
globalExcludeSet map[string]struct{},
globalExcludeSet map[string]map[string]struct{},
progress *corsoProgress,
) error {
if dir == nil {
return nil
}
var (
excludeSet map[string]struct{}
curPrefix string
curPathString = curPath.String()
)
for prefix, excludes := range globalExcludeSet {
// Select the set with the longest prefix to be most precise.
if strings.HasPrefix(curPathString, prefix) && len(prefix) >= len(curPrefix) {
excludeSet = excludes
curPrefix = prefix
}
}
err := dir.IterateEntries(ctx, func(innerCtx context.Context, entry fs.Entry) error {
if err := innerCtx.Err(); err != nil {
return err
@ -453,7 +467,7 @@ func streamBaseEntries(
// This entry was marked as deleted by a service that can't tell us the
// previous path of deleted items, only the item ID.
if _, ok := globalExcludeSet[entName]; ok {
if _, ok := excludeSet[entName]; ok {
return nil
}
@ -519,7 +533,7 @@ func getStreamItemFunc(
staticEnts []fs.Entry,
streamedEnts data.BackupCollection,
baseDir fs.Directory,
globalExcludeSet map[string]struct{},
globalExcludeSet map[string]map[string]struct{},
progress *corsoProgress,
) func(context.Context, func(context.Context, fs.Entry) error) error {
return func(ctx context.Context, cb func(context.Context, fs.Entry) error) error {
@ -567,7 +581,7 @@ func getStreamItemFunc(
func buildKopiaDirs(
dirName string,
dir *treeMap,
globalExcludeSet map[string]struct{},
globalExcludeSet map[string]map[string]struct{},
progress *corsoProgress,
) (fs.Directory, error) {
// Need to build the directory tree from the leaves up because intermediate
@ -1002,7 +1016,7 @@ func inflateDirTree(
loader snapshotLoader,
baseSnaps []IncrementalBase,
collections []data.BackupCollection,
globalExcludeSet map[string]struct{},
globalExcludeSet map[string]map[string]struct{},
progress *corsoProgress,
) (fs.Directory, error) {
roots, updatedPaths, err := inflateCollectionTree(ctx, collections)

View File

@ -1435,7 +1435,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeMultipleSubdirecto
table := []struct {
name string
inputCollections func(t *testing.T) []data.BackupCollection
inputExcludes map[string]struct{}
inputExcludes map[string]map[string]struct{}
expected *expectedNode
}{
{
@ -1443,9 +1443,11 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeMultipleSubdirecto
inputCollections: func(t *testing.T) []data.BackupCollection {
return nil
},
inputExcludes: map[string]struct{}{
inputExcludes: map[string]map[string]struct{}{
"": {
inboxFileName1: {},
},
},
expected: expectedTreeWithChildren(
[]string{
testTenant,

View File

@ -135,7 +135,7 @@ func (w Wrapper) BackupCollections(
ctx context.Context,
previousSnapshots []IncrementalBase,
collections []data.BackupCollection,
globalExcludeSet map[string]struct{},
globalExcludeSet map[string]map[string]struct{},
tags map[string]string,
buildTreeWithBase bool,
errs *fault.Bus,

View File

@ -925,6 +925,7 @@ func (suite *KopiaSimpleRepoIntegrationSuite) TestBackupExcludeItem() {
table := []struct {
name string
excludeItem bool
excludePrefix bool
expectedCachedItems int
expectedUncachedItems int
cols func() []data.BackupCollection
@ -932,7 +933,7 @@ func (suite *KopiaSimpleRepoIntegrationSuite) TestBackupExcludeItem() {
restoreCheck assert.ErrorAssertionFunc
}{
{
name: "ExcludeItem",
name: "ExcludeItem_NoPrefix",
excludeItem: true,
expectedCachedItems: len(suite.filesByPath) - 1,
expectedUncachedItems: 0,
@ -942,6 +943,18 @@ func (suite *KopiaSimpleRepoIntegrationSuite) TestBackupExcludeItem() {
backupIDCheck: require.NotEmpty,
restoreCheck: assert.Error,
},
{
name: "ExcludeItem_WithPrefix",
excludeItem: true,
excludePrefix: true,
expectedCachedItems: len(suite.filesByPath) - 1,
expectedUncachedItems: 0,
cols: func() []data.BackupCollection {
return nil
},
backupIDCheck: require.NotEmpty,
restoreCheck: assert.Error,
},
{
name: "NoExcludeItemNoChanges",
// No snapshot should be made since there were no changes.
@ -975,10 +988,22 @@ func (suite *KopiaSimpleRepoIntegrationSuite) TestBackupExcludeItem() {
suite.Run(test.name, func() {
t := suite.T()
var excluded map[string]struct{}
var (
prefix string
itemPath = suite.files[suite.testPath1.String()][0].itemPath
)
if !test.excludePrefix {
prefix = itemPath.ToBuilder().Dir().Dir().String()
}
var excluded map[string]map[string]struct{}
if test.excludeItem {
excluded = map[string]struct{}{
suite.files[suite.testPath1.String()][0].itemPath.Item(): {},
excluded = map[string]map[string]struct{}{
// Add a prefix if needed.
prefix: {
itemPath.Item(): {},
},
}
}

View File

@ -312,7 +312,7 @@ func produceBackupDataCollections(
metadata []data.RestoreCollection,
ctrlOpts control.Options,
errs *fault.Bus,
) ([]data.BackupCollection, map[string]struct{}, error) {
) ([]data.BackupCollection, map[string]map[string]struct{}, error) {
complete, closer := observe.MessageWithCompletion(ctx, observe.Safe("Discovering items to backup"))
defer func() {
complete <- struct{}{}
@ -332,7 +332,7 @@ type backuper interface {
ctx context.Context,
bases []kopia.IncrementalBase,
cs []data.BackupCollection,
excluded map[string]struct{},
excluded map[string]map[string]struct{},
tags map[string]string,
buildTreeWithBase bool,
errs *fault.Bus,
@ -391,7 +391,7 @@ func consumeBackupDataCollections(
reasons []kopia.Reason,
mans []*kopia.ManifestEntry,
cs []data.BackupCollection,
excludes map[string]struct{},
excludes map[string]map[string]struct{},
backupID model.StableID,
isIncremental bool,
errs *fault.Bus,

View File

@ -97,7 +97,7 @@ func (mbu mockBackuper) BackupCollections(
ctx context.Context,
bases []kopia.IncrementalBase,
cs []data.BackupCollection,
excluded map[string]struct{},
excluded map[string]map[string]struct{},
tags map[string]string,
buildTreeWithBase bool,
errs *fault.Bus,