Scope excluded items by prefix (#2760)
In kopia select the longest prefix's exclude set. Has undeterministic behavior if there are somehow prefixes of the same length. In OneDrive, add a prefix that contains the drive ID to all excludes. This makes incrementals safe even if two items in different drives somehow have the same ID. --- #### Does this PR need a docs update or release note? - [ ] ✅ Yes, it's included - [ ] 🕐 Yes, but in a later PR - [x] ⛔ No #### Type of change - [ ] 🌻 Feature - [x] 🐛 Bugfix - [ ] 🗺️ Documentation - [ ] 🤖 Test - [ ] 💻 CI/Deployment - [ ] 🧹 Tech Debt/Cleanup #### Issue(s) * closes #2759 #### Test Plan - [ ] 💪 Manual - [x] ⚡ Unit test - [ ] 💚 E2E
This commit is contained in:
parent
871f0b406d
commit
fd661d216a
@ -38,7 +38,7 @@ func (gc *GraphConnector) DataCollections(
|
||||
metadata []data.RestoreCollection,
|
||||
ctrlOpts control.Options,
|
||||
errs *fault.Bus,
|
||||
) ([]data.BackupCollection, map[string]struct{}, error) {
|
||||
) ([]data.BackupCollection, map[string]map[string]struct{}, error) {
|
||||
ctx, end := D.Span(ctx, "gc:dataCollections", D.Index("service", sels.Service.String()))
|
||||
defer end()
|
||||
|
||||
|
||||
@ -169,7 +169,7 @@ func DataCollections(
|
||||
su support.StatusUpdater,
|
||||
ctrlOpts control.Options,
|
||||
errs *fault.Bus,
|
||||
) ([]data.BackupCollection, map[string]struct{}, error) {
|
||||
) ([]data.BackupCollection, map[string]map[string]struct{}, error) {
|
||||
eb, err := selector.ToExchangeBackup()
|
||||
if err != nil {
|
||||
return nil, nil, clues.Wrap(err, "exchange dataCollection selector").WithClues(ctx)
|
||||
|
||||
@ -258,7 +258,7 @@ func (c *Collections) Get(
|
||||
ctx context.Context,
|
||||
prevMetadata []data.RestoreCollection,
|
||||
errs *fault.Bus,
|
||||
) ([]data.BackupCollection, map[string]struct{}, error) {
|
||||
) ([]data.BackupCollection, map[string]map[string]struct{}, error) {
|
||||
prevDeltas, oldPathsByDriveID, err := deserializeMetadata(ctx, prevMetadata, errs)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
@ -283,11 +283,7 @@ func (c *Collections) Get(
|
||||
// Drive ID -> folder ID -> folder path
|
||||
folderPaths = map[string]map[string]string{}
|
||||
// Items that should be excluded when sourcing data from the base backup.
|
||||
// TODO(ashmrtn): This list contains the M365 IDs of deleted items so while
|
||||
// it's technically safe to pass all the way through to kopia (files are
|
||||
// unlikely to be named their M365 ID) we should wait to do that until we've
|
||||
// switched to using those IDs for file names in kopia.
|
||||
excludedItems = map[string]struct{}{}
|
||||
excludedItems = map[string]map[string]struct{}{}
|
||||
)
|
||||
|
||||
for _, d := range drives {
|
||||
@ -349,7 +345,18 @@ func (c *Collections) Get(
|
||||
numDeltas)
|
||||
|
||||
if !delta.Reset {
|
||||
maps.Copy(excludedItems, excluded)
|
||||
p, err := GetCanonicalPath(
|
||||
fmt.Sprintf(rootDrivePattern, driveID),
|
||||
c.tenant,
|
||||
c.resourceOwner,
|
||||
c.source)
|
||||
if err != nil {
|
||||
return nil, nil,
|
||||
clues.Wrap(err, "making exclude prefix for drive").WithClues(ctx).With("drive_id", driveID)
|
||||
}
|
||||
|
||||
excludedItems[p.String()] = excluded
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
@ -377,7 +384,7 @@ func (c *Collections) Get(
|
||||
|
||||
prevPath, err := path.FromDataLayerPath(p, false)
|
||||
if err != nil {
|
||||
return nil, map[string]struct{}{},
|
||||
return nil, nil,
|
||||
clues.Wrap(err, "invalid previous path").WithClues(ctx).With("deleted_path", p)
|
||||
}
|
||||
|
||||
|
||||
@ -1269,7 +1269,7 @@ func (suite *OneDriveCollectionsSuite) TestGet() {
|
||||
expectedCollections map[string]map[data.CollectionState][]string
|
||||
expectedDeltaURLs map[string]string
|
||||
expectedFolderPaths map[string]map[string]string
|
||||
expectedDelList map[string]struct{}
|
||||
expectedDelList map[string]map[string]struct{}
|
||||
expectedSkippedCount int
|
||||
doNotMergeItems bool
|
||||
}{
|
||||
@ -1300,7 +1300,9 @@ func (suite *OneDriveCollectionsSuite) TestGet() {
|
||||
expectedFolderPaths: map[string]map[string]string{
|
||||
driveID1: {"root": rootFolderPath1},
|
||||
},
|
||||
expectedDelList: getDelList("file"),
|
||||
expectedDelList: map[string]map[string]struct{}{
|
||||
rootFolderPath1: getDelList("file"),
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "OneDrive_OneItemPage_NoFolders_NoErrors",
|
||||
@ -1329,7 +1331,9 @@ func (suite *OneDriveCollectionsSuite) TestGet() {
|
||||
expectedFolderPaths: map[string]map[string]string{
|
||||
driveID1: {"root": rootFolderPath1},
|
||||
},
|
||||
expectedDelList: getDelList("file"),
|
||||
expectedDelList: map[string]map[string]struct{}{
|
||||
rootFolderPath1: getDelList("file"),
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "OneDrive_OneItemPage_NoErrors",
|
||||
@ -1363,7 +1367,9 @@ func (suite *OneDriveCollectionsSuite) TestGet() {
|
||||
"folder": folderPath1,
|
||||
},
|
||||
},
|
||||
expectedDelList: getDelList("file"),
|
||||
expectedDelList: map[string]map[string]struct{}{
|
||||
rootFolderPath1: getDelList("file"),
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "OneDrive_OneItemPage_NoErrors_FileRenamedMultiple",
|
||||
@ -1398,7 +1404,9 @@ func (suite *OneDriveCollectionsSuite) TestGet() {
|
||||
"folder": folderPath1,
|
||||
},
|
||||
},
|
||||
expectedDelList: getDelList("file"),
|
||||
expectedDelList: map[string]map[string]struct{}{
|
||||
rootFolderPath1: getDelList("file"),
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "OneDrive_OneItemPage_NoErrors_FileMovedMultiple",
|
||||
@ -1433,7 +1441,9 @@ func (suite *OneDriveCollectionsSuite) TestGet() {
|
||||
"folder": folderPath1,
|
||||
},
|
||||
},
|
||||
expectedDelList: getDelList("file"),
|
||||
expectedDelList: map[string]map[string]struct{}{
|
||||
rootFolderPath1: getDelList("file"),
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "OneDrive_OneItemPage_EmptyDelta_NoErrors",
|
||||
@ -1460,7 +1470,9 @@ func (suite *OneDriveCollectionsSuite) TestGet() {
|
||||
},
|
||||
expectedDeltaURLs: map[string]string{},
|
||||
expectedFolderPaths: map[string]map[string]string{},
|
||||
expectedDelList: getDelList("file"),
|
||||
expectedDelList: map[string]map[string]struct{}{
|
||||
rootFolderPath1: getDelList("file"),
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "OneDrive_TwoItemPages_NoErrors",
|
||||
@ -1502,7 +1514,9 @@ func (suite *OneDriveCollectionsSuite) TestGet() {
|
||||
"folder": folderPath1,
|
||||
},
|
||||
},
|
||||
expectedDelList: getDelList("file", "file2"),
|
||||
expectedDelList: map[string]map[string]struct{}{
|
||||
rootFolderPath1: getDelList("file", "file2"),
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "TwoDrives_OneItemPageEach_NoErrors",
|
||||
@ -1557,7 +1571,10 @@ func (suite *OneDriveCollectionsSuite) TestGet() {
|
||||
"folder2": folderPath2,
|
||||
},
|
||||
},
|
||||
expectedDelList: getDelList("file", "file2"),
|
||||
expectedDelList: map[string]map[string]struct{}{
|
||||
rootFolderPath1: getDelList("file"),
|
||||
rootFolderPath2: getDelList("file2"),
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "OneDrive_OneItemPage_Errors",
|
||||
@ -1607,7 +1624,7 @@ func (suite *OneDriveCollectionsSuite) TestGet() {
|
||||
"root": rootFolderPath1,
|
||||
},
|
||||
},
|
||||
expectedDelList: map[string]struct{}{},
|
||||
expectedDelList: map[string]map[string]struct{}{},
|
||||
doNotMergeItems: true,
|
||||
},
|
||||
{
|
||||
@ -1649,7 +1666,7 @@ func (suite *OneDriveCollectionsSuite) TestGet() {
|
||||
"folder": folderPath1,
|
||||
},
|
||||
},
|
||||
expectedDelList: map[string]struct{}{},
|
||||
expectedDelList: map[string]map[string]struct{}{},
|
||||
doNotMergeItems: true,
|
||||
},
|
||||
{
|
||||
@ -1691,7 +1708,9 @@ func (suite *OneDriveCollectionsSuite) TestGet() {
|
||||
"folder": folderPath1,
|
||||
},
|
||||
},
|
||||
expectedDelList: getDelList("file", "file2"),
|
||||
expectedDelList: map[string]map[string]struct{}{
|
||||
rootFolderPath1: getDelList("file", "file2"),
|
||||
},
|
||||
doNotMergeItems: false,
|
||||
},
|
||||
{
|
||||
@ -1733,7 +1752,7 @@ func (suite *OneDriveCollectionsSuite) TestGet() {
|
||||
"folder2": expectedPath1("/folder2"),
|
||||
},
|
||||
},
|
||||
expectedDelList: map[string]struct{}{},
|
||||
expectedDelList: map[string]map[string]struct{}{},
|
||||
doNotMergeItems: true,
|
||||
},
|
||||
{
|
||||
@ -1774,7 +1793,7 @@ func (suite *OneDriveCollectionsSuite) TestGet() {
|
||||
"folder2": expectedPath1("/folder"),
|
||||
},
|
||||
},
|
||||
expectedDelList: map[string]struct{}{},
|
||||
expectedDelList: map[string]map[string]struct{}{},
|
||||
doNotMergeItems: true,
|
||||
},
|
||||
{
|
||||
@ -1819,7 +1838,9 @@ func (suite *OneDriveCollectionsSuite) TestGet() {
|
||||
"folder": folderPath1,
|
||||
},
|
||||
},
|
||||
expectedDelList: getDelList("file", "file2"),
|
||||
expectedDelList: map[string]map[string]struct{}{
|
||||
rootFolderPath1: getDelList("file", "file2"),
|
||||
},
|
||||
expectedSkippedCount: 2,
|
||||
},
|
||||
}
|
||||
|
||||
@ -39,7 +39,7 @@ func DataCollections(
|
||||
su support.StatusUpdater,
|
||||
ctrlOpts control.Options,
|
||||
errs *fault.Bus,
|
||||
) ([]data.BackupCollection, map[string]struct{}, error) {
|
||||
) ([]data.BackupCollection, map[string]map[string]struct{}, error) {
|
||||
odb, err := selector.ToOneDriveBackup()
|
||||
if err != nil {
|
||||
return nil, nil, clues.Wrap(err, "parsing selector").WithClues(ctx)
|
||||
@ -49,7 +49,7 @@ func DataCollections(
|
||||
el = errs.Local()
|
||||
user = selector.DiscreteOwner
|
||||
collections = []data.BackupCollection{}
|
||||
allExcludes = map[string]struct{}{}
|
||||
allExcludes = map[string]map[string]struct{}{}
|
||||
)
|
||||
|
||||
// for each scope that includes oneDrive items, get all
|
||||
@ -77,7 +77,13 @@ func DataCollections(
|
||||
|
||||
collections = append(collections, odcs...)
|
||||
|
||||
maps.Copy(allExcludes, excludes)
|
||||
for k, v := range excludes {
|
||||
if _, ok := allExcludes[k]; !ok {
|
||||
allExcludes[k] = map[string]struct{}{}
|
||||
}
|
||||
|
||||
maps.Copy(allExcludes[k], v)
|
||||
}
|
||||
}
|
||||
|
||||
return collections, allExcludes, el.Failure()
|
||||
|
||||
@ -37,7 +37,7 @@ func DataCollections(
|
||||
su statusUpdater,
|
||||
ctrlOpts control.Options,
|
||||
errs *fault.Bus,
|
||||
) ([]data.BackupCollection, map[string]struct{}, error) {
|
||||
) ([]data.BackupCollection, map[string]map[string]struct{}, error) {
|
||||
b, err := selector.ToSharePointBackup()
|
||||
if err != nil {
|
||||
return nil, nil, errors.Wrap(err, "sharePointDataCollection: parsing selector")
|
||||
@ -171,7 +171,7 @@ func collectLibraries(
|
||||
updater statusUpdater,
|
||||
ctrlOpts control.Options,
|
||||
errs *fault.Bus,
|
||||
) ([]data.BackupCollection, map[string]struct{}, error) {
|
||||
) ([]data.BackupCollection, map[string]map[string]struct{}, error) {
|
||||
logger.Ctx(ctx).Debug("creating SharePoint Library collections")
|
||||
|
||||
var (
|
||||
|
||||
@ -422,13 +422,27 @@ func streamBaseEntries(
|
||||
locationPath path.Path,
|
||||
dir fs.Directory,
|
||||
encodedSeen map[string]struct{},
|
||||
globalExcludeSet map[string]struct{},
|
||||
globalExcludeSet map[string]map[string]struct{},
|
||||
progress *corsoProgress,
|
||||
) error {
|
||||
if dir == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
var (
|
||||
excludeSet map[string]struct{}
|
||||
curPrefix string
|
||||
curPathString = curPath.String()
|
||||
)
|
||||
|
||||
for prefix, excludes := range globalExcludeSet {
|
||||
// Select the set with the longest prefix to be most precise.
|
||||
if strings.HasPrefix(curPathString, prefix) && len(prefix) >= len(curPrefix) {
|
||||
excludeSet = excludes
|
||||
curPrefix = prefix
|
||||
}
|
||||
}
|
||||
|
||||
err := dir.IterateEntries(ctx, func(innerCtx context.Context, entry fs.Entry) error {
|
||||
if err := innerCtx.Err(); err != nil {
|
||||
return err
|
||||
@ -453,7 +467,7 @@ func streamBaseEntries(
|
||||
|
||||
// This entry was marked as deleted by a service that can't tell us the
|
||||
// previous path of deleted items, only the item ID.
|
||||
if _, ok := globalExcludeSet[entName]; ok {
|
||||
if _, ok := excludeSet[entName]; ok {
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -519,7 +533,7 @@ func getStreamItemFunc(
|
||||
staticEnts []fs.Entry,
|
||||
streamedEnts data.BackupCollection,
|
||||
baseDir fs.Directory,
|
||||
globalExcludeSet map[string]struct{},
|
||||
globalExcludeSet map[string]map[string]struct{},
|
||||
progress *corsoProgress,
|
||||
) func(context.Context, func(context.Context, fs.Entry) error) error {
|
||||
return func(ctx context.Context, cb func(context.Context, fs.Entry) error) error {
|
||||
@ -567,7 +581,7 @@ func getStreamItemFunc(
|
||||
func buildKopiaDirs(
|
||||
dirName string,
|
||||
dir *treeMap,
|
||||
globalExcludeSet map[string]struct{},
|
||||
globalExcludeSet map[string]map[string]struct{},
|
||||
progress *corsoProgress,
|
||||
) (fs.Directory, error) {
|
||||
// Need to build the directory tree from the leaves up because intermediate
|
||||
@ -1002,7 +1016,7 @@ func inflateDirTree(
|
||||
loader snapshotLoader,
|
||||
baseSnaps []IncrementalBase,
|
||||
collections []data.BackupCollection,
|
||||
globalExcludeSet map[string]struct{},
|
||||
globalExcludeSet map[string]map[string]struct{},
|
||||
progress *corsoProgress,
|
||||
) (fs.Directory, error) {
|
||||
roots, updatedPaths, err := inflateCollectionTree(ctx, collections)
|
||||
|
||||
@ -1435,7 +1435,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeMultipleSubdirecto
|
||||
table := []struct {
|
||||
name string
|
||||
inputCollections func(t *testing.T) []data.BackupCollection
|
||||
inputExcludes map[string]struct{}
|
||||
inputExcludes map[string]map[string]struct{}
|
||||
expected *expectedNode
|
||||
}{
|
||||
{
|
||||
@ -1443,9 +1443,11 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeMultipleSubdirecto
|
||||
inputCollections: func(t *testing.T) []data.BackupCollection {
|
||||
return nil
|
||||
},
|
||||
inputExcludes: map[string]struct{}{
|
||||
inputExcludes: map[string]map[string]struct{}{
|
||||
"": {
|
||||
inboxFileName1: {},
|
||||
},
|
||||
},
|
||||
expected: expectedTreeWithChildren(
|
||||
[]string{
|
||||
testTenant,
|
||||
|
||||
@ -135,7 +135,7 @@ func (w Wrapper) BackupCollections(
|
||||
ctx context.Context,
|
||||
previousSnapshots []IncrementalBase,
|
||||
collections []data.BackupCollection,
|
||||
globalExcludeSet map[string]struct{},
|
||||
globalExcludeSet map[string]map[string]struct{},
|
||||
tags map[string]string,
|
||||
buildTreeWithBase bool,
|
||||
errs *fault.Bus,
|
||||
|
||||
@ -925,6 +925,7 @@ func (suite *KopiaSimpleRepoIntegrationSuite) TestBackupExcludeItem() {
|
||||
table := []struct {
|
||||
name string
|
||||
excludeItem bool
|
||||
excludePrefix bool
|
||||
expectedCachedItems int
|
||||
expectedUncachedItems int
|
||||
cols func() []data.BackupCollection
|
||||
@ -932,7 +933,7 @@ func (suite *KopiaSimpleRepoIntegrationSuite) TestBackupExcludeItem() {
|
||||
restoreCheck assert.ErrorAssertionFunc
|
||||
}{
|
||||
{
|
||||
name: "ExcludeItem",
|
||||
name: "ExcludeItem_NoPrefix",
|
||||
excludeItem: true,
|
||||
expectedCachedItems: len(suite.filesByPath) - 1,
|
||||
expectedUncachedItems: 0,
|
||||
@ -942,6 +943,18 @@ func (suite *KopiaSimpleRepoIntegrationSuite) TestBackupExcludeItem() {
|
||||
backupIDCheck: require.NotEmpty,
|
||||
restoreCheck: assert.Error,
|
||||
},
|
||||
{
|
||||
name: "ExcludeItem_WithPrefix",
|
||||
excludeItem: true,
|
||||
excludePrefix: true,
|
||||
expectedCachedItems: len(suite.filesByPath) - 1,
|
||||
expectedUncachedItems: 0,
|
||||
cols: func() []data.BackupCollection {
|
||||
return nil
|
||||
},
|
||||
backupIDCheck: require.NotEmpty,
|
||||
restoreCheck: assert.Error,
|
||||
},
|
||||
{
|
||||
name: "NoExcludeItemNoChanges",
|
||||
// No snapshot should be made since there were no changes.
|
||||
@ -975,10 +988,22 @@ func (suite *KopiaSimpleRepoIntegrationSuite) TestBackupExcludeItem() {
|
||||
suite.Run(test.name, func() {
|
||||
t := suite.T()
|
||||
|
||||
var excluded map[string]struct{}
|
||||
var (
|
||||
prefix string
|
||||
itemPath = suite.files[suite.testPath1.String()][0].itemPath
|
||||
)
|
||||
|
||||
if !test.excludePrefix {
|
||||
prefix = itemPath.ToBuilder().Dir().Dir().String()
|
||||
}
|
||||
|
||||
var excluded map[string]map[string]struct{}
|
||||
if test.excludeItem {
|
||||
excluded = map[string]struct{}{
|
||||
suite.files[suite.testPath1.String()][0].itemPath.Item(): {},
|
||||
excluded = map[string]map[string]struct{}{
|
||||
// Add a prefix if needed.
|
||||
prefix: {
|
||||
itemPath.Item(): {},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -312,7 +312,7 @@ func produceBackupDataCollections(
|
||||
metadata []data.RestoreCollection,
|
||||
ctrlOpts control.Options,
|
||||
errs *fault.Bus,
|
||||
) ([]data.BackupCollection, map[string]struct{}, error) {
|
||||
) ([]data.BackupCollection, map[string]map[string]struct{}, error) {
|
||||
complete, closer := observe.MessageWithCompletion(ctx, observe.Safe("Discovering items to backup"))
|
||||
defer func() {
|
||||
complete <- struct{}{}
|
||||
@ -332,7 +332,7 @@ type backuper interface {
|
||||
ctx context.Context,
|
||||
bases []kopia.IncrementalBase,
|
||||
cs []data.BackupCollection,
|
||||
excluded map[string]struct{},
|
||||
excluded map[string]map[string]struct{},
|
||||
tags map[string]string,
|
||||
buildTreeWithBase bool,
|
||||
errs *fault.Bus,
|
||||
@ -391,7 +391,7 @@ func consumeBackupDataCollections(
|
||||
reasons []kopia.Reason,
|
||||
mans []*kopia.ManifestEntry,
|
||||
cs []data.BackupCollection,
|
||||
excludes map[string]struct{},
|
||||
excludes map[string]map[string]struct{},
|
||||
backupID model.StableID,
|
||||
isIncremental bool,
|
||||
errs *fault.Bus,
|
||||
|
||||
@ -97,7 +97,7 @@ func (mbu mockBackuper) BackupCollections(
|
||||
ctx context.Context,
|
||||
bases []kopia.IncrementalBase,
|
||||
cs []data.BackupCollection,
|
||||
excluded map[string]struct{},
|
||||
excluded map[string]map[string]struct{},
|
||||
tags map[string]string,
|
||||
buildTreeWithBase bool,
|
||||
errs *fault.Bus,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user