From 3a2d0876dd842ebdb815226a96292e57b698cb76 Mon Sep 17 00:00:00 2001 From: Keepers Date: Thu, 4 May 2023 13:32:47 -0600 Subject: [PATCH] consolidate aggregation of parent-item exclude map (#3258) introduces a new type wrapping a nested map so that aggregation of globally excluded items in driveish services don't need to manage the map updates themselves. --- #### Does this PR need a docs update or release note? - [x] :no_entry: No #### Type of change - [x] :broom: Tech Debt/Cleanup #### Issue(s) * #2340 #### Test Plan - [x] :zap: Unit test - [x] :green_heart: E2E --- .../common/prefixmatcher/mock/mock.go | 44 +++++ .../common/prefixmatcher/prefix_matcher.go | 38 ++-- .../prefixmatcher/prefix_matcher_test.go | 3 + .../prefixmatcher/string_set_matcher.go | 122 +++++++++++++ .../prefixmatcher/string_set_matcher_test.go | 166 ++++++++++++++++++ src/internal/connector/data_collections.go | 15 +- .../connector/data_collections_test.go | 10 +- .../connector/exchange/data_collections.go | 3 +- .../connector/graph_connector_test.go | 6 +- src/internal/connector/mock/connector.go | 8 +- .../connector/onedrive/collections.go | 34 ++-- .../connector/onedrive/collections_test.go | 82 +++++---- .../connector/onedrive/data_collections.go | 18 +- src/internal/connector/onedrive/drive_test.go | 7 +- .../connector/sharepoint/data_collections.go | 30 ++-- src/internal/kopia/merge_details.go | 2 +- src/internal/kopia/upload.go | 26 +-- src/internal/kopia/upload_test.go | 38 ++-- src/internal/kopia/wrapper.go | 5 +- src/internal/kopia/wrapper_test.go | 7 +- src/internal/operations/backup.go | 11 +- src/internal/operations/backup_test.go | 3 +- src/internal/operations/inject/inject.go | 5 +- src/internal/streamstore/streamstore.go | 3 +- 24 files changed, 515 insertions(+), 171 deletions(-) create mode 100644 src/internal/common/prefixmatcher/mock/mock.go create mode 100644 src/internal/common/prefixmatcher/string_set_matcher.go create mode 100644 src/internal/common/prefixmatcher/string_set_matcher_test.go diff --git a/src/internal/common/prefixmatcher/mock/mock.go b/src/internal/common/prefixmatcher/mock/mock.go new file mode 100644 index 000000000..ad4568114 --- /dev/null +++ b/src/internal/common/prefixmatcher/mock/mock.go @@ -0,0 +1,44 @@ +package mock + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/alcionai/corso/src/internal/common/prefixmatcher" +) + +var _ prefixmatcher.StringSetReader = &PrefixMap{} + +type PrefixMap struct { + prefixmatcher.StringSetBuilder +} + +func NewPrefixMap(m map[string]map[string]struct{}) *PrefixMap { + r := PrefixMap{StringSetBuilder: prefixmatcher.NewMatcher[map[string]struct{}]()} + + for k, v := range m { + r.Add(k, v) + } + + return &r +} + +func (pm PrefixMap) AssertEqual(t *testing.T, r prefixmatcher.StringSetReader) { + if pm.Empty() { + require.True(t, r.Empty(), "both prefix maps are empty") + return + } + + pks := pm.Keys() + rks := r.Keys() + + assert.ElementsMatch(t, pks, rks, "prefix keys match") + + for _, pk := range pks { + p, _ := pm.Get(pk) + r, _ := r.Get(pk) + assert.Equal(t, p, r, "values match") + } +} diff --git a/src/internal/common/prefixmatcher/prefix_matcher.go b/src/internal/common/prefixmatcher/prefix_matcher.go index cb244cf26..cc7403612 100644 --- a/src/internal/common/prefixmatcher/prefix_matcher.go +++ b/src/internal/common/prefixmatcher/prefix_matcher.go @@ -2,28 +2,48 @@ package prefixmatcher import ( "strings" + + "golang.org/x/exp/maps" ) -type View[T any] interface { +type Reader[T any] interface { Get(key string) (T, bool) LongestPrefix(key string) (string, T, bool) Empty() bool + Keys() []string } -type Matcher[T any] interface { +type Builder[T any] interface { // Add adds or updates the item with key to have value value. Add(key string, value T) - View[T] + Reader[T] } +// --------------------------------------------------------------------------- +// Implementation +// --------------------------------------------------------------------------- + +// prefixMatcher implements Builder type prefixMatcher[T any] struct { data map[string]T } -func (m *prefixMatcher[T]) Add(key string, value T) { - m.data[key] = value +func NewMatcher[T any]() Builder[T] { + return &prefixMatcher[T]{ + data: map[string]T{}, + } } +func NopReader[T any]() *prefixMatcher[T] { + return &prefixMatcher[T]{ + data: make(map[string]T), + } +} + +func (m *prefixMatcher[T]) Add(key string, value T) { m.data[key] = value } +func (m prefixMatcher[T]) Empty() bool { return len(m.data) == 0 } +func (m prefixMatcher[T]) Keys() []string { return maps.Keys(m.data) } + func (m *prefixMatcher[T]) Get(key string) (T, bool) { if m == nil { return *new(T), false @@ -58,11 +78,3 @@ func (m *prefixMatcher[T]) LongestPrefix(key string) (string, T, bool) { return rk, rv, found } - -func (m prefixMatcher[T]) Empty() bool { - return len(m.data) == 0 -} - -func NewMatcher[T any]() Matcher[T] { - return &prefixMatcher[T]{data: map[string]T{}} -} diff --git a/src/internal/common/prefixmatcher/prefix_matcher_test.go b/src/internal/common/prefixmatcher/prefix_matcher_test.go index 998b0184e..815e0fd49 100644 --- a/src/internal/common/prefixmatcher/prefix_matcher_test.go +++ b/src/internal/common/prefixmatcher/prefix_matcher_test.go @@ -5,6 +5,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" + "golang.org/x/exp/maps" "github.com/alcionai/corso/src/internal/common/prefixmatcher" "github.com/alcionai/corso/src/internal/tester" @@ -41,6 +42,8 @@ func (suite *PrefixMatcherUnitSuite) TestAdd_Get() { assert.True(t, ok, "searching for key", k) assert.Equal(t, v, val, "returned value") } + + assert.ElementsMatch(t, maps.Keys(kvs), pm.Keys()) } func (suite *PrefixMatcherUnitSuite) TestLongestPrefix() { diff --git a/src/internal/common/prefixmatcher/string_set_matcher.go b/src/internal/common/prefixmatcher/string_set_matcher.go new file mode 100644 index 000000000..2de4396a2 --- /dev/null +++ b/src/internal/common/prefixmatcher/string_set_matcher.go @@ -0,0 +1,122 @@ +package prefixmatcher + +import "golang.org/x/exp/maps" + +// StringSetReader is a reader designed specifially to contain a set +// of string values (ie: Reader[map[string]struct{}]). +// This is a quality-of-life typecast for the generic Reader. +type StringSetReader interface { + Reader[map[string]struct{}] +} + +// StringSetReader is a builder designed specifially to contain a set +// of string values (ie: Builder[map[string]struct{}]). +// This is a quality-of-life typecast for the generic Builder. +type StringSetBuilder interface { + Builder[map[string]struct{}] +} + +// --------------------------------------------------------------------------- +// Implementation +// --------------------------------------------------------------------------- + +var ( + _ StringSetReader = &StringSetMatcher{} + _ StringSetBuilder = &StringSetMatchBuilder{} +) + +// Items that should be excluded when sourcing data from the base backup. +// Parent Path -> item ID -> {} +type StringSetMatcher struct { + ssb StringSetBuilder +} + +func (m *StringSetMatcher) LongestPrefix(parent string) (string, map[string]struct{}, bool) { + if m == nil { + return "", nil, false + } + + return m.ssb.LongestPrefix(parent) +} + +func (m *StringSetMatcher) Empty() bool { + return m == nil || m.ssb.Empty() +} + +func (m *StringSetMatcher) Get(parent string) (map[string]struct{}, bool) { + if m == nil { + return nil, false + } + + return m.ssb.Get(parent) +} + +func (m *StringSetMatcher) Keys() []string { + if m == nil { + return []string{} + } + + return m.ssb.Keys() +} + +func (m *StringSetMatchBuilder) ToReader() *StringSetMatcher { + if m == nil { + return nil + } + + return m.ssm +} + +// Items that should be excluded when sourcing data from the base backup. +// Parent Path -> item ID -> {} +type StringSetMatchBuilder struct { + ssm *StringSetMatcher +} + +func NewStringSetBuilder() *StringSetMatchBuilder { + return &StringSetMatchBuilder{ + ssm: &StringSetMatcher{ + ssb: NewMatcher[map[string]struct{}](), + }, + } +} + +// copies all items into the key's bucket. +func (m *StringSetMatchBuilder) Add(key string, items map[string]struct{}) { + if m == nil { + return + } + + vs, ok := m.ssm.Get(key) + if !ok { + m.ssm.ssb.Add(key, items) + return + } + + maps.Copy(vs, items) + m.ssm.ssb.Add(key, vs) +} + +func (m *StringSetMatchBuilder) LongestPrefix(parent string) (string, map[string]struct{}, bool) { + return m.ssm.LongestPrefix(parent) +} + +func (m *StringSetMatchBuilder) Empty() bool { + return m == nil || m.ssm.Empty() +} + +func (m *StringSetMatchBuilder) Get(parent string) (map[string]struct{}, bool) { + if m == nil { + return nil, false + } + + return m.ssm.Get(parent) +} + +func (m *StringSetMatchBuilder) Keys() []string { + if m == nil { + return []string{} + } + + return m.ssm.Keys() +} diff --git a/src/internal/common/prefixmatcher/string_set_matcher_test.go b/src/internal/common/prefixmatcher/string_set_matcher_test.go new file mode 100644 index 000000000..d9a18bc98 --- /dev/null +++ b/src/internal/common/prefixmatcher/string_set_matcher_test.go @@ -0,0 +1,166 @@ +package prefixmatcher_test + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/suite" + "golang.org/x/exp/maps" + + "github.com/alcionai/corso/src/internal/common/prefixmatcher" + "github.com/alcionai/corso/src/internal/tester" +) + +type StringSetUnitSuite struct { + tester.Suite +} + +func TestSTringSetUnitSuite(t *testing.T) { + suite.Run(t, &StringSetUnitSuite{Suite: tester.NewUnitSuite(t)}) +} + +func (suite *StringSetUnitSuite) TestEmpty() { + pm := prefixmatcher.NewStringSetBuilder() + assert.True(suite.T(), pm.Empty()) +} + +func (suite *StringSetUnitSuite) TestToReader() { + var ( + pr prefixmatcher.StringSetReader + t = suite.T() + pm = prefixmatcher.NewStringSetBuilder() + ) + + pr = pm.ToReader() + _, ok := pr.(prefixmatcher.StringSetBuilder) + assert.False(t, ok, "cannot cast to builder") +} + +func (suite *StringSetUnitSuite) TestAdd_Get() { + t := suite.T() + pm := prefixmatcher.NewStringSetBuilder() + kvs := map[string]map[string]struct{}{ + "hello": {"world": {}}, + "hola": {"mundo": {}}, + "foo": {"bar": {}}, + } + + for k, v := range kvs { + pm.Add(k, v) + } + + for k, v := range kvs { + val, ok := pm.Get(k) + assert.True(t, ok, "searching for key", k) + assert.Equal(t, v, val, "returned value") + } + + assert.ElementsMatch(t, maps.Keys(kvs), pm.Keys()) +} + +func (suite *StringSetUnitSuite) TestAdd_Union() { + t := suite.T() + pm := prefixmatcher.NewStringSetBuilder() + pm.Add("hello", map[string]struct{}{ + "world": {}, + "mundo": {}, + }) + pm.Add("hello", map[string]struct{}{ + "goodbye": {}, + "aideu": {}, + }) + + expect := map[string]struct{}{ + "world": {}, + "mundo": {}, + "goodbye": {}, + "aideu": {}, + } + + result, _ := pm.Get("hello") + assert.Equal(t, expect, result) + assert.ElementsMatch(t, []string{"hello"}, pm.Keys()) +} + +func (suite *StringSetUnitSuite) TestLongestPrefix() { + key := "hello" + value := "world" + + table := []struct { + name string + inputKVs map[string]map[string]struct{} + searchKey string + expectedKey string + expectedValue map[string]struct{} + expectedFound assert.BoolAssertionFunc + }{ + { + name: "Empty Prefix", + inputKVs: map[string]map[string]struct{}{ + "": {value: {}}, + }, + searchKey: key, + expectedKey: "", + expectedValue: map[string]struct{}{value: {}}, + expectedFound: assert.True, + }, + { + name: "Exact Match", + inputKVs: map[string]map[string]struct{}{ + key: {value: {}}, + }, + searchKey: key, + expectedKey: key, + expectedValue: map[string]struct{}{value: {}}, + expectedFound: assert.True, + }, + { + name: "Prefix Match", + inputKVs: map[string]map[string]struct{}{ + key[:len(key)-2]: {value: {}}, + }, + searchKey: key, + expectedKey: key[:len(key)-2], + expectedValue: map[string]struct{}{value: {}}, + expectedFound: assert.True, + }, + { + name: "Longest Prefix Match", + inputKVs: map[string]map[string]struct{}{ + key[:len(key)-2]: {value: {}}, + "": {value + "2": {}}, + key[:len(key)-4]: {value + "3": {}}, + }, + searchKey: key, + expectedKey: key[:len(key)-2], + expectedValue: map[string]struct{}{value: {}}, + expectedFound: assert.True, + }, + { + name: "No Match", + inputKVs: map[string]map[string]struct{}{ + "foo": {value: {}}, + }, + searchKey: key, + expectedKey: "", + expectedValue: nil, + expectedFound: assert.False, + }, + } + + for _, test := range table { + suite.Run(test.name, func() { + t := suite.T() + pm := prefixmatcher.NewStringSetBuilder() + + for k, v := range test.inputKVs { + pm.Add(k, v) + } + + k, v, ok := pm.LongestPrefix(test.searchKey) + assert.Equal(t, test.expectedKey, k, "key") + assert.Equal(t, test.expectedValue, v, "value") + test.expectedFound(t, ok, "found") + }) + } +} diff --git a/src/internal/connector/data_collections.go b/src/internal/connector/data_collections.go index 77b5ba7ca..9f0f738e5 100644 --- a/src/internal/connector/data_collections.go +++ b/src/internal/connector/data_collections.go @@ -7,6 +7,7 @@ import ( "github.com/alcionai/clues" "github.com/alcionai/corso/src/internal/common/idname" + "github.com/alcionai/corso/src/internal/common/prefixmatcher" "github.com/alcionai/corso/src/internal/connector/discovery" "github.com/alcionai/corso/src/internal/connector/exchange" "github.com/alcionai/corso/src/internal/connector/graph" @@ -41,7 +42,7 @@ func (gc *GraphConnector) ProduceBackupCollections( lastBackupVersion int, ctrlOpts control.Options, errs *fault.Bus, -) ([]data.BackupCollection, map[string]map[string]struct{}, error) { +) ([]data.BackupCollection, prefixmatcher.StringSetReader, error) { ctx, end := diagnostics.Span( ctx, "gc:produceBackupCollections", @@ -71,13 +72,13 @@ func (gc *GraphConnector) ProduceBackupCollections( } var ( - colls []data.BackupCollection - excludes map[string]map[string]struct{} + colls []data.BackupCollection + ssmb *prefixmatcher.StringSetMatcher ) switch sels.Service { case selectors.ServiceExchange: - colls, excludes, err = exchange.DataCollections( + colls, ssmb, err = exchange.DataCollections( ctx, sels, owner, @@ -91,7 +92,7 @@ func (gc *GraphConnector) ProduceBackupCollections( } case selectors.ServiceOneDrive: - colls, excludes, err = onedrive.DataCollections( + colls, ssmb, err = onedrive.DataCollections( ctx, sels, owner, @@ -108,7 +109,7 @@ func (gc *GraphConnector) ProduceBackupCollections( } case selectors.ServiceSharePoint: - colls, excludes, err = sharepoint.DataCollections( + colls, ssmb, err = sharepoint.DataCollections( ctx, gc.itemClient, sels, @@ -139,7 +140,7 @@ func (gc *GraphConnector) ProduceBackupCollections( } } - return colls, excludes, nil + return colls, ssmb, nil } func verifyBackupInputs(sels selectors.Selector, siteIDs []string) error { diff --git a/src/internal/connector/data_collections_test.go b/src/internal/connector/data_collections_test.go index 97618df13..fedc85106 100644 --- a/src/internal/connector/data_collections_test.go +++ b/src/internal/connector/data_collections_test.go @@ -110,7 +110,7 @@ func (suite *DataCollectionIntgSuite) TestExchangeDataCollection() { control.Defaults(), fault.New(true)) require.NoError(t, err, clues.ToCore(err)) - assert.Empty(t, excludes) + assert.True(t, excludes.Empty()) for range collections { connector.incrementAwaitingMessages() @@ -215,7 +215,7 @@ func (suite *DataCollectionIntgSuite) TestDataCollections_invalidResourceOwner() fault.New(true)) assert.Error(t, err, clues.ToCore(err)) assert.Empty(t, collections) - assert.Empty(t, excludes) + assert.Nil(t, excludes) }) } } @@ -272,7 +272,7 @@ func (suite *DataCollectionIntgSuite) TestSharePointDataCollection() { fault.New(true)) require.NoError(t, err, clues.ToCore(err)) // Not expecting excludes as this isn't an incremental backup. - assert.Empty(t, excludes) + assert.True(t, excludes.Empty()) for range collections { connector.incrementAwaitingMessages() @@ -356,7 +356,7 @@ func (suite *SPCollectionIntgSuite) TestCreateSharePointCollection_Libraries() { require.NoError(t, err, clues.ToCore(err)) require.Len(t, cols, 2) // 1 collection, 1 path prefix directory to ensure the root path exists. // No excludes yet as this isn't an incremental backup. - assert.Empty(t, excludes) + assert.True(t, excludes.Empty()) t.Logf("cols[0] Path: %s\n", cols[0].FullPath().String()) assert.Equal( @@ -401,7 +401,7 @@ func (suite *SPCollectionIntgSuite) TestCreateSharePointCollection_Lists() { require.NoError(t, err, clues.ToCore(err)) assert.Less(t, 0, len(cols)) // No excludes yet as this isn't an incremental backup. - assert.Empty(t, excludes) + assert.True(t, excludes.Empty()) for _, collection := range cols { t.Logf("Path: %s\n", collection.FullPath().String()) diff --git a/src/internal/connector/exchange/data_collections.go b/src/internal/connector/exchange/data_collections.go index 6cf9a749d..1ff1d47c1 100644 --- a/src/internal/connector/exchange/data_collections.go +++ b/src/internal/connector/exchange/data_collections.go @@ -7,6 +7,7 @@ import ( "github.com/alcionai/clues" "github.com/alcionai/corso/src/internal/common/idname" + "github.com/alcionai/corso/src/internal/common/prefixmatcher" "github.com/alcionai/corso/src/internal/connector/exchange/api" "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/support" @@ -170,7 +171,7 @@ func DataCollections( su support.StatusUpdater, ctrlOpts control.Options, errs *fault.Bus, -) ([]data.BackupCollection, map[string]map[string]struct{}, error) { +) ([]data.BackupCollection, *prefixmatcher.StringSetMatcher, error) { eb, err := selector.ToExchangeBackup() if err != nil { return nil, nil, clues.Wrap(err, "exchange dataCollection selector").WithClues(ctx) diff --git a/src/internal/connector/graph_connector_test.go b/src/internal/connector/graph_connector_test.go index 92d4dccb6..00731b93e 100644 --- a/src/internal/connector/graph_connector_test.go +++ b/src/internal/connector/graph_connector_test.go @@ -538,7 +538,7 @@ func runBackupAndCompare( fault.New(true)) require.NoError(t, err, clues.ToCore(err)) // No excludes yet because this isn't an incremental backup. - assert.Empty(t, excludes) + assert.True(t, excludes.Empty()) t.Logf("Backup enumeration complete in %v\n", time.Since(start)) @@ -1121,7 +1121,7 @@ func (suite *GraphConnectorIntegrationSuite) TestMultiFolderBackupDifferentNames fault.New(true)) require.NoError(t, err, clues.ToCore(err)) // No excludes yet because this isn't an incremental backup. - assert.Empty(t, excludes) + assert.True(t, excludes.Empty()) t.Log("Backup enumeration complete") @@ -1280,7 +1280,7 @@ func (suite *GraphConnectorIntegrationSuite) TestBackup_CreatesPrefixCollections fault.New(true)) require.NoError(t, err) // No excludes yet because this isn't an incremental backup. - assert.Empty(t, excludes) + assert.True(t, excludes.Empty()) t.Logf("Backup enumeration complete in %v\n", time.Since(start)) diff --git a/src/internal/connector/mock/connector.go b/src/internal/connector/mock/connector.go index b9f712225..d8cce9781 100644 --- a/src/internal/connector/mock/connector.go +++ b/src/internal/connector/mock/connector.go @@ -4,7 +4,9 @@ import ( "context" "github.com/alcionai/corso/src/internal/common/idname" + "github.com/alcionai/corso/src/internal/common/prefixmatcher" "github.com/alcionai/corso/src/internal/data" + "github.com/alcionai/corso/src/internal/operations/inject" "github.com/alcionai/corso/src/pkg/account" "github.com/alcionai/corso/src/pkg/backup/details" "github.com/alcionai/corso/src/pkg/control" @@ -12,9 +14,11 @@ import ( "github.com/alcionai/corso/src/pkg/selectors" ) +var _ inject.BackupProducer = &GraphConnector{} + type GraphConnector struct { Collections []data.BackupCollection - Exclude map[string]map[string]struct{} + Exclude *prefixmatcher.StringSetMatcher Deets *details.Details @@ -33,7 +37,7 @@ func (gc GraphConnector) ProduceBackupCollections( _ *fault.Bus, ) ( []data.BackupCollection, - map[string]map[string]struct{}, + prefixmatcher.StringSetReader, error, ) { return gc.Collections, gc.Exclude, gc.Err diff --git a/src/internal/connector/onedrive/collections.go b/src/internal/connector/onedrive/collections.go index aca636b94..8594e4a6f 100644 --- a/src/internal/connector/onedrive/collections.go +++ b/src/internal/connector/onedrive/collections.go @@ -12,6 +12,7 @@ import ( "github.com/pkg/errors" "golang.org/x/exp/maps" + "github.com/alcionai/corso/src/internal/common/prefixmatcher" "github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/onedrive/api" @@ -271,11 +272,12 @@ func deserializeMap[T any](reader io.ReadCloser, alreadyFound map[string]T) erro func (c *Collections) Get( ctx context.Context, prevMetadata []data.RestoreCollection, + ssmb *prefixmatcher.StringSetMatchBuilder, errs *fault.Bus, -) ([]data.BackupCollection, map[string]map[string]struct{}, error) { +) ([]data.BackupCollection, error) { prevDeltas, oldPathsByDriveID, err := deserializeMetadata(ctx, prevMetadata, errs) if err != nil { - return nil, nil, err + return nil, err } driveComplete, closer := observe.MessageWithCompletion(ctx, observe.Bulletf("files")) @@ -285,12 +287,12 @@ func (c *Collections) Get( // Enumerate drives for the specified resourceOwner pager, err := c.drivePagerFunc(c.source, c.service, c.resourceOwner, nil) if err != nil { - return nil, nil, graph.Stack(ctx, err) + return nil, graph.Stack(ctx, err) } drives, err := api.GetAllDrives(ctx, pager, true, maxDrivesRetries) if err != nil { - return nil, nil, err + return nil, err } var ( @@ -298,9 +300,6 @@ func (c *Collections) Get( deltaURLs = map[string]string{} // Drive ID -> folder ID -> folder path folderPaths = map[string]map[string]string{} - // Items that should be excluded when sourcing data from the base backup. - // Parent Path -> item ID -> {} - excludedItems = map[string]map[string]struct{}{} ) for _, d := range drives { @@ -336,7 +335,7 @@ func (c *Collections) Get( prevDelta, errs) if err != nil { - return nil, nil, err + return nil, err } // Used for logging below. @@ -376,19 +375,10 @@ func (c *Collections) Get( c.resourceOwner, c.source) if err != nil { - return nil, nil, - clues.Wrap(err, "making exclude prefix").WithClues(ictx) + return nil, clues.Wrap(err, "making exclude prefix").WithClues(ictx) } - pstr := p.String() - - eidi, ok := excludedItems[pstr] - if !ok { - eidi = map[string]struct{}{} - } - - maps.Copy(eidi, excluded) - excludedItems[pstr] = eidi + ssmb.Add(p.String(), excluded) continue } @@ -413,7 +403,7 @@ func (c *Collections) Get( prevPath, err := path.FromDataLayerPath(p, false) if err != nil { err = clues.Wrap(err, "invalid previous path").WithClues(ictx).With("deleted_path", p) - return nil, map[string]map[string]struct{}{}, err + return nil, err } col, err := NewCollection( @@ -428,7 +418,7 @@ func (c *Collections) Get( CollectionScopeUnknown, true) if err != nil { - return nil, map[string]map[string]struct{}{}, clues.Wrap(err, "making collection").WithClues(ictx) + return nil, clues.Wrap(err, "making collection").WithClues(ictx) } c.CollectionMap[driveID][fldID] = col @@ -468,7 +458,7 @@ func (c *Collections) Get( } // TODO(ashmrtn): Track and return the set of items to exclude. - return collections, excludedItems, nil + return collections, nil } func updateCollectionPaths( diff --git a/src/internal/connector/onedrive/collections_test.go b/src/internal/connector/onedrive/collections_test.go index d9e6fde6c..1baaed521 100644 --- a/src/internal/connector/onedrive/collections_test.go +++ b/src/internal/connector/onedrive/collections_test.go @@ -15,6 +15,8 @@ import ( "github.com/stretchr/testify/suite" "golang.org/x/exp/maps" + "github.com/alcionai/corso/src/internal/common/prefixmatcher" + pmMock "github.com/alcionai/corso/src/internal/common/prefixmatcher/mock" "github.com/alcionai/corso/src/internal/connector/graph" gapi "github.com/alcionai/corso/src/internal/connector/graph/api" "github.com/alcionai/corso/src/internal/connector/onedrive/api" @@ -1283,7 +1285,7 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { expectedCollections map[string]map[data.CollectionState][]string expectedDeltaURLs map[string]string expectedFolderPaths map[string]map[string]string - expectedDelList map[string]map[string]struct{} + expectedDelList *pmMock.PrefixMap expectedSkippedCount int doNotMergeItems bool }{ @@ -1314,9 +1316,9 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { expectedFolderPaths: map[string]map[string]string{ driveID1: {"root": rootFolderPath1}, }, - expectedDelList: map[string]map[string]struct{}{ + expectedDelList: pmMock.NewPrefixMap(map[string]map[string]struct{}{ rootFolderPath1: getDelList("file"), - }, + }), }, { name: "OneDrive_OneItemPage_NoFolders_NoErrors", @@ -1345,9 +1347,9 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { expectedFolderPaths: map[string]map[string]string{ driveID1: {"root": rootFolderPath1}, }, - expectedDelList: map[string]map[string]struct{}{ + expectedDelList: pmMock.NewPrefixMap(map[string]map[string]struct{}{ rootFolderPath1: getDelList("file"), - }, + }), }, { name: "OneDrive_OneItemPage_NoErrors", @@ -1381,9 +1383,9 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { "folder": folderPath1, }, }, - expectedDelList: map[string]map[string]struct{}{ + expectedDelList: pmMock.NewPrefixMap(map[string]map[string]struct{}{ rootFolderPath1: getDelList("file"), - }, + }), }, { name: "OneDrive_OneItemPage_NoErrors_FileRenamedMultiple", @@ -1418,9 +1420,9 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { "folder": folderPath1, }, }, - expectedDelList: map[string]map[string]struct{}{ + expectedDelList: pmMock.NewPrefixMap(map[string]map[string]struct{}{ rootFolderPath1: getDelList("file"), - }, + }), }, { name: "OneDrive_OneItemPage_NoErrors_FileMovedMultiple", @@ -1455,9 +1457,9 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { "folder": folderPath1, }, }, - expectedDelList: map[string]map[string]struct{}{ + expectedDelList: pmMock.NewPrefixMap(map[string]map[string]struct{}{ rootFolderPath1: getDelList("file"), - }, + }), }, { name: "OneDrive_OneItemPage_EmptyDelta_NoErrors", @@ -1484,9 +1486,9 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { }, expectedDeltaURLs: map[string]string{}, expectedFolderPaths: map[string]map[string]string{}, - expectedDelList: map[string]map[string]struct{}{ + expectedDelList: pmMock.NewPrefixMap(map[string]map[string]struct{}{ rootFolderPath1: getDelList("file"), - }, + }), }, { name: "OneDrive_TwoItemPages_NoErrors", @@ -1528,9 +1530,9 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { "folder": folderPath1, }, }, - expectedDelList: map[string]map[string]struct{}{ + expectedDelList: pmMock.NewPrefixMap(map[string]map[string]struct{}{ rootFolderPath1: getDelList("file", "file2"), - }, + }), }, { name: "TwoDrives_OneItemPageEach_NoErrors", @@ -1585,10 +1587,10 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { "folder2": folderPath2, }, }, - expectedDelList: map[string]map[string]struct{}{ + expectedDelList: pmMock.NewPrefixMap(map[string]map[string]struct{}{ rootFolderPath1: getDelList("file"), rootFolderPath2: getDelList("file2"), - }, + }), }, { name: "TwoDrives_DuplicateIDs_OneItemPageEach_NoErrors", @@ -1643,10 +1645,10 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { "folder": folderPath2, }, }, - expectedDelList: map[string]map[string]struct{}{ + expectedDelList: pmMock.NewPrefixMap(map[string]map[string]struct{}{ rootFolderPath1: getDelList("file"), rootFolderPath2: getDelList("file2"), - }, + }), }, { name: "OneDrive_OneItemPage_Errors", @@ -1696,7 +1698,7 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { "root": rootFolderPath1, }, }, - expectedDelList: map[string]map[string]struct{}{}, + expectedDelList: pmMock.NewPrefixMap(map[string]map[string]struct{}{}), doNotMergeItems: true, }, { @@ -1738,7 +1740,7 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { "folder": folderPath1, }, }, - expectedDelList: map[string]map[string]struct{}{}, + expectedDelList: pmMock.NewPrefixMap(map[string]map[string]struct{}{}), doNotMergeItems: true, }, { @@ -1780,9 +1782,9 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { "folder": folderPath1, }, }, - expectedDelList: map[string]map[string]struct{}{ + expectedDelList: pmMock.NewPrefixMap(map[string]map[string]struct{}{ rootFolderPath1: getDelList("file", "file2"), - }, + }), doNotMergeItems: false, }, { @@ -1824,7 +1826,7 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { "folder2": expectedPath1("/folder2"), }, }, - expectedDelList: map[string]map[string]struct{}{}, + expectedDelList: pmMock.NewPrefixMap(map[string]map[string]struct{}{}), doNotMergeItems: true, }, { @@ -1870,7 +1872,7 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { "folder2": expectedPath1("/folder"), }, }, - expectedDelList: map[string]map[string]struct{}{}, + expectedDelList: pmMock.NewPrefixMap(map[string]map[string]struct{}{}), doNotMergeItems: true, }, { @@ -1915,9 +1917,9 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { "folder": folderPath1, }, }, - expectedDelList: map[string]map[string]struct{}{ + expectedDelList: pmMock.NewPrefixMap(map[string]map[string]struct{}{ rootFolderPath1: getDelList("file", "file2"), - }, + }), expectedSkippedCount: 2, }, { @@ -1970,7 +1972,7 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { "folder": folderPath1, }, }, - expectedDelList: map[string]map[string]struct{}{}, + expectedDelList: pmMock.NewPrefixMap(map[string]map[string]struct{}{}), doNotMergeItems: true, }, { @@ -2009,7 +2011,7 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { "root": rootFolderPath1, }, }, - expectedDelList: map[string]map[string]struct{}{}, + expectedDelList: pmMock.NewPrefixMap(map[string]map[string]struct{}{}), doNotMergeItems: true, }, { @@ -2046,7 +2048,7 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { "root": rootFolderPath1, }, }, - expectedDelList: map[string]map[string]struct{}{}, + expectedDelList: pmMock.NewPrefixMap(map[string]map[string]struct{}{}), doNotMergeItems: true, }, { @@ -2087,9 +2089,9 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { "root": rootFolderPath1, }, }, - expectedDelList: map[string]map[string]struct{}{ + expectedDelList: pmMock.NewPrefixMap(map[string]map[string]struct{}{ rootFolderPath1: getDelList("file"), - }, + }), }, { name: "One Drive Item Made And Deleted", @@ -2130,9 +2132,9 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { "folder": folderPath1, }, }, - expectedDelList: map[string]map[string]struct{}{ + expectedDelList: pmMock.NewPrefixMap(map[string]map[string]struct{}{ rootFolderPath1: getDelList("file"), - }, + }), }, { name: "One Drive Random Folder Delete", @@ -2163,7 +2165,7 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { "root": rootFolderPath1, }, }, - expectedDelList: map[string]map[string]struct{}{}, + expectedDelList: pmMock.NewPrefixMap(map[string]map[string]struct{}{}), }, { name: "One Drive Random Item Delete", @@ -2194,9 +2196,9 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { "root": rootFolderPath1, }, }, - expectedDelList: map[string]map[string]struct{}{ + expectedDelList: pmMock.NewPrefixMap(map[string]map[string]struct{}{ rootFolderPath1: getDelList("file"), - }, + }), }, } for _, test := range table { @@ -2269,7 +2271,9 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { prevMetadata := []data.RestoreCollection{data.NotFoundRestoreCollection{Collection: mc}} errs := fault.New(true) - cols, delList, err := c.Get(ctx, prevMetadata, errs) + delList := prefixmatcher.NewStringSetBuilder() + + cols, err := c.Get(ctx, prevMetadata, delList, errs) test.errCheck(t, err) assert.Equal(t, test.expectedSkippedCount, len(errs.Skipped())) @@ -2339,7 +2343,7 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { // collections we expect it to assert.Equal(t, expectedCollectionCount, collectionCount, "number of collections") - assert.Equal(t, test.expectedDelList, delList, "del list") + test.expectedDelList.AssertEqual(t, delList) }) } } diff --git a/src/internal/connector/onedrive/data_collections.go b/src/internal/connector/onedrive/data_collections.go index 721cc5e85..e89753dae 100644 --- a/src/internal/connector/onedrive/data_collections.go +++ b/src/internal/connector/onedrive/data_collections.go @@ -4,9 +4,9 @@ import ( "context" "github.com/alcionai/clues" - "golang.org/x/exp/maps" "github.com/alcionai/corso/src/internal/common/idname" + "github.com/alcionai/corso/src/internal/common/prefixmatcher" "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/support" "github.com/alcionai/corso/src/internal/data" @@ -44,7 +44,7 @@ func DataCollections( su support.StatusUpdater, ctrlOpts control.Options, errs *fault.Bus, -) ([]data.BackupCollection, map[string]map[string]struct{}, error) { +) ([]data.BackupCollection, *prefixmatcher.StringSetMatcher, error) { odb, err := selector.ToOneDriveBackup() if err != nil { return nil, nil, clues.Wrap(err, "parsing selector").WithClues(ctx) @@ -54,7 +54,7 @@ func DataCollections( el = errs.Local() categories = map[path.CategoryType]struct{}{} collections = []data.BackupCollection{} - allExcludes = map[string]map[string]struct{}{} + ssmb = prefixmatcher.NewStringSetBuilder() ) // for each scope that includes oneDrive items, get all @@ -75,7 +75,7 @@ func DataCollections( su, ctrlOpts) - odcs, excludes, err := nc.Get(ctx, metadata, errs) + odcs, err := nc.Get(ctx, metadata, ssmb, errs) if err != nil { el.AddRecoverable(clues.Stack(err).Label(fault.LabelForceNoBackupCreation)) } @@ -83,14 +83,6 @@ func DataCollections( categories[scope.Category().PathType()] = struct{}{} collections = append(collections, odcs...) - - for k, ex := range excludes { - if _, ok := allExcludes[k]; !ok { - allExcludes[k] = map[string]struct{}{} - } - - maps.Copy(allExcludes[k], ex) - } } mcs, err := migrationCollections( @@ -123,7 +115,7 @@ func DataCollections( collections = append(collections, baseCols...) } - return collections, allExcludes, el.Failure() + return collections, ssmb.ToReader(), el.Failure() } // adds data migrations to the collection set. diff --git a/src/internal/connector/onedrive/drive_test.go b/src/internal/connector/onedrive/drive_test.go index 7d5bd9f4c..d2f1a68b6 100644 --- a/src/internal/connector/onedrive/drive_test.go +++ b/src/internal/connector/onedrive/drive_test.go @@ -14,6 +14,7 @@ import ( "github.com/stretchr/testify/suite" "github.com/alcionai/corso/src/internal/common/dttm" + "github.com/alcionai/corso/src/internal/common/prefixmatcher" "github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/onedrive/api" @@ -442,10 +443,12 @@ func (suite *OneDriveSuite) TestOneDriveNewCollections() { ToggleFeatures: control.Toggles{}, }) - odcs, excludes, err := colls.Get(ctx, nil, fault.New(true)) + ssmb := prefixmatcher.NewStringSetBuilder() + + odcs, err := colls.Get(ctx, nil, ssmb, fault.New(true)) assert.NoError(t, err, clues.ToCore(err)) // Don't expect excludes as this isn't an incremental backup. - assert.Empty(t, excludes) + assert.True(t, ssmb.Empty()) for _, entry := range odcs { assert.NotEmpty(t, entry.FullPath()) diff --git a/src/internal/connector/sharepoint/data_collections.go b/src/internal/connector/sharepoint/data_collections.go index a759f27da..d2a626e49 100644 --- a/src/internal/connector/sharepoint/data_collections.go +++ b/src/internal/connector/sharepoint/data_collections.go @@ -4,9 +4,9 @@ import ( "context" "github.com/alcionai/clues" - "golang.org/x/exp/maps" "github.com/alcionai/corso/src/internal/common/idname" + "github.com/alcionai/corso/src/internal/common/prefixmatcher" "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/onedrive" "github.com/alcionai/corso/src/internal/connector/sharepoint/api" @@ -39,7 +39,7 @@ func DataCollections( su statusUpdater, ctrlOpts control.Options, errs *fault.Bus, -) ([]data.BackupCollection, map[string]map[string]struct{}, error) { +) ([]data.BackupCollection, *prefixmatcher.StringSetMatcher, error) { b, err := selector.ToSharePointBackup() if err != nil { return nil, nil, clues.Wrap(err, "sharePointDataCollection: parsing selector") @@ -54,7 +54,7 @@ func DataCollections( el = errs.Local() collections = []data.BackupCollection{} categories = map[path.CategoryType]struct{}{} - excluded = map[string]map[string]struct{}{} + ssmb = prefixmatcher.NewStringSetBuilder() ) for _, scope := range b.Scopes() { @@ -86,15 +86,14 @@ func DataCollections( } case path.LibrariesCategory: - var excludes map[string]map[string]struct{} - - spcs, excludes, err = collectLibraries( + spcs, err = collectLibraries( ctx, itemClient, serv, creds.AzureTenantID, site, metadata, + ssmb, scope, su, ctrlOpts, @@ -104,14 +103,6 @@ func DataCollections( continue } - for prefix, excludes := range excludes { - if _, ok := excluded[prefix]; !ok { - excluded[prefix] = map[string]struct{}{} - } - - maps.Copy(excluded[prefix], excludes) - } - case path.PagesCategory: spcs, err = collectPages( ctx, @@ -150,7 +141,7 @@ func DataCollections( collections = append(collections, baseCols...) } - return collections, excluded, el.Failure() + return collections, ssmb.ToReader(), el.Failure() } func collectLists( @@ -208,11 +199,12 @@ func collectLibraries( tenantID string, site idname.Provider, metadata []data.RestoreCollection, + ssmb *prefixmatcher.StringSetMatchBuilder, scope selectors.SharePointScope, updater statusUpdater, ctrlOpts control.Options, errs *fault.Bus, -) ([]data.BackupCollection, map[string]map[string]struct{}, error) { +) ([]data.BackupCollection, error) { logger.Ctx(ctx).Debug("creating SharePoint Library collections") var ( @@ -228,12 +220,12 @@ func collectLibraries( ctrlOpts) ) - odcs, excludes, err := colls.Get(ctx, metadata, errs) + odcs, err := colls.Get(ctx, metadata, ssmb, errs) if err != nil { - return nil, nil, graph.Wrap(ctx, err, "getting library") + return nil, graph.Wrap(ctx, err, "getting library") } - return append(collections, odcs...), excludes, nil + return append(collections, odcs...), nil } // collectPages constructs a sharepoint Collections struct and Get()s the associated diff --git a/src/internal/kopia/merge_details.go b/src/internal/kopia/merge_details.go index 5917892a7..2ec6cc4bb 100644 --- a/src/internal/kopia/merge_details.go +++ b/src/internal/kopia/merge_details.go @@ -114,7 +114,7 @@ type locRefs struct { } type locationPrefixMatcher struct { - m prefixmatcher.Matcher[locRefs] + m prefixmatcher.Builder[locRefs] } func (m *locationPrefixMatcher) add( diff --git a/src/internal/kopia/upload.go b/src/internal/kopia/upload.go index ccf8a86ec..6f7f5388c 100644 --- a/src/internal/kopia/upload.go +++ b/src/internal/kopia/upload.go @@ -21,6 +21,7 @@ import ( "github.com/kopia/kopia/repo/manifest" "github.com/kopia/kopia/snapshot/snapshotfs" + "github.com/alcionai/corso/src/internal/common/prefixmatcher" "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/graph/metadata" "github.com/alcionai/corso/src/internal/data" @@ -413,7 +414,7 @@ func streamBaseEntries( locationPath *path.Builder, dir fs.Directory, encodedSeen map[string]struct{}, - globalExcludeSet map[string]map[string]struct{}, + globalExcludeSet prefixmatcher.StringSetReader, progress *corsoProgress, ) error { if dir == nil { @@ -421,20 +422,19 @@ func streamBaseEntries( } var ( + longest string excludeSet map[string]struct{} - curPrefix string ) - ctx = clues.Add(ctx, "current_item_path", curPath) - - for prefix, excludes := range globalExcludeSet { - // Select the set with the longest prefix to be most precise. - if strings.HasPrefix(curPath.String(), prefix) && len(prefix) >= len(curPrefix) { - excludeSet = excludes - curPrefix = prefix - } + if globalExcludeSet != nil { + longest, excludeSet, _ = globalExcludeSet.LongestPrefix(curPath.String()) } + ctx = clues.Add( + ctx, + "current_item_path", curPath, + "longest_prefix", longest) + err := dir.IterateEntries(ctx, func(innerCtx context.Context, entry fs.Entry) error { if err := innerCtx.Err(); err != nil { return err @@ -521,7 +521,7 @@ func getStreamItemFunc( staticEnts []fs.Entry, streamedEnts data.BackupCollection, baseDir fs.Directory, - globalExcludeSet map[string]map[string]struct{}, + globalExcludeSet prefixmatcher.StringSetReader, progress *corsoProgress, ) func(context.Context, func(context.Context, fs.Entry) error) error { return func(ctx context.Context, cb func(context.Context, fs.Entry) error) error { @@ -569,7 +569,7 @@ func getStreamItemFunc( func buildKopiaDirs( dirName string, dir *treeMap, - globalExcludeSet map[string]map[string]struct{}, + globalExcludeSet prefixmatcher.StringSetReader, progress *corsoProgress, ) (fs.Directory, error) { // Need to build the directory tree from the leaves up because intermediate @@ -1053,7 +1053,7 @@ func inflateDirTree( loader snapshotLoader, baseSnaps []IncrementalBase, collections []data.BackupCollection, - globalExcludeSet map[string]map[string]struct{}, + globalExcludeSet prefixmatcher.StringSetReader, progress *corsoProgress, ) (fs.Directory, error) { roots, updatedPaths, err := inflateCollectionTree(ctx, collections, progress.toMerge) diff --git a/src/internal/kopia/upload_test.go b/src/internal/kopia/upload_test.go index 0bd168368..e86826f27 100644 --- a/src/internal/kopia/upload_test.go +++ b/src/internal/kopia/upload_test.go @@ -19,6 +19,7 @@ import ( "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" + pmMock "github.com/alcionai/corso/src/internal/common/prefixmatcher/mock" exchMock "github.com/alcionai/corso/src/internal/connector/exchange/mock" "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/tester" @@ -708,7 +709,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTree() { // - emails // - Inbox // - 42 separate files - dirTree, err := inflateDirTree(ctx, nil, nil, collections, nil, progress) + dirTree, err := inflateDirTree(ctx, nil, nil, collections, pmMock.NewPrefixMap(nil), progress) require.NoError(t, err, clues.ToCore(err)) assert.Equal(t, encodeAsPath(testTenant), dirTree.Name()) @@ -805,7 +806,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTree_MixedDirectory() errs: fault.New(true), } - dirTree, err := inflateDirTree(ctx, nil, nil, test.layout, nil, progress) + dirTree, err := inflateDirTree(ctx, nil, nil, test.layout, pmMock.NewPrefixMap(nil), progress) require.NoError(t, err, clues.ToCore(err)) assert.Equal(t, encodeAsPath(testTenant), dirTree.Name()) @@ -911,7 +912,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTree_Fails() { errs: fault.New(true), } - _, err := inflateDirTree(ctx, nil, nil, test.layout, nil, progress) + _, err := inflateDirTree(ctx, nil, nil, test.layout, pmMock.NewPrefixMap(nil), progress) assert.Error(t, err, clues.ToCore(err)) }) } @@ -1027,7 +1028,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeErrors() { cols = append(cols, mc) } - _, err := inflateDirTree(ctx, nil, nil, cols, nil, progress) + _, err := inflateDirTree(ctx, nil, nil, cols, pmMock.NewPrefixMap(nil), progress) require.Error(t, err, clues.ToCore(err)) }) } @@ -1312,9 +1313,8 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeSingleSubtree() { mockIncrementalBase("", testTenant, testUser, path.ExchangeService, path.EmailCategory), }, test.inputCollections(), - nil, - progress, - ) + pmMock.NewPrefixMap(nil), + progress) require.NoError(t, err, clues.ToCore(err)) expectTree(t, ctx, test.expected, dirTree) @@ -1433,7 +1433,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeMultipleSubdirecto table := []struct { name string inputCollections func(t *testing.T) []data.BackupCollection - inputExcludes map[string]map[string]struct{} + inputExcludes *pmMock.PrefixMap expected *expectedNode }{ { @@ -1441,11 +1441,11 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeMultipleSubdirecto inputCollections: func(t *testing.T) []data.BackupCollection { return nil }, - inputExcludes: map[string]map[string]struct{}{ + inputExcludes: pmMock.NewPrefixMap(map[string]map[string]struct{}{ "": { inboxFileName1: {}, }, - }, + }), expected: expectedTreeWithChildren( []string{ testTenant, @@ -2229,6 +2229,11 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeMultipleSubdirecto snapshotRoot: getBaseSnapshot(), } + ie := pmMock.NewPrefixMap(nil) + if test.inputExcludes != nil { + ie = test.inputExcludes + } + dirTree, err := inflateDirTree( ctx, msw, @@ -2236,7 +2241,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeMultipleSubdirecto mockIncrementalBase("", testTenant, testUser, path.ExchangeService, path.EmailCategory), }, test.inputCollections(t), - test.inputExcludes, + ie, progress) require.NoError(t, err, clues.ToCore(err)) @@ -2400,7 +2405,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeSkipsDeletedSubtre mockIncrementalBase("", testTenant, testUser, path.ExchangeService, path.EmailCategory), }, collections, - nil, + pmMock.NewPrefixMap(nil), progress) require.NoError(t, err, clues.ToCore(err)) @@ -2505,7 +2510,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTree_HandleEmptyBase() mockIncrementalBase("", testTenant, testUser, path.ExchangeService, path.EmailCategory), }, collections, - nil, + pmMock.NewPrefixMap(nil), progress) require.NoError(t, err, clues.ToCore(err)) @@ -2756,9 +2761,8 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeSelectsCorrectSubt mockIncrementalBase("id2", testTenant, testUser, path.ExchangeService, path.EmailCategory), }, collections, - nil, - progress, - ) + pmMock.NewPrefixMap(nil), + progress) require.NoError(t, err, clues.ToCore(err)) expectTree(t, ctx, expected, dirTree) @@ -2921,7 +2925,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeSelectsMigrateSubt mockIncrementalBase("id1", testTenant, testUser, path.ExchangeService, path.EmailCategory, path.ContactsCategory), }, []data.BackupCollection{mce, mcc}, - nil, + pmMock.NewPrefixMap(nil), progress) require.NoError(t, err, clues.ToCore(err)) diff --git a/src/internal/kopia/wrapper.go b/src/internal/kopia/wrapper.go index 9b20f5151..4e21a2347 100644 --- a/src/internal/kopia/wrapper.go +++ b/src/internal/kopia/wrapper.go @@ -14,6 +14,7 @@ import ( "github.com/kopia/kopia/snapshot/snapshotfs" "github.com/kopia/kopia/snapshot/snapshotmaintenance" + "github.com/alcionai/corso/src/internal/common/prefixmatcher" "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/diagnostics" "github.com/alcionai/corso/src/internal/observe" @@ -138,7 +139,7 @@ func (w Wrapper) ConsumeBackupCollections( ctx context.Context, previousSnapshots []IncrementalBase, collections []data.BackupCollection, - globalExcludeSet map[string]map[string]struct{}, + globalExcludeSet prefixmatcher.StringSetReader, tags map[string]string, buildTreeWithBase bool, errs *fault.Bus, @@ -150,7 +151,7 @@ func (w Wrapper) ConsumeBackupCollections( ctx, end := diagnostics.Span(ctx, "kopia:consumeBackupCollections") defer end() - if len(collections) == 0 && len(globalExcludeSet) == 0 { + if len(collections) == 0 && (globalExcludeSet == nil || globalExcludeSet.Empty()) { return &BackupStats{}, &details.Builder{}, nil, nil } diff --git a/src/internal/kopia/wrapper_test.go b/src/internal/kopia/wrapper_test.go index 7fdcd2907..67540aec7 100644 --- a/src/internal/kopia/wrapper_test.go +++ b/src/internal/kopia/wrapper_test.go @@ -18,6 +18,7 @@ import ( "github.com/stretchr/testify/suite" "golang.org/x/exp/maps" + pmMock "github.com/alcionai/corso/src/internal/common/prefixmatcher/mock" exchMock "github.com/alcionai/corso/src/internal/connector/exchange/mock" "github.com/alcionai/corso/src/internal/connector/onedrive/metadata" "github.com/alcionai/corso/src/internal/data" @@ -1178,14 +1179,14 @@ func (suite *KopiaSimpleRepoIntegrationSuite) TestBackupExcludeItem() { prefix = itemPath.ToBuilder().Dir().Dir().String() } - var excluded map[string]map[string]struct{} + excluded := pmMock.NewPrefixMap(nil) if test.excludeItem { - excluded = map[string]map[string]struct{}{ + excluded = pmMock.NewPrefixMap(map[string]map[string]struct{}{ // Add a prefix if needed. prefix: { itemPath.Item(): {}, }, - } + }) } stats, _, _, err := suite.w.ConsumeBackupCollections( diff --git a/src/internal/operations/backup.go b/src/internal/operations/backup.go index 2f180d506..2d926b692 100644 --- a/src/internal/operations/backup.go +++ b/src/internal/operations/backup.go @@ -10,6 +10,7 @@ import ( "github.com/alcionai/corso/src/internal/common/crash" "github.com/alcionai/corso/src/internal/common/dttm" "github.com/alcionai/corso/src/internal/common/idname" + "github.com/alcionai/corso/src/internal/common/prefixmatcher" "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/diagnostics" "github.com/alcionai/corso/src/internal/events" @@ -272,7 +273,7 @@ func (op *BackupOperation) do( } } - cs, excludes, err := produceBackupDataCollections( + cs, ssmb, err := produceBackupDataCollections( ctx, op.bp, op.ResourceOwner, @@ -294,7 +295,7 @@ func (op *BackupOperation) do( reasons, mans, cs, - excludes, + ssmb, backupID, op.incremental && canUseMetaData, op.Errors) @@ -352,7 +353,7 @@ func produceBackupDataCollections( lastBackupVersion int, ctrlOpts control.Options, errs *fault.Bus, -) ([]data.BackupCollection, map[string]map[string]struct{}, error) { +) ([]data.BackupCollection, prefixmatcher.StringSetReader, error) { complete, closer := observe.MessageWithCompletion(ctx, "Discovering items to backup") defer func() { complete <- struct{}{} @@ -424,7 +425,7 @@ func consumeBackupCollections( reasons []kopia.Reason, mans []*kopia.ManifestEntry, cs []data.BackupCollection, - excludes map[string]map[string]struct{}, + pmr prefixmatcher.StringSetReader, backupID model.StableID, isIncremental bool, errs *fault.Bus, @@ -497,7 +498,7 @@ func consumeBackupCollections( ctx, bases, cs, - excludes, + pmr, tags, isIncremental, errs) diff --git a/src/internal/operations/backup_test.go b/src/internal/operations/backup_test.go index 6be46243c..ea710fcf3 100644 --- a/src/internal/operations/backup_test.go +++ b/src/internal/operations/backup_test.go @@ -14,6 +14,7 @@ import ( "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" + "github.com/alcionai/corso/src/internal/common/prefixmatcher" "github.com/alcionai/corso/src/internal/connector/mock" "github.com/alcionai/corso/src/internal/data" evmock "github.com/alcionai/corso/src/internal/events/mock" @@ -98,7 +99,7 @@ func (mbu mockBackupConsumer) ConsumeBackupCollections( ctx context.Context, bases []kopia.IncrementalBase, cs []data.BackupCollection, - excluded map[string]map[string]struct{}, + excluded prefixmatcher.StringSetReader, tags map[string]string, buildTreeWithBase bool, errs *fault.Bus, diff --git a/src/internal/operations/inject/inject.go b/src/internal/operations/inject/inject.go index a85bf08ca..41f934692 100644 --- a/src/internal/operations/inject/inject.go +++ b/src/internal/operations/inject/inject.go @@ -4,6 +4,7 @@ import ( "context" "github.com/alcionai/corso/src/internal/common/idname" + "github.com/alcionai/corso/src/internal/common/prefixmatcher" "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/kopia" "github.com/alcionai/corso/src/pkg/account" @@ -25,7 +26,7 @@ type ( lastBackupVersion int, ctrlOpts control.Options, errs *fault.Bus, - ) ([]data.BackupCollection, map[string]map[string]struct{}, error) + ) ([]data.BackupCollection, prefixmatcher.StringSetReader, error) Wait() *data.CollectionStats } @@ -35,7 +36,7 @@ type ( ctx context.Context, bases []kopia.IncrementalBase, cs []data.BackupCollection, - excluded map[string]map[string]struct{}, + pmr prefixmatcher.StringSetReader, tags map[string]string, buildTreeWithBase bool, errs *fault.Bus, diff --git a/src/internal/streamstore/streamstore.go b/src/internal/streamstore/streamstore.go index 57fe5b8f1..bc86687ef 100644 --- a/src/internal/streamstore/streamstore.go +++ b/src/internal/streamstore/streamstore.go @@ -9,6 +9,7 @@ import ( "github.com/alcionai/clues" + "github.com/alcionai/corso/src/internal/common/prefixmatcher" "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/kopia" "github.com/alcionai/corso/src/internal/operations/inject" @@ -232,7 +233,7 @@ func write( ctx, nil, dbcs, - nil, + prefixmatcher.NopReader[map[string]struct{}](), nil, false, errs)