Refactor exchange collection (#4283)

No real changes to API or internal structure,
but pull out more generic functionality like
getting the path/location info for an
exchange collection into a separate struct
and factor out some soon-to-be common
functions

---

#### Does this PR need a docs update or release note?

- [ ]  Yes, it's included
- [ ] 🕐 Yes, but in a later PR
- [x]  No

#### Type of change

- [ ] 🌻 Feature
- [ ] 🐛 Bugfix
- [ ] 🗺️ Documentation
- [ ] 🤖 Supportability/Tests
- [ ] 💻 CI/Deployment
- [x] 🧹 Tech Debt/Cleanup

#### Issue(s)

* #2023

#### Test Plan

- [ ] 💪 Manual
- [x]  Unit test
- [ ] 💚 E2E
This commit is contained in:
ashmrtn 2023-09-18 14:49:53 -07:00 committed by GitHub
parent 8f7070ffac
commit 647f7326d7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 197 additions and 147 deletions

View File

@ -189,15 +189,15 @@ func populateCollections(
} }
edc := NewCollection( edc := NewCollection(
qp.ProtectedResource.ID(), NewBaseCollection(
currPath, currPath,
prevPath, prevPath,
locPath, locPath,
category,
bh.itemHandler(),
statusUpdater,
ctrlOpts, ctrlOpts,
newDelta.Reset) newDelta.Reset),
qp.ProtectedResource.ID(),
bh.itemHandler(),
statusUpdater)
collections[cID] = &edc collections[cID] = &edc
@ -251,15 +251,15 @@ func populateCollections(
} }
edc := NewCollection( edc := NewCollection(
qp.ProtectedResource.ID(), NewBaseCollection(
nil, // marks the collection as deleted nil, // marks the collection as deleted
prevPath, prevPath,
nil, // tombstones don't need a location nil, // tombstones don't need a location
category,
bh.itemHandler(),
statusUpdater,
ctrlOpts, ctrlOpts,
false) false),
qp.ProtectedResource.ID(),
bh.itemHandler(),
statusUpdater)
collections[id] = &edc collections[id] = &edc
} }

View File

@ -36,21 +36,28 @@ const (
numberOfRetries = 4 numberOfRetries = 4
) )
// Collection implements the interface from data.Collection func NewBaseCollection(
// Structure holds data for an Exchange application for a single user curr, prev path.Path,
type Collection struct { location *path.Builder,
user string ctrlOpts control.Options,
stream chan data.Item doNotMergeItems bool,
) baseCollection {
return baseCollection{
ctrl: ctrlOpts,
doNotMergeItems: doNotMergeItems,
fullPath: curr,
locationPath: location,
prevPath: prev,
state: data.StateOf(prev, curr),
}
}
// added is a list of existing item IDs that were added to a container // baseCollection contains basic functionality like returning path, location,
added map[string]struct{} // and state information. It can be embedded in other implementations to provide
// removed is a list of item IDs that were deleted from, or moved out, of a container // this functionality.
removed map[string]struct{} //
// Functionality like how items are fetched is left to the embedding struct.
getter itemGetterSerializer type baseCollection struct {
category path.CategoryType
statusUpdater support.StatusUpdater
ctrl control.Options ctrl control.Options
// FullPath is the current hierarchical path used by this collection. // FullPath is the current hierarchical path used by this collection.
@ -71,6 +78,92 @@ type Collection struct {
doNotMergeItems bool doNotMergeItems bool
} }
// FullPath returns the baseCollection's fullPath []string
func (col *baseCollection) FullPath() path.Path {
return col.fullPath
}
// LocationPath produces the baseCollection's full path, but with display names
// instead of IDs in the folders. Only populated for Calendars.
func (col *baseCollection) LocationPath() *path.Builder {
return col.locationPath
}
func (col baseCollection) PreviousPath() path.Path {
return col.prevPath
}
func (col baseCollection) State() data.CollectionState {
return col.state
}
func (col baseCollection) DoNotMergeItems() bool {
return col.doNotMergeItems
}
// updateStatus is a utility function used to send the status update through
// the channel.
func updateStatus(
ctx context.Context,
statusUpdater support.StatusUpdater,
attempted int,
success int,
totalBytes int64,
folderPath string,
err error,
) {
status := support.CreateStatus(
ctx,
support.Backup,
1,
support.CollectionMetrics{
Objects: attempted,
Successes: success,
Bytes: totalBytes,
},
folderPath)
logger.Ctx(ctx).Debugw("done streaming items", "status", status.String())
statusUpdater(status)
}
func getItemAndInfo(
ctx context.Context,
getter itemGetterSerializer,
userID string,
id string,
useImmutableIDs bool,
parentPath string,
) ([]byte, *details.ExchangeInfo, error) {
item, info, err := getter.GetItem(
ctx,
userID,
id,
useImmutableIDs,
fault.New(true)) // temporary way to force a failFast error
if err != nil {
return nil, nil, clues.Wrap(err, "fetching item").
WithClues(ctx).
Label(fault.LabelForceNoBackupCreation)
}
itemData, err := getter.Serialize(ctx, item, userID, id)
if err != nil {
return nil, nil, clues.Wrap(err, "serializing item").WithClues(ctx)
}
// In case of mail the size of itemData is calc as- size of body content+size of attachment
// in all other case the size is - total item's serialized size
if info.Size <= 0 {
info.Size = int64(len(itemData))
}
info.ParentPath = parentPath
return itemData, info, nil
}
// NewExchangeDataCollection creates an ExchangeDataCollection. // NewExchangeDataCollection creates an ExchangeDataCollection.
// State of the collection is set as an observation of the current // State of the collection is set as an observation of the current
// and previous paths. If the curr path is nil, the state is assumed // and previous paths. If the curr path is nil, the state is assumed
@ -78,73 +171,56 @@ type Collection struct {
// If both are populated, then state is either moved (if they differ), // If both are populated, then state is either moved (if they differ),
// or notMoved (if they match). // or notMoved (if they match).
func NewCollection( func NewCollection(
bc baseCollection,
user string, user string,
curr, prev path.Path,
location *path.Builder,
category path.CategoryType,
items itemGetterSerializer, items itemGetterSerializer,
statusUpdater support.StatusUpdater, statusUpdater support.StatusUpdater,
ctrlOpts control.Options,
doNotMergeItems bool,
) Collection { ) Collection {
collection := Collection{ collection := Collection{
added: make(map[string]struct{}, 0), baseCollection: bc,
category: category,
ctrl: ctrlOpts,
stream: make(chan data.Item, collectionChannelBufferSize),
doNotMergeItems: doNotMergeItems,
fullPath: curr,
getter: items,
locationPath: location,
prevPath: prev,
removed: make(map[string]struct{}, 0),
state: data.StateOf(prev, curr),
statusUpdater: statusUpdater,
user: user, user: user,
added: map[string]struct{}{},
removed: map[string]struct{}{},
getter: items,
statusUpdater: statusUpdater,
} }
return collection return collection
} }
// Collection implements the interface from data.Collection
// Structure holds data for an Exchange application for a single user
type Collection struct {
baseCollection
user string
// added is a list of existing item IDs that were added to a container
added map[string]struct{}
// removed is a list of item IDs that were deleted from, or moved out, of a container
removed map[string]struct{}
getter itemGetterSerializer
statusUpdater support.StatusUpdater
}
// Items utility function to asynchronously execute process to fill data channel with // Items utility function to asynchronously execute process to fill data channel with
// M365 exchange objects and returns the data channel // M365 exchange objects and returns the data channel
func (col *Collection) Items(ctx context.Context, errs *fault.Bus) <-chan data.Item { func (col *Collection) Items(ctx context.Context, errs *fault.Bus) <-chan data.Item {
go col.streamItems(ctx, errs) stream := make(chan data.Item, collectionChannelBufferSize)
return col.stream go col.streamItems(ctx, stream, errs)
}
// FullPath returns the Collection's fullPath []string return stream
func (col *Collection) FullPath() path.Path {
return col.fullPath
} }
// LocationPath produces the Collection's full path, but with display names
// instead of IDs in the folders. Only populated for Calendars.
func (col *Collection) LocationPath() *path.Builder {
return col.locationPath
}
// TODO(ashmrtn): Fill in with previous path once the Controller compares old
// and new folder hierarchies.
func (col Collection) PreviousPath() path.Path {
return col.prevPath
}
func (col Collection) State() data.CollectionState {
return col.state
}
func (col Collection) DoNotMergeItems() bool {
return col.doNotMergeItems
}
// ---------------------------------------------------------------------------
// Items() channel controller
// ---------------------------------------------------------------------------
// streamItems is a utility function that uses col.collectionType to be able to serialize // streamItems is a utility function that uses col.collectionType to be able to serialize
// all the M365IDs defined in the added field. data channel is closed by this function // all the M365IDs defined in the added field. data channel is closed by this function
func (col *Collection) streamItems(ctx context.Context, errs *fault.Bus) { func (col *Collection) streamItems(
ctx context.Context,
stream chan<- data.Item,
errs *fault.Bus,
) {
var ( var (
success int64 success int64
totalBytes int64 totalBytes int64
@ -154,11 +230,19 @@ func (col *Collection) streamItems(ctx context.Context, errs *fault.Bus) {
user = col.user user = col.user
log = logger.Ctx(ctx).With( log = logger.Ctx(ctx).With(
"service", path.ExchangeService.String(), "service", path.ExchangeService.String(),
"category", col.category.String()) "category", col.FullPath().Category().String())
) )
defer func() { defer func() {
col.finishPopulation(ctx, int(success), totalBytes, errs.Failure()) close(stream)
updateStatus(
ctx,
col.statusUpdater,
len(col.added)+len(col.removed),
int(success),
totalBytes,
col.FullPath().Folder(false),
errs.Failure())
}() }()
if len(col.added)+len(col.removed) > 0 { if len(col.added)+len(col.removed) > 0 {
@ -182,14 +266,13 @@ func (col *Collection) streamItems(ctx context.Context, errs *fault.Bus) {
defer wg.Done() defer wg.Done()
defer func() { <-semaphoreCh }() defer func() { <-semaphoreCh }()
col.stream <- &Item{ stream <- &Item{
id: id, id: id,
modTime: time.Now().UTC(), // removed items have no modTime entry. modTime: time.Now().UTC(), // removed items have no modTime entry.
deleted: true, deleted: true,
} }
atomic.AddInt64(&success, 1) atomic.AddInt64(&success, 1)
atomic.AddInt64(&totalBytes, 0)
if colProgress != nil { if colProgress != nil {
colProgress <- struct{}{} colProgress <- struct{}{}
@ -197,6 +280,8 @@ func (col *Collection) streamItems(ctx context.Context, errs *fault.Bus) {
}(id) }(id)
} }
parentPath := col.LocationPath().String()
// add any new items // add any new items
for id := range col.added { for id := range col.added {
if errs.Failure() != nil { if errs.Failure() != nil {
@ -211,12 +296,13 @@ func (col *Collection) streamItems(ctx context.Context, errs *fault.Bus) {
defer wg.Done() defer wg.Done()
defer func() { <-semaphoreCh }() defer func() { <-semaphoreCh }()
item, info, err := col.getter.GetItem( itemData, info, err := getItemAndInfo(
ctx, ctx,
col.getter,
user, user,
id, id,
col.ctrl.ToggleFeatures.ExchangeImmutableIDs, col.ctrl.ToggleFeatures.ExchangeImmutableIDs,
fault.New(true)) // temporary way to force a failFast error parentPath)
if err != nil { if err != nil {
// Don't report errors for deleted items as there's no way for us to // Don't report errors for deleted items as there's no way for us to
// back up data that is gone. Record it as a "success", since there's // back up data that is gone. Record it as a "success", since there's
@ -232,23 +318,9 @@ func (col *Collection) streamItems(ctx context.Context, errs *fault.Bus) {
return return
} }
data, err := col.getter.Serialize(ctx, item, user, id) stream <- &Item{
if err != nil {
errs.AddRecoverable(ctx, clues.Wrap(err, "serializing item").Label(fault.LabelForceNoBackupCreation))
return
}
// In case of mail the size of data is calc as- size of body content+size of attachment
// in all other case the size is - total item's serialized size
if info.Size <= 0 {
info.Size = int64(len(data))
}
info.ParentPath = col.LocationPath().String()
col.stream <- &Item{
id: id, id: id,
message: data, message: itemData,
info: info, info: info,
modTime: info.Modified, modTime: info.Modified,
} }
@ -265,33 +337,6 @@ func (col *Collection) streamItems(ctx context.Context, errs *fault.Bus) {
wg.Wait() wg.Wait()
} }
// finishPopulation is a utility function used to close a Collection's data channel
// and to send the status update through the channel.
func (col *Collection) finishPopulation(
ctx context.Context,
success int,
totalBytes int64,
err error,
) {
close(col.stream)
attempted := len(col.added) + len(col.removed)
status := support.CreateStatus(
ctx,
support.Backup,
1,
support.CollectionMetrics{
Objects: attempted,
Successes: success,
Bytes: totalBytes,
},
col.FullPath().Folder(false))
logger.Ctx(ctx).Debugw("done streaming items", "status", status.String())
col.statusUpdater(status)
}
// Item represents a single item retrieved from exchange // Item represents a single item retrieved from exchange
type Item struct { type Item struct {
id string id string

View File

@ -72,8 +72,10 @@ func (suite *CollectionUnitSuite) TestCollection_NewCollection() {
require.NoError(t, err, clues.ToCore(err)) require.NoError(t, err, clues.ToCore(err))
edc := Collection{ edc := Collection{
user: name, baseCollection: baseCollection{
fullPath: fullPath, fullPath: fullPath,
},
user: name,
} }
assert.Equal(t, name, edc.user) assert.Equal(t, name, edc.user)
assert.Equal(t, fullPath, edc.FullPath()) assert.Equal(t, fullPath, edc.FullPath())
@ -125,13 +127,15 @@ func (suite *CollectionUnitSuite) TestNewCollection_state() {
t := suite.T() t := suite.T()
c := NewCollection( c := NewCollection(
"u", NewBaseCollection(
test.curr, test.prev, test.loc, test.curr,
0, test.prev,
mock.DefaultItemGetSerialize(), test.loc,
nil,
control.DefaultOptions(), control.DefaultOptions(),
false) false),
"u",
mock.DefaultItemGetSerialize(),
nil)
assert.Equal(t, test.expect, c.State(), "collection state") assert.Equal(t, test.expect, c.State(), "collection state")
assert.Equal(t, test.curr, c.fullPath, "full path") assert.Equal(t, test.curr, c.fullPath, "full path")
assert.Equal(t, test.prev, c.prevPath, "prev path") assert.Equal(t, test.prev, c.prevPath, "prev path")
@ -191,7 +195,7 @@ func (suite *CollectionUnitSuite) TestGetItemWithRetries() {
func (suite *CollectionUnitSuite) TestCollection_streamItems() { func (suite *CollectionUnitSuite) TestCollection_streamItems() {
var ( var (
t = suite.T() t = suite.T()
start = time.Now().Add(-1 * time.Second) start = time.Now().Add(-time.Second)
statusUpdater = func(*support.ControllerOperationStatus) {} statusUpdater = func(*support.ControllerOperationStatus) {}
) )
@ -250,20 +254,21 @@ func (suite *CollectionUnitSuite) TestCollection_streamItems() {
ctx, flush := tester.NewContext(t) ctx, flush := tester.NewContext(t)
defer flush() defer flush()
col := &Collection{ col := NewCollection(
added: test.added, NewBaseCollection(
removed: test.removed, fullPath,
ctrl: control.DefaultOptions(), nil,
getter: &mock.ItemGetSerialize{}, locPath.ToBuilder(),
stream: make(chan data.Item), control.DefaultOptions(),
fullPath: fullPath, false),
locationPath: locPath.ToBuilder(), "",
statusUpdater: statusUpdater, &mock.ItemGetSerialize{},
} statusUpdater)
go col.streamItems(ctx, errs) col.added = test.added
col.removed = test.removed
for item := range col.stream { for item := range col.Items(ctx, errs) {
itemCount++ itemCount++
_, aok := test.added[item.ID()] _, aok := test.added[item.ID()]