Lazily fetch Exchange item data when possible (#4300)
Implement lazy data fetch for Exchange items. Use a new collection type to clearly denote when items can be lazily fetched vs. requiring eager fetch **This PR changes how the read bytes stat is updated.** Lazily fetched items will not update the read bytes stat. This stat doesn't appear to be used anywhere at the moment For items that are deleted between the time enumeration takes place and the time the data for them needs fetched, the corso will: * return an empty reader for the item * not add the item to backup details * delete the (empty) item from kopia on the next backup Manually tested deleting an item between enumeration and data fetch --- #### Does this PR need a docs update or release note? - [x] ✅ Yes, it's included - [ ] 🕐 Yes, but in a later PR - [ ] ⛔ No #### Type of change - [x] 🌻 Feature - [ ] 🐛 Bugfix - [ ] 🗺️ Documentation - [ ] 🤖 Supportability/Tests - [ ] 💻 CI/Deployment - [ ] 🧹 Tech Debt/Cleanup #### Issue(s) * closes #2023 #### Test Plan - [x] 💪 Manual - [x] ⚡ Unit test - [x] 💚 E2E
This commit is contained in:
parent
7f2200195c
commit
b212c37fd3
@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
### Added
|
||||
- Enables local or network-attached storage for Corso repositories.
|
||||
- Reduce backup runtime for OneDrive and SharePoint incremental backups that have no file changes.
|
||||
- Increase Exchange backup performance by lazily fetching data only for items whose content changed.
|
||||
|
||||
## [v0.13.0] (beta) - 2023-09-18
|
||||
|
||||
|
||||
@ -5,6 +5,7 @@ import (
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"io"
|
||||
"os"
|
||||
"runtime/trace"
|
||||
@ -231,7 +232,11 @@ func (cp *corsoProgress) FinishedFile(relativePath string, err error) {
|
||||
}
|
||||
|
||||
info, err := d.infoer.Info()
|
||||
if err != nil {
|
||||
if errors.Is(err, data.ErrNotFound) {
|
||||
// The item was deleted between enumeration and trying to get data. Skip
|
||||
// adding it to details since there's no data for it.
|
||||
return
|
||||
} else if err != nil {
|
||||
cp.errs.AddRecoverable(ctx, clues.Wrap(err, "getting ItemInfo").
|
||||
WithClues(ctx).
|
||||
Label(fault.LabelForceNoBackupCreation))
|
||||
|
||||
@ -160,7 +160,7 @@ func populateCollections(
|
||||
|
||||
ictx = clues.Add(ictx, "previous_path", prevPath)
|
||||
|
||||
added, _, removed, newDelta, err := bh.itemEnumerator().
|
||||
added, validModTimes, removed, newDelta, err := bh.itemEnumerator().
|
||||
GetAddedAndRemovedItemIDs(
|
||||
ictx,
|
||||
qp.ProtectedResource.ID(),
|
||||
@ -197,21 +197,12 @@ func populateCollections(
|
||||
newDelta.Reset),
|
||||
qp.ProtectedResource.ID(),
|
||||
bh.itemHandler(),
|
||||
added,
|
||||
removed,
|
||||
validModTimes,
|
||||
statusUpdater)
|
||||
|
||||
collections[cID] = &edc
|
||||
|
||||
for add := range added {
|
||||
edc.added[add] = struct{}{}
|
||||
}
|
||||
|
||||
// Remove any deleted IDs from the set of added IDs because items that are
|
||||
// deleted and then restored will have a different ID than they did
|
||||
// originally.
|
||||
for _, remove := range removed {
|
||||
delete(edc.added, remove)
|
||||
edc.removed[remove] = struct{}{}
|
||||
}
|
||||
collections[cID] = edc
|
||||
|
||||
// add the current path for the container ID to be used in the next backup
|
||||
// as the "previous path", for reference in case of a rename or relocation.
|
||||
@ -259,8 +250,11 @@ func populateCollections(
|
||||
false),
|
||||
qp.ProtectedResource.ID(),
|
||||
bh.itemHandler(),
|
||||
nil,
|
||||
nil,
|
||||
false,
|
||||
statusUpdater)
|
||||
collections[id] = &edc
|
||||
collections[id] = edc
|
||||
}
|
||||
|
||||
logger.Ctx(ctx).Infow(
|
||||
|
||||
@ -11,6 +11,7 @@ import (
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"github.com/stretchr/testify/suite"
|
||||
"golang.org/x/exp/maps"
|
||||
|
||||
inMock "github.com/alcionai/corso/src/internal/common/idname/mock"
|
||||
"github.com/alcionai/corso/src/internal/common/ptr"
|
||||
@ -599,7 +600,7 @@ func (suite *BackupIntgSuite) TestDelta() {
|
||||
|
||||
// now do another backup with the previous delta tokens,
|
||||
// which should only contain the difference.
|
||||
collections, err = CreateCollections(
|
||||
_, err = CreateCollections(
|
||||
ctx,
|
||||
bpc,
|
||||
handlers,
|
||||
@ -609,19 +610,6 @@ func (suite *BackupIntgSuite) TestDelta() {
|
||||
func(status *support.ControllerOperationStatus) {},
|
||||
fault.New(true))
|
||||
require.NoError(t, err, clues.ToCore(err))
|
||||
|
||||
// TODO(keepers): this isn't a very useful test at the moment. It needs to
|
||||
// investigate the items in the original and delta collections to at least
|
||||
// assert some minimum assumptions, such as "deltas should retrieve fewer items".
|
||||
// Delta usage is commented out at the moment, anyway. So this is currently
|
||||
// a sanity check that the minimum behavior won't break.
|
||||
for _, coll := range collections {
|
||||
if coll.FullPath().Service() != path.ExchangeMetadataService {
|
||||
ec, ok := coll.(*prefetchCollection)
|
||||
require.True(t, ok, "collection is *prefetchCollection")
|
||||
assert.NotNil(t, ec)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@ -1184,10 +1172,12 @@ func (suite *CollectionPopulationSuite) TestPopulateCollections() {
|
||||
make([]string, 0, len(exColl.removed)),
|
||||
}
|
||||
|
||||
for i, cIDs := range []map[string]struct{}{exColl.added, exColl.removed} {
|
||||
for id := range cIDs {
|
||||
ids[i] = append(ids[i], id)
|
||||
for id := range exColl.added {
|
||||
ids[0] = append(ids[0], id)
|
||||
}
|
||||
|
||||
for id := range exColl.removed {
|
||||
ids[1] = append(ids[1], id)
|
||||
}
|
||||
|
||||
assert.ElementsMatch(t, expect.added, ids[0], "added items")
|
||||
@ -1519,10 +1509,12 @@ func (suite *CollectionPopulationSuite) TestFilterContainersAndFillCollections_D
|
||||
make([]string, 0, len(exColl.removed)),
|
||||
}
|
||||
|
||||
for i, cIDs := range []map[string]struct{}{exColl.added, exColl.removed} {
|
||||
for id := range cIDs {
|
||||
ids[i] = append(ids[i], id)
|
||||
for id := range exColl.added {
|
||||
ids[0] = append(ids[0], id)
|
||||
}
|
||||
|
||||
for id := range exColl.removed {
|
||||
ids[1] = append(ids[1], id)
|
||||
}
|
||||
|
||||
assert.ElementsMatch(t, expect.added, ids[0], "added items")
|
||||
@ -1680,7 +1672,11 @@ func (suite *CollectionPopulationSuite) TestFilterContainersAndFillCollections_r
|
||||
exColl, ok := coll.(*prefetchCollection)
|
||||
require.True(t, ok, "collection is an *exchange.prefetchCollection")
|
||||
|
||||
assert.Equal(t, test.expectAdded, exColl.added, "added items")
|
||||
assert.ElementsMatch(
|
||||
t,
|
||||
maps.Keys(test.expectAdded),
|
||||
maps.Keys(exColl.added),
|
||||
"added items")
|
||||
assert.Equal(t, test.expectRemoved, exColl.removed, "removed items")
|
||||
}
|
||||
})
|
||||
|
||||
@ -12,6 +12,8 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/alcionai/clues"
|
||||
"github.com/spatialcurrent/go-lazy/pkg/lazy"
|
||||
"golang.org/x/exp/maps"
|
||||
|
||||
"github.com/alcionai/corso/src/internal/data"
|
||||
"github.com/alcionai/corso/src/internal/m365/graph"
|
||||
@ -174,18 +176,46 @@ func NewCollection(
|
||||
bc baseCollection,
|
||||
user string,
|
||||
items itemGetterSerializer,
|
||||
origAdded map[string]time.Time,
|
||||
origRemoved []string,
|
||||
validModTimes bool,
|
||||
statusUpdater support.StatusUpdater,
|
||||
) prefetchCollection {
|
||||
collection := prefetchCollection{
|
||||
) data.BackupCollection {
|
||||
added := maps.Clone(origAdded)
|
||||
removed := make(map[string]struct{}, len(origRemoved))
|
||||
|
||||
// Remove any deleted IDs from the set of added IDs because items that are
|
||||
// deleted and then restored will have a different ID than they did
|
||||
// originally.
|
||||
//
|
||||
// TODO(ashmrtn): If we switch to immutable IDs then we'll need to handle this
|
||||
// sort of operation in the pager since this would become order-dependent
|
||||
// unless Graph started consolidating the changes into a single delta result.
|
||||
for _, r := range origRemoved {
|
||||
delete(added, r)
|
||||
|
||||
removed[r] = struct{}{}
|
||||
}
|
||||
|
||||
if !validModTimes {
|
||||
return &prefetchCollection{
|
||||
baseCollection: bc,
|
||||
user: user,
|
||||
added: map[string]struct{}{},
|
||||
removed: map[string]struct{}{},
|
||||
added: added,
|
||||
removed: removed,
|
||||
getter: items,
|
||||
statusUpdater: statusUpdater,
|
||||
}
|
||||
}
|
||||
|
||||
return collection
|
||||
return &lazyFetchCollection{
|
||||
baseCollection: bc,
|
||||
user: user,
|
||||
added: added,
|
||||
removed: removed,
|
||||
getter: items,
|
||||
statusUpdater: statusUpdater,
|
||||
}
|
||||
}
|
||||
|
||||
// prefetchCollection implements the interface from data.BackupCollection
|
||||
@ -196,7 +226,7 @@ type prefetchCollection struct {
|
||||
user string
|
||||
|
||||
// added is a list of existing item IDs that were added to a container
|
||||
added map[string]struct{}
|
||||
added map[string]time.Time
|
||||
// removed is a list of item IDs that were deleted from, or moved out, of a container
|
||||
removed map[string]struct{}
|
||||
|
||||
@ -337,6 +367,126 @@ func (col *prefetchCollection) streamItems(
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// lazyFetchCollection
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
// lazyFetchCollection implements the interface from data.BackupCollection
|
||||
// Structure holds data for an Exchange application for a single user. It lazily
|
||||
// fetches the data associated with each item when kopia requests it during
|
||||
// upload.
|
||||
//
|
||||
// When accounting for stats, items are marked as successful when the basic
|
||||
// information (path and mod time) is handed to kopia. Total bytes across all
|
||||
// items is not tracked.
|
||||
type lazyFetchCollection struct {
|
||||
baseCollection
|
||||
|
||||
user string
|
||||
|
||||
// added is a list of existing item IDs that were added to a container
|
||||
added map[string]time.Time
|
||||
// removed is a list of item IDs that were deleted from, or moved out, of a container
|
||||
removed map[string]struct{}
|
||||
|
||||
getter itemGetterSerializer
|
||||
|
||||
statusUpdater support.StatusUpdater
|
||||
}
|
||||
|
||||
// Items utility function to asynchronously execute process to fill data channel with
|
||||
// M365 exchange objects and returns the data channel
|
||||
func (col *lazyFetchCollection) Items(ctx context.Context, errs *fault.Bus) <-chan data.Item {
|
||||
stream := make(chan data.Item, collectionChannelBufferSize)
|
||||
go col.streamItems(ctx, stream, errs)
|
||||
|
||||
return stream
|
||||
}
|
||||
|
||||
// streamItems is a utility function that uses col.collectionType to be able to
|
||||
// serialize all the M365IDs defined in the added field. data channel is closed
|
||||
// by this function.
|
||||
func (col *lazyFetchCollection) streamItems(
|
||||
ctx context.Context,
|
||||
stream chan<- data.Item,
|
||||
errs *fault.Bus,
|
||||
) {
|
||||
var (
|
||||
success int64
|
||||
colProgress chan<- struct{}
|
||||
|
||||
user = col.user
|
||||
)
|
||||
|
||||
defer func() {
|
||||
close(stream)
|
||||
updateStatus(
|
||||
ctx,
|
||||
col.statusUpdater,
|
||||
len(col.added)+len(col.removed),
|
||||
int(success),
|
||||
0,
|
||||
col.FullPath().Folder(false),
|
||||
errs.Failure())
|
||||
}()
|
||||
|
||||
if len(col.added)+len(col.removed) > 0 {
|
||||
colProgress = observe.CollectionProgress(
|
||||
ctx,
|
||||
col.FullPath().Category().String(),
|
||||
col.LocationPath().Elements())
|
||||
defer close(colProgress)
|
||||
}
|
||||
|
||||
// delete all removed items
|
||||
for id := range col.removed {
|
||||
stream <- &Item{
|
||||
id: id,
|
||||
modTime: time.Now().UTC(), // removed items have no modTime entry.
|
||||
deleted: true,
|
||||
}
|
||||
|
||||
atomic.AddInt64(&success, 1)
|
||||
|
||||
if colProgress != nil {
|
||||
colProgress <- struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
parentPath := col.LocationPath().String()
|
||||
|
||||
// add any new items
|
||||
for id, modTime := range col.added {
|
||||
if errs.Failure() != nil {
|
||||
break
|
||||
}
|
||||
|
||||
ictx := clues.Add(
|
||||
ctx,
|
||||
"item_id", id,
|
||||
"parent_path", path.LoggableDir(parentPath),
|
||||
"service", path.ExchangeService.String(),
|
||||
"category", col.FullPath().Category().String())
|
||||
|
||||
stream <- &lazyItem{
|
||||
ctx: ictx,
|
||||
userID: user,
|
||||
id: id,
|
||||
getter: col.getter,
|
||||
modTime: modTime,
|
||||
immutableIDs: col.ctrl.ToggleFeatures.ExchangeImmutableIDs,
|
||||
parentPath: parentPath,
|
||||
errs: errs,
|
||||
}
|
||||
|
||||
atomic.AddInt64(&success, 1)
|
||||
|
||||
if colProgress != nil {
|
||||
colProgress <- struct{}{}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Item represents a single item retrieved from exchange
|
||||
type Item struct {
|
||||
id string
|
||||
@ -386,3 +536,88 @@ func NewItem(
|
||||
modTime: modTime,
|
||||
}
|
||||
}
|
||||
|
||||
// lazyItem represents a single item retrieved from exchange that lazily fetches
|
||||
// the item's data when the first call to ToReader().Read() is made.
|
||||
type lazyItem struct {
|
||||
ctx context.Context
|
||||
userID string
|
||||
id string
|
||||
parentPath string
|
||||
getter itemGetterSerializer
|
||||
errs *fault.Bus
|
||||
|
||||
modTime time.Time
|
||||
// info holds the Exchnage-specific details information for this item. Store
|
||||
// a pointer in this struct so the golang garbage collector can collect the
|
||||
// Item struct once kopia is done with it. The ExchangeInfo struct needs to
|
||||
// stick around until the end of the backup though as backup details is
|
||||
// written last.
|
||||
info *details.ExchangeInfo
|
||||
|
||||
immutableIDs bool
|
||||
|
||||
delInFlight bool
|
||||
}
|
||||
|
||||
func (i lazyItem) ID() string {
|
||||
return i.id
|
||||
}
|
||||
|
||||
func (i *lazyItem) ToReader() io.ReadCloser {
|
||||
return lazy.NewLazyReadCloser(func() (io.ReadCloser, error) {
|
||||
itemData, info, err := getItemAndInfo(
|
||||
i.ctx,
|
||||
i.getter,
|
||||
i.userID,
|
||||
i.ID(),
|
||||
i.immutableIDs,
|
||||
i.parentPath)
|
||||
if err != nil {
|
||||
// If an item was deleted then return an empty file so we don't fail
|
||||
// the backup and return a sentinel error when asked for ItemInfo so
|
||||
// we don't display the item in the backup.
|
||||
//
|
||||
// The item will be deleted from kopia on the next backup when the
|
||||
// delta token shows it's removed.
|
||||
if graph.IsErrDeletedInFlight(err) {
|
||||
logger.CtxErr(i.ctx, err).Info("item not found")
|
||||
|
||||
i.delInFlight = true
|
||||
|
||||
return io.NopCloser(bytes.NewReader([]byte{})), nil
|
||||
}
|
||||
|
||||
err = clues.Stack(err)
|
||||
i.errs.AddRecoverable(i.ctx, err)
|
||||
|
||||
return nil, err
|
||||
}
|
||||
|
||||
i.info = info
|
||||
// Update the mod time to what we already told kopia about. This is required
|
||||
// for proper details merging.
|
||||
i.info.Modified = i.modTime
|
||||
|
||||
return io.NopCloser(bytes.NewReader(itemData)), nil
|
||||
})
|
||||
}
|
||||
|
||||
func (i lazyItem) Deleted() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func (i lazyItem) Info() (details.ItemInfo, error) {
|
||||
if i.delInFlight {
|
||||
return details.ItemInfo{}, clues.Stack(data.ErrNotFound).WithClues(i.ctx)
|
||||
} else if i.info == nil {
|
||||
return details.ItemInfo{}, clues.New("requesting ItemInfo before data retrieval").
|
||||
WithClues(i.ctx)
|
||||
}
|
||||
|
||||
return details.ItemInfo{Exchange: i.info}, nil
|
||||
}
|
||||
|
||||
func (i lazyItem) ModTime() time.Time {
|
||||
return i.modTime
|
||||
}
|
||||
|
||||
@ -2,19 +2,27 @@ package exchange
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"io"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/alcionai/clues"
|
||||
"github.com/microsoft/kiota-abstractions-go/serialization"
|
||||
"github.com/microsoftgraph/msgraph-sdk-go/models"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"github.com/stretchr/testify/suite"
|
||||
"golang.org/x/exp/maps"
|
||||
"golang.org/x/exp/slices"
|
||||
|
||||
"github.com/alcionai/corso/src/internal/common/ptr"
|
||||
"github.com/alcionai/corso/src/internal/data"
|
||||
"github.com/alcionai/corso/src/internal/m365/collection/exchange/mock"
|
||||
"github.com/alcionai/corso/src/internal/m365/graph"
|
||||
"github.com/alcionai/corso/src/internal/m365/support"
|
||||
"github.com/alcionai/corso/src/internal/tester"
|
||||
"github.com/alcionai/corso/src/pkg/backup/details"
|
||||
"github.com/alcionai/corso/src/pkg/control"
|
||||
"github.com/alcionai/corso/src/pkg/fault"
|
||||
"github.com/alcionai/corso/src/pkg/path"
|
||||
@ -82,6 +90,21 @@ func (suite *CollectionUnitSuite) TestCollection_NewCollection() {
|
||||
}
|
||||
|
||||
func (suite *CollectionUnitSuite) TestNewCollection_state() {
|
||||
type collectionTypes struct {
|
||||
name string
|
||||
validModTimes bool
|
||||
}
|
||||
|
||||
colTypes := []collectionTypes{
|
||||
{
|
||||
name: "prefetchCollection",
|
||||
},
|
||||
{
|
||||
name: "lazyFetchCollection",
|
||||
validModTimes: true,
|
||||
},
|
||||
}
|
||||
|
||||
fooP, err := path.Build("t", "u", path.ExchangeService, path.EmailCategory, false, "foo")
|
||||
require.NoError(suite.T(), err, clues.ToCore(err))
|
||||
barP, err := path.Build("t", "u", path.ExchangeService, path.EmailCategory, false, "bar")
|
||||
@ -122,6 +145,9 @@ func (suite *CollectionUnitSuite) TestNewCollection_state() {
|
||||
expect: data.DeletedState,
|
||||
},
|
||||
}
|
||||
|
||||
for _, colType := range colTypes {
|
||||
suite.Run(colType.name, func() {
|
||||
for _, test := range table {
|
||||
suite.Run(test.name, func() {
|
||||
t := suite.T()
|
||||
@ -135,11 +161,23 @@ func (suite *CollectionUnitSuite) TestNewCollection_state() {
|
||||
false),
|
||||
"u",
|
||||
mock.DefaultItemGetSerialize(),
|
||||
nil,
|
||||
nil,
|
||||
colType.validModTimes,
|
||||
nil)
|
||||
assert.Equal(t, test.expect, c.State(), "collection state")
|
||||
assert.Equal(t, test.curr, c.fullPath, "full path")
|
||||
assert.Equal(t, test.prev, c.prevPath, "prev path")
|
||||
assert.Equal(t, test.loc, c.locationPath, "location path")
|
||||
assert.Equal(t, test.curr, c.FullPath(), "full path")
|
||||
assert.Equal(t, test.prev, c.PreviousPath(), "prev path")
|
||||
|
||||
// TODO(ashmrtn): Add LocationPather as part of BackupCollection.
|
||||
require.Implements(t, (*data.LocationPather)(nil), c)
|
||||
assert.Equal(
|
||||
t,
|
||||
test.loc,
|
||||
c.(data.LocationPather).LocationPath(),
|
||||
"location path")
|
||||
})
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@ -192,7 +230,7 @@ func (suite *CollectionUnitSuite) TestGetItemWithRetries() {
|
||||
}
|
||||
}
|
||||
|
||||
func (suite *CollectionUnitSuite) TestCollection_streamItems() {
|
||||
func (suite *CollectionUnitSuite) TestPrefetchCollection_Items() {
|
||||
var (
|
||||
t = suite.T()
|
||||
start = time.Now().Add(-time.Second)
|
||||
@ -207,42 +245,45 @@ func (suite *CollectionUnitSuite) TestCollection_streamItems() {
|
||||
|
||||
table := []struct {
|
||||
name string
|
||||
added map[string]struct{}
|
||||
added map[string]time.Time
|
||||
removed map[string]struct{}
|
||||
expectItemCount int
|
||||
}{
|
||||
{
|
||||
name: "no items",
|
||||
added: map[string]struct{}{},
|
||||
removed: map[string]struct{}{},
|
||||
},
|
||||
{
|
||||
name: "only added items",
|
||||
added: map[string]struct{}{
|
||||
added: map[string]time.Time{
|
||||
"fisher": {},
|
||||
"flannigan": {},
|
||||
"fitzbog": {},
|
||||
},
|
||||
removed: map[string]struct{}{},
|
||||
expectItemCount: 3,
|
||||
},
|
||||
{
|
||||
name: "only removed items",
|
||||
added: map[string]struct{}{},
|
||||
removed: map[string]struct{}{
|
||||
"princess": {},
|
||||
"poppy": {},
|
||||
"petunia": {},
|
||||
},
|
||||
expectItemCount: 3,
|
||||
},
|
||||
{
|
||||
name: "added and removed items",
|
||||
added: map[string]struct{}{},
|
||||
added: map[string]time.Time{
|
||||
"general": {},
|
||||
},
|
||||
removed: map[string]struct{}{
|
||||
"general": {},
|
||||
"goose": {},
|
||||
"grumbles": {},
|
||||
},
|
||||
expectItemCount: 3,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range table {
|
||||
suite.Run(test.name, func() {
|
||||
var (
|
||||
@ -263,19 +304,14 @@ func (suite *CollectionUnitSuite) TestCollection_streamItems() {
|
||||
false),
|
||||
"",
|
||||
&mock.ItemGetSerialize{},
|
||||
test.added,
|
||||
maps.Keys(test.removed),
|
||||
false,
|
||||
statusUpdater)
|
||||
|
||||
col.added = test.added
|
||||
col.removed = test.removed
|
||||
|
||||
for item := range col.Items(ctx, errs) {
|
||||
itemCount++
|
||||
|
||||
_, aok := test.added[item.ID()]
|
||||
if aok {
|
||||
assert.False(t, item.Deleted(), "additions should not be marked as deleted")
|
||||
}
|
||||
|
||||
_, rok := test.removed[item.ID()]
|
||||
if rok {
|
||||
assert.True(t, item.Deleted(), "removals should be marked as deleted")
|
||||
@ -284,15 +320,310 @@ func (suite *CollectionUnitSuite) TestCollection_streamItems() {
|
||||
assert.True(t, dimt.ModTime().After(start), "deleted items should set mod time to now()")
|
||||
}
|
||||
|
||||
_, aok := test.added[item.ID()]
|
||||
if !rok && aok {
|
||||
assert.False(t, item.Deleted(), "additions should not be marked as deleted")
|
||||
}
|
||||
|
||||
assert.True(t, aok || rok, "item must be either added or removed: %q", item.ID())
|
||||
}
|
||||
|
||||
assert.NoError(t, errs.Failure())
|
||||
assert.Equal(
|
||||
t,
|
||||
len(test.added)+len(test.removed),
|
||||
test.expectItemCount,
|
||||
itemCount,
|
||||
"should see all expected items")
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
type mockLazyItemGetterSerializer struct {
|
||||
*mock.ItemGetSerialize
|
||||
callIDs []string
|
||||
}
|
||||
|
||||
func (mlg *mockLazyItemGetterSerializer) GetItem(
|
||||
ctx context.Context,
|
||||
user string,
|
||||
itemID string,
|
||||
immutableIDs bool,
|
||||
errs *fault.Bus,
|
||||
) (serialization.Parsable, *details.ExchangeInfo, error) {
|
||||
mlg.callIDs = append(mlg.callIDs, itemID)
|
||||
return mlg.ItemGetSerialize.GetItem(ctx, user, itemID, immutableIDs, errs)
|
||||
}
|
||||
|
||||
func (mlg *mockLazyItemGetterSerializer) check(t *testing.T, expectIDs []string) {
|
||||
assert.ElementsMatch(t, expectIDs, mlg.callIDs)
|
||||
}
|
||||
|
||||
func (suite *CollectionUnitSuite) TestLazyFetchCollection_Items_LazyFetch() {
|
||||
var (
|
||||
t = suite.T()
|
||||
start = time.Now().Add(-time.Second)
|
||||
statusUpdater = func(*support.ControllerOperationStatus) {}
|
||||
)
|
||||
|
||||
fullPath, err := path.Build("t", "pr", path.ExchangeService, path.EmailCategory, false, "fnords", "smarf")
|
||||
require.NoError(t, err, clues.ToCore(err))
|
||||
|
||||
locPath, err := path.Build("t", "pr", path.ExchangeService, path.EmailCategory, false, "fnords", "smarf")
|
||||
require.NoError(t, err, clues.ToCore(err))
|
||||
|
||||
table := []struct {
|
||||
name string
|
||||
added map[string]time.Time
|
||||
removed map[string]struct{}
|
||||
expectItemCount int
|
||||
expectReads []string
|
||||
}{
|
||||
{
|
||||
name: "no items",
|
||||
},
|
||||
{
|
||||
name: "only added items",
|
||||
added: map[string]time.Time{
|
||||
"fisher": start.Add(time.Minute),
|
||||
"flannigan": start.Add(2 * time.Minute),
|
||||
"fitzbog": start.Add(3 * time.Minute),
|
||||
},
|
||||
expectItemCount: 3,
|
||||
expectReads: []string{
|
||||
"fisher",
|
||||
"fitzbog",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "only removed items",
|
||||
removed: map[string]struct{}{
|
||||
"princess": {},
|
||||
"poppy": {},
|
||||
"petunia": {},
|
||||
},
|
||||
expectItemCount: 3,
|
||||
},
|
||||
{
|
||||
name: "added and removed items",
|
||||
added: map[string]time.Time{
|
||||
"general": {},
|
||||
},
|
||||
removed: map[string]struct{}{
|
||||
"general": {},
|
||||
"goose": {},
|
||||
"grumbles": {},
|
||||
},
|
||||
expectItemCount: 3,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range table {
|
||||
suite.Run(test.name, func() {
|
||||
var (
|
||||
t = suite.T()
|
||||
errs = fault.New(true)
|
||||
itemCount int
|
||||
)
|
||||
|
||||
ctx, flush := tester.NewContext(t)
|
||||
defer flush()
|
||||
|
||||
mlg := &mockLazyItemGetterSerializer{
|
||||
ItemGetSerialize: &mock.ItemGetSerialize{},
|
||||
}
|
||||
defer mlg.check(t, test.expectReads)
|
||||
|
||||
col := NewCollection(
|
||||
NewBaseCollection(
|
||||
fullPath,
|
||||
nil,
|
||||
locPath.ToBuilder(),
|
||||
control.DefaultOptions(),
|
||||
false),
|
||||
"",
|
||||
mlg,
|
||||
test.added,
|
||||
maps.Keys(test.removed),
|
||||
true,
|
||||
statusUpdater)
|
||||
|
||||
for item := range col.Items(ctx, errs) {
|
||||
itemCount++
|
||||
|
||||
_, rok := test.removed[item.ID()]
|
||||
if rok {
|
||||
assert.True(t, item.Deleted(), "removals should be marked as deleted")
|
||||
dimt, ok := item.(data.ItemModTime)
|
||||
require.True(t, ok, "item implements data.ItemModTime")
|
||||
assert.True(t, dimt.ModTime().After(start), "deleted items should set mod time to now()")
|
||||
}
|
||||
|
||||
modTime, aok := test.added[item.ID()]
|
||||
if !rok && aok {
|
||||
// Item's mod time should be what's passed into the collection
|
||||
// initializer.
|
||||
assert.Implements(t, (*data.ItemModTime)(nil), item)
|
||||
assert.Equal(t, modTime, item.(data.ItemModTime).ModTime(), "item mod time")
|
||||
|
||||
assert.False(t, item.Deleted(), "additions should not be marked as deleted")
|
||||
|
||||
// Check if the test want's us to read the item's data so the lazy
|
||||
// data fetch is executed.
|
||||
if slices.Contains(test.expectReads, item.ID()) {
|
||||
r := item.ToReader()
|
||||
|
||||
_, err := io.ReadAll(r)
|
||||
assert.NoError(t, err, clues.ToCore(err))
|
||||
|
||||
r.Close()
|
||||
|
||||
assert.Implements(t, (*data.ItemInfo)(nil), item)
|
||||
info, err := item.(data.ItemInfo).Info()
|
||||
|
||||
// ItemInfo's mod time should match what was passed into the
|
||||
// collection initializer.
|
||||
assert.NoError(t, err, clues.ToCore(err))
|
||||
assert.Equal(t, modTime, info.Modified(), "ItemInfo mod time")
|
||||
}
|
||||
}
|
||||
|
||||
assert.True(t, aok || rok, "item must be either added or removed: %q", item.ID())
|
||||
}
|
||||
|
||||
assert.NoError(t, errs.Failure())
|
||||
assert.Equal(
|
||||
t,
|
||||
test.expectItemCount,
|
||||
itemCount,
|
||||
"should see all expected items")
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func (suite *CollectionUnitSuite) TestLazyItem_NoRead_GetInfo_Errors() {
|
||||
t := suite.T()
|
||||
|
||||
ctx, flush := tester.NewContext(t)
|
||||
defer flush()
|
||||
|
||||
li := lazyItem{ctx: ctx}
|
||||
|
||||
_, err := li.Info()
|
||||
assert.Error(suite.T(), err, "Info without reading data should error")
|
||||
}
|
||||
|
||||
func (suite *CollectionUnitSuite) TestLazyItem() {
|
||||
var (
|
||||
parentPath = "inbox/private/silly cats"
|
||||
now = time.Now()
|
||||
)
|
||||
|
||||
table := []struct {
|
||||
name string
|
||||
modTime time.Time
|
||||
getErr error
|
||||
serializeErr error
|
||||
expectModTime time.Time
|
||||
expectReadErrType error
|
||||
dataCheck assert.ValueAssertionFunc
|
||||
expectInfoErr bool
|
||||
expectInfoErrType error
|
||||
}{
|
||||
{
|
||||
name: "ReturnsEmptyReaderOnDeletedInFlight",
|
||||
modTime: now,
|
||||
getErr: graph.ErrDeletedInFlight,
|
||||
dataCheck: assert.Empty,
|
||||
expectInfoErr: true,
|
||||
expectInfoErrType: data.ErrNotFound,
|
||||
},
|
||||
{
|
||||
name: "ReturnsValidReaderAndInfo",
|
||||
modTime: now,
|
||||
dataCheck: assert.NotEmpty,
|
||||
expectModTime: now,
|
||||
},
|
||||
{
|
||||
name: "ReturnsErrorOnGenericGetError",
|
||||
modTime: now,
|
||||
getErr: assert.AnError,
|
||||
expectReadErrType: assert.AnError,
|
||||
dataCheck: assert.Empty,
|
||||
expectInfoErr: true,
|
||||
},
|
||||
{
|
||||
name: "ReturnsErrorOnGenericSerializeError",
|
||||
modTime: now,
|
||||
serializeErr: assert.AnError,
|
||||
expectReadErrType: assert.AnError,
|
||||
dataCheck: assert.Empty,
|
||||
expectInfoErr: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range table {
|
||||
suite.Run(test.name, func() {
|
||||
t := suite.T()
|
||||
|
||||
ctx, flush := tester.NewContext(t)
|
||||
defer flush()
|
||||
|
||||
var testData serialization.Parsable
|
||||
|
||||
if test.getErr == nil {
|
||||
// Exact data type doesn't really matter.
|
||||
item := models.NewMessage()
|
||||
item.SetSubject(ptr.To("hello world"))
|
||||
|
||||
testData = item
|
||||
}
|
||||
|
||||
getter := &mock.ItemGetSerialize{
|
||||
GetData: testData,
|
||||
GetErr: test.getErr,
|
||||
SerializeErr: test.serializeErr,
|
||||
}
|
||||
|
||||
li := &lazyItem{
|
||||
ctx: ctx,
|
||||
userID: "userID",
|
||||
id: "itemID",
|
||||
parentPath: parentPath,
|
||||
getter: getter,
|
||||
errs: fault.New(true),
|
||||
modTime: test.modTime,
|
||||
immutableIDs: false,
|
||||
}
|
||||
|
||||
assert.False(t, li.Deleted(), "item shouldn't be marked deleted")
|
||||
assert.Equal(t, test.modTime, li.ModTime(), "item mod time")
|
||||
|
||||
data, err := io.ReadAll(li.ToReader())
|
||||
if test.expectReadErrType == nil {
|
||||
assert.NoError(t, err, "reading item data: %v", clues.ToCore(err))
|
||||
} else {
|
||||
assert.ErrorIs(t, err, test.expectReadErrType, "read error")
|
||||
}
|
||||
|
||||
test.dataCheck(t, data, "read item data")
|
||||
|
||||
info, err := li.Info()
|
||||
|
||||
// Didn't expect an error getting info, it should be valid.
|
||||
if !test.expectInfoErr {
|
||||
assert.NoError(t, err, "getting item info: %v", clues.ToCore(err))
|
||||
assert.Equal(t, parentPath, info.Exchange.ParentPath)
|
||||
assert.Equal(t, test.expectModTime, info.Modified())
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// Should get some form of error when trying to get info.
|
||||
assert.Error(t, err, "Info()")
|
||||
|
||||
if test.expectInfoErrType != nil {
|
||||
assert.ErrorIs(t, err, test.expectInfoErrType, "Info() error")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@ -7,9 +7,11 @@ import (
|
||||
|
||||
"github.com/alcionai/corso/src/pkg/backup/details"
|
||||
"github.com/alcionai/corso/src/pkg/fault"
|
||||
"github.com/alcionai/corso/src/pkg/services/m365/api"
|
||||
)
|
||||
|
||||
type ItemGetSerialize struct {
|
||||
GetData serialization.Parsable
|
||||
GetCount int
|
||||
GetErr error
|
||||
SerializeCount int
|
||||
@ -23,18 +25,23 @@ func (m *ItemGetSerialize) GetItem(
|
||||
*fault.Bus,
|
||||
) (serialization.Parsable, *details.ExchangeInfo, error) {
|
||||
m.GetCount++
|
||||
return nil, &details.ExchangeInfo{}, m.GetErr
|
||||
return m.GetData, &details.ExchangeInfo{}, m.GetErr
|
||||
}
|
||||
|
||||
func (m *ItemGetSerialize) Serialize(
|
||||
context.Context,
|
||||
serialization.Parsable,
|
||||
string, string,
|
||||
ctx context.Context,
|
||||
p serialization.Parsable,
|
||||
_ string, _ string,
|
||||
) ([]byte, error) {
|
||||
m.SerializeCount++
|
||||
|
||||
if p == nil || m.SerializeErr != nil {
|
||||
return nil, m.SerializeErr
|
||||
}
|
||||
|
||||
return api.Mail{}.Serialize(ctx, p, "", "")
|
||||
}
|
||||
|
||||
func DefaultItemGetSerialize() *ItemGetSerialize {
|
||||
return &ItemGetSerialize{}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user