Lazily fetch Exchange item data when possible (#4300)

Implement lazy data fetch for Exchange items.
Use a new collection type to clearly denote
when items can be lazily fetched vs. requiring
eager fetch

**This PR changes how the read bytes stat is
updated.** Lazily fetched items will not
update the read bytes stat. This stat doesn't
appear to be used anywhere at the moment

For items that are deleted between the time
enumeration takes place and the time the data
for them needs fetched, the corso will:
* return an empty reader for the item
* not add the item to backup details
* delete the (empty) item from kopia on the
  next backup

Manually tested deleting an item between
enumeration and data fetch

---

#### Does this PR need a docs update or release note?

- [x]  Yes, it's included
- [ ] 🕐 Yes, but in a later PR
- [ ]  No

#### Type of change

- [x] 🌻 Feature
- [ ] 🐛 Bugfix
- [ ] 🗺️ Documentation
- [ ] 🤖 Supportability/Tests
- [ ] 💻 CI/Deployment
- [ ] 🧹 Tech Debt/Cleanup

#### Issue(s)

* closes #2023

#### Test Plan

- [x] 💪 Manual
- [x]  Unit test
- [x] 💚 E2E
This commit is contained in:
ashmrtn 2023-09-20 13:12:05 -07:00 committed by GitHub
parent 7f2200195c
commit b212c37fd3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 659 additions and 90 deletions

View File

@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added ### Added
- Enables local or network-attached storage for Corso repositories. - Enables local or network-attached storage for Corso repositories.
- Reduce backup runtime for OneDrive and SharePoint incremental backups that have no file changes. - Reduce backup runtime for OneDrive and SharePoint incremental backups that have no file changes.
- Increase Exchange backup performance by lazily fetching data only for items whose content changed.
## [v0.13.0] (beta) - 2023-09-18 ## [v0.13.0] (beta) - 2023-09-18

View File

@ -5,6 +5,7 @@ import (
"context" "context"
"encoding/base64" "encoding/base64"
"encoding/binary" "encoding/binary"
"errors"
"io" "io"
"os" "os"
"runtime/trace" "runtime/trace"
@ -231,7 +232,11 @@ func (cp *corsoProgress) FinishedFile(relativePath string, err error) {
} }
info, err := d.infoer.Info() info, err := d.infoer.Info()
if err != nil { if errors.Is(err, data.ErrNotFound) {
// The item was deleted between enumeration and trying to get data. Skip
// adding it to details since there's no data for it.
return
} else if err != nil {
cp.errs.AddRecoverable(ctx, clues.Wrap(err, "getting ItemInfo"). cp.errs.AddRecoverable(ctx, clues.Wrap(err, "getting ItemInfo").
WithClues(ctx). WithClues(ctx).
Label(fault.LabelForceNoBackupCreation)) Label(fault.LabelForceNoBackupCreation))

View File

@ -160,7 +160,7 @@ func populateCollections(
ictx = clues.Add(ictx, "previous_path", prevPath) ictx = clues.Add(ictx, "previous_path", prevPath)
added, _, removed, newDelta, err := bh.itemEnumerator(). added, validModTimes, removed, newDelta, err := bh.itemEnumerator().
GetAddedAndRemovedItemIDs( GetAddedAndRemovedItemIDs(
ictx, ictx,
qp.ProtectedResource.ID(), qp.ProtectedResource.ID(),
@ -197,21 +197,12 @@ func populateCollections(
newDelta.Reset), newDelta.Reset),
qp.ProtectedResource.ID(), qp.ProtectedResource.ID(),
bh.itemHandler(), bh.itemHandler(),
added,
removed,
validModTimes,
statusUpdater) statusUpdater)
collections[cID] = &edc collections[cID] = edc
for add := range added {
edc.added[add] = struct{}{}
}
// Remove any deleted IDs from the set of added IDs because items that are
// deleted and then restored will have a different ID than they did
// originally.
for _, remove := range removed {
delete(edc.added, remove)
edc.removed[remove] = struct{}{}
}
// add the current path for the container ID to be used in the next backup // add the current path for the container ID to be used in the next backup
// as the "previous path", for reference in case of a rename or relocation. // as the "previous path", for reference in case of a rename or relocation.
@ -259,8 +250,11 @@ func populateCollections(
false), false),
qp.ProtectedResource.ID(), qp.ProtectedResource.ID(),
bh.itemHandler(), bh.itemHandler(),
nil,
nil,
false,
statusUpdater) statusUpdater)
collections[id] = &edc collections[id] = edc
} }
logger.Ctx(ctx).Infow( logger.Ctx(ctx).Infow(

View File

@ -11,6 +11,7 @@ import (
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"github.com/stretchr/testify/suite" "github.com/stretchr/testify/suite"
"golang.org/x/exp/maps"
inMock "github.com/alcionai/corso/src/internal/common/idname/mock" inMock "github.com/alcionai/corso/src/internal/common/idname/mock"
"github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/internal/common/ptr"
@ -599,7 +600,7 @@ func (suite *BackupIntgSuite) TestDelta() {
// now do another backup with the previous delta tokens, // now do another backup with the previous delta tokens,
// which should only contain the difference. // which should only contain the difference.
collections, err = CreateCollections( _, err = CreateCollections(
ctx, ctx,
bpc, bpc,
handlers, handlers,
@ -609,19 +610,6 @@ func (suite *BackupIntgSuite) TestDelta() {
func(status *support.ControllerOperationStatus) {}, func(status *support.ControllerOperationStatus) {},
fault.New(true)) fault.New(true))
require.NoError(t, err, clues.ToCore(err)) require.NoError(t, err, clues.ToCore(err))
// TODO(keepers): this isn't a very useful test at the moment. It needs to
// investigate the items in the original and delta collections to at least
// assert some minimum assumptions, such as "deltas should retrieve fewer items".
// Delta usage is commented out at the moment, anyway. So this is currently
// a sanity check that the minimum behavior won't break.
for _, coll := range collections {
if coll.FullPath().Service() != path.ExchangeMetadataService {
ec, ok := coll.(*prefetchCollection)
require.True(t, ok, "collection is *prefetchCollection")
assert.NotNil(t, ec)
}
}
}) })
} }
} }
@ -1184,10 +1172,12 @@ func (suite *CollectionPopulationSuite) TestPopulateCollections() {
make([]string, 0, len(exColl.removed)), make([]string, 0, len(exColl.removed)),
} }
for i, cIDs := range []map[string]struct{}{exColl.added, exColl.removed} { for id := range exColl.added {
for id := range cIDs { ids[0] = append(ids[0], id)
ids[i] = append(ids[i], id)
} }
for id := range exColl.removed {
ids[1] = append(ids[1], id)
} }
assert.ElementsMatch(t, expect.added, ids[0], "added items") assert.ElementsMatch(t, expect.added, ids[0], "added items")
@ -1519,10 +1509,12 @@ func (suite *CollectionPopulationSuite) TestFilterContainersAndFillCollections_D
make([]string, 0, len(exColl.removed)), make([]string, 0, len(exColl.removed)),
} }
for i, cIDs := range []map[string]struct{}{exColl.added, exColl.removed} { for id := range exColl.added {
for id := range cIDs { ids[0] = append(ids[0], id)
ids[i] = append(ids[i], id)
} }
for id := range exColl.removed {
ids[1] = append(ids[1], id)
} }
assert.ElementsMatch(t, expect.added, ids[0], "added items") assert.ElementsMatch(t, expect.added, ids[0], "added items")
@ -1680,7 +1672,11 @@ func (suite *CollectionPopulationSuite) TestFilterContainersAndFillCollections_r
exColl, ok := coll.(*prefetchCollection) exColl, ok := coll.(*prefetchCollection)
require.True(t, ok, "collection is an *exchange.prefetchCollection") require.True(t, ok, "collection is an *exchange.prefetchCollection")
assert.Equal(t, test.expectAdded, exColl.added, "added items") assert.ElementsMatch(
t,
maps.Keys(test.expectAdded),
maps.Keys(exColl.added),
"added items")
assert.Equal(t, test.expectRemoved, exColl.removed, "removed items") assert.Equal(t, test.expectRemoved, exColl.removed, "removed items")
} }
}) })

View File

@ -12,6 +12,8 @@ import (
"time" "time"
"github.com/alcionai/clues" "github.com/alcionai/clues"
"github.com/spatialcurrent/go-lazy/pkg/lazy"
"golang.org/x/exp/maps"
"github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/data"
"github.com/alcionai/corso/src/internal/m365/graph" "github.com/alcionai/corso/src/internal/m365/graph"
@ -174,18 +176,46 @@ func NewCollection(
bc baseCollection, bc baseCollection,
user string, user string,
items itemGetterSerializer, items itemGetterSerializer,
origAdded map[string]time.Time,
origRemoved []string,
validModTimes bool,
statusUpdater support.StatusUpdater, statusUpdater support.StatusUpdater,
) prefetchCollection { ) data.BackupCollection {
collection := prefetchCollection{ added := maps.Clone(origAdded)
removed := make(map[string]struct{}, len(origRemoved))
// Remove any deleted IDs from the set of added IDs because items that are
// deleted and then restored will have a different ID than they did
// originally.
//
// TODO(ashmrtn): If we switch to immutable IDs then we'll need to handle this
// sort of operation in the pager since this would become order-dependent
// unless Graph started consolidating the changes into a single delta result.
for _, r := range origRemoved {
delete(added, r)
removed[r] = struct{}{}
}
if !validModTimes {
return &prefetchCollection{
baseCollection: bc, baseCollection: bc,
user: user, user: user,
added: map[string]struct{}{}, added: added,
removed: map[string]struct{}{}, removed: removed,
getter: items, getter: items,
statusUpdater: statusUpdater, statusUpdater: statusUpdater,
} }
}
return collection return &lazyFetchCollection{
baseCollection: bc,
user: user,
added: added,
removed: removed,
getter: items,
statusUpdater: statusUpdater,
}
} }
// prefetchCollection implements the interface from data.BackupCollection // prefetchCollection implements the interface from data.BackupCollection
@ -196,7 +226,7 @@ type prefetchCollection struct {
user string user string
// added is a list of existing item IDs that were added to a container // added is a list of existing item IDs that were added to a container
added map[string]struct{} added map[string]time.Time
// removed is a list of item IDs that were deleted from, or moved out, of a container // removed is a list of item IDs that were deleted from, or moved out, of a container
removed map[string]struct{} removed map[string]struct{}
@ -337,6 +367,126 @@ func (col *prefetchCollection) streamItems(
wg.Wait() wg.Wait()
} }
// -----------------------------------------------------------------------------
// lazyFetchCollection
// -----------------------------------------------------------------------------
// lazyFetchCollection implements the interface from data.BackupCollection
// Structure holds data for an Exchange application for a single user. It lazily
// fetches the data associated with each item when kopia requests it during
// upload.
//
// When accounting for stats, items are marked as successful when the basic
// information (path and mod time) is handed to kopia. Total bytes across all
// items is not tracked.
type lazyFetchCollection struct {
baseCollection
user string
// added is a list of existing item IDs that were added to a container
added map[string]time.Time
// removed is a list of item IDs that were deleted from, or moved out, of a container
removed map[string]struct{}
getter itemGetterSerializer
statusUpdater support.StatusUpdater
}
// Items utility function to asynchronously execute process to fill data channel with
// M365 exchange objects and returns the data channel
func (col *lazyFetchCollection) Items(ctx context.Context, errs *fault.Bus) <-chan data.Item {
stream := make(chan data.Item, collectionChannelBufferSize)
go col.streamItems(ctx, stream, errs)
return stream
}
// streamItems is a utility function that uses col.collectionType to be able to
// serialize all the M365IDs defined in the added field. data channel is closed
// by this function.
func (col *lazyFetchCollection) streamItems(
ctx context.Context,
stream chan<- data.Item,
errs *fault.Bus,
) {
var (
success int64
colProgress chan<- struct{}
user = col.user
)
defer func() {
close(stream)
updateStatus(
ctx,
col.statusUpdater,
len(col.added)+len(col.removed),
int(success),
0,
col.FullPath().Folder(false),
errs.Failure())
}()
if len(col.added)+len(col.removed) > 0 {
colProgress = observe.CollectionProgress(
ctx,
col.FullPath().Category().String(),
col.LocationPath().Elements())
defer close(colProgress)
}
// delete all removed items
for id := range col.removed {
stream <- &Item{
id: id,
modTime: time.Now().UTC(), // removed items have no modTime entry.
deleted: true,
}
atomic.AddInt64(&success, 1)
if colProgress != nil {
colProgress <- struct{}{}
}
}
parentPath := col.LocationPath().String()
// add any new items
for id, modTime := range col.added {
if errs.Failure() != nil {
break
}
ictx := clues.Add(
ctx,
"item_id", id,
"parent_path", path.LoggableDir(parentPath),
"service", path.ExchangeService.String(),
"category", col.FullPath().Category().String())
stream <- &lazyItem{
ctx: ictx,
userID: user,
id: id,
getter: col.getter,
modTime: modTime,
immutableIDs: col.ctrl.ToggleFeatures.ExchangeImmutableIDs,
parentPath: parentPath,
errs: errs,
}
atomic.AddInt64(&success, 1)
if colProgress != nil {
colProgress <- struct{}{}
}
}
}
// Item represents a single item retrieved from exchange // Item represents a single item retrieved from exchange
type Item struct { type Item struct {
id string id string
@ -386,3 +536,88 @@ func NewItem(
modTime: modTime, modTime: modTime,
} }
} }
// lazyItem represents a single item retrieved from exchange that lazily fetches
// the item's data when the first call to ToReader().Read() is made.
type lazyItem struct {
ctx context.Context
userID string
id string
parentPath string
getter itemGetterSerializer
errs *fault.Bus
modTime time.Time
// info holds the Exchnage-specific details information for this item. Store
// a pointer in this struct so the golang garbage collector can collect the
// Item struct once kopia is done with it. The ExchangeInfo struct needs to
// stick around until the end of the backup though as backup details is
// written last.
info *details.ExchangeInfo
immutableIDs bool
delInFlight bool
}
func (i lazyItem) ID() string {
return i.id
}
func (i *lazyItem) ToReader() io.ReadCloser {
return lazy.NewLazyReadCloser(func() (io.ReadCloser, error) {
itemData, info, err := getItemAndInfo(
i.ctx,
i.getter,
i.userID,
i.ID(),
i.immutableIDs,
i.parentPath)
if err != nil {
// If an item was deleted then return an empty file so we don't fail
// the backup and return a sentinel error when asked for ItemInfo so
// we don't display the item in the backup.
//
// The item will be deleted from kopia on the next backup when the
// delta token shows it's removed.
if graph.IsErrDeletedInFlight(err) {
logger.CtxErr(i.ctx, err).Info("item not found")
i.delInFlight = true
return io.NopCloser(bytes.NewReader([]byte{})), nil
}
err = clues.Stack(err)
i.errs.AddRecoverable(i.ctx, err)
return nil, err
}
i.info = info
// Update the mod time to what we already told kopia about. This is required
// for proper details merging.
i.info.Modified = i.modTime
return io.NopCloser(bytes.NewReader(itemData)), nil
})
}
func (i lazyItem) Deleted() bool {
return false
}
func (i lazyItem) Info() (details.ItemInfo, error) {
if i.delInFlight {
return details.ItemInfo{}, clues.Stack(data.ErrNotFound).WithClues(i.ctx)
} else if i.info == nil {
return details.ItemInfo{}, clues.New("requesting ItemInfo before data retrieval").
WithClues(i.ctx)
}
return details.ItemInfo{Exchange: i.info}, nil
}
func (i lazyItem) ModTime() time.Time {
return i.modTime
}

View File

@ -2,19 +2,27 @@ package exchange
import ( import (
"bytes" "bytes"
"context"
"io"
"testing" "testing"
"time" "time"
"github.com/alcionai/clues" "github.com/alcionai/clues"
"github.com/microsoft/kiota-abstractions-go/serialization"
"github.com/microsoftgraph/msgraph-sdk-go/models"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"github.com/stretchr/testify/suite" "github.com/stretchr/testify/suite"
"golang.org/x/exp/maps"
"golang.org/x/exp/slices"
"github.com/alcionai/corso/src/internal/common/ptr"
"github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/data"
"github.com/alcionai/corso/src/internal/m365/collection/exchange/mock" "github.com/alcionai/corso/src/internal/m365/collection/exchange/mock"
"github.com/alcionai/corso/src/internal/m365/graph" "github.com/alcionai/corso/src/internal/m365/graph"
"github.com/alcionai/corso/src/internal/m365/support" "github.com/alcionai/corso/src/internal/m365/support"
"github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/internal/tester"
"github.com/alcionai/corso/src/pkg/backup/details"
"github.com/alcionai/corso/src/pkg/control" "github.com/alcionai/corso/src/pkg/control"
"github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/fault"
"github.com/alcionai/corso/src/pkg/path" "github.com/alcionai/corso/src/pkg/path"
@ -82,6 +90,21 @@ func (suite *CollectionUnitSuite) TestCollection_NewCollection() {
} }
func (suite *CollectionUnitSuite) TestNewCollection_state() { func (suite *CollectionUnitSuite) TestNewCollection_state() {
type collectionTypes struct {
name string
validModTimes bool
}
colTypes := []collectionTypes{
{
name: "prefetchCollection",
},
{
name: "lazyFetchCollection",
validModTimes: true,
},
}
fooP, err := path.Build("t", "u", path.ExchangeService, path.EmailCategory, false, "foo") fooP, err := path.Build("t", "u", path.ExchangeService, path.EmailCategory, false, "foo")
require.NoError(suite.T(), err, clues.ToCore(err)) require.NoError(suite.T(), err, clues.ToCore(err))
barP, err := path.Build("t", "u", path.ExchangeService, path.EmailCategory, false, "bar") barP, err := path.Build("t", "u", path.ExchangeService, path.EmailCategory, false, "bar")
@ -122,6 +145,9 @@ func (suite *CollectionUnitSuite) TestNewCollection_state() {
expect: data.DeletedState, expect: data.DeletedState,
}, },
} }
for _, colType := range colTypes {
suite.Run(colType.name, func() {
for _, test := range table { for _, test := range table {
suite.Run(test.name, func() { suite.Run(test.name, func() {
t := suite.T() t := suite.T()
@ -135,11 +161,23 @@ func (suite *CollectionUnitSuite) TestNewCollection_state() {
false), false),
"u", "u",
mock.DefaultItemGetSerialize(), mock.DefaultItemGetSerialize(),
nil,
nil,
colType.validModTimes,
nil) nil)
assert.Equal(t, test.expect, c.State(), "collection state") assert.Equal(t, test.expect, c.State(), "collection state")
assert.Equal(t, test.curr, c.fullPath, "full path") assert.Equal(t, test.curr, c.FullPath(), "full path")
assert.Equal(t, test.prev, c.prevPath, "prev path") assert.Equal(t, test.prev, c.PreviousPath(), "prev path")
assert.Equal(t, test.loc, c.locationPath, "location path")
// TODO(ashmrtn): Add LocationPather as part of BackupCollection.
require.Implements(t, (*data.LocationPather)(nil), c)
assert.Equal(
t,
test.loc,
c.(data.LocationPather).LocationPath(),
"location path")
})
}
}) })
} }
} }
@ -192,7 +230,7 @@ func (suite *CollectionUnitSuite) TestGetItemWithRetries() {
} }
} }
func (suite *CollectionUnitSuite) TestCollection_streamItems() { func (suite *CollectionUnitSuite) TestPrefetchCollection_Items() {
var ( var (
t = suite.T() t = suite.T()
start = time.Now().Add(-time.Second) start = time.Now().Add(-time.Second)
@ -207,42 +245,45 @@ func (suite *CollectionUnitSuite) TestCollection_streamItems() {
table := []struct { table := []struct {
name string name string
added map[string]struct{} added map[string]time.Time
removed map[string]struct{} removed map[string]struct{}
expectItemCount int
}{ }{
{ {
name: "no items", name: "no items",
added: map[string]struct{}{},
removed: map[string]struct{}{},
}, },
{ {
name: "only added items", name: "only added items",
added: map[string]struct{}{ added: map[string]time.Time{
"fisher": {}, "fisher": {},
"flannigan": {}, "flannigan": {},
"fitzbog": {}, "fitzbog": {},
}, },
removed: map[string]struct{}{}, expectItemCount: 3,
}, },
{ {
name: "only removed items", name: "only removed items",
added: map[string]struct{}{},
removed: map[string]struct{}{ removed: map[string]struct{}{
"princess": {}, "princess": {},
"poppy": {}, "poppy": {},
"petunia": {}, "petunia": {},
}, },
expectItemCount: 3,
}, },
{ {
name: "added and removed items", name: "added and removed items",
added: map[string]struct{}{}, added: map[string]time.Time{
"general": {},
},
removed: map[string]struct{}{ removed: map[string]struct{}{
"general": {}, "general": {},
"goose": {}, "goose": {},
"grumbles": {}, "grumbles": {},
}, },
expectItemCount: 3,
}, },
} }
for _, test := range table { for _, test := range table {
suite.Run(test.name, func() { suite.Run(test.name, func() {
var ( var (
@ -263,19 +304,14 @@ func (suite *CollectionUnitSuite) TestCollection_streamItems() {
false), false),
"", "",
&mock.ItemGetSerialize{}, &mock.ItemGetSerialize{},
test.added,
maps.Keys(test.removed),
false,
statusUpdater) statusUpdater)
col.added = test.added
col.removed = test.removed
for item := range col.Items(ctx, errs) { for item := range col.Items(ctx, errs) {
itemCount++ itemCount++
_, aok := test.added[item.ID()]
if aok {
assert.False(t, item.Deleted(), "additions should not be marked as deleted")
}
_, rok := test.removed[item.ID()] _, rok := test.removed[item.ID()]
if rok { if rok {
assert.True(t, item.Deleted(), "removals should be marked as deleted") assert.True(t, item.Deleted(), "removals should be marked as deleted")
@ -284,15 +320,310 @@ func (suite *CollectionUnitSuite) TestCollection_streamItems() {
assert.True(t, dimt.ModTime().After(start), "deleted items should set mod time to now()") assert.True(t, dimt.ModTime().After(start), "deleted items should set mod time to now()")
} }
_, aok := test.added[item.ID()]
if !rok && aok {
assert.False(t, item.Deleted(), "additions should not be marked as deleted")
}
assert.True(t, aok || rok, "item must be either added or removed: %q", item.ID()) assert.True(t, aok || rok, "item must be either added or removed: %q", item.ID())
} }
assert.NoError(t, errs.Failure()) assert.NoError(t, errs.Failure())
assert.Equal( assert.Equal(
t, t,
len(test.added)+len(test.removed), test.expectItemCount,
itemCount, itemCount,
"should see all expected items") "should see all expected items")
}) })
} }
} }
type mockLazyItemGetterSerializer struct {
*mock.ItemGetSerialize
callIDs []string
}
func (mlg *mockLazyItemGetterSerializer) GetItem(
ctx context.Context,
user string,
itemID string,
immutableIDs bool,
errs *fault.Bus,
) (serialization.Parsable, *details.ExchangeInfo, error) {
mlg.callIDs = append(mlg.callIDs, itemID)
return mlg.ItemGetSerialize.GetItem(ctx, user, itemID, immutableIDs, errs)
}
func (mlg *mockLazyItemGetterSerializer) check(t *testing.T, expectIDs []string) {
assert.ElementsMatch(t, expectIDs, mlg.callIDs)
}
func (suite *CollectionUnitSuite) TestLazyFetchCollection_Items_LazyFetch() {
var (
t = suite.T()
start = time.Now().Add(-time.Second)
statusUpdater = func(*support.ControllerOperationStatus) {}
)
fullPath, err := path.Build("t", "pr", path.ExchangeService, path.EmailCategory, false, "fnords", "smarf")
require.NoError(t, err, clues.ToCore(err))
locPath, err := path.Build("t", "pr", path.ExchangeService, path.EmailCategory, false, "fnords", "smarf")
require.NoError(t, err, clues.ToCore(err))
table := []struct {
name string
added map[string]time.Time
removed map[string]struct{}
expectItemCount int
expectReads []string
}{
{
name: "no items",
},
{
name: "only added items",
added: map[string]time.Time{
"fisher": start.Add(time.Minute),
"flannigan": start.Add(2 * time.Minute),
"fitzbog": start.Add(3 * time.Minute),
},
expectItemCount: 3,
expectReads: []string{
"fisher",
"fitzbog",
},
},
{
name: "only removed items",
removed: map[string]struct{}{
"princess": {},
"poppy": {},
"petunia": {},
},
expectItemCount: 3,
},
{
name: "added and removed items",
added: map[string]time.Time{
"general": {},
},
removed: map[string]struct{}{
"general": {},
"goose": {},
"grumbles": {},
},
expectItemCount: 3,
},
}
for _, test := range table {
suite.Run(test.name, func() {
var (
t = suite.T()
errs = fault.New(true)
itemCount int
)
ctx, flush := tester.NewContext(t)
defer flush()
mlg := &mockLazyItemGetterSerializer{
ItemGetSerialize: &mock.ItemGetSerialize{},
}
defer mlg.check(t, test.expectReads)
col := NewCollection(
NewBaseCollection(
fullPath,
nil,
locPath.ToBuilder(),
control.DefaultOptions(),
false),
"",
mlg,
test.added,
maps.Keys(test.removed),
true,
statusUpdater)
for item := range col.Items(ctx, errs) {
itemCount++
_, rok := test.removed[item.ID()]
if rok {
assert.True(t, item.Deleted(), "removals should be marked as deleted")
dimt, ok := item.(data.ItemModTime)
require.True(t, ok, "item implements data.ItemModTime")
assert.True(t, dimt.ModTime().After(start), "deleted items should set mod time to now()")
}
modTime, aok := test.added[item.ID()]
if !rok && aok {
// Item's mod time should be what's passed into the collection
// initializer.
assert.Implements(t, (*data.ItemModTime)(nil), item)
assert.Equal(t, modTime, item.(data.ItemModTime).ModTime(), "item mod time")
assert.False(t, item.Deleted(), "additions should not be marked as deleted")
// Check if the test want's us to read the item's data so the lazy
// data fetch is executed.
if slices.Contains(test.expectReads, item.ID()) {
r := item.ToReader()
_, err := io.ReadAll(r)
assert.NoError(t, err, clues.ToCore(err))
r.Close()
assert.Implements(t, (*data.ItemInfo)(nil), item)
info, err := item.(data.ItemInfo).Info()
// ItemInfo's mod time should match what was passed into the
// collection initializer.
assert.NoError(t, err, clues.ToCore(err))
assert.Equal(t, modTime, info.Modified(), "ItemInfo mod time")
}
}
assert.True(t, aok || rok, "item must be either added or removed: %q", item.ID())
}
assert.NoError(t, errs.Failure())
assert.Equal(
t,
test.expectItemCount,
itemCount,
"should see all expected items")
})
}
}
func (suite *CollectionUnitSuite) TestLazyItem_NoRead_GetInfo_Errors() {
t := suite.T()
ctx, flush := tester.NewContext(t)
defer flush()
li := lazyItem{ctx: ctx}
_, err := li.Info()
assert.Error(suite.T(), err, "Info without reading data should error")
}
func (suite *CollectionUnitSuite) TestLazyItem() {
var (
parentPath = "inbox/private/silly cats"
now = time.Now()
)
table := []struct {
name string
modTime time.Time
getErr error
serializeErr error
expectModTime time.Time
expectReadErrType error
dataCheck assert.ValueAssertionFunc
expectInfoErr bool
expectInfoErrType error
}{
{
name: "ReturnsEmptyReaderOnDeletedInFlight",
modTime: now,
getErr: graph.ErrDeletedInFlight,
dataCheck: assert.Empty,
expectInfoErr: true,
expectInfoErrType: data.ErrNotFound,
},
{
name: "ReturnsValidReaderAndInfo",
modTime: now,
dataCheck: assert.NotEmpty,
expectModTime: now,
},
{
name: "ReturnsErrorOnGenericGetError",
modTime: now,
getErr: assert.AnError,
expectReadErrType: assert.AnError,
dataCheck: assert.Empty,
expectInfoErr: true,
},
{
name: "ReturnsErrorOnGenericSerializeError",
modTime: now,
serializeErr: assert.AnError,
expectReadErrType: assert.AnError,
dataCheck: assert.Empty,
expectInfoErr: true,
},
}
for _, test := range table {
suite.Run(test.name, func() {
t := suite.T()
ctx, flush := tester.NewContext(t)
defer flush()
var testData serialization.Parsable
if test.getErr == nil {
// Exact data type doesn't really matter.
item := models.NewMessage()
item.SetSubject(ptr.To("hello world"))
testData = item
}
getter := &mock.ItemGetSerialize{
GetData: testData,
GetErr: test.getErr,
SerializeErr: test.serializeErr,
}
li := &lazyItem{
ctx: ctx,
userID: "userID",
id: "itemID",
parentPath: parentPath,
getter: getter,
errs: fault.New(true),
modTime: test.modTime,
immutableIDs: false,
}
assert.False(t, li.Deleted(), "item shouldn't be marked deleted")
assert.Equal(t, test.modTime, li.ModTime(), "item mod time")
data, err := io.ReadAll(li.ToReader())
if test.expectReadErrType == nil {
assert.NoError(t, err, "reading item data: %v", clues.ToCore(err))
} else {
assert.ErrorIs(t, err, test.expectReadErrType, "read error")
}
test.dataCheck(t, data, "read item data")
info, err := li.Info()
// Didn't expect an error getting info, it should be valid.
if !test.expectInfoErr {
assert.NoError(t, err, "getting item info: %v", clues.ToCore(err))
assert.Equal(t, parentPath, info.Exchange.ParentPath)
assert.Equal(t, test.expectModTime, info.Modified())
return
}
// Should get some form of error when trying to get info.
assert.Error(t, err, "Info()")
if test.expectInfoErrType != nil {
assert.ErrorIs(t, err, test.expectInfoErrType, "Info() error")
}
})
}
}

View File

@ -7,9 +7,11 @@ import (
"github.com/alcionai/corso/src/pkg/backup/details" "github.com/alcionai/corso/src/pkg/backup/details"
"github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/fault"
"github.com/alcionai/corso/src/pkg/services/m365/api"
) )
type ItemGetSerialize struct { type ItemGetSerialize struct {
GetData serialization.Parsable
GetCount int GetCount int
GetErr error GetErr error
SerializeCount int SerializeCount int
@ -23,16 +25,21 @@ func (m *ItemGetSerialize) GetItem(
*fault.Bus, *fault.Bus,
) (serialization.Parsable, *details.ExchangeInfo, error) { ) (serialization.Parsable, *details.ExchangeInfo, error) {
m.GetCount++ m.GetCount++
return nil, &details.ExchangeInfo{}, m.GetErr return m.GetData, &details.ExchangeInfo{}, m.GetErr
} }
func (m *ItemGetSerialize) Serialize( func (m *ItemGetSerialize) Serialize(
context.Context, ctx context.Context,
serialization.Parsable, p serialization.Parsable,
string, string, _ string, _ string,
) ([]byte, error) { ) ([]byte, error) {
m.SerializeCount++ m.SerializeCount++
if p == nil || m.SerializeErr != nil {
return nil, m.SerializeErr return nil, m.SerializeErr
}
return api.Mail{}.Serialize(ctx, p, "", "")
} }
func DefaultItemGetSerialize() *ItemGetSerialize { func DefaultItemGetSerialize() *ItemGetSerialize {