Switch to custom drive items for backup operations (#4784)

<!-- PR description-->

Switch to using `custom.DriveItem` instead of `models.DriveItemable` during backups. There is a slight impact to restore as well, since backup and restore both use a few common interfaces e.g. `AugmentItemInfo`. 

---

#### Does this PR need a docs update or release note?

- [x]  Yes, it's included
- [ ] 🕐 Yes, but in a later PR
- [ ]  No

#### Type of change

<!--- Please check the type of change your PR introduces: --->
- [x] 🌻 Feature
- [ ] 🐛 Bugfix
- [ ] 🗺️ Documentation
- [ ] 🤖 Supportability/Tests
- [ ] 💻 CI/Deployment
- [ ] 🧹 Tech Debt/Cleanup

#### Issue(s)

<!-- Can reference multiple issues. Use one of the following "magic words" - "closes, fixes" to auto-close the Github issue. -->
* #<issue>

#### Test Plan

<!-- How will this be tested prior to merging.-->
- [x] 💪 Manual
- [x]  Unit test
- [x] 💚 E2E
This commit is contained in:
Abhishek Pandey 2023-12-05 14:48:02 -08:00 committed by GitHub
parent 047d46ea53
commit 3f98aa33de
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 133 additions and 55 deletions

View File

@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added
### Changed
- Memory optimizations for large scale OneDrive and Sharepoint backups.
### Fixed
## [v0.16.0] (beta) - 2023-11-28

View File

@ -10,7 +10,6 @@ import (
"time"
"github.com/alcionai/clues"
"github.com/microsoftgraph/msgraph-sdk-go/models"
"github.com/spatialcurrent/go-lazy/pkg/lazy"
"github.com/alcionai/corso/src/internal/common/idname"
@ -28,6 +27,7 @@ import (
"github.com/alcionai/corso/src/pkg/path"
"github.com/alcionai/corso/src/pkg/services/m365/api"
"github.com/alcionai/corso/src/pkg/services/m365/api/graph"
"github.com/alcionai/corso/src/pkg/services/m365/custom"
)
const (
@ -52,7 +52,7 @@ type Collection struct {
// represents
folderPath path.Path
// M365 IDs of file items within this collection
driveItems map[string]models.DriveItemable
driveItems map[string]*custom.DriveItem
// Primary M365 ID of the drive this collection was created from
driveID string
@ -172,7 +172,7 @@ func newColl(
protectedResource: resource,
folderPath: currPath,
prevPath: prevPath,
driveItems: map[string]models.DriveItemable{},
driveItems: map[string]*custom.DriveItem{},
driveID: driveID,
data: dataCh,
statusUpdater: statusUpdater,
@ -190,7 +190,7 @@ func newColl(
// Adds an itemID to the collection. This will make it eligible to be
// populated. The return values denotes if the item was previously
// present or is new one.
func (oc *Collection) Add(item models.DriveItemable) bool {
func (oc *Collection) Add(item *custom.DriveItem) bool {
_, found := oc.driveItems[ptr.Val(item.GetId())]
oc.driveItems[ptr.Val(item.GetId())] = item
@ -217,7 +217,7 @@ func (oc *Collection) IsEmpty() bool {
// ContainsItem returns true if the collection has the given item as one of its
// children.
func (oc Collection) ContainsItem(item models.DriveItemable) bool {
func (oc Collection) ContainsItem(item *custom.DriveItem) bool {
_, ok := oc.driveItems[ptr.Val(item.GetId())]
return ok
}
@ -277,7 +277,7 @@ func (oc Collection) DoNotMergeItems() bool {
func (oc *Collection) getDriveItemContent(
ctx context.Context,
driveID string,
item models.DriveItemable,
item *custom.DriveItem,
errs *fault.Bus,
) (io.ReadCloser, error) {
var (
@ -355,7 +355,7 @@ func downloadContent(
ctx context.Context,
iaag itemAndAPIGetter,
uc getItemPropertyer,
item models.DriveItemable,
item *custom.DriveItem,
driveID string,
counter *count.Bus,
) (io.ReadCloser, error) {
@ -389,7 +389,9 @@ func downloadContent(
return nil, clues.Wrap(err, "retrieving expired item")
}
content, err = downloadItem(ctx, iaag, di)
cdi := custom.ToCustomDriveItem(di)
content, err = downloadItem(ctx, iaag, cdi)
if err != nil {
return nil, clues.Wrap(err, "content download retry")
}
@ -483,7 +485,7 @@ func (oc *Collection) streamItems(ctx context.Context, errs *fault.Bus) {
wg.Add(1)
go func(item models.DriveItemable) {
go func(item *custom.DriveItem) {
defer wg.Done()
defer func() { <-semaphoreCh }()
@ -507,14 +509,14 @@ func (oc *Collection) streamItems(ctx context.Context, errs *fault.Bus) {
type lazyItemGetter struct {
info *details.ItemInfo
item models.DriveItemable
item *custom.DriveItem
driveID string
suffix string
itemExtensionFactory []extensions.CreateItemExtensioner
contentGetter func(
ctx context.Context,
driveID string,
item models.DriveItemable,
item *custom.DriveItem,
errs *fault.Bus) (io.ReadCloser, error)
}
@ -555,7 +557,7 @@ func (lig *lazyItemGetter) GetData(
func (oc *Collection) streamDriveItem(
ctx context.Context,
parentPath *path.Builder,
item models.DriveItemable,
item *custom.DriveItem,
stats *driveStats,
itemExtensionFactory []extensions.CreateItemExtensioner,
errs *fault.Bus,
@ -578,7 +580,7 @@ func (oc *Collection) streamDriveItem(
"item_name", clues.Hide(itemName),
"item_size", itemSize)
item.SetParentReference(setName(item.GetParentReference(), oc.driveName))
item.SetParentReference(custom.SetParentName(item.GetParentReference(), oc.driveName))
isFile := item.GetFile() != nil

View File

@ -34,6 +34,7 @@ import (
"github.com/alcionai/corso/src/pkg/fault"
"github.com/alcionai/corso/src/pkg/path"
"github.com/alcionai/corso/src/pkg/services/m365/api/graph"
"github.com/alcionai/corso/src/pkg/services/m365/custom"
)
// ---------------------------------------------------------------------------
@ -232,7 +233,7 @@ func (suite *CollectionUnitSuite) TestCollection() {
true)
for i := 0; i < test.numInstances; i++ {
coll.Add(stubItem)
coll.Add(custom.ToCustomDriveItem(stubItem))
}
// Read items from the collection
@ -352,7 +353,7 @@ func (suite *CollectionUnitSuite) TestCollectionReadError() {
true,
false)
coll.Add(stubItem)
coll.Add(custom.ToCustomDriveItem(stubItem))
collItem, ok := <-coll.Items(ctx, fault.New(true))
assert.True(t, ok)
@ -422,7 +423,7 @@ func (suite *CollectionUnitSuite) TestCollectionReadUnauthorizedErrorRetry() {
count.New())
require.NoError(t, err, clues.ToCore(err))
coll.Add(stubItem)
coll.Add(custom.ToCustomDriveItem(stubItem))
collItem, ok := <-coll.Items(ctx, fault.New(true))
assert.True(t, ok)
@ -490,7 +491,7 @@ func (suite *CollectionUnitSuite) TestCollectionPermissionBackupLatestModTime()
true,
false)
coll.Add(stubItem)
coll.Add(custom.ToCustomDriveItem(stubItem))
coll.handler = mbh
@ -641,7 +642,7 @@ func (suite *GetDriveItemUnitTestSuite) TestGetDriveItem_error() {
col.handler = mbh
_, err := col.getDriveItemContent(ctx, "driveID", stubItem, errs)
_, err := col.getDriveItemContent(ctx, "driveID", custom.ToCustomDriveItem(stubItem), errs)
if test.err == nil {
assert.NoError(t, err, clues.ToCore(err))
return
@ -819,7 +820,7 @@ func (suite *GetDriveItemUnitTestSuite) TestDownloadContent() {
mbh.GetResps = resps
mbh.GetErrs = test.getErr
r, err := downloadContent(ctx, mbh, test.muc, item, driveID, count.New())
r, err := downloadContent(ctx, mbh, test.muc, custom.ToCustomDriveItem(item), driveID, count.New())
test.expect(t, r)
test.expectErr(t, err, clues.ToCore(err))
})
@ -1020,7 +1021,7 @@ func (suite *CollectionUnitSuite) TestItemExtensions() {
true,
false)
coll.Add(stubItem)
coll.Add(custom.ToCustomDriveItem(stubItem))
collItem, ok := <-coll.Items(ctx, fault.New(true))
assert.True(t, ok)

View File

@ -27,6 +27,7 @@ import (
"github.com/alcionai/corso/src/pkg/services/m365/api"
"github.com/alcionai/corso/src/pkg/services/m365/api/graph"
"github.com/alcionai/corso/src/pkg/services/m365/api/pagers"
"github.com/alcionai/corso/src/pkg/services/m365/custom"
)
var errGetTreeNotImplemented = clues.New("forced error: cannot run tree-based backup: incomplete implementation")
@ -717,7 +718,7 @@ func (c *Collections) handleDelete(
func (c *Collections) getCollectionPath(
driveID string,
item models.DriveItemable,
item *custom.DriveItem,
) (path.Path, error) {
var (
pb = odConsts.DriveFolderPrefixBuilder(driveID)
@ -932,7 +933,7 @@ func (c *Collections) PopulateDriveCollections(
func (c *Collections) processItem(
ctx context.Context,
item models.DriveItemable,
di models.DriveItemable,
driveID, driveName string,
oldPrevPaths, currPrevPaths, newPrevPaths map[string]string,
seenFolders map[string]string,
@ -945,6 +946,10 @@ func (c *Collections) processItem(
skipper fault.AddSkipper,
) error {
var (
// Convert the DriveItemable retrieved from graph SDK to custom DriveItem
// which only stores the properties corso cares about during the backup
// operation. This is a memory optimization.
item = custom.ToCustomDriveItem(di)
itemID = ptr.Val(item.GetId())
itemName = ptr.Val(item.GetName())
isFolder = item.GetFolder() != nil || item.GetPackageEscaped() != nil

View File

@ -19,6 +19,7 @@ import (
"github.com/alcionai/corso/src/pkg/services/m365/api"
"github.com/alcionai/corso/src/pkg/services/m365/api/graph"
"github.com/alcionai/corso/src/pkg/services/m365/api/pagers"
"github.com/alcionai/corso/src/pkg/services/m365/custom"
)
// ---------------------------------------------------------------------------
@ -449,7 +450,7 @@ func (c *Collections) addFolderToTree(
driveID,
folderID,
folderName,
graph.ItemInfo(folder))
graph.ItemInfo(custom.ToCustomDriveItem(folder)))
logger.Ctx(ctx).Infow("malware folder detected")
@ -550,7 +551,7 @@ func (c *Collections) addFileToTree(
driveID,
fileID,
fileName,
graph.ItemInfo(file))
graph.ItemInfo(custom.ToCustomDriveItem(file)))
logger.Ctx(ctx).Infow("malware file detected")

View File

@ -2,7 +2,6 @@ package drive
import (
"github.com/alcionai/clues"
"github.com/microsoftgraph/msgraph-sdk-go/models"
"github.com/alcionai/corso/src/internal/common/idname"
"github.com/alcionai/corso/src/internal/common/ptr"
@ -11,6 +10,7 @@ import (
"github.com/alcionai/corso/src/pkg/path"
"github.com/alcionai/corso/src/pkg/selectors"
"github.com/alcionai/corso/src/pkg/services/m365/api"
"github.com/alcionai/corso/src/pkg/services/m365/custom"
)
var _ BackupHandler = &groupBackupHandler{}
@ -105,7 +105,7 @@ func (h groupBackupHandler) SitePathPrefix(tenantID string) (path.Path, error) {
func (h groupBackupHandler) AugmentItemInfo(
dii details.ItemInfo,
resource idname.Provider,
item models.DriveItemable,
item *custom.DriveItem,
size int64,
parentPath *path.Builder,
) details.ItemInfo {

View File

@ -3,12 +3,11 @@ package drive
import (
"strings"
"github.com/microsoftgraph/msgraph-sdk-go/models"
"github.com/alcionai/corso/src/internal/common/ptr"
"github.com/alcionai/corso/src/pkg/services/m365/custom"
)
func getItemCreator(item models.DriveItemable) string {
func getItemCreator(item *custom.DriveItem) string {
if item.GetCreatedBy() == nil || item.GetCreatedBy().GetUser() == nil {
return ""
}
@ -30,7 +29,7 @@ func getItemCreator(item models.DriveItemable) string {
return *ed.(*string)
}
func getItemDriveInfo(item models.DriveItemable) (string, string) {
func getItemDriveInfo(item *custom.DriveItem) (string, string) {
if item.GetParentReference() == nil {
return "", ""
}

View File

@ -12,6 +12,7 @@ import (
"github.com/alcionai/corso/src/pkg/path"
"github.com/alcionai/corso/src/pkg/services/m365/api"
"github.com/alcionai/corso/src/pkg/services/m365/api/pagers"
"github.com/alcionai/corso/src/pkg/services/m365/custom"
)
type ItemInfoAugmenter interface {
@ -23,7 +24,7 @@ type ItemInfoAugmenter interface {
AugmentItemInfo(
dii details.ItemInfo,
resource idname.Provider,
item models.DriveItemable,
item *custom.DriveItem,
size int64,
parentPath *path.Builder,
) details.ItemInfo

View File

@ -7,7 +7,6 @@ import (
"io"
"github.com/alcionai/clues"
"github.com/microsoftgraph/msgraph-sdk-go/models"
"golang.org/x/exp/maps"
"github.com/alcionai/corso/src/internal/common/ptr"
@ -18,6 +17,7 @@ import (
"github.com/alcionai/corso/src/pkg/logger"
"github.com/alcionai/corso/src/pkg/services/m365/api"
"github.com/alcionai/corso/src/pkg/services/m365/api/graph"
"github.com/alcionai/corso/src/pkg/services/m365/custom"
)
const (
@ -34,7 +34,7 @@ var downloadURLKeys = []string{
func downloadItem(
ctx context.Context,
ag api.Getter,
item models.DriveItemable,
item *custom.DriveItem,
) (io.ReadCloser, error) {
if item == nil {
return nil, clues.New("nil item")
@ -152,7 +152,7 @@ func downloadItemMeta(
ctx context.Context,
getter GetItemPermissioner,
driveID string,
item models.DriveItemable,
item *custom.DriveItem,
) (io.ReadCloser, int, error) {
meta := metadata.Metadata{
FileName: ptr.Val(item.GetName()),
@ -204,13 +204,3 @@ func driveItemWriter(
return iw, ptr.Val(icu.GetUploadUrl()), nil
}
func setName(orig models.ItemReferenceable, driveName string) models.ItemReferenceable {
if orig == nil {
return nil
}
orig.SetName(&driveName)
return orig
}

View File

@ -25,6 +25,7 @@ import (
"github.com/alcionai/corso/src/pkg/selectors"
"github.com/alcionai/corso/src/pkg/services/m365/api"
"github.com/alcionai/corso/src/pkg/services/m365/api/graph"
"github.com/alcionai/corso/src/pkg/services/m365/custom"
)
type ItemIntegrationSuite struct {
@ -123,7 +124,7 @@ func (suite *ItemIntegrationSuite) TestItemReader_oneDrive() {
}
// Read data for the file
itemData, err := downloadItem(ctx, bh, driveItem)
itemData, err := downloadItem(ctx, bh, custom.ToCustomDriveItem(driveItem))
require.NoError(t, err, clues.ToCore(err))
size, err := io.Copy(io.Discard, itemData)
@ -462,7 +463,7 @@ func (suite *ItemUnitTestSuite) TestDownloadItem() {
mg := mockGetter{
GetFunc: test.GetFunc,
}
rc, err := downloadItem(ctx, mg, test.itemFunc())
rc, err := downloadItem(ctx, mg, custom.ToCustomDriveItem(test.itemFunc()))
test.errorExpected(t, err, clues.ToCore(err))
test.rcExpected(t, rc)
})
@ -521,7 +522,7 @@ func (suite *ItemUnitTestSuite) TestDownloadItem_ConnectionResetErrorOnFirstRead
mg := mockGetter{
GetFunc: GetFunc,
}
rc, err := downloadItem(ctx, mg, itemFunc())
rc, err := downloadItem(ctx, mg, custom.ToCustomDriveItem(itemFunc()))
errorExpected(t, err, clues.ToCore(err))
rcExpected(t, rc)

View File

@ -30,6 +30,7 @@ import (
"github.com/alcionai/corso/src/pkg/path"
"github.com/alcionai/corso/src/pkg/services/m365/api"
"github.com/alcionai/corso/src/pkg/services/m365/api/graph"
"github.com/alcionai/corso/src/pkg/services/m365/custom"
)
const (
@ -863,7 +864,7 @@ func restoreFile(
dii := ir.AugmentItemInfo(
details.ItemInfo{},
rcc.ProtectedResource,
newItem,
custom.ToCustomDriveItem(newItem),
written,
nil)

View File

@ -17,6 +17,7 @@ import (
"github.com/alcionai/corso/src/pkg/selectors"
"github.com/alcionai/corso/src/pkg/services/m365/api"
"github.com/alcionai/corso/src/pkg/services/m365/api/pagers"
"github.com/alcionai/corso/src/pkg/services/m365/custom"
)
type baseSiteHandler struct {
@ -33,7 +34,7 @@ func (h baseSiteHandler) NewDrivePager(
func (h baseSiteHandler) AugmentItemInfo(
dii details.ItemInfo,
resource idname.Provider,
item models.DriveItemable,
item *custom.DriveItem,
size int64,
parentPath *path.Builder,
) details.ItemInfo {

View File

@ -17,6 +17,7 @@ import (
"github.com/alcionai/corso/src/pkg/selectors"
"github.com/alcionai/corso/src/pkg/services/m365/api"
"github.com/alcionai/corso/src/pkg/services/m365/api/pagers"
"github.com/alcionai/corso/src/pkg/services/m365/custom"
)
// ---------------------------------------------------------------------------
@ -42,7 +43,7 @@ func (h baseUserDriveHandler) NewDrivePager(
func (h baseUserDriveHandler) AugmentItemInfo(
dii details.ItemInfo,
resource idname.Provider,
item models.DriveItemable,
item *custom.DriveItem,
size int64,
parentPath *path.Builder,
) details.ItemInfo {

View File

@ -17,6 +17,7 @@ import (
"github.com/alcionai/corso/src/pkg/services/m365/api"
apiMock "github.com/alcionai/corso/src/pkg/services/m365/api/mock"
"github.com/alcionai/corso/src/pkg/services/m365/api/pagers"
"github.com/alcionai/corso/src/pkg/services/m365/custom"
)
// ---------------------------------------------------------------------------
@ -165,7 +166,7 @@ func (h BackupHandler[T]) NewLocationIDer(driveID string, elems ...string) detai
func (h BackupHandler[T]) AugmentItemInfo(
details.ItemInfo,
idname.Provider,
models.DriveItemable,
*custom.DriveItem,
int64,
*path.Builder,
) details.ItemInfo {
@ -405,7 +406,7 @@ func (h RestoreHandler) NewDrivePager(string, []string) pagers.NonDeltaHandler[m
func (h *RestoreHandler) AugmentItemInfo(
details.ItemInfo,
idname.Provider,
models.DriveItemable,
*custom.DriveItem,
int64,
*path.Builder,
) details.ItemInfo {

View File

@ -10,7 +10,6 @@ import (
"syscall"
"github.com/alcionai/clues"
"github.com/microsoftgraph/msgraph-sdk-go/models"
"github.com/microsoftgraph/msgraph-sdk-go/models/odataerrors"
"github.com/pkg/errors"
@ -20,6 +19,7 @@ import (
"github.com/alcionai/corso/src/internal/common/str"
"github.com/alcionai/corso/src/pkg/fault"
"github.com/alcionai/corso/src/pkg/filters"
"github.com/alcionai/corso/src/pkg/services/m365/custom"
)
// ---------------------------------------------------------------------------
@ -517,7 +517,7 @@ func appendIf(a []any, k string, v *string) []any {
// ItemInfo gathers potentially useful information about a drive item,
// and aggregates that data into a map.
func ItemInfo(item models.DriveItemable) map[string]any {
func ItemInfo(item *custom.DriveItem) map[string]any {
m := map[string]any{}
creator := item.GetCreatedByUser()

View File

@ -17,6 +17,7 @@ import (
"github.com/alcionai/corso/src/internal/tester"
"github.com/alcionai/corso/src/pkg/fault"
graphTD "github.com/alcionai/corso/src/pkg/services/m365/api/graph/testdata"
"github.com/alcionai/corso/src/pkg/services/m365/custom"
)
type GraphErrorsUnitSuite struct {
@ -566,7 +567,7 @@ func (suite *GraphErrorsUnitSuite) TestMalwareInfo() {
fault.AddtlMalwareDesc: malDesc,
}
assert.Equal(suite.T(), expect, ItemInfo(i))
assert.Equal(suite.T(), expect, ItemInfo(custom.ToCustomDriveItem(i)))
}
func (suite *GraphErrorsUnitSuite) TestIsErrFolderExists() {

View File

@ -361,3 +361,13 @@ func ToCustomDriveItem(item models.DriveItemable) *DriveItem {
return di
}
func SetParentName(orig *itemReference, driveName string) *itemReference {
if orig == nil {
return nil
}
orig.name = &driveName
return orig
}

View File

@ -527,3 +527,65 @@ func (suite *driveItemUnitSuite) TestToLiteDriveItemable() {
})
}
}
func (suite *driveItemUnitSuite) TestSetParentName() {
parentID := "parentID"
parentPath := "/parentPath"
parentName := "parentName"
parentDriveID := "parentDriveID"
table := []struct {
name string
driveName string
itemFunc func() *itemReference
validateFunc func(
t *testing.T,
expected *itemReference,
got *itemReference)
}{
{
name: "nil item",
itemFunc: func() *itemReference {
return nil
},
validateFunc: func(
t *testing.T,
expected *itemReference,
got *itemReference,
) {
require.Nil(t, got)
},
},
{
name: "set name",
driveName: "testDrive",
itemFunc: func() *itemReference {
return &itemReference{
id: &parentID,
path: &parentPath,
name: &parentName,
driveID: &parentDriveID,
}
},
validateFunc: func(
t *testing.T,
expected *itemReference,
got *itemReference,
) {
assert.Equal(t, ptr.Val(got.name), "testDrive")
assert.Equal(t, ptr.Val(got.id), ptr.Val(expected.id))
assert.Equal(t, ptr.Val(got.path), ptr.Val(expected.path))
assert.Equal(t, ptr.Val(got.driveID), ptr.Val(expected.driveID))
},
},
}
for _, test := range table {
suite.Run(test.name, func() {
orig := test.itemFunc()
got := SetParentName(orig, test.driveName)
test.validateFunc(suite.T(), orig, got)
})
}
}