From 3fd3da7cafc15678ed5d42f2a1ac5f5135f7d500 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 1 Feb 2023 16:33:53 +0000 Subject: [PATCH 1/6] =?UTF-8?q?=E2=AC=86=EF=B8=8F=20Bump=20github.com/aws/?= =?UTF-8?q?aws-sdk-go=20from=201.44.190=20to=201.44.191=20in=20/src=20(#23?= =?UTF-8?q?51)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [github.com/aws/aws-sdk-go](https://github.com/aws/aws-sdk-go) from 1.44.190 to 1.44.191.
Release notes

Sourced from github.com/aws/aws-sdk-go's releases.

Release v1.44.191 (2023-01-31)

Service Client Updates

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=github.com/aws/aws-sdk-go&package-manager=go_modules&previous-version=1.44.190&new-version=1.44.191)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) You can trigger a rebase of this PR by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
--- src/go.mod | 2 +- src/go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/go.mod b/src/go.mod index 0b7fd24ee..f947ed45a 100644 --- a/src/go.mod +++ b/src/go.mod @@ -5,7 +5,7 @@ go 1.19 require ( github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.2.0 github.com/alcionai/clues v0.0.0-20230131232239-cee86233b005 - github.com/aws/aws-sdk-go v1.44.190 + github.com/aws/aws-sdk-go v1.44.191 github.com/aws/aws-xray-sdk-go v1.8.0 github.com/google/uuid v1.3.0 github.com/hashicorp/go-multierror v1.1.1 diff --git a/src/go.sum b/src/go.sum index aa032076f..02f39a5a5 100644 --- a/src/go.sum +++ b/src/go.sum @@ -62,8 +62,8 @@ github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk5 github.com/alessio/shellescape v1.4.1 h1:V7yhSDDn8LP4lc4jS8pFkt0zCnzVJlG5JXy9BVKJUX0= github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY= github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= -github.com/aws/aws-sdk-go v1.44.190 h1:QC+Pf/Ooj7Waf2obOPZbIQOqr00hy4h54j3ZK9mvHcc= -github.com/aws/aws-sdk-go v1.44.190/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI= +github.com/aws/aws-sdk-go v1.44.191 h1:GnbkalCx/AgobaorDMFCa248acmk+91+aHBQOk7ljzU= +github.com/aws/aws-sdk-go v1.44.191/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI= github.com/aws/aws-xray-sdk-go v1.8.0 h1:0xncHZ588wB/geLjbM/esoW3FOEThWy2TJyb4VXfLFY= github.com/aws/aws-xray-sdk-go v1.8.0/go.mod h1:7LKe47H+j3evfvS1+q0wzpoaGXGrF3mUsfM+thqVO+A= github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8= From 1594a86c223d45ae410e1dbeec39993d0048450d Mon Sep 17 00:00:00 2001 From: ashmrtn Date: Wed, 1 Feb 2023 09:03:23 -0800 Subject: [PATCH 2/6] OneDrive Items API for mocking (#2322) ## Description Create a pager for drive items that allows for better testing via mocking. Increased testing will come in later PRs ## Does this PR need a docs update or release note? - [ ] :white_check_mark: Yes, it's included - [ ] :clock1: Yes, but in a later PR - [x] :no_entry: No ## Type of change - [ ] :sunflower: Feature - [ ] :bug: Bugfix - [ ] :world_map: Documentation - [x] :robot: Test - [ ] :computer: CI/Deployment - [ ] :broom: Tech Debt/Cleanup ## Issue(s) * #2264 ## Test Plan - [ ] :muscle: Manual - [x] :zap: Unit test - [x] :green_heart: E2E --- src/internal/connector/onedrive/api/drive.go | 80 +++++++++++++---- .../connector/onedrive/collections.go | 14 ++- src/internal/connector/onedrive/drive.go | 88 +++++++++++-------- src/internal/connector/onedrive/item_test.go | 12 ++- 4 files changed, 137 insertions(+), 57 deletions(-) diff --git a/src/internal/connector/onedrive/api/drive.go b/src/internal/connector/onedrive/api/drive.go index 6dd7d46a1..fea6e53a7 100644 --- a/src/internal/connector/onedrive/api/drive.go +++ b/src/internal/connector/onedrive/api/drive.go @@ -3,6 +3,7 @@ package api import ( "context" + msdrives "github.com/microsoftgraph/msgraph-sdk-go/drives" "github.com/microsoftgraph/msgraph-sdk-go/models" mssites "github.com/microsoftgraph/msgraph-sdk-go/sites" msusers "github.com/microsoftgraph/msgraph-sdk-go/users" @@ -12,6 +13,65 @@ import ( "github.com/alcionai/corso/src/internal/connector/graph/api" ) +func getValues[T any](l api.PageLinker) ([]T, error) { + page, ok := l.(interface{ GetValue() []T }) + if !ok { + return nil, errors.Errorf( + "response of type [%T] does not comply with GetValue() interface", + l, + ) + } + + return page.GetValue(), nil +} + +// max we can do is 999 +const pageSize = int32(999) + +type driveItemPager struct { + gs graph.Servicer + builder *msdrives.ItemRootDeltaRequestBuilder + options *msdrives.ItemRootDeltaRequestBuilderGetRequestConfiguration +} + +func NewItemPager( + gs graph.Servicer, + driveID, link string, + fields []string, +) *driveItemPager { + pageCount := pageSize + requestConfig := &msdrives.ItemRootDeltaRequestBuilderGetRequestConfiguration{ + QueryParameters: &msdrives.ItemRootDeltaRequestBuilderGetQueryParameters{ + Top: &pageCount, + Select: fields, + }, + } + + res := &driveItemPager{ + gs: gs, + options: requestConfig, + builder: gs.Client().DrivesById(driveID).Root().Delta(), + } + + if len(link) > 0 { + res.builder = msdrives.NewItemRootDeltaRequestBuilder(link, gs.Adapter()) + } + + return res +} + +func (p *driveItemPager) GetPage(ctx context.Context) (api.DeltaPageLinker, error) { + return p.builder.Get(ctx, p.options) +} + +func (p *driveItemPager) SetNext(link string) { + p.builder = msdrives.NewItemRootDeltaRequestBuilder(link, p.gs.Adapter()) +} + +func (p *driveItemPager) ValuesIn(l api.DeltaPageLinker) ([]models.DriveItemable, error) { + return getValues[models.DriveItemable](l) +} + type userDrivePager struct { gs graph.Servicer builder *msusers.ItemDrivesRequestBuilder @@ -47,15 +107,7 @@ func (p *userDrivePager) SetNext(link string) { } func (p *userDrivePager) ValuesIn(l api.PageLinker) ([]models.Driveable, error) { - page, ok := l.(interface{ GetValue() []models.Driveable }) - if !ok { - return nil, errors.Errorf( - "response of type [%T] does not comply with GetValue() interface", - l, - ) - } - - return page.GetValue(), nil + return getValues[models.Driveable](l) } type siteDrivePager struct { @@ -93,13 +145,5 @@ func (p *siteDrivePager) SetNext(link string) { } func (p *siteDrivePager) ValuesIn(l api.PageLinker) ([]models.Driveable, error) { - page, ok := l.(interface{ GetValue() []models.Driveable }) - if !ok { - return nil, errors.Errorf( - "response of type [%T] does not comply with GetValue() interface", - l, - ) - } - - return page.GetValue(), nil + return getValues[models.Driveable](l) } diff --git a/src/internal/connector/onedrive/collections.go b/src/internal/connector/onedrive/collections.go index 35b81f7f2..1e36b1ea3 100644 --- a/src/internal/connector/onedrive/collections.go +++ b/src/internal/connector/onedrive/collections.go @@ -65,6 +65,13 @@ type Collections struct { // for a OneDrive folder CollectionMap map[string]data.Collection + // Not the most ideal, but allows us to change the pager function for testing + // as needed. This will allow us to mock out some scenarios during testing. + itemPagerFunc func( + servicer graph.Servicer, + driveID, link string, + ) itemPager + // Track stats from drive enumeration. Represents the items backed up. NumItems int NumFiles int @@ -88,6 +95,7 @@ func NewCollections( source: source, matcher: matcher, CollectionMap: map[string]data.Collection{}, + itemPagerFunc: defaultItemPager, service: service, statusUpdater: statusUpdater, ctrl: ctrlOpts, @@ -266,7 +274,11 @@ func (c *Collections) Get( delta, paths, excluded, err := collectItems( ctx, - c.service, + c.itemPagerFunc( + c.service, + driveID, + "", + ), driveID, driveName, c.UpdateCollections, diff --git a/src/internal/connector/onedrive/drive.go b/src/internal/connector/onedrive/drive.go index 6270ec08a..8e1578caf 100644 --- a/src/internal/connector/onedrive/drive.go +++ b/src/internal/connector/onedrive/drive.go @@ -7,7 +7,6 @@ import ( "time" msdrive "github.com/microsoftgraph/msgraph-sdk-go/drive" - msdrives "github.com/microsoftgraph/msgraph-sdk-go/drives" "github.com/microsoftgraph/msgraph-sdk-go/models" "github.com/microsoftgraph/msgraph-sdk-go/models/odataerrors" "github.com/pkg/errors" @@ -135,11 +134,42 @@ type itemCollector func( excluded map[string]struct{}, ) error +type itemPager interface { + GetPage(context.Context) (gapi.DeltaPageLinker, error) + SetNext(nextLink string) + ValuesIn(gapi.DeltaPageLinker) ([]models.DriveItemable, error) +} + +func defaultItemPager( + servicer graph.Servicer, + driveID, link string, +) itemPager { + return api.NewItemPager( + servicer, + driveID, + link, + []string{ + "content.downloadUrl", + "createdBy", + "createdDateTime", + "file", + "folder", + "id", + "lastModifiedDateTime", + "name", + "package", + "parentReference", + "root", + "size", + }, + ) +} + // collectItems will enumerate all items in the specified drive and hand them to the // provided `collector` method func collectItems( ctx context.Context, - service graph.Servicer, + pager itemPager, driveID, driveName string, collector itemCollector, ) (string, map[string]string, map[string]struct{}, error) { @@ -154,34 +184,8 @@ func collectItems( maps.Copy(newPaths, oldPaths) - // TODO: Specify a timestamp in the delta query - // https://docs.microsoft.com/en-us/graph/api/driveitem-delta? - // view=graph-rest-1.0&tabs=http#example-4-retrieving-delta-results-using-a-timestamp - builder := service.Client().DrivesById(driveID).Root().Delta() - pageCount := int32(999) // max we can do is 999 - requestFields := []string{ - "content.downloadUrl", - "createdBy", - "createdDateTime", - "file", - "folder", - "id", - "lastModifiedDateTime", - "name", - "package", - "parentReference", - "root", - "size", - } - requestConfig := &msdrives.ItemRootDeltaRequestBuilderGetRequestConfiguration{ - QueryParameters: &msdrives.ItemRootDeltaRequestBuilderGetQueryParameters{ - Top: &pageCount, - Select: requestFields, - }, - } - for { - r, err := builder.Get(ctx, requestConfig) + page, err := pager.GetPage(ctx) if err != nil { return "", nil, nil, errors.Wrapf( err, @@ -190,23 +194,29 @@ func collectItems( ) } - err = collector(ctx, driveID, driveName, r.GetValue(), oldPaths, newPaths, excluded) + vals, err := pager.ValuesIn(page) + if err != nil { + return "", nil, nil, errors.Wrap(err, "extracting items from response") + } + + err = collector(ctx, driveID, driveName, vals, oldPaths, newPaths, excluded) if err != nil { return "", nil, nil, err } - if r.GetOdataDeltaLink() != nil && len(*r.GetOdataDeltaLink()) > 0 { - newDeltaURL = *r.GetOdataDeltaLink() + nextLink, deltaLink := gapi.NextAndDeltaLink(page) + + if len(deltaLink) > 0 { + newDeltaURL = deltaLink } // Check if there are more items - nextLink := r.GetOdataNextLink() - if nextLink == nil { + if len(nextLink) == 0 { break } - logger.Ctx(ctx).Debugf("Found %s nextLink", *nextLink) - builder = msdrives.NewItemRootDeltaRequestBuilder(*nextLink, service.Adapter()) + logger.Ctx(ctx).Debugw("Found nextLink", "link", nextLink) + pager.SetNext(nextLink) } return newDeltaURL, newPaths, excluded, nil @@ -318,7 +328,11 @@ func GetAllFolders( for _, d := range drives { _, _, _, err = collectItems( ctx, - gs, + defaultItemPager( + gs, + *d.GetId(), + "", + ), *d.GetId(), *d.GetName(), func( diff --git a/src/internal/connector/onedrive/item_test.go b/src/internal/connector/onedrive/item_test.go index 938748ca2..6a8894ebf 100644 --- a/src/internal/connector/onedrive/item_test.go +++ b/src/internal/connector/onedrive/item_test.go @@ -115,7 +115,17 @@ func (suite *ItemIntegrationSuite) TestItemReader_oneDrive() { return nil } - _, _, _, err := collectItems(ctx, suite, suite.userDriveID, "General", itemCollector) + _, _, _, err := collectItems( + ctx, + defaultItemPager( + suite, + suite.userDriveID, + "", + ), + suite.userDriveID, + "General", + itemCollector, + ) require.NoError(suite.T(), err) // Test Requirement 2: Need a file From 24c918e99c09ae481628a5e98466e5563bc530bb Mon Sep 17 00:00:00 2001 From: ashmrtn Date: Wed, 1 Feb 2023 10:57:38 -0800 Subject: [PATCH 3/6] Patch and test some edge cases for deserializing OneDrive metadata (#2341) ## Description Currently no folder path entry is stored for the root folder. This leads to not having a folders map but having a valid delta token if all items are in the root of the drive. Update the code to add at least an empty folders map entry so we don't think we have missing metadata. Add tests to check for these edge cases. Long-term we may want to add a folder entry for the root folder. Whether we do or not may depend on how we decide to set the state field for that collection. ## Does this PR need a docs update or release note? - [ ] :white_check_mark: Yes, it's included - [ ] :clock1: Yes, but in a later PR - [x] :no_entry: No ## Type of change - [ ] :sunflower: Feature - [x] :bug: Bugfix - [ ] :world_map: Documentation - [ ] :robot: Test - [ ] :computer: CI/Deployment - [ ] :broom: Tech Debt/Cleanup ## Issue(s) * #2122 ## Test Plan - [ ] :muscle: Manual - [x] :zap: Unit test - [ ] :green_heart: E2E --- .../connector/onedrive/collections.go | 28 +++++++--- .../connector/onedrive/collections_test.go | 54 +++++++++++++++++++ 2 files changed, 74 insertions(+), 8 deletions(-) diff --git a/src/internal/connector/onedrive/collections.go b/src/internal/connector/onedrive/collections.go index 1e36b1ea3..e3d60fe2b 100644 --- a/src/internal/connector/onedrive/collections.go +++ b/src/internal/connector/onedrive/collections.go @@ -180,7 +180,15 @@ func deserializeMetadata( // Go through and remove partial results (i.e. path mapping but no delta URL // or vice-versa). - for k := range prevDeltas { + for k, v := range prevDeltas { + // Remove entries with an empty delta token as it's not useful. + if len(v) == 0 { + delete(prevDeltas, k) + delete(prevFolders, k) + } + + // Remove entries without a folders map as we can't tell kopia the + // hierarchy changes. if _, ok := prevFolders[k]; !ok { delete(prevDeltas, k) } @@ -287,17 +295,21 @@ func (c *Collections) Get( return nil, nil, err } + // It's alright to have an empty folders map (i.e. no folders found) but not + // an empty delta token. This is because when deserializing the metadata we + // remove entries for which there is no corresponding delta token/folder. If + // we leave empty delta tokens then we may end up setting the State field + // for collections when not actually getting delta results. if len(delta) > 0 { deltaURLs[driveID] = delta } - if len(paths) > 0 { - folderPaths[driveID] = map[string]string{} - - for id, p := range paths { - folderPaths[driveID][id] = p - } - } + // Avoid the edge case where there's no paths but we do have a valid delta + // token. We can accomplish this by adding an empty paths map for this + // drive. If we don't have this then the next backup won't use the delta + // token because it thinks the folder paths weren't persisted. + folderPaths[driveID] = map[string]string{} + maps.Copy(folderPaths[driveID], paths) maps.Copy(excludedItems, excluded) } diff --git a/src/internal/connector/onedrive/collections_test.go b/src/internal/connector/onedrive/collections_test.go index c250afe2a..39e094cd6 100644 --- a/src/internal/connector/onedrive/collections_test.go +++ b/src/internal/connector/onedrive/collections_test.go @@ -711,6 +711,60 @@ func (suite *OneDriveCollectionsSuite) TestDeserializeMetadata() { expectedPaths: map[string]map[string]string{}, errCheck: assert.NoError, }, + { + // An empty path map but valid delta results in metadata being returned + // since it's possible to have a drive with no folders other than the + // root. + name: "EmptyPaths", + cols: []func() []graph.MetadataCollectionEntry{ + func() []graph.MetadataCollectionEntry { + return []graph.MetadataCollectionEntry{ + graph.NewMetadataEntry( + graph.DeltaURLsFileName, + map[string]string{driveID1: deltaURL1}, + ), + graph.NewMetadataEntry( + graph.PreviousPathFileName, + map[string]map[string]string{ + driveID1: {}, + }, + ), + } + }, + }, + expectedDeltas: map[string]string{driveID1: deltaURL1}, + expectedPaths: map[string]map[string]string{driveID1: {}}, + errCheck: assert.NoError, + }, + { + // An empty delta map but valid path results in no metadata for that drive + // being returned since the path map is only useful if we have a valid + // delta. + name: "EmptyDeltas", + cols: []func() []graph.MetadataCollectionEntry{ + func() []graph.MetadataCollectionEntry { + return []graph.MetadataCollectionEntry{ + graph.NewMetadataEntry( + graph.DeltaURLsFileName, + map[string]string{ + driveID1: "", + }, + ), + graph.NewMetadataEntry( + graph.PreviousPathFileName, + map[string]map[string]string{ + driveID1: { + folderID1: path1, + }, + }, + ), + } + }, + }, + expectedDeltas: map[string]string{}, + expectedPaths: map[string]map[string]string{}, + errCheck: assert.NoError, + }, { name: "SuccessTwoDrivesTwoCollections", cols: []func() []graph.MetadataCollectionEntry{ From 93e8b67d156993281fe061966520c3a4a140ef58 Mon Sep 17 00:00:00 2001 From: Danny Date: Wed, 1 Feb 2023 16:07:14 -0500 Subject: [PATCH 4/6] GC: Backup: ItemAttachment Complete (#2358) ## Description Bug fix: Ensures that `item.Attachment` content is completely backed up. - Exchange Changes affect the behavior of Mail and Events. Downloads can be potentially bigger if the user have these types of attachments. - `item.Attachments` are specific to Outlook objects such as `Events` or `Messages` are included as an attachment rather than inline. ## Does this PR need a docs update or release note? - [x] :white_check_mark: Yes, it's included ## Type of change - [x] :bug: Bugfix ## Issue(s) * related to #2353 ## Test Plan - [x] :muscle: Manual --- src/internal/connector/exchange/api/events.go | 11 +++++++++-- src/internal/connector/exchange/api/mail.go | 7 ++++++- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/internal/connector/exchange/api/events.go b/src/internal/connector/exchange/api/events.go index 962b2e576..810c1a2ff 100644 --- a/src/internal/connector/exchange/api/events.go +++ b/src/internal/connector/exchange/api/events.go @@ -108,7 +108,14 @@ func (c Events) GetItem( return nil, nil, err } - var errs *multierror.Error + var ( + errs *multierror.Error + options = &users.ItemEventsItemAttachmentsRequestBuilderGetRequestConfiguration{ + QueryParameters: &users.ItemEventsItemAttachmentsRequestBuilderGetQueryParameters{ + Expand: []string{"microsoft.graph.itemattachment/item"}, + }, + } + ) if *event.GetHasAttachments() || HasAttachments(event.GetBody()) { for count := 0; count < numberOfRetries; count++ { @@ -117,7 +124,7 @@ func (c Events) GetItem( UsersById(user). EventsById(itemID). Attachments(). - Get(ctx, nil) + Get(ctx, options) if err == nil { event.SetAttachments(attached.GetValue()) break diff --git a/src/internal/connector/exchange/api/mail.go b/src/internal/connector/exchange/api/mail.go index 29c03b82f..6a863322e 100644 --- a/src/internal/connector/exchange/api/mail.go +++ b/src/internal/connector/exchange/api/mail.go @@ -130,13 +130,18 @@ func (c Mail) GetItem( var errs *multierror.Error if *mail.GetHasAttachments() || HasAttachments(mail.GetBody()) { + options := &users.ItemMessagesItemAttachmentsRequestBuilderGetRequestConfiguration{ + QueryParameters: &users.ItemMessagesItemAttachmentsRequestBuilderGetQueryParameters{ + Expand: []string{"microsoft.graph.itemattachment/item"}, + }, + } for count := 0; count < numberOfRetries; count++ { attached, err := c.largeItem. Client(). UsersById(user). MessagesById(itemID). Attachments(). - Get(ctx, nil) + Get(ctx, options) if err == nil { mail.SetAttachments(attached.GetValue()) break From cf1c80271f7a3416db7fb83febbc0c29bcc6662f Mon Sep 17 00:00:00 2001 From: ashmrtn Date: Wed, 1 Feb 2023 13:34:16 -0800 Subject: [PATCH 5/6] OneDrive tests for Get() (#2342) ## Description Create a test case that allows checking most of the Get call (getting item data is not mocked right now). This allows better checking on things like: * having multiple drives to backup * having items split across multiple delta query pages * errors returned by the API * aggregation of delta URLs and folder paths in metadata Eventually these tests could help with checking: * consumption of metadata for incremental backups * consumption of delta tokens for incremental backups Long-term, we may want to merge these with the UpdateCollections tests as there is overlap between the two. We may also want to consider a more stable API for checking the returned items. Right now it partly relies on being able to cast to a onedrive.Collection struct instead of using `Items()` ## Does this PR need a docs update or release note? - [ ] :white_check_mark: Yes, it's included - [ ] :clock1: Yes, but in a later PR - [x] :no_entry: No ## Type of change - [ ] :sunflower: Feature - [ ] :bug: Bugfix - [ ] :world_map: Documentation - [x] :robot: Test - [ ] :computer: CI/Deployment - [ ] :broom: Tech Debt/Cleanup ## Issue(s) * #2264 ## Test Plan - [ ] :muscle: Manual - [x] :zap: Unit test - [ ] :green_heart: E2E --- .../connector/onedrive/collections.go | 29 +- .../connector/onedrive/collections_test.go | 423 +++++++++++++++++- 2 files changed, 439 insertions(+), 13 deletions(-) diff --git a/src/internal/connector/onedrive/collections.go b/src/internal/connector/onedrive/collections.go index e3d60fe2b..200e51e23 100644 --- a/src/internal/connector/onedrive/collections.go +++ b/src/internal/connector/onedrive/collections.go @@ -67,6 +67,12 @@ type Collections struct { // Not the most ideal, but allows us to change the pager function for testing // as needed. This will allow us to mock out some scenarios during testing. + drivePagerFunc func( + source driveSource, + servicer graph.Servicer, + resourceOwner string, + fields []string, + ) (drivePager, error) itemPagerFunc func( servicer graph.Servicer, driveID, link string, @@ -89,16 +95,17 @@ func NewCollections( ctrlOpts control.Options, ) *Collections { return &Collections{ - itemClient: itemClient, - tenant: tenant, - resourceOwner: resourceOwner, - source: source, - matcher: matcher, - CollectionMap: map[string]data.Collection{}, - itemPagerFunc: defaultItemPager, - service: service, - statusUpdater: statusUpdater, - ctrl: ctrlOpts, + itemClient: itemClient, + tenant: tenant, + resourceOwner: resourceOwner, + source: source, + matcher: matcher, + CollectionMap: map[string]data.Collection{}, + drivePagerFunc: PagerForSource, + itemPagerFunc: defaultItemPager, + service: service, + statusUpdater: statusUpdater, + ctrl: ctrlOpts, } } @@ -250,7 +257,7 @@ func (c *Collections) Get( } // Enumerate drives for the specified resourceOwner - pager, err := PagerForSource(c.source, c.service, c.resourceOwner, nil) + pager, err := c.drivePagerFunc(c.source, c.service, c.resourceOwner, nil) if err != nil { return nil, nil, err } diff --git a/src/internal/connector/onedrive/collections_test.go b/src/internal/connector/onedrive/collections_test.go index 39e094cd6..21dae9549 100644 --- a/src/internal/connector/onedrive/collections_test.go +++ b/src/internal/connector/onedrive/collections_test.go @@ -1,9 +1,11 @@ package onedrive import ( + "context" "strings" "testing" + "github.com/google/uuid" "github.com/microsoftgraph/msgraph-sdk-go/models" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -11,6 +13,7 @@ import ( "golang.org/x/exp/maps" "github.com/alcionai/corso/src/internal/connector/graph" + gapi "github.com/alcionai/corso/src/internal/connector/graph/api" "github.com/alcionai/corso/src/internal/connector/support" "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/tester" @@ -969,7 +972,419 @@ func (suite *OneDriveCollectionsSuite) TestDeserializeMetadata() { } } -func driveItem(id string, name string, parentPath string, isFile, isFolder, isPackage bool) models.DriveItemable { +type mockDeltaPageLinker struct { + link *string + delta *string +} + +func (pl *mockDeltaPageLinker) GetOdataNextLink() *string { + return pl.link +} + +func (pl *mockDeltaPageLinker) GetOdataDeltaLink() *string { + return pl.delta +} + +type deltaPagerResult struct { + items []models.DriveItemable + nextLink *string + deltaLink *string + err error +} + +type mockItemPager struct { + // DriveID -> set of return values for queries for that drive. + toReturn []deltaPagerResult + getIdx int +} + +func (p *mockItemPager) GetPage(context.Context) (gapi.DeltaPageLinker, error) { + if len(p.toReturn) <= p.getIdx { + return nil, assert.AnError + } + + idx := p.getIdx + p.getIdx++ + + return &mockDeltaPageLinker{ + p.toReturn[idx].nextLink, + p.toReturn[idx].deltaLink, + }, p.toReturn[idx].err +} + +func (p *mockItemPager) SetNext(string) {} + +func (p *mockItemPager) ValuesIn(gapi.DeltaPageLinker) ([]models.DriveItemable, error) { + idx := p.getIdx + if idx > 0 { + // Return values lag by one since we increment in GetPage(). + idx-- + } + + if len(p.toReturn) <= idx { + return nil, assert.AnError + } + + return p.toReturn[idx].items, nil +} + +func (suite *OneDriveCollectionsSuite) TestGet() { + anyFolder := (&selectors.OneDriveBackup{}).Folders(selectors.Any())[0] + + tenant := "a-tenant" + user := "a-user" + + metadataPath, err := path.Builder{}.ToServiceCategoryMetadataPath( + tenant, + user, + path.OneDriveService, + path.FilesCategory, + false, + ) + require.NoError(suite.T(), err, "making metadata path") + + folderPath := expectedPathAsSlice( + suite.T(), + tenant, + user, + testBaseDrivePath+"/folder", + )[0] + + empty := "" + next := "next" + delta := "delta1" + delta2 := "delta2" + + driveID1 := uuid.NewString() + drive1 := models.NewDrive() + drive1.SetId(&driveID1) + drive1.SetName(&driveID1) + + driveID2 := uuid.NewString() + drive2 := models.NewDrive() + drive2.SetId(&driveID2) + drive2.SetName(&driveID2) + + driveBasePath2 := "drive/driveID2/root:" + + folderPath2 := expectedPathAsSlice( + suite.T(), + tenant, + user, + driveBasePath2+"/folder", + )[0] + + table := []struct { + name string + drives []models.Driveable + items map[string][]deltaPagerResult + errCheck assert.ErrorAssertionFunc + // Collection name -> set of item IDs. We can't check item data because + // that's not mocked out. Metadata is checked separately. + expectedCollections map[string][]string + expectedDeltaURLs map[string]string + expectedFolderPaths map[string]map[string]string + expectedDelList map[string]struct{} + }{ + { + name: "OneDrive_OneItemPage_DelFileOnly_NoFolders_NoErrors", + drives: []models.Driveable{drive1}, + items: map[string][]deltaPagerResult{ + driveID1: { + { + items: []models.DriveItemable{ + delItem("file", testBaseDrivePath, true, false, false), + }, + deltaLink: &delta, + }, + }, + }, + errCheck: assert.NoError, + expectedCollections: map[string][]string{}, + expectedDeltaURLs: map[string]string{ + driveID1: delta, + }, + expectedFolderPaths: map[string]map[string]string{ + // We need an empty map here so deserializing metadata knows the delta + // token for this drive is valid. + driveID1: {}, + }, + expectedDelList: map[string]struct{}{ + "file": {}, + }, + }, + { + name: "OneDrive_OneItemPage_NoFolders_NoErrors", + drives: []models.Driveable{drive1}, + items: map[string][]deltaPagerResult{ + driveID1: { + { + items: []models.DriveItemable{ + driveItem("file", "file", testBaseDrivePath, true, false, false), + }, + deltaLink: &delta, + }, + }, + }, + errCheck: assert.NoError, + expectedCollections: map[string][]string{ + expectedPathAsSlice( + suite.T(), + tenant, + user, + testBaseDrivePath, + )[0]: {"file"}, + }, + expectedDeltaURLs: map[string]string{ + driveID1: delta, + }, + expectedFolderPaths: map[string]map[string]string{ + // We need an empty map here so deserializing metadata knows the delta + // token for this drive is valid. + driveID1: {}, + }, + expectedDelList: map[string]struct{}{}, + }, + { + name: "OneDrive_OneItemPage_NoErrors", + drives: []models.Driveable{drive1}, + items: map[string][]deltaPagerResult{ + driveID1: { + { + items: []models.DriveItemable{ + driveItem("folder", "folder", testBaseDrivePath, false, true, false), + driveItem("file", "file", testBaseDrivePath+"/folder", true, false, false), + }, + deltaLink: &delta, + }, + }, + }, + errCheck: assert.NoError, + expectedCollections: map[string][]string{ + folderPath: {"file"}, + }, + expectedDeltaURLs: map[string]string{ + driveID1: delta, + }, + expectedFolderPaths: map[string]map[string]string{ + driveID1: { + "folder": folderPath, + }, + }, + expectedDelList: map[string]struct{}{}, + }, + { + name: "OneDrive_OneItemPage_EmptyDelta_NoErrors", + drives: []models.Driveable{drive1}, + items: map[string][]deltaPagerResult{ + driveID1: { + { + items: []models.DriveItemable{ + driveItem("folder", "folder", testBaseDrivePath, false, true, false), + driveItem("file", "file", testBaseDrivePath+"/folder", true, false, false), + }, + deltaLink: &empty, + }, + }, + }, + errCheck: assert.NoError, + expectedCollections: map[string][]string{ + folderPath: {"file"}, + }, + expectedDeltaURLs: map[string]string{}, + expectedFolderPaths: map[string]map[string]string{}, + expectedDelList: map[string]struct{}{}, + }, + { + name: "OneDrive_TwoItemPages_NoErrors", + drives: []models.Driveable{drive1}, + items: map[string][]deltaPagerResult{ + driveID1: { + { + items: []models.DriveItemable{ + driveItem("folder", "folder", testBaseDrivePath, false, true, false), + driveItem("file", "file", testBaseDrivePath+"/folder", true, false, false), + }, + nextLink: &next, + }, + { + items: []models.DriveItemable{ + driveItem("folder", "folder", testBaseDrivePath, false, true, false), + driveItem("file2", "file2", testBaseDrivePath+"/folder", true, false, false), + }, + deltaLink: &delta, + }, + }, + }, + errCheck: assert.NoError, + expectedCollections: map[string][]string{ + folderPath: {"file", "file2"}, + }, + expectedDeltaURLs: map[string]string{ + driveID1: delta, + }, + expectedFolderPaths: map[string]map[string]string{ + driveID1: { + "folder": folderPath, + }, + }, + expectedDelList: map[string]struct{}{}, + }, + { + name: "TwoDrives_OneItemPageEach_NoErrors", + drives: []models.Driveable{ + drive1, + drive2, + }, + items: map[string][]deltaPagerResult{ + driveID1: { + { + items: []models.DriveItemable{ + driveItem("folder", "folder", testBaseDrivePath, false, true, false), + driveItem("file", "file", testBaseDrivePath+"/folder", true, false, false), + }, + deltaLink: &delta, + }, + }, + driveID2: { + { + items: []models.DriveItemable{ + driveItem("folder", "folder", driveBasePath2, false, true, false), + driveItem("file", "file", driveBasePath2+"/folder", true, false, false), + }, + deltaLink: &delta2, + }, + }, + }, + errCheck: assert.NoError, + expectedCollections: map[string][]string{ + folderPath: {"file"}, + folderPath2: {"file"}, + }, + expectedDeltaURLs: map[string]string{ + driveID1: delta, + driveID2: delta2, + }, + expectedFolderPaths: map[string]map[string]string{ + driveID1: { + "folder": folderPath, + }, + driveID2: { + "folder": folderPath2, + }, + }, + expectedDelList: map[string]struct{}{}, + }, + { + name: "OneDrive_OneItemPage_Errors", + drives: []models.Driveable{drive1}, + items: map[string][]deltaPagerResult{ + driveID1: { + { + err: assert.AnError, + }, + }, + }, + errCheck: assert.Error, + expectedCollections: nil, + expectedDeltaURLs: nil, + expectedFolderPaths: nil, + expectedDelList: nil, + }, + } + for _, test := range table { + suite.T().Run(test.name, func(t *testing.T) { + ctx, flush := tester.NewContext() + defer flush() + + drivePagerFunc := func( + source driveSource, + servicer graph.Servicer, + resourceOwner string, + fields []string, + ) (drivePager, error) { + return &mockDrivePager{ + toReturn: []pagerResult{ + { + drives: test.drives, + }, + }, + }, nil + } + + itemPagerFunc := func( + servicer graph.Servicer, + driveID, link string, + ) itemPager { + return &mockItemPager{ + toReturn: test.items[driveID], + } + } + + c := NewCollections( + graph.HTTPClient(graph.NoTimeout()), + tenant, + user, + OneDriveSource, + testFolderMatcher{anyFolder}, + &MockGraphService{}, + func(*support.ConnectorOperationStatus) {}, + control.Options{}, + ) + c.drivePagerFunc = drivePagerFunc + c.itemPagerFunc = itemPagerFunc + + // TODO(ashmrtn): Allow passing previous metadata. + cols, _, err := c.Get(ctx, nil) + test.errCheck(t, err) + + if err != nil { + return + } + + for _, baseCol := range cols { + folderPath := baseCol.FullPath().String() + if folderPath == metadataPath.String() { + deltas, paths, err := deserializeMetadata(ctx, []data.Collection{baseCol}) + if !assert.NoError(t, err, "deserializing metadata") { + continue + } + + assert.Equal(t, test.expectedDeltaURLs, deltas) + assert.Equal(t, test.expectedFolderPaths, paths) + + continue + } + + // TODO(ashmrtn): We should really be getting items in the collection + // via the Items() channel, but we don't have a way to mock out the + // actual item fetch yet (mostly wiring issues). The lack of that makes + // this check a bit more bittle since internal details can change. + col, ok := baseCol.(*Collection) + require.True(t, ok, "getting onedrive.Collection handle") + + itemIDs := make([]string, 0, len(col.driveItems)) + + for id := range col.driveItems { + itemIDs = append(itemIDs, id) + } + + assert.ElementsMatch(t, test.expectedCollections[folderPath], itemIDs) + } + + // TODO(ashmrtn): Uncomment this when we begin return the set of items to + // remove from the upcoming backup. + // assert.Equal(t, test.expectedDelList, delList) + }) + } +} + +func driveItem( + id string, + name string, + parentPath string, + isFile, isFolder, isPackage bool, +) models.DriveItemable { item := models.NewDriveItem() item.SetName(&name) item.SetId(&id) @@ -992,7 +1407,11 @@ func driveItem(id string, name string, parentPath string, isFile, isFolder, isPa // delItem creates a DriveItemable that is marked as deleted. path must be set // to the base drive path. -func delItem(id string, parentPath string, isFile, isFolder, isPackage bool) models.DriveItemable { +func delItem( + id string, + parentPath string, + isFile, isFolder, isPackage bool, +) models.DriveItemable { item := models.NewDriveItem() item.SetId(&id) item.SetDeleted(models.NewDeleted()) From 46d584ec7502b6727e419d44cac22ba444cefb80 Mon Sep 17 00:00:00 2001 From: Danny Date: Wed, 1 Feb 2023 18:29:33 -0500 Subject: [PATCH 6/6] GC: Restore: Disable Item.Attachment Restore (#2359) ## Description `Item.Attachments` have unique properties that require transformation to be uploaded into the M365 system. All objects of this type are temporarily prevented from uploading via this patch. As Issue #2353 is addressed, there will be more coverage for the various types of item attachments. ## Does this PR need a docs update or release note? - [x] :white_check_mark: Yes, it's included ## Type of change - [x] :sunflower: Feature ## Issue(s) *related to #2353 ## Test Plan - [x] :muscle: Manual --- CHANGELOG.md | 2 +- src/internal/connector/exchange/attachment.go | 11 +++++++++ .../connector/exchange/restore_test.go | 14 ++++++++++- .../mockconnector/mock_data_message.go | 23 +++++++++++++++++++ 4 files changed, 48 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3333c7ad6..50a5f3270 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,7 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Check if the user specified for an exchange backup operation has a mailbox. ### Changed - +- Item.Attachments are disabled from being restored for the patching of ([#2353](https://github.com/alcionai/corso/issues/2353)) - BetaClient introduced. Enables Corso to be able to interact with SharePoint Page objects. Package located `/internal/connector/graph/betasdk` - Handle case where user's drive has not been initialized - Inline attachments (e.g. copy/paste ) are discovered and backed up correctly ([#2163](https://github.com/alcionai/corso/issues/2163)) diff --git a/src/internal/connector/exchange/attachment.go b/src/internal/connector/exchange/attachment.go index 5cbce271c..5b6334e21 100644 --- a/src/internal/connector/exchange/attachment.go +++ b/src/internal/connector/exchange/attachment.go @@ -53,6 +53,17 @@ func uploadAttachment( return nil } + // item Attachments to be skipped until the completion of Issue #2353 + if attachmentType == models.ITEM_ATTACHMENTTYPE { + logger.Ctx(ctx).Infow("item attachment uploads are not supported ", + "attachment_name", *attachment.GetName(), // TODO: Update to support PII protection + "attachment_type", attachmentType, + "attachment_id", *attachment.GetId(), + ) + + return nil + } + // For Item/Reference attachments *or* file attachments < 3MB, use the attachments endpoint if attachmentType != models.FILE_ATTACHMENTTYPE || *attachment.GetSize() < largeAttachmentSize { err := uploader.uploadSmallAttachment(ctx, attachment) diff --git a/src/internal/connector/exchange/restore_test.go b/src/internal/connector/exchange/restore_test.go index 9c32fd530..187d0c127 100644 --- a/src/internal/connector/exchange/restore_test.go +++ b/src/internal/connector/exchange/restore_test.go @@ -175,6 +175,18 @@ func (suite *ExchangeRestoreSuite) TestRestoreExchangeObject() { return *folder.GetId() }, }, + { + name: "Test Mail: Item Attachment", + bytes: mockconnector.GetMockMessageWithItemAttachmentEvent("Event Item Attachment"), + category: path.EmailCategory, + destination: func(t *testing.T, ctx context.Context) string { + folderName := "TestRestoreMailItemAttachment: " + common.FormatSimpleDateTime(now) + folder, err := suite.ac.Mail().CreateMailFolder(ctx, userID, folderName) + require.NoError(t, err) + + return *folder.GetId() + }, + }, { name: "Test Mail: One Large Attachment", bytes: mockconnector.GetMockMessageWithLargeAttachment("Restore Large Attachment"), @@ -266,7 +278,7 @@ func (suite *ExchangeRestoreSuite) TestRestoreExchangeObject() { userID, ) assert.NoError(t, err, support.ConnectorStackErrorTrace(err)) - assert.NotNil(t, info, "item info is populated") + assert.NotNil(t, info, "item info was not populated") assert.NoError(t, deleters[test.category].DeleteContainer(ctx, userID, destination)) }) } diff --git a/src/internal/connector/mockconnector/mock_data_message.go b/src/internal/connector/mockconnector/mock_data_message.go index 597447492..da06cbaab 100644 --- a/src/internal/connector/mockconnector/mock_data_message.go +++ b/src/internal/connector/mockconnector/mock_data_message.go @@ -336,3 +336,26 @@ func GetMockEventMessageRequest(subject string) []byte { return []byte(message) } + +func GetMockMessageWithItemAttachmentEvent(subject string) []byte { + //nolint:lll + message := "{\"id\":\"AAMkAGQ1NzViZTdhLTEwMTMtNGJjNi05YWI2LTg4NWRlZDA2Y2UxOABGAAAAAAAPvVwUramXT7jlSGpVU8_7BwB8wYc0thTTTYl3RpEYIUq_AAAAAAEMAAB8wYc0thTTTYl3RpEYIUq_AADFfThMAAA=\",\"@odata.type\":\"#microsoft.graph.message\"," + + "\"@odata.etag\":\"W/\\\"CQAAABYAAAB8wYc0thTTTYl3RpEYIUq+AADFK3BH\\\"\",\"@odata.context\":\"https://graph.microsoft.com/v1.0/$metadata#users('dustina%408qzvrj.onmicrosoft.com')/messages/$entity\",\"categories\":[]," + + "\"changeKey\":\"CQAAABYAAAB8wYc0thTTTYl3RpEYIUq+AADFK3BH\",\"createdDateTime\":\"2023-02-01T13:48:43Z\",\"lastModifiedDateTime\":\"2023-02-01T18:27:03Z\"," + + "\"attachments\":[{\"id\":\"AAMkAGQ1NzViZTdhLTEwMTMtNGJjNi05YWI2LTg4NWRlZDA2Y2UxOABGAAAAAAAPvVwUramXT7jlSGpVU8_7BwB8wYc0thTTTYl3RpEYIUq_AAAAAAEMAAB8wYc0thTTTYl3RpEYIUq_AADFfThMAAABEgAQAKHxTL6mNCZPo71dbwrfKYM=\"," + + "\"@odata.type\":\"#microsoft.graph.itemAttachment\",\"isInline\":false,\"lastModifiedDateTime\":\"2023-02-01T13:52:56Z\",\"name\":\"Holidayevent\",\"size\":2059,\"item\":{\"id\":\"\",\"@odata.type\":\"#microsoft.graph.event\"," + + "\"createdDateTime\":\"2023-02-01T13:52:56Z\",\"lastModifiedDateTime\":\"2023-02-01T13:52:56Z\",\"body\":{\"content\":\"\\r\\nLet'slookforfunding!\"," + + "\"contentType\":\"html\"},\"end\":{\"dateTime\":\"2016-12-02T19:00:00.0000000Z\",\"timeZone\":\"UTC\"}," + + "\"hasAttachments\":false,\"isAllDay\":false,\"isCancelled\":false,\"isDraft\":true,\"isOnlineMeeting\":false,\"isOrganizer\":true,\"isReminderOn\":false,\"organizer\":{\"emailAddress\":{\"address\":\"" + defaultMessageFrom + "\",\"name\":\"" + defaultAlias + "\"}}," + + "\"originalEndTimeZone\":\"tzone://Microsoft/Utc\",\"originalStartTimeZone\":\"tzone://Microsoft/Utc\",\"reminderMinutesBeforeStart\":0,\"responseRequested\":true,\"start\":{\"dateTime\":\"2016-12-02T18:00:00.0000000Z\",\"timeZone\":\"UTC\"}," + + "\"subject\":\"Discussgiftsforchildren\",\"type\":\"singleInstance\"}}],\"bccRecipients\":[],\"body\":{\"content\":\"\\r\\n\\r\\n\\r\\nLookingtodothis \",\"contentType\":\"html\"}," + + "\"bodyPreview\":\"Lookingtodothis\",\"ccRecipients\":[],\"conversationId\":\"AAQkAGQ1NzViZTdhLTEwMTMtNGJjNi05YWI2LTg4NWRlZDA2Y2UxOAAQADGvj5ACBMdGpESX4xSOxCo=\",\"conversationIndex\":\"AQHZNkPmMa+PkAIEx0akRJfjFI7EKg==\",\"flag\":{\"flagStatus\":\"notFlagged\"}," + + "\"from\":{\"emailAddress\":{\"address\":\"" + defaultMessageFrom + "\",\"name\":\"" + defaultAlias + "\"}},\"hasAttachments\":true,\"importance\":\"normal\",\"inferenceClassification\":\"focused\"," + + "\"internetMessageId\":\"\",\"isDeliveryReceiptRequested\":false,\"isDraft\":false,\"isRead\":true,\"isReadReceiptRequested\":false," + + "\"parentFolderId\":\"AQMkAGQ1NzViZTdhLTEwMTMtNGJjNi05YWI2LTg4ADVkZWQwNmNlMTgALgAAAw_9XBStqZdPuOVIalVTz7sBAHzBhzS2FNNNiXdGkRghSr4AAAIBDAAAAA==\",\"receivedDateTime\":\"2023-02-01T13:48:47Z\",\"replyTo\":[]," + + "\"sender\":{\"emailAddress\":{\"address\":\"" + defaultMessageSender + "\",\"name\":\"" + defaultAlias + "\"}},\"sentDateTime\":\"2023-02-01T13:48:46Z\"," + + "\"subject\":\"" + subject + "\",\"toRecipients\":[{\"emailAddress\":{\"address\":\"" + defaultMessageTo + "\",\"name\":\"" + defaultAlias + "\"}}]," + + "\"webLink\":\"https://outlook.office365.com/owa/?ItemID=AAMkAGQ1NzViZTdhLTEwMTMtNGJjNi05YWI2LTg4NWRlZDA2Y2UxOABGAAAAAAAPvVwUramXT7jlSGpVU8%2B7BwB8wYc0thTTTYl3RpEYIUq%2BAAAAAAEMAAB8wYc0thTTTYl3RpEYIUq%2BAADFfThMAAA%3D&exvsurl=1&viewmodel=ReadMessageItem\"}" + + return []byte(message) +}