From e828209a303d67bbf7555f9d6a55bae74503af06 Mon Sep 17 00:00:00 2001 From: Keepers Date: Wed, 25 Jan 2023 03:44:00 -0700 Subject: [PATCH 01/18] return gc stats errors in backup (#2248) ## Description If gc.stats reports a non-zero error count at the end of a backup, retrieve the error from the status and return it as the backup operation err. ## Does this PR need a docs update or release note? - [x] :no_entry: No ## Type of change - [x] :bug: Bugfix ## Test Plan - [x] :zap: Unit test - [x] :green_heart: E2E --- src/internal/connector/support/status.go | 4 ++++ src/internal/operations/backup.go | 8 +++----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/internal/connector/support/status.go b/src/internal/connector/support/status.go index fb08ea1c4..3f2435263 100644 --- a/src/internal/connector/support/status.go +++ b/src/internal/connector/support/status.go @@ -4,6 +4,7 @@ import ( "context" "fmt" + multierror "github.com/hashicorp/go-multierror" bytesize "github.com/inhies/go-bytesize" "github.com/alcionai/corso/src/pkg/logger" @@ -21,6 +22,7 @@ type ConnectorOperationStatus struct { FolderCount int Successful int ErrorCount int + Err error incomplete bool incompleteReason string additionalDetails string @@ -70,6 +72,7 @@ func CreateStatus( FolderCount: folders, Successful: cm.Successes, ErrorCount: numErr, + Err: err, incomplete: hasErrors, incompleteReason: reason, bytes: cm.TotalBytes, @@ -115,6 +118,7 @@ func MergeStatus(one, two ConnectorOperationStatus) ConnectorOperationStatus { FolderCount: one.FolderCount + two.FolderCount, Successful: one.Successful + two.Successful, ErrorCount: one.ErrorCount + two.ErrorCount, + Err: multierror.Append(one.Err, two.Err).ErrorOrNil(), bytes: one.bytes + two.bytes, incomplete: hasErrors, incompleteReason: one.incompleteReason + ", " + two.incompleteReason, diff --git a/src/internal/operations/backup.go b/src/internal/operations/backup.go index 92a8b93c6..e2c2bd0eb 100644 --- a/src/internal/operations/backup.go +++ b/src/internal/operations/backup.go @@ -208,13 +208,11 @@ func (op *BackupOperation) Run(ctx context.Context) (err error) { opStats.gc = gc.AwaitStatus() if opStats.gc.ErrorCount > 0 { - opStats.writeErr = multierror.Append(nil, opStats.writeErr, errors.Errorf( - "%v errors reported while fetching item data", - opStats.gc.ErrorCount, - )).ErrorOrNil() + merr := multierror.Append(opStats.readErr, errors.Wrap(opStats.gc.Err, "retrieving data")) + opStats.readErr = merr.ErrorOrNil() // Need to exit before we set started to true else we'll report no errors. - return opStats.writeErr + return opStats.readErr } // should always be 1, since backups are 1:1 with resourceOwners. From 42133ffc7d973cd82215807ad6fc3270c4158d2e Mon Sep 17 00:00:00 2001 From: Danny Date: Wed, 25 Jan 2023 06:15:52 -0500 Subject: [PATCH 02/18] [BUG FIX]: CLI: Details: Sharepoint (#2222) ## Description Enabling backup commands for `SharePoint.Pages` in #2216 introduced failures when `backup details()` called in Corso. Test suite updated ## Does this PR need a docs update or release note? - [x] :no_entry: No ## Type of change - [x] :bug: Bugfix ## Issue(s) * related to #2216 * related to #2173 ## Test Plan - [x] :zap: Unit test --- src/pkg/selectors/sharepoint.go | 4 +-- src/pkg/selectors/sharepoint_test.go | 40 ++++++++++++++++++++++++---- 2 files changed, 37 insertions(+), 7 deletions(-) diff --git a/src/pkg/selectors/sharepoint.go b/src/pkg/selectors/sharepoint.go index 3c47c847b..7138acd6c 100644 --- a/src/pkg/selectors/sharepoint.go +++ b/src/pkg/selectors/sharepoint.go @@ -224,7 +224,7 @@ func (s *sharePoint) AllData() []SharePointScope { scopes, makeScope[SharePointScope](SharePointLibrary, Any()), makeScope[SharePointScope](SharePointList, Any()), - makeScope[SharePointScope](SharePointPage, Any()), + makeScope[SharePointScope](SharePointPageFolder, Any()), ) return scopes @@ -323,7 +323,7 @@ func (s *sharePoint) PageItems(pages, items []string, opts ...option) []SharePoi scopes = append( scopes, makeScope[SharePointScope](SharePointPage, items). - set(SharePointPage, pages, opts...), + set(SharePointPageFolder, pages, opts...), ) return scopes diff --git a/src/pkg/selectors/sharepoint_test.go b/src/pkg/selectors/sharepoint_test.go index b9334a492..4ce3859cd 100644 --- a/src/pkg/selectors/sharepoint_test.go +++ b/src/pkg/selectors/sharepoint_test.go @@ -193,9 +193,13 @@ func (suite *SharePointSelectorSuite) TestToSharePointRestore() { func (suite *SharePointSelectorSuite) TestSharePointRestore_Reduce() { var ( - item = stubRepoRef(path.SharePointService, path.LibrariesCategory, "sid", "folderA/folderB", "item") - item2 = stubRepoRef(path.SharePointService, path.LibrariesCategory, "sid", "folderA/folderC", "item2") - item3 = stubRepoRef(path.SharePointService, path.LibrariesCategory, "sid", "folderD/folderE", "item3") + pairAC = "folderA/folderC" + pairGH = "folderG/folderH" + item = stubRepoRef(path.SharePointService, path.LibrariesCategory, "sid", "folderA/folderB", "item") + item2 = stubRepoRef(path.SharePointService, path.LibrariesCategory, "sid", pairAC, "item2") + item3 = stubRepoRef(path.SharePointService, path.LibrariesCategory, "sid", "folderD/folderE", "item3") + item4 = stubRepoRef(path.SharePointService, path.PagesCategory, "sid", pairGH, "item4") + item5 = stubRepoRef(path.SharePointService, path.PagesCategory, "sid", pairGH, "item5") ) deets := &details.Details{ @@ -225,6 +229,22 @@ func (suite *SharePointSelectorSuite) TestSharePointRestore_Reduce() { }, }, }, + { + RepoRef: item4, + ItemInfo: details.ItemInfo{ + SharePoint: &details.SharePointInfo{ + ItemType: details.SharePointItem, + }, + }, + }, + { + RepoRef: item5, + ItemInfo: details.ItemInfo{ + SharePoint: &details.SharePointInfo{ + ItemType: details.SharePointItem, + }, + }, + }, }, }, } @@ -247,7 +267,7 @@ func (suite *SharePointSelectorSuite) TestSharePointRestore_Reduce() { odr.Include(odr.AllData()) return odr }, - expect: arr(item, item2, item3), + expect: arr(item, item2, item3, item4, item5), }, { name: "only match item", @@ -264,11 +284,21 @@ func (suite *SharePointSelectorSuite) TestSharePointRestore_Reduce() { deets: deets, makeSelector: func() *SharePointRestore { odr := NewSharePointRestore([]string{"sid"}) - odr.Include(odr.Libraries([]string{"folderA/folderB", "folderA/folderC"})) + odr.Include(odr.Libraries([]string{"folderA/folderB", pairAC})) return odr }, expect: arr(item, item2), }, + { + name: "pages match folder", + deets: deets, + makeSelector: func() *SharePointRestore { + odr := NewSharePointRestore([]string{"sid"}) + odr.Include(odr.Pages([]string{pairGH, pairAC})) + return odr + }, + expect: arr(item4, item5), + }, } for _, test := range table { suite.T().Run(test.name, func(t *testing.T) { From b2e7920201703d4bcc4f9588b6fd1a59e9c83201 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 25 Jan 2023 12:10:07 +0000 Subject: [PATCH 03/18] =?UTF-8?q?=E2=AC=86=EF=B8=8F=20Bump=20github.com/aw?= =?UTF-8?q?s/aws-sdk-go=20from=201.44.184=20to=201.44.186=20in=20/src=20(#?= =?UTF-8?q?2254)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [github.com/aws/aws-sdk-go](https://github.com/aws/aws-sdk-go) from 1.44.184 to 1.44.186.
Release notes

Sourced from github.com/aws/aws-sdk-go's releases.

Release v1.44.186 (2023-01-24)

Service Client Updates

  • service/databrew: Adds new service
  • service/route53: Updates service API
    • Amazon Route 53 now supports the Asia Pacific (Melbourne) Region (ap-southeast-4) for latency records, geoproximity records, and private DNS for Amazon VPCs in that region.
  • service/ssm-sap: Updates service API, documentation, and paginators

Release v1.44.185 (2023-01-23)

Service Client Updates

  • service/lambda: Updates service API and documentation
    • Release Lambda RuntimeManagementConfig, enabling customers to better manage runtime updates to their Lambda functions. This release adds two new APIs, GetRuntimeManagementConfig and PutRuntimeManagementConfig, as well as support on existing Create/Get/Update function APIs.
  • service/sagemaker: Updates service API and documentation
    • Amazon SageMaker Inference now supports P4de instance types.
Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=github.com/aws/aws-sdk-go&package-manager=go_modules&previous-version=1.44.184&new-version=1.44.186)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) You can trigger a rebase of this PR by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
--- src/go.mod | 2 +- src/go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/go.mod b/src/go.mod index 66ffdd7ef..9c031a21c 100644 --- a/src/go.mod +++ b/src/go.mod @@ -4,7 +4,7 @@ go 1.19 require ( github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.2.0 - github.com/aws/aws-sdk-go v1.44.184 + github.com/aws/aws-sdk-go v1.44.186 github.com/aws/aws-xray-sdk-go v1.8.0 github.com/google/uuid v1.3.0 github.com/hashicorp/go-multierror v1.1.1 diff --git a/src/go.sum b/src/go.sum index 6563d813b..f1407e45f 100644 --- a/src/go.sum +++ b/src/go.sum @@ -60,8 +60,8 @@ github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk5 github.com/alessio/shellescape v1.4.1 h1:V7yhSDDn8LP4lc4jS8pFkt0zCnzVJlG5JXy9BVKJUX0= github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY= github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= -github.com/aws/aws-sdk-go v1.44.184 h1:/MggyE66rOImXJKl1HqhLQITvWvqIV7w1Q4MaG6FHUo= -github.com/aws/aws-sdk-go v1.44.184/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI= +github.com/aws/aws-sdk-go v1.44.186 h1:HInpD2b9FXgJIcP/WDRuSW4Wri9i5WVglO9okFFuOow= +github.com/aws/aws-sdk-go v1.44.186/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI= github.com/aws/aws-xray-sdk-go v1.8.0 h1:0xncHZ588wB/geLjbM/esoW3FOEThWy2TJyb4VXfLFY= github.com/aws/aws-xray-sdk-go v1.8.0/go.mod h1:7LKe47H+j3evfvS1+q0wzpoaGXGrF3mUsfM+thqVO+A= github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8= From 91a9077d9028f9bfa0ede2dc5663af0acc3bf4ff Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 25 Jan 2023 12:21:28 +0000 Subject: [PATCH 04/18] =?UTF-8?q?=E2=AC=86=EF=B8=8F=20Bump=20ua-parser-js?= =?UTF-8?q?=20from=200.7.32=20to=200.7.33=20in=20/website=20(#2258)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- website/package-lock.json | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/website/package-lock.json b/website/package-lock.json index c781b7458..0662eb756 100644 --- a/website/package-lock.json +++ b/website/package-lock.json @@ -13220,9 +13220,9 @@ } }, "node_modules/ua-parser-js": { - "version": "0.7.32", - "resolved": "https://registry.npmjs.org/ua-parser-js/-/ua-parser-js-0.7.32.tgz", - "integrity": "sha512-f9BESNVhzlhEFf2CHMSj40NWOjYPl1YKYbrvIr/hFTDEmLq7SRbWvm7FcdcpCYT95zrOhC7gZSxjdnnTpBcwVw==", + "version": "0.7.33", + "resolved": "https://registry.npmjs.org/ua-parser-js/-/ua-parser-js-0.7.33.tgz", + "integrity": "sha512-s8ax/CeZdK9R/56Sui0WM6y9OFREJarMRHqLB2EwkovemBxNQ+Bqu8GAsUnVcXKgphb++ghr/B2BZx4mahujPw==", "funding": [ { "type": "opencollective", @@ -13233,7 +13233,6 @@ "url": "https://paypal.me/faisalman" } ], - "license": "MIT", "engines": { "node": "*" } @@ -23418,9 +23417,9 @@ "peer": true }, "ua-parser-js": { - "version": "0.7.32", - "resolved": "https://registry.npmjs.org/ua-parser-js/-/ua-parser-js-0.7.32.tgz", - "integrity": "sha512-f9BESNVhzlhEFf2CHMSj40NWOjYPl1YKYbrvIr/hFTDEmLq7SRbWvm7FcdcpCYT95zrOhC7gZSxjdnnTpBcwVw==" + "version": "0.7.33", + "resolved": "https://registry.npmjs.org/ua-parser-js/-/ua-parser-js-0.7.33.tgz", + "integrity": "sha512-s8ax/CeZdK9R/56Sui0WM6y9OFREJarMRHqLB2EwkovemBxNQ+Bqu8GAsUnVcXKgphb++ghr/B2BZx4mahujPw==" }, "unherit": { "version": "1.1.3", From 46d61c724640ca8f80023bc832963cefc5876a51 Mon Sep 17 00:00:00 2001 From: ashmrtn Date: Wed, 25 Jan 2023 08:28:09 -0800 Subject: [PATCH 05/18] Add a set of items that will be excluded from base directories during backup (#2143) ## Description Some external services like OneDrive do not have the ability to determine the path a deleted item used to exist at, they only know the item's old ID. This patch allows Kopia Wrapper to handle those items by implementing a global exclude set. The patch assumes that items in base directories are unique as the items in every base directory are checked against the set. This is not wired to anything outside of Kopia Wrapper. Currently this feature is disabled as the passed value is always nil. ## Does this PR need a docs update or release note? - [ ] :white_check_mark: Yes, it's included - [ ] :clock1: Yes, but in a later PR - [x] :no_entry: No ## Type of change - [x] :sunflower: Feature - [ ] :bug: Bugfix - [ ] :world_map: Documentation - [ ] :robot: Test - [ ] :computer: CI/Deployment - [ ] :broom: Tech Debt/Cleanup ## Issue(s) * closes #2121 ## Test Plan - [ ] :muscle: Manual - [x] :zap: Unit test - [ ] :green_heart: E2E --- src/internal/kopia/upload.go | 35 ++++++++++++++---- src/internal/kopia/upload_test.go | 61 +++++++++++++++++++++++++++---- src/internal/kopia/wrapper.go | 15 +++++++- 3 files changed, 94 insertions(+), 17 deletions(-) diff --git a/src/internal/kopia/upload.go b/src/internal/kopia/upload.go index 52452ffa9..5301e6872 100644 --- a/src/internal/kopia/upload.go +++ b/src/internal/kopia/upload.go @@ -345,6 +345,7 @@ func streamBaseEntries( prevPath path.Path, dir fs.Directory, encodedSeen map[string]struct{}, + globalExcludeSet map[string]struct{}, progress *corsoProgress, ) error { if dir == nil { @@ -373,6 +374,12 @@ func streamBaseEntries( return errors.Wrapf(err, "unable to decode entry name %s", entry.Name()) } + // This entry was marked as deleted by a service that can't tell us the + // previous path of deleted items, only the item ID. + if _, ok := globalExcludeSet[entName]; ok { + return nil + } + // For now assuming that item IDs don't need escaping. itemPath, err := curPath.Append(entName, true) if err != nil { @@ -421,6 +428,7 @@ func getStreamItemFunc( staticEnts []fs.Entry, streamedEnts data.Collection, baseDir fs.Directory, + globalExcludeSet map[string]struct{}, progress *corsoProgress, ) func(context.Context, func(context.Context, fs.Entry) error) error { return func(ctx context.Context, cb func(context.Context, fs.Entry) error) error { @@ -443,6 +451,7 @@ func getStreamItemFunc( prevPath, baseDir, seen, + globalExcludeSet, progress, ); err != nil { errs = multierror.Append( @@ -457,21 +466,22 @@ func getStreamItemFunc( // buildKopiaDirs recursively builds a directory hierarchy from the roots up. // Returned directories are virtualfs.StreamingDirectory. -func buildKopiaDirs(dirName string, dir *treeMap, progress *corsoProgress) (fs.Directory, error) { +func buildKopiaDirs( + dirName string, + dir *treeMap, + globalExcludeSet map[string]struct{}, + progress *corsoProgress, +) (fs.Directory, error) { // Reuse kopia directories directly if the subtree rooted at them is // unchanged. // - // TODO(ashmrtn): This will need updated when we have OneDrive backups where - // items have been deleted because we can't determine which directory used to - // have the item. - // // TODO(ashmrtn): We could possibly also use this optimization if we know that // the collection has no items in it. In that case though, we may need to take // extra care to ensure the name of the directory is properly represented. For // example, a directory that has been renamed but with no additional items may // not be able to directly use kopia's version of the directory due to the // rename. - if dir.collection == nil && len(dir.childDirs) == 0 && dir.baseDir != nil { + if dir.collection == nil && len(dir.childDirs) == 0 && dir.baseDir != nil && len(globalExcludeSet) == 0 { return dir.baseDir, nil } @@ -480,7 +490,7 @@ func buildKopiaDirs(dirName string, dir *treeMap, progress *corsoProgress) (fs.D var childDirs []fs.Entry for childName, childDir := range dir.childDirs { - child, err := buildKopiaDirs(childName, childDir, progress) + child, err := buildKopiaDirs(childName, childDir, globalExcludeSet, progress) if err != nil { return nil, err } @@ -496,6 +506,7 @@ func buildKopiaDirs(dirName string, dir *treeMap, progress *corsoProgress) (fs.D childDirs, dir.collection, dir.baseDir, + globalExcludeSet, progress, ), ), nil @@ -879,11 +890,19 @@ func inflateBaseTree( // virtualfs.StreamingDirectory with the given DataCollections if there is one // for that node. Tags can be used in future backups to fetch old snapshots for // caching reasons. +// +// globalExcludeSet represents a set of items, represented with file names, to +// exclude from base directories when uploading the snapshot. As items in *all* +// base directories will be checked for in every base directory, this assumes +// that items in the bases are unique. Deletions of directories or subtrees +// should be represented as changes in the status of a Collection, not an entry +// in the globalExcludeSet. func inflateDirTree( ctx context.Context, loader snapshotLoader, baseSnaps []IncrementalBase, collections []data.Collection, + globalExcludeSet map[string]struct{}, progress *corsoProgress, ) (fs.Directory, error) { roots, updatedPaths, err := inflateCollectionTree(ctx, collections) @@ -915,7 +934,7 @@ func inflateDirTree( var res fs.Directory for dirName, dir := range roots { - tmp, err := buildKopiaDirs(dirName, dir, progress) + tmp, err := buildKopiaDirs(dirName, dir, globalExcludeSet, progress) if err != nil { return nil, err } diff --git a/src/internal/kopia/upload_test.go b/src/internal/kopia/upload_test.go index 57ce9fd56..1cc4daf47 100644 --- a/src/internal/kopia/upload_test.go +++ b/src/internal/kopia/upload_test.go @@ -705,7 +705,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTree() { // - emails // - Inbox // - 42 separate files - dirTree, err := inflateDirTree(ctx, nil, nil, collections, progress) + dirTree, err := inflateDirTree(ctx, nil, nil, collections, nil, progress) require.NoError(t, err) assert.Equal(t, encodeAsPath(testTenant), dirTree.Name()) @@ -793,7 +793,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTree_MixedDirectory() suite.T().Run(test.name, func(t *testing.T) { progress := &corsoProgress{pending: map[string]*itemDetails{}} - dirTree, err := inflateDirTree(ctx, nil, nil, test.layout, progress) + dirTree, err := inflateDirTree(ctx, nil, nil, test.layout, nil, progress) require.NoError(t, err) assert.Equal(t, encodeAsPath(testTenant), dirTree.Name()) @@ -889,7 +889,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTree_Fails() { defer flush() suite.T().Run(test.name, func(t *testing.T) { - _, err := inflateDirTree(ctx, nil, nil, test.layout, nil) + _, err := inflateDirTree(ctx, nil, nil, test.layout, nil, nil) assert.Error(t, err) }) } @@ -992,7 +992,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeErrors() { cols = append(cols, mc) } - _, err := inflateDirTree(ctx, nil, nil, cols, progress) + _, err := inflateDirTree(ctx, nil, nil, cols, nil, progress) require.Error(t, err) }) } @@ -1261,6 +1261,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeSingleSubtree() { mockIncrementalBase("", testTenant, testUser, path.ExchangeService, path.EmailCategory), }, test.inputCollections(), + nil, progress, ) require.NoError(t, err) @@ -1281,7 +1282,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeMultipleSubdirecto []string{testTenant, service, testUser, category, testInboxDir}, false, ) - inboxFileName1 := testFileName4 + inboxFileName1 := testFileName inboxFileData1 := testFileData4 inboxFileName2 := testFileName5 inboxFileData2 := testFileData5 @@ -1291,7 +1292,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeMultipleSubdirecto append(inboxPath.Elements(), personalDir), false, ) - personalFileName1 := testFileName + personalFileName1 := inboxFileName1 personalFileName2 := testFileName2 workPath := makePath( @@ -1312,7 +1313,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeMultipleSubdirecto // - user1 // - email // - Inbox - // - file4 + // - file1 // - personal // - file1 // - file2 @@ -1369,8 +1370,51 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeMultipleSubdirecto table := []struct { name string inputCollections func(t *testing.T) []data.Collection + inputExcludes map[string]struct{} expected *expectedNode }{ + { + name: "GlobalExcludeSet", + inputCollections: func(t *testing.T) []data.Collection { + return nil + }, + inputExcludes: map[string]struct{}{ + inboxFileName1: {}, + }, + expected: expectedTreeWithChildren( + []string{ + testTenant, + service, + testUser, + category, + }, + []*expectedNode{ + { + name: testInboxDir, + children: []*expectedNode{ + { + name: personalDir, + children: []*expectedNode{ + { + name: personalFileName2, + children: []*expectedNode{}, + }, + }, + }, + { + name: workDir, + children: []*expectedNode{ + { + name: workFileName1, + children: []*expectedNode{}, + }, + }, + }, + }, + }, + }, + ), + }, { name: "MovesSubtree", inputCollections: func(t *testing.T) []data.Collection { @@ -1919,6 +1963,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeMultipleSubdirecto mockIncrementalBase("", testTenant, testUser, path.ExchangeService, path.EmailCategory), }, test.inputCollections(t), + test.inputExcludes, progress, ) require.NoError(t, err) @@ -2079,6 +2124,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeSkipsDeletedSubtre mockIncrementalBase("", testTenant, testUser, path.ExchangeService, path.EmailCategory), }, collections, + nil, progress, ) require.NoError(t, err) @@ -2325,6 +2371,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeSelectsCorrectSubt mockIncrementalBase("id2", testTenant, testUser, path.ExchangeService, path.EmailCategory), }, collections, + nil, progress, ) require.NoError(t, err) diff --git a/src/internal/kopia/wrapper.go b/src/internal/kopia/wrapper.go index 8171c853a..13db75ac4 100644 --- a/src/internal/kopia/wrapper.go +++ b/src/internal/kopia/wrapper.go @@ -129,7 +129,11 @@ func (w Wrapper) BackupCollections( ctx, end := D.Span(ctx, "kopia:backupCollections") defer end() - if len(collections) == 0 { + // TODO(ashmrtn): Make this a parameter when actually enabling the global + // exclude set. + var globalExcludeSet map[string]struct{} + + if len(collections) == 0 && len(globalExcludeSet) == 0 { return &BackupStats{}, &details.Builder{}, nil, nil } @@ -147,7 +151,14 @@ func (w Wrapper) BackupCollections( base = previousSnapshots } - dirTree, err := inflateDirTree(ctx, w.c, base, collections, progress) + dirTree, err := inflateDirTree( + ctx, + w.c, + base, + collections, + globalExcludeSet, + progress, + ) if err != nil { return nil, nil, nil, errors.Wrap(err, "building kopia directories") } From a4f2f0a43787f698883e4d3b6df6a455c61c7ccc Mon Sep 17 00:00:00 2001 From: ashmrtn Date: Wed, 25 Jan 2023 08:57:30 -0800 Subject: [PATCH 06/18] Add deleted files to OneDrive excluded file list (#2250) ## Description Begin populating a global exclude set with files deleted from OneDrive. The set contains the IDs of files that have been deleted. This is actually safe to return from GraphConnector now (though it's not returned) as it uses item IDs instead of item names. Future PRs will need to address handling of (potentially) moved files ## Does this PR need a docs update or release note? - [ ] :white_check_mark: Yes, it's included - [ ] :clock1: Yes, but in a later PR - [x] :no_entry: No ## Type of change - [x] :sunflower: Feature - [ ] :bug: Bugfix - [ ] :world_map: Documentation - [ ] :robot: Test - [ ] :computer: CI/Deployment - [ ] :broom: Tech Debt/Cleanup ## Issue(s) * #2242 ## Test Plan - [ ] :muscle: Manual - [x] :zap: Unit test - [ ] :green_heart: E2E --- .../connector/onedrive/collections.go | 31 ++++++++++++- .../connector/onedrive/collections_test.go | 44 ++++++++++++++++++- src/internal/connector/onedrive/drive.go | 15 ++++--- src/internal/connector/onedrive/item_test.go | 3 +- .../sharepoint/data_collections_test.go | 3 +- 5 files changed, 86 insertions(+), 10 deletions(-) diff --git a/src/internal/connector/onedrive/collections.go b/src/internal/connector/onedrive/collections.go index 90734c43a..8f18aa780 100644 --- a/src/internal/connector/onedrive/collections.go +++ b/src/internal/connector/onedrive/collections.go @@ -7,6 +7,7 @@ import ( "github.com/microsoftgraph/msgraph-sdk-go/models" "github.com/pkg/errors" + "golang.org/x/exp/maps" "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/support" @@ -98,6 +99,12 @@ func (c *Collections) Get(ctx context.Context) ([]data.Collection, error) { deltaURLs = map[string]string{} // Drive ID -> folder ID -> folder path folderPaths = map[string]map[string]string{} + // Items that should be excluded when sourcing data from the base backup. + // TODO(ashmrtn): This list contains the M365 IDs of deleted items so while + // it's technically safe to pass all the way through to kopia (files are + // unlikely to be named their M365 ID) we should wait to do that until we've + // switched to using those IDs for file names in kopia. + excludedItems = map[string]struct{}{} ) // Update the collection map with items from each drive @@ -105,7 +112,13 @@ func (c *Collections) Get(ctx context.Context) ([]data.Collection, error) { driveID := *d.GetId() driveName := *d.GetName() - delta, paths, err := collectItems(ctx, c.service, driveID, driveName, c.UpdateCollections) + delta, paths, excluded, err := collectItems( + ctx, + c.service, + driveID, + driveName, + c.UpdateCollections, + ) if err != nil { return nil, err } @@ -121,6 +134,8 @@ func (c *Collections) Get(ctx context.Context) ([]data.Collection, error) { folderPaths[driveID][id] = p } } + + maps.Copy(excludedItems, excluded) } observe.Message(ctx, fmt.Sprintf("Discovered %d items to backup", c.NumItems)) @@ -171,6 +186,7 @@ func (c *Collections) UpdateCollections( items []models.DriveItemable, oldPaths map[string]string, newPaths map[string]string, + excluded map[string]struct{}, ) error { for _, item := range items { if item.GetRoot() != nil { @@ -231,6 +247,19 @@ func (c *Collections) UpdateCollections( updatePath(newPaths, *item.GetId(), folderPath.String()) case item.GetFile() != nil: + if item.GetDeleted() != nil { + excluded[*item.GetId()] = struct{}{} + // Exchange counts items streamed through it which includes deletions so + // add that here too. + c.NumFiles++ + c.NumItems++ + + continue + } + + // TODO(ashmrtn): Figure what when an item was moved (maybe) and add it to + // the exclude list. + col, found := c.CollectionMap[collectionPath.String()] if !found { // TODO(ashmrtn): Compare old and new path and set collection state diff --git a/src/internal/connector/onedrive/collections_test.go b/src/internal/connector/onedrive/collections_test.go index b9c313883..485b9ed86 100644 --- a/src/internal/connector/onedrive/collections_test.go +++ b/src/internal/connector/onedrive/collections_test.go @@ -105,6 +105,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { expectedContainerCount int expectedFileCount int expectedMetadataPaths map[string]string + expectedExcludes map[string]struct{} }{ { testCase: "Invalid item", @@ -115,6 +116,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { scope: anyFolder, expect: assert.Error, expectedMetadataPaths: map[string]string{}, + expectedExcludes: map[string]struct{}{}, }, { testCase: "Single File", @@ -135,6 +137,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { expectedContainerCount: 1, // Root folder is skipped since it's always present. expectedMetadataPaths: map[string]string{}, + expectedExcludes: map[string]struct{}{}, }, { testCase: "Single Folder", @@ -153,6 +156,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { testBaseDrivePath+"/folder", )[0], }, + expectedExcludes: map[string]struct{}{}, }, { testCase: "Single Package", @@ -171,6 +175,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { testBaseDrivePath+"/package", )[0], }, + expectedExcludes: map[string]struct{}{}, }, { testCase: "1 root file, 1 folder, 1 package, 2 files, 3 collections", @@ -209,6 +214,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { testBaseDrivePath+"/package", )[0], }, + expectedExcludes: map[string]struct{}{}, }, { testCase: "contains folder selector", @@ -258,6 +264,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { testBaseDrivePath+"/folder/subfolder/folder", )[0], }, + expectedExcludes: map[string]struct{}{}, }, { testCase: "prefix subfolder selector", @@ -292,6 +299,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { testBaseDrivePath+"/folder/subfolder/folder", )[0], }, + expectedExcludes: map[string]struct{}{}, }, { testCase: "match subfolder selector", @@ -318,6 +326,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { expectedContainerCount: 1, // No child folders for subfolder so nothing here. expectedMetadataPaths: map[string]string{}, + expectedExcludes: map[string]struct{}{}, }, { testCase: "not moved folder tree", @@ -358,6 +367,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { testBaseDrivePath+"/folder/subfolder", )[0], }, + expectedExcludes: map[string]struct{}{}, }, { testCase: "moved folder tree", @@ -398,6 +408,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { testBaseDrivePath+"/folder/subfolder", )[0], }, + expectedExcludes: map[string]struct{}{}, }, { testCase: "moved folder tree and subfolder 1", @@ -439,6 +450,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { testBaseDrivePath+"/subfolder", )[0], }, + expectedExcludes: map[string]struct{}{}, }, { testCase: "moved folder tree and subfolder 2", @@ -480,6 +492,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { testBaseDrivePath+"/subfolder", )[0], }, + expectedExcludes: map[string]struct{}{}, }, { testCase: "deleted folder and package", @@ -508,6 +521,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { expectedFileCount: 0, expectedContainerCount: 0, expectedMetadataPaths: map[string]string{}, + expectedExcludes: map[string]struct{}{}, }, { testCase: "delete folder tree move subfolder", @@ -543,6 +557,24 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { testBaseDrivePath+"/subfolder", )[0], }, + expectedExcludes: map[string]struct{}{}, + }, + { + testCase: "delete file", + items: []models.DriveItemable{ + delItem("item", testBaseDrivePath, true, false, false), + }, + inputFolderMap: map[string]string{}, + scope: anyFolder, + expect: assert.NoError, + expectedCollectionPaths: []string{}, + expectedItemCount: 1, + expectedFileCount: 1, + expectedContainerCount: 0, + expectedMetadataPaths: map[string]string{}, + expectedExcludes: map[string]struct{}{ + "item": {}, + }, }, } @@ -551,6 +583,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { ctx, flush := tester.NewContext() defer flush() + excludes := map[string]struct{}{} outputFolderMap := map[string]string{} maps.Copy(outputFolderMap, tt.inputFolderMap) c := NewCollections( @@ -562,7 +595,15 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { nil, control.Options{}) - err := c.UpdateCollections(ctx, "driveID", "General", tt.items, tt.inputFolderMap, outputFolderMap) + err := c.UpdateCollections( + ctx, + "driveID", + "General", + tt.items, + tt.inputFolderMap, + outputFolderMap, + excludes, + ) tt.expect(t, err) assert.Equal(t, len(tt.expectedCollectionPaths), len(c.CollectionMap), "collection paths") assert.Equal(t, tt.expectedItemCount, c.NumItems, "item count") @@ -573,6 +614,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { } assert.Equal(t, tt.expectedMetadataPaths, outputFolderMap) + assert.Equal(t, tt.expectedExcludes, excludes) }) } } diff --git a/src/internal/connector/onedrive/drive.go b/src/internal/connector/onedrive/drive.go index 17ee6f96d..315eaee5c 100644 --- a/src/internal/connector/onedrive/drive.go +++ b/src/internal/connector/onedrive/drive.go @@ -170,6 +170,7 @@ type itemCollector func( driveItems []models.DriveItemable, oldPaths map[string]string, newPaths map[string]string, + excluded map[string]struct{}, ) error // collectItems will enumerate all items in the specified drive and hand them to the @@ -179,13 +180,14 @@ func collectItems( service graph.Servicer, driveID, driveName string, collector itemCollector, -) (string, map[string]string, error) { +) (string, map[string]string, map[string]struct{}, error) { var ( newDeltaURL = "" // TODO(ashmrtn): Eventually this should probably be a parameter so we can // take in previous paths. oldPaths = map[string]string{} newPaths = map[string]string{} + excluded = map[string]struct{}{} ) maps.Copy(newPaths, oldPaths) @@ -219,16 +221,16 @@ func collectItems( for { r, err := builder.Get(ctx, requestConfig) if err != nil { - return "", nil, errors.Wrapf( + return "", nil, nil, errors.Wrapf( err, "failed to query drive items. details: %s", support.ConnectorStackErrorTrace(err), ) } - err = collector(ctx, driveID, driveName, r.GetValue(), oldPaths, newPaths) + err = collector(ctx, driveID, driveName, r.GetValue(), oldPaths, newPaths, excluded) if err != nil { - return "", nil, err + return "", nil, nil, err } if r.GetOdataDeltaLink() != nil && len(*r.GetOdataDeltaLink()) > 0 { @@ -245,7 +247,7 @@ func collectItems( builder = msdrives.NewItemRootDeltaRequestBuilder(*nextLink, service.Adapter()) } - return newDeltaURL, newPaths, nil + return newDeltaURL, newPaths, excluded, nil } // getFolder will lookup the specified folder name under `parentFolderID` @@ -352,7 +354,7 @@ func GetAllFolders( folders := map[string]*Displayable{} for _, d := range drives { - _, _, err = collectItems( + _, _, _, err = collectItems( ctx, gs, *d.GetId(), @@ -363,6 +365,7 @@ func GetAllFolders( items []models.DriveItemable, oldPaths map[string]string, newPaths map[string]string, + excluded map[string]struct{}, ) error { for _, item := range items { // Skip the root item. diff --git a/src/internal/connector/onedrive/item_test.go b/src/internal/connector/onedrive/item_test.go index f53607328..b3f45ca56 100644 --- a/src/internal/connector/onedrive/item_test.go +++ b/src/internal/connector/onedrive/item_test.go @@ -101,6 +101,7 @@ func (suite *ItemIntegrationSuite) TestItemReader_oneDrive() { items []models.DriveItemable, oldPaths map[string]string, newPaths map[string]string, + excluded map[string]struct{}, ) error { for _, item := range items { if item.GetFile() != nil { @@ -111,7 +112,7 @@ func (suite *ItemIntegrationSuite) TestItemReader_oneDrive() { return nil } - _, _, err := collectItems(ctx, suite, suite.userDriveID, "General", itemCollector) + _, _, _, err := collectItems(ctx, suite, suite.userDriveID, "General", itemCollector) require.NoError(suite.T(), err) // Test Requirement 2: Need a file diff --git a/src/internal/connector/sharepoint/data_collections_test.go b/src/internal/connector/sharepoint/data_collections_test.go index 9fc538e2c..423336253 100644 --- a/src/internal/connector/sharepoint/data_collections_test.go +++ b/src/internal/connector/sharepoint/data_collections_test.go @@ -89,6 +89,7 @@ func (suite *SharePointLibrariesSuite) TestUpdateCollections() { paths := map[string]string{} newPaths := map[string]string{} + excluded := map[string]struct{}{} c := onedrive.NewCollections( tenant, site, @@ -97,7 +98,7 @@ func (suite *SharePointLibrariesSuite) TestUpdateCollections() { &MockGraphService{}, nil, control.Options{}) - err := c.UpdateCollections(ctx, "driveID", "General", test.items, paths, newPaths) + err := c.UpdateCollections(ctx, "driveID", "General", test.items, paths, newPaths, excluded) test.expect(t, err) assert.Equal(t, len(test.expectedCollectionPaths), len(c.CollectionMap), "collection paths") assert.Equal(t, test.expectedItemCount, c.NumItems, "item count") From bb73d142c5c60457e8a8e25b2078c007be8e23d9 Mon Sep 17 00:00:00 2001 From: ashmrtn Date: Wed, 25 Jan 2023 12:13:50 -0800 Subject: [PATCH 07/18] Remove duplicate string->path.CategoryType helper function (#2260) ## Description Tackles a few things: * normalize case when going from string->path.Service or path.Category * removes duplicate helper function for string->path.Category in favor of path package one ## Does this PR need a docs update or release note? - [ ] :white_check_mark: Yes, it's included - [ ] :clock1: Yes, but in a later PR - [x] :no_entry: No ## Type of change - [ ] :sunflower: Feature - [ ] :bug: Bugfix - [ ] :world_map: Documentation - [ ] :robot: Test - [ ] :computer: CI/Deployment - [x] :broom: Tech Debt/Cleanup ## Issue(s) * closes #2259 ## Test Plan - [x] :muscle: Manual - [x] :zap: Unit test - [ ] :green_heart: E2E --- src/cmd/getM365/getItem.go | 3 +-- .../connector/graph/service_helper.go | 25 ------------------- src/pkg/path/resource_path.go | 10 ++++++-- src/pkg/path/service_category_test.go | 9 +++++++ 4 files changed, 18 insertions(+), 29 deletions(-) diff --git a/src/cmd/getM365/getItem.go b/src/cmd/getM365/getItem.go index 9c2f8f135..25961a236 100644 --- a/src/cmd/getM365/getItem.go +++ b/src/cmd/getM365/getItem.go @@ -19,7 +19,6 @@ import ( "github.com/alcionai/corso/src/internal/common" "github.com/alcionai/corso/src/internal/connector" "github.com/alcionai/corso/src/internal/connector/exchange/api" - "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/pkg/account" "github.com/alcionai/corso/src/pkg/backup/details" "github.com/alcionai/corso/src/pkg/credentials" @@ -96,7 +95,7 @@ func runDisplayM365JSON( var ( bs []byte err error - cat = graph.StringToPathCategory(category) + cat = path.ToCategoryType(category) sw = kw.NewJsonSerializationWriter() ) diff --git a/src/internal/connector/graph/service_helper.go b/src/internal/connector/graph/service_helper.go index 76ff54ad4..b374933f8 100644 --- a/src/internal/connector/graph/service_helper.go +++ b/src/internal/connector/graph/service_helper.go @@ -4,7 +4,6 @@ import ( "net/http" "net/http/httputil" "os" - "strings" "time" az "github.com/Azure/azure-sdk-for-go/sdk/azidentity" @@ -15,7 +14,6 @@ import ( "github.com/pkg/errors" "github.com/alcionai/corso/src/pkg/logger" - "github.com/alcionai/corso/src/pkg/path" ) const ( @@ -104,26 +102,3 @@ func (handler *LoggingMiddleware) Intercept( return resp, err } - -// --------------------------------------------------------------------------- -// Other Helpers -// --------------------------------------------------------------------------- - -func StringToPathCategory(input string) path.CategoryType { - param := strings.ToLower(input) - - switch param { - case "email": - return path.EmailCategory - case "contacts": - return path.ContactsCategory - case "events": - return path.EventsCategory - case "files": - return path.FilesCategory - case "libraries": - return path.LibrariesCategory - default: - return path.UnknownCategory - } -} diff --git a/src/pkg/path/resource_path.go b/src/pkg/path/resource_path.go index c66cd300e..b8113618e 100644 --- a/src/pkg/path/resource_path.go +++ b/src/pkg/path/resource_path.go @@ -1,6 +1,8 @@ package path import ( + "strings" + "github.com/pkg/errors" ) @@ -30,7 +32,9 @@ const ( ) func toServiceType(service string) ServiceType { - switch service { + s := strings.ToLower(service) + + switch s { case ExchangeService.String(): return ExchangeService case OneDriveService.String(): @@ -70,7 +74,9 @@ const ( ) func ToCategoryType(category string) CategoryType { - switch category { + cat := strings.ToLower(category) + + switch cat { case EmailCategory.String(): return EmailCategory case ContactsCategory.String(): diff --git a/src/pkg/path/service_category_test.go b/src/pkg/path/service_category_test.go index 915e48dc5..72e389e2a 100644 --- a/src/pkg/path/service_category_test.go +++ b/src/pkg/path/service_category_test.go @@ -1,6 +1,7 @@ package path import ( + "strings" "testing" "github.com/stretchr/testify/assert" @@ -73,6 +74,14 @@ func (suite *ServiceCategoryUnitSuite) TestValidateServiceAndCategory() { category: "foo", check: assert.Error, }, + { + name: "DifferentCases", + service: strings.ToUpper(ExchangeService.String()), + category: strings.ToUpper(EmailCategory.String()), + expectedService: ExchangeService, + expectedCategory: EmailCategory, + check: assert.NoError, + }, { name: "ExchangeEmail", service: ExchangeService.String(), From 78b9f2752e4a0a03e4ebbfe4b6f9baa2058cf911 Mon Sep 17 00:00:00 2001 From: Keepers Date: Wed, 25 Jan 2023 14:39:38 -0700 Subject: [PATCH 08/18] adds clues package to begin structured logging (#2214) ## Does this PR need a docs update or release note? - [x] :no_entry: No ## Type of change - [x] :broom: Tech Debt/Cleanup ## Issue(s) * #1969 ## Test Plan - [x] :muscle: Manual --- src/go.mod | 3 ++- src/go.sum | 6 ++++-- src/internal/operations/backup.go | 11 +++++++++++ src/internal/operations/restore.go | 13 +++++++++++++ 4 files changed, 30 insertions(+), 3 deletions(-) diff --git a/src/go.mod b/src/go.mod index 9c031a21c..7a8aced9e 100644 --- a/src/go.mod +++ b/src/go.mod @@ -4,7 +4,8 @@ go 1.19 require ( github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.2.0 - github.com/aws/aws-sdk-go v1.44.186 + github.com/alcionai/clues v0.0.0-20230120231953-1cf61dbafc40 + github.com/aws/aws-sdk-go v1.44.184 github.com/aws/aws-xray-sdk-go v1.8.0 github.com/google/uuid v1.3.0 github.com/hashicorp/go-multierror v1.1.1 diff --git a/src/go.sum b/src/go.sum index f1407e45f..2644af9be 100644 --- a/src/go.sum +++ b/src/go.sum @@ -52,6 +52,8 @@ github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1o github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4= github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d h1:licZJFw2RwpHMqeKTCYkitsPqHNxTmd4SNR5r94FGM8= github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d/go.mod h1:asat636LX7Bqt5lYEZ27JNDcqxfjdBQuJ/MM4CN/Lzo= +github.com/alcionai/clues v0.0.0-20230120231953-1cf61dbafc40 h1:bvAwz0dcJeIyRjudVyzmmawOvc4SqlSerKd0B4dh0yw= +github.com/alcionai/clues v0.0.0-20230120231953-1cf61dbafc40/go.mod h1:UlAs8jkWIpsOMakiC8NxPgQQVQRdvyf1hYMszlYYLb4= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= @@ -60,8 +62,8 @@ github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk5 github.com/alessio/shellescape v1.4.1 h1:V7yhSDDn8LP4lc4jS8pFkt0zCnzVJlG5JXy9BVKJUX0= github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY= github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= -github.com/aws/aws-sdk-go v1.44.186 h1:HInpD2b9FXgJIcP/WDRuSW4Wri9i5WVglO9okFFuOow= -github.com/aws/aws-sdk-go v1.44.186/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI= +github.com/aws/aws-sdk-go v1.44.184 h1:/MggyE66rOImXJKl1HqhLQITvWvqIV7w1Q4MaG6FHUo= +github.com/aws/aws-sdk-go v1.44.184/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI= github.com/aws/aws-xray-sdk-go v1.8.0 h1:0xncHZ588wB/geLjbM/esoW3FOEThWy2TJyb4VXfLFY= github.com/aws/aws-xray-sdk-go v1.8.0/go.mod h1:7LKe47H+j3evfvS1+q0wzpoaGXGrF3mUsfM+thqVO+A= github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8= diff --git a/src/internal/operations/backup.go b/src/internal/operations/backup.go index e2c2bd0eb..a5a955150 100644 --- a/src/internal/operations/backup.go +++ b/src/internal/operations/backup.go @@ -4,6 +4,7 @@ import ( "context" "time" + "github.com/alcionai/clues" "github.com/google/uuid" multierror "github.com/hashicorp/go-multierror" "github.com/pkg/errors" @@ -119,6 +120,14 @@ func (op *BackupOperation) Run(ctx context.Context) (err error) { op.Results.BackupID = model.StableID(uuid.NewString()) + ctx = clues.AddAll( + ctx, + "tenant_id", tenantID, // TODO: pii + "resource_owner", op.ResourceOwner, // TODO: pii + "backup_id", op.Results.BackupID, + "service", op.Selectors.Service, + "incremental", uib) + op.bus.Event( ctx, events.BackupStart, @@ -174,6 +183,8 @@ func (op *BackupOperation) Run(ctx context.Context) (err error) { return opStats.readErr } + ctx = clues.Add(ctx, "collections", len(cs)) + opStats.k, backupDetails, toMerge, err = consumeBackupDataCollections( ctx, op.kopia, diff --git a/src/internal/operations/restore.go b/src/internal/operations/restore.go index f7505fa7d..03c203b05 100644 --- a/src/internal/operations/restore.go +++ b/src/internal/operations/restore.go @@ -5,6 +5,7 @@ import ( "fmt" "time" + "github.com/alcionai/clues" "github.com/google/uuid" multierror "github.com/hashicorp/go-multierror" "github.com/pkg/errors" @@ -129,6 +130,12 @@ func (op *RestoreOperation) Run(ctx context.Context) (restoreDetails *details.De detailsStore := streamstore.New(op.kopia, op.account.ID(), op.Selectors.PathService()) + ctx = clues.AddAll( + ctx, + "tenant_id", op.account.ID(), // TODO: pii + "backup_id", op.BackupID, + "service", op.Selectors.Service) + bup, deets, err := getBackupAndDetailsFromID( ctx, op.BackupID, @@ -142,6 +149,8 @@ func (op *RestoreOperation) Run(ctx context.Context) (restoreDetails *details.De return nil, err } + ctx = clues.Add(ctx, "resource_owner", bup.Selector.DiscreteOwner) + op.bus.Event( ctx, events.RestoreStart, @@ -159,6 +168,8 @@ func (op *RestoreOperation) Run(ctx context.Context) (restoreDetails *details.De return nil, err } + ctx = clues.Add(ctx, "details_paths", len(paths)) + observe.Message(ctx, fmt.Sprintf("Discovered %d items in backup %s to restore", len(paths), op.BackupID)) kopiaComplete, closer := observe.MessageWithCompletion(ctx, "Enumerating items in repository") @@ -174,6 +185,8 @@ func (op *RestoreOperation) Run(ctx context.Context) (restoreDetails *details.De } kopiaComplete <- struct{}{} + ctx = clues.Add(ctx, "collections", len(dcs)) + opStats.cs = dcs opStats.resourceCount = len(data.ResourceOwnerSet(dcs)) From 4852667468fdbff4425ed4bb1d3c40ce53a0a8b7 Mon Sep 17 00:00:00 2001 From: Keepers Date: Wed, 25 Jan 2023 15:00:23 -0700 Subject: [PATCH 09/18] Add fault pkg and errors struct (#2236) ## Does this PR need a docs update or release note? - [x] :no_entry: No ## Type of change - [x] :broom: Tech Debt/Cleanup ## Issue(s) * #1970 ## Test Plan - [x] :zap: Unit test --- src/pkg/fault/example_fault_test.go | 291 ++++++++++++++++++++++++++++ src/pkg/fault/fault.go | 127 ++++++++++++ src/pkg/fault/fault_test.go | 202 +++++++++++++++++++ 3 files changed, 620 insertions(+) create mode 100644 src/pkg/fault/example_fault_test.go create mode 100644 src/pkg/fault/fault.go create mode 100644 src/pkg/fault/fault_test.go diff --git a/src/pkg/fault/example_fault_test.go b/src/pkg/fault/example_fault_test.go new file mode 100644 index 000000000..4ad5945d2 --- /dev/null +++ b/src/pkg/fault/example_fault_test.go @@ -0,0 +1,291 @@ +package fault_test + +import ( + "fmt" + + "github.com/pkg/errors" + + "github.com/alcionai/corso/src/pkg/fault" +) + +// --------------------------------------------------------------------------- +// mock helpers +// --------------------------------------------------------------------------- + +var ( + ctrl any + items = []string{} +) + +type mockController struct { + errors any +} + +func connectClient() error { return nil } +func dependencyCall() error { return nil } +func getIthItem(i string) error { return nil } +func getData() ([]string, error) { return nil, nil } +func storeData([]string, *fault.Errors) {} + +type mockOper struct { + Errors *fault.Errors +} + +func newOperation() mockOper { return mockOper{fault.New(true)} } +func (m mockOper) Run() *fault.Errors { return m.Errors } + +// --------------------------------------------------------------------------- +// examples +// --------------------------------------------------------------------------- + +// ExampleNewErrors highlights assumptions and best practices +// for generating Errors structs. +func Example_new() { + // Errors should only be generated during the construction of + // another controller, such as a new Backup or Restore Operations. + // Configurations like failFast are set during construction. + // + // Generating new fault.Errors structs outside of an operation + // controller is a smell, and should be avoided. If you need + // to aggregate errors, you should accept an interface and pass + // an Errors instance into it. + ctrl = mockController{ + errors: fault.New(false), + } +} + +// ExampleErrorsFail describes the assumptions and best practices +// for setting the Failure error. +func Example_errors_Fail() { + errs := fault.New(false) + + // Fail() should be used to record any error that highlights a + // non-recoverable failure in a process. + // + // Fail() should only get called in the last step before returning + // a fault.Errors from a controller. In all other cases, you + // should simply return an error and expect the upstream controller + // to call Fail() for you. + if err := connectClient(); err != nil { + // normally, you'd want to + // return errs.Fail(err) + errs.Fail(err) + } + + // Only the topmost handler of the error should set the Fail() err. + // This will normally be the operation controller itself. + // IE: Fail() is not Wrap(). In lower levels, errors should get + // wrapped and returned like normal, and only handled by errors + // at the end. + lowLevelCall := func() error { + if err := dependencyCall(); err != nil { + // wrap here, deeper into the stack + return errors.Wrap(err, "dependency") + } + + return nil + } + + if err := lowLevelCall(); err != nil { + // fail here, at the top of the stack + errs.Fail(err) + } +} + +// ExampleErrorsAdd describes the assumptions and best practices +// for aggregating iterable or recoverable errors. +func Example_errors_Add() { + errs := fault.New(false) + + // Add() should be used to record any error in a recoverable + // part of processing. + // + // Add() should only get called in the last step in handling an + // error within a loop or stream that does not otherwise return + // an error. In all other cases, you should simply return an error + // and expect the upstream point of iteration to call Add() for you. + for _, i := range items { + if err := getIthItem(i); err != nil { + errs.Add(err) + } + } + + // In case of failFast behavior, iteration should exit as soon + // as an error occurs. Errors does not expose the failFast flag + // directly. Instead, iterators should check the value of Err(). + // If it is non-nil, then the loop shold break. + for _, i := range items { + if errs.Err() != nil { + break + } + + errs.Add(getIthItem(i)) + } + + // Only the topmost handler of the error should Add() the err. + // This will normally be the iteration loop itself. + // IE: Add() is not Wrap(). In lower levels, errors should get + // wrapped and returned like normally, and only added to the + // errors at the end. + clientBasedGetter := func(s string) error { + if err := dependencyCall(); err != nil { + // wrap here, deeper into the stack + return errors.Wrap(err, "dependency") + } + + return nil + } + + for _, i := range items { + if err := clientBasedGetter(i); err != nil { + // add here, within the iteraton loop + errs.Add(err) + } + } +} + +// ExampleErrorsErr describes retrieving the non-recoverable error. +func Example_errors_Err() { + errs := fault.New(false) + errs.Fail(errors.New("catastrophe")) + + // Err() gets the primary failure error. + err := errs.Err() + fmt.Println(err) + + // if multiple Failures occur, each one after the first gets + // added to the Errs slice. + errs.Fail(errors.New("another catastrophe")) + errSl := errs.Errs() + + for _, e := range errSl { + fmt.Println(e) + } + + // If Err() is nil, then you can assume the operation completed. + // A complete operation is not necessarily an error-free operation. + // + // Even if Err() is nil, Errs() can be non-empty. + // Make sure you check both. + + errs = fault.New(true) + + // If failFast is set to true, then the first error Add()ed gets + // promoted to the Err() position. + + errs.Add(errors.New("not catastrophic, but still becomes the Err()")) + err = errs.Err() + fmt.Println(err) + + // Output: catastrophe + // another catastrophe + // not catastrophic, but still becomes the Err() +} + +// ExampleErrorsErrs describes retrieving individual errors. +func Example_errors_Errs() { + errs := fault.New(false) + errs.Add(errors.New("not catastrophic")) + errs.Add(errors.New("something unwanted")) + + // Errs() gets the slice errors that were recorded, but were + // considered recoverable. + errSl := errs.Errs() + for _, err := range errSl { + fmt.Println(err) + } + + // Errs() only needs to be investigated by the end user at the + // conclusion of an operation. Checking Errs() within lower- + // layer code is a smell. Funcs should return an error if they + // need upstream handlers to recognize failure states. + // + // If Errs() is nil, then you can assume that no recoverable or + // iteration-based errors occurred. But that does not necessarily + // mean the operation was able to complete. + // + // Even if Errs() contains zero items, Err() can be non-nil. + // Make sure you check both. + + // Output: not catastrophic + // something unwanted +} + +// ExampleErrorsE2e showcases a more complex integration. +func Example_errors_e2e() { + oper := newOperation() + + // imagine that we're a user, calling into corso SDK. + // (fake funcs used here to minimize example bloat) + // + // The operation is our controller, we expect it to + // generate a new fault.Errors when constructed, and + // to return that struct when we call Run() + errs := oper.Run() + + // Let's investigate what went on inside. Since we're at + // the top of our controller, and returning a fault.Errors, + // all the error handlers set the Fail() case. + /* Run() */ + func() *fault.Errors { + if err := connectClient(); err != nil { + // Fail() here; we're top level in the controller + // and this is a non-recoverable issue + return oper.Errors.Fail(err) + } + + data, err := getData() + if err != nil { + return oper.Errors.Fail(err) + } + + // storeData will aggregate iterated errors into + // oper.Errors. + storeData(data, oper.Errors) + + // return oper.Errors here, in part to ensure it's + // non-nil, and because we don't know if we've + // aggregated any iterated errors yet. + return oper.Errors + }() + + // What about the lower level handling? storeData didn't + // return an error, so what's happening there? + /* storeData */ + func(data []any, errs *fault.Errors) { + // this is downstream in our code somewhere + storer := func(a any) error { + if err := dependencyCall(); err != nil { + // we're not passing in or calling fault.Errors here, + // because this isn't the iteration handler, it's just + // a regular error. + return errors.Wrap(err, "dependency") + } + + return nil + } + + for _, d := range data { + if errs.Err() != nil { + break + } + + if err := storer(d); err != nil { + // Since we're at the top of the iteration, we need + // to add each error to the fault.Errors struct. + errs.Add(err) + } + } + }(nil, nil) + + // then at the end of the oper.Run, we investigate the results. + if errs.Err() != nil { + // handle the primary error + fmt.Println("err occurred", errs.Err()) + } + + for _, err := range errs.Errs() { + // handle each recoverable error + fmt.Println("recoverable err occurred", err) + } +} diff --git a/src/pkg/fault/fault.go b/src/pkg/fault/fault.go new file mode 100644 index 000000000..f4171ce77 --- /dev/null +++ b/src/pkg/fault/fault.go @@ -0,0 +1,127 @@ +package fault + +import ( + "sync" + + "golang.org/x/exp/slices" +) + +type Errors struct { + mu *sync.Mutex + + // err identifies non-recoverable errors. This includes + // non-start cases (ex: cannot connect to client), hard- + // stop issues (ex: credentials expired) or conscious exit + // cases (ex: iteration error + failFast config). + err error + + // errs is the accumulation of recoverable or iterated + // errors. Eg: if a process is retrieving N items, and + // 1 of the items fails to be retrieved, but the rest of + // them succeed, we'd expect to see 1 error added to this + // slice. + errs []error + + // if failFast is true, the first errs addition will + // get promoted to the err value. This signifies a + // non-recoverable processing state, causing any running + // processes to exit. + failFast bool +} + +// ErrorsData provides the errors data alone, without sync +// controls, allowing the data to be persisted. +type ErrorsData struct { + Err error `json:"err"` + Errs []error `json:"errs"` + FailFast bool `json:"failFast"` +} + +// New constructs a new error with default values in place. +func New(failFast bool) *Errors { + return &Errors{ + mu: &sync.Mutex{}, + errs: []error{}, + failFast: failFast, + } +} + +// Err returns the primary error. If not nil, this +// indicates the operation exited prior to completion. +func (e *Errors) Err() error { + return e.err +} + +// Errs returns the slice of recoverable and +// iterated errors. +func (e *Errors) Errs() []error { + return e.errs +} + +// Data returns the plain set of error data +// without any sync properties. +func (e *Errors) Data() ErrorsData { + return ErrorsData{ + Err: e.err, + Errs: slices.Clone(e.errs), + FailFast: e.failFast, + } +} + +// TODO: introduce Failer interface + +// Fail sets the non-recoverable error (ie: errors.err) +// in the errors struct. If a non-recoverable error is +// already present, the error gets added to the errs slice. +func (e *Errors) Fail(err error) *Errors { + if err == nil { + return e + } + + e.mu.Lock() + defer e.mu.Unlock() + + return e.setErr(err) +} + +// setErr handles setting errors.err. Sync locking gets +// handled upstream of this call. +func (e *Errors) setErr(err error) *Errors { + if e.err != nil { + return e.addErr(err) + } + + e.err = err + + return e +} + +// TODO: introduce Adder interface + +// Add appends the error to the slice of recoverable and +// iterated errors (ie: errors.errs). If failFast is true, +// the first Added error will get copied to errors.err, +// causing the errors struct to identify as non-recoverably +// failed. +func (e *Errors) Add(err error) *Errors { + if err == nil { + return e + } + + e.mu.Lock() + defer e.mu.Unlock() + + return e.addErr(err) +} + +// addErr handles adding errors to errors.errs. Sync locking +// gets handled upstream of this call. +func (e *Errors) addErr(err error) *Errors { + if e.err == nil && e.failFast { + e.setErr(err) + } + + e.errs = append(e.errs, err) + + return e +} diff --git a/src/pkg/fault/fault_test.go b/src/pkg/fault/fault_test.go new file mode 100644 index 000000000..3f5ad127c --- /dev/null +++ b/src/pkg/fault/fault_test.go @@ -0,0 +1,202 @@ +package fault_test + +import ( + "errors" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + + "github.com/alcionai/corso/src/pkg/fault" +) + +type FaultErrorsUnitSuite struct { + suite.Suite +} + +func TestFaultErrorsUnitSuite(t *testing.T) { + suite.Run(t, new(FaultErrorsUnitSuite)) +} + +func (suite *FaultErrorsUnitSuite) TestNew() { + t := suite.T() + + n := fault.New(false) + assert.NotNil(t, n) + + n = fault.New(true) + assert.NotNil(t, n) +} + +func (suite *FaultErrorsUnitSuite) TestErr() { + table := []struct { + name string + failFast bool + fail error + add error + expect assert.ErrorAssertionFunc + }{ + { + name: "nil", + expect: assert.NoError, + }, + { + name: "nil, failFast", + failFast: true, + expect: assert.NoError, + }, + { + name: "failed", + fail: assert.AnError, + expect: assert.Error, + }, + { + name: "failed, failFast", + fail: assert.AnError, + failFast: true, + expect: assert.Error, + }, + { + name: "added", + add: assert.AnError, + expect: assert.NoError, + }, + { + name: "added, failFast", + add: assert.AnError, + failFast: true, + expect: assert.Error, + }, + } + for _, test := range table { + suite.T().Run(test.name, func(t *testing.T) { + n := fault.New(test.failFast) + require.NotNil(t, n) + + e := n.Fail(test.fail) + require.NotNil(t, e) + + e = n.Add(test.add) + require.NotNil(t, e) + + test.expect(t, n.Err()) + }) + } +} + +func (suite *FaultErrorsUnitSuite) TestFail() { + t := suite.T() + + n := fault.New(false) + require.NotNil(t, n) + + n.Fail(assert.AnError) + assert.Error(t, n.Err()) + assert.Empty(t, n.Errs()) + + n.Fail(assert.AnError) + assert.Error(t, n.Err()) + assert.NotEmpty(t, n.Errs()) +} + +func (suite *FaultErrorsUnitSuite) TestErrs() { + table := []struct { + name string + failFast bool + fail error + add error + expect assert.ValueAssertionFunc + }{ + { + name: "nil", + expect: assert.Empty, + }, + { + name: "nil, failFast", + failFast: true, + expect: assert.Empty, + }, + { + name: "failed", + fail: assert.AnError, + expect: assert.Empty, + }, + { + name: "failed, failFast", + fail: assert.AnError, + failFast: true, + expect: assert.Empty, + }, + { + name: "added", + add: assert.AnError, + expect: assert.NotEmpty, + }, + { + name: "added, failFast", + add: assert.AnError, + failFast: true, + expect: assert.NotEmpty, + }, + } + for _, test := range table { + suite.T().Run(test.name, func(t *testing.T) { + n := fault.New(test.failFast) + require.NotNil(t, n) + + e := n.Fail(test.fail) + require.NotNil(t, e) + + e = n.Add(test.add) + require.NotNil(t, e) + + test.expect(t, n.Errs()) + }) + } +} + +func (suite *FaultErrorsUnitSuite) TestAdd() { + t := suite.T() + + n := fault.New(true) + require.NotNil(t, n) + + n.Add(assert.AnError) + assert.Error(t, n.Err()) + assert.Len(t, n.Errs(), 1) + + n.Add(assert.AnError) + assert.Error(t, n.Err()) + assert.Len(t, n.Errs(), 2) +} + +func (suite *FaultErrorsUnitSuite) TestData() { + t := suite.T() + + // not fail-fast + n := fault.New(false) + require.NotNil(t, n) + + n.Fail(errors.New("fail")) + n.Add(errors.New("1")) + n.Add(errors.New("2")) + + d := n.Data() + assert.Equal(t, n.Err(), d.Err) + assert.ElementsMatch(t, n.Errs(), d.Errs) + assert.False(t, d.FailFast) + + // fail-fast + n = fault.New(true) + require.NotNil(t, n) + + n.Fail(errors.New("fail")) + n.Add(errors.New("1")) + n.Add(errors.New("2")) + + d = n.Data() + assert.Equal(t, n.Err(), d.Err) + assert.ElementsMatch(t, n.Errs(), d.Errs) + assert.True(t, d.FailFast) +} From b71e3d9147f97e3deecbf979a988f210bc25279e Mon Sep 17 00:00:00 2001 From: ashmrtn Date: Wed, 25 Jan 2023 18:26:05 -0800 Subject: [PATCH 10/18] Fix case comparison in switch cases (#2265) ## Description Fix case comparisons and add more robust tests for service at least ## Does this PR need a docs update or release note? - [ ] :white_check_mark: Yes, it's included - [ ] :clock1: Yes, but in a later PR - [x] :no_entry: No ## Type of change - [ ] :sunflower: Feature - [x] :bug: Bugfix - [ ] :world_map: Documentation - [ ] :robot: Test - [ ] :computer: CI/Deployment - [ ] :broom: Tech Debt/Cleanup ## Issue(s) * #2259 ## Test Plan - [ ] :muscle: Manual - [x] :zap: Unit test - [ ] :green_heart: E2E --- src/pkg/path/resource_path.go | 28 +++++++------- src/pkg/path/service_category_test.go | 56 +++++++++++++++++++++++---- 2 files changed, 62 insertions(+), 22 deletions(-) diff --git a/src/pkg/path/resource_path.go b/src/pkg/path/resource_path.go index b8113618e..07f9f429c 100644 --- a/src/pkg/path/resource_path.go +++ b/src/pkg/path/resource_path.go @@ -35,17 +35,17 @@ func toServiceType(service string) ServiceType { s := strings.ToLower(service) switch s { - case ExchangeService.String(): + case strings.ToLower(ExchangeService.String()): return ExchangeService - case OneDriveService.String(): + case strings.ToLower(OneDriveService.String()): return OneDriveService - case SharePointService.String(): + case strings.ToLower(SharePointService.String()): return SharePointService - case ExchangeMetadataService.String(): + case strings.ToLower(ExchangeMetadataService.String()): return ExchangeMetadataService - case OneDriveMetadataService.String(): + case strings.ToLower(OneDriveMetadataService.String()): return OneDriveMetadataService - case SharePointMetadataService.String(): + case strings.ToLower(SharePointMetadataService.String()): return SharePointMetadataService default: return UnknownService @@ -77,21 +77,21 @@ func ToCategoryType(category string) CategoryType { cat := strings.ToLower(category) switch cat { - case EmailCategory.String(): + case strings.ToLower(EmailCategory.String()): return EmailCategory - case ContactsCategory.String(): + case strings.ToLower(ContactsCategory.String()): return ContactsCategory - case EventsCategory.String(): + case strings.ToLower(EventsCategory.String()): return EventsCategory - case FilesCategory.String(): + case strings.ToLower(FilesCategory.String()): return FilesCategory - case LibrariesCategory.String(): + case strings.ToLower(LibrariesCategory.String()): return LibrariesCategory - case ListsCategory.String(): + case strings.ToLower(ListsCategory.String()): return ListsCategory - case PagesCategory.String(): + case strings.ToLower(PagesCategory.String()): return PagesCategory - case DetailsCategory.String(): + case strings.ToLower(DetailsCategory.String()): return DetailsCategory default: return UnknownCategory diff --git a/src/pkg/path/service_category_test.go b/src/pkg/path/service_category_test.go index 72e389e2a..a97f22cd8 100644 --- a/src/pkg/path/service_category_test.go +++ b/src/pkg/path/service_category_test.go @@ -74,14 +74,6 @@ func (suite *ServiceCategoryUnitSuite) TestValidateServiceAndCategory() { category: "foo", check: assert.Error, }, - { - name: "DifferentCases", - service: strings.ToUpper(ExchangeService.String()), - category: strings.ToUpper(EmailCategory.String()), - expectedService: ExchangeService, - expectedCategory: EmailCategory, - check: assert.NoError, - }, { name: "ExchangeEmail", service: ExchangeService.String(), @@ -137,3 +129,51 @@ func (suite *ServiceCategoryUnitSuite) TestValidateServiceAndCategory() { }) } } + +func (suite *ServiceCategoryUnitSuite) TestToServiceType() { + table := []struct { + name string + service string + expected ServiceType + }{ + { + name: "SameCase", + service: ExchangeMetadataService.String(), + expected: ExchangeMetadataService, + }, + { + name: "DifferentCase", + service: strings.ToUpper(ExchangeMetadataService.String()), + expected: ExchangeMetadataService, + }, + } + for _, test := range table { + suite.T().Run(test.name, func(t *testing.T) { + assert.Equal(t, test.expected, toServiceType(test.service)) + }) + } +} + +func (suite *ServiceCategoryUnitSuite) TestToCategoryType() { + table := []struct { + name string + category string + expected CategoryType + }{ + { + name: "SameCase", + category: EmailCategory.String(), + expected: EmailCategory, + }, + { + name: "DifferentCase", + category: strings.ToUpper(EmailCategory.String()), + expected: EmailCategory, + }, + } + for _, test := range table { + suite.T().Run(test.name, func(t *testing.T) { + assert.Equal(t, test.expected, ToCategoryType(test.category)) + }) + } +} From bda8a5c60c303141ecf230f26f7a9d5380a3f1de Mon Sep 17 00:00:00 2001 From: Keepers Date: Wed, 25 Jan 2023 21:27:52 -0700 Subject: [PATCH 11/18] add itemClient for re-usable od item downloads (#2266) ## Description onedrive currently constructs a new http client for every file it downloads. This causes the OS to generate extra sockets, and hang onto them after the download is complete. Replacing these one-off clients with a singular, re-used client- which is the behavior and standard suggested for golang http clients- minimizes system resource consumption. ## Does this PR need a docs update or release note? - [x] :no_entry: No ## Type of change - [x] :bug: Bugfix ## Issue(s) * closes #2262 ## Test Plan - [x] :muscle: Manual - [x] :zap: Unit test - [x] :green_heart: E2E --- src/cli/backup/sharepoint.go | 3 +- src/cmd/factory/impl/common.go | 3 +- src/cmd/getM365/getItem.go | 3 +- src/cmd/purge/purge.go | 2 +- src/internal/connector/data_collections.go | 2 + .../connector/data_collections_test.go | 16 ++++---- .../connector/graph/service_helper.go | 29 +++++++++++++ src/internal/connector/graph_connector.go | 14 +++++-- .../graph_connector_disconnected_test.go | 3 +- .../connector/graph_connector_helper_test.go | 5 ++- .../connector/graph_connector_test.go | 10 ++--- src/internal/connector/onedrive/collection.go | 21 +++++++--- .../connector/onedrive/collection_test.go | 20 ++++----- .../connector/onedrive/collections.go | 10 ++++- .../connector/onedrive/collections_test.go | 2 + src/internal/connector/onedrive/drive_test.go | 2 + src/internal/connector/onedrive/item.go | 41 +++++-------------- src/internal/connector/onedrive/item_test.go | 2 +- .../connector/sharepoint/data_collections.go | 5 +++ .../sharepoint/data_collections_test.go | 2 + .../operations/backup_integration_test.go | 2 +- src/internal/operations/operation.go | 3 +- src/pkg/services/m365/m365.go | 7 ++-- 23 files changed, 132 insertions(+), 75 deletions(-) diff --git a/src/cli/backup/sharepoint.go b/src/cli/backup/sharepoint.go index 4eb62dca0..fb4f7e766 100644 --- a/src/cli/backup/sharepoint.go +++ b/src/cli/backup/sharepoint.go @@ -13,6 +13,7 @@ import ( . "github.com/alcionai/corso/src/cli/print" "github.com/alcionai/corso/src/cli/utils" "github.com/alcionai/corso/src/internal/connector" + "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/kopia" "github.com/alcionai/corso/src/internal/model" "github.com/alcionai/corso/src/pkg/backup" @@ -209,7 +210,7 @@ func createSharePointCmd(cmd *cobra.Command, args []string) error { defer utils.CloseRepo(ctx, r) - gc, err := connector.NewGraphConnector(ctx, acct, connector.Sites) + gc, err := connector.NewGraphConnector(ctx, graph.LargeItemClient(), acct, connector.Sites) if err != nil { return Only(ctx, errors.Wrap(err, "Failed to connect to Microsoft APIs")) } diff --git a/src/cmd/factory/impl/common.go b/src/cmd/factory/impl/common.go index 369d80d20..585118442 100644 --- a/src/cmd/factory/impl/common.go +++ b/src/cmd/factory/impl/common.go @@ -12,6 +12,7 @@ import ( . "github.com/alcionai/corso/src/cli/print" "github.com/alcionai/corso/src/internal/common" "github.com/alcionai/corso/src/internal/connector" + "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/mockconnector" "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/pkg/account" @@ -111,7 +112,7 @@ func getGCAndVerifyUser(ctx context.Context, userID string) (*connector.GraphCon } // build a graph connector - gc, err := connector.NewGraphConnector(ctx, acct, connector.Users) + gc, err := connector.NewGraphConnector(ctx, graph.LargeItemClient(), acct, connector.Users) if err != nil { return nil, account.Account{}, errors.Wrap(err, "connecting to graph api") } diff --git a/src/cmd/getM365/getItem.go b/src/cmd/getM365/getItem.go index 25961a236..24ce81d9a 100644 --- a/src/cmd/getM365/getItem.go +++ b/src/cmd/getM365/getItem.go @@ -19,6 +19,7 @@ import ( "github.com/alcionai/corso/src/internal/common" "github.com/alcionai/corso/src/internal/connector" "github.com/alcionai/corso/src/internal/connector/exchange/api" + "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/pkg/account" "github.com/alcionai/corso/src/pkg/backup/details" "github.com/alcionai/corso/src/pkg/credentials" @@ -177,7 +178,7 @@ func getGC(ctx context.Context) (*connector.GraphConnector, account.M365Config, return nil, m365Cfg, Only(ctx, errors.Wrap(err, "finding m365 account details")) } - gc, err := connector.NewGraphConnector(ctx, acct, connector.Users) + gc, err := connector.NewGraphConnector(ctx, graph.LargeItemClient(), acct, connector.Users) if err != nil { return nil, m365Cfg, Only(ctx, errors.Wrap(err, "connecting to graph API")) } diff --git a/src/cmd/purge/purge.go b/src/cmd/purge/purge.go index a59f7c2b8..32100772d 100644 --- a/src/cmd/purge/purge.go +++ b/src/cmd/purge/purge.go @@ -255,7 +255,7 @@ func getGC(ctx context.Context) (*connector.GraphConnector, error) { } // build a graph connector - gc, err := connector.NewGraphConnector(ctx, acct, connector.Users) + gc, err := connector.NewGraphConnector(ctx, graph.LargeItemClient(), acct, connector.Users) if err != nil { return nil, Only(ctx, errors.Wrap(err, "connecting to graph api")) } diff --git a/src/internal/connector/data_collections.go b/src/internal/connector/data_collections.go index 7eaf1c517..0b6d20b27 100644 --- a/src/internal/connector/data_collections.go +++ b/src/internal/connector/data_collections.go @@ -87,6 +87,7 @@ func (gc *GraphConnector) DataCollections( case selectors.ServiceSharePoint: colls, err := sharepoint.DataCollections( ctx, + gc.itemClient, sels, gc.credentials.AzureTenantID, gc.Service, @@ -198,6 +199,7 @@ func (gc *GraphConnector) OneDriveDataCollections( logger.Ctx(ctx).With("user", user).Debug("Creating OneDrive collections") odcs, err := onedrive.NewCollections( + gc.itemClient, gc.credentials.AzureTenantID, user, onedrive.OneDriveSource, diff --git a/src/internal/connector/data_collections_test.go b/src/internal/connector/data_collections_test.go index e16aa4b51..57332ce1a 100644 --- a/src/internal/connector/data_collections_test.go +++ b/src/internal/connector/data_collections_test.go @@ -10,6 +10,7 @@ import ( "github.com/stretchr/testify/suite" "github.com/alcionai/corso/src/internal/connector/exchange" + "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/sharepoint" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/control" @@ -43,7 +44,7 @@ func (suite *ConnectorDataCollectionIntegrationSuite) SetupSuite() { tester.MustGetEnvVars(suite.T(), tester.M365AcctCredEnvs...) - suite.connector = loadConnector(ctx, suite.T(), AllResources) + suite.connector = loadConnector(ctx, suite.T(), graph.LargeItemClient(), AllResources) suite.user = tester.M365UserID(suite.T()) suite.site = tester.M365SiteID(suite.T()) @@ -62,7 +63,7 @@ func (suite *ConnectorDataCollectionIntegrationSuite) TestExchangeDataCollection selUsers := []string{suite.user} - connector := loadConnector(ctx, suite.T(), Users) + connector := loadConnector(ctx, suite.T(), graph.LargeItemClient(), Users) tests := []struct { name string getSelector func(t *testing.T) selectors.Selector @@ -138,7 +139,7 @@ func (suite *ConnectorDataCollectionIntegrationSuite) TestDataCollections_invali owners := []string{"snuffleupagus"} - connector := loadConnector(ctx, suite.T(), Users) + connector := loadConnector(ctx, suite.T(), graph.LargeItemClient(), Users) tests := []struct { name string getSelector func(t *testing.T) selectors.Selector @@ -214,7 +215,7 @@ func (suite *ConnectorDataCollectionIntegrationSuite) TestSharePointDataCollecti selSites := []string{suite.site} - connector := loadConnector(ctx, suite.T(), Sites) + connector := loadConnector(ctx, suite.T(), graph.LargeItemClient(), Sites) tests := []struct { name string expected int @@ -243,6 +244,7 @@ func (suite *ConnectorDataCollectionIntegrationSuite) TestSharePointDataCollecti suite.T().Run(test.name, func(t *testing.T) { collections, err := sharepoint.DataCollections( ctx, + graph.LargeItemClient(), test.getSelector(), connector.credentials.AzureTenantID, connector.Service, @@ -298,7 +300,7 @@ func (suite *ConnectorCreateSharePointCollectionIntegrationSuite) SetupSuite() { tester.MustGetEnvSets(suite.T(), tester.M365AcctCredEnvs) - suite.connector = loadConnector(ctx, suite.T(), Sites) + suite.connector = loadConnector(ctx, suite.T(), graph.LargeItemClient(), Sites) suite.user = tester.M365UserID(suite.T()) tester.LogTimeOfTest(suite.T()) @@ -311,7 +313,7 @@ func (suite *ConnectorCreateSharePointCollectionIntegrationSuite) TestCreateShar var ( t = suite.T() siteID = tester.M365SiteID(t) - gc = loadConnector(ctx, t, Sites) + gc = loadConnector(ctx, t, graph.LargeItemClient(), Sites) siteIDs = []string{siteID} ) @@ -335,7 +337,7 @@ func (suite *ConnectorCreateSharePointCollectionIntegrationSuite) TestCreateShar var ( t = suite.T() siteID = tester.M365SiteID(t) - gc = loadConnector(ctx, t, Sites) + gc = loadConnector(ctx, t, graph.LargeItemClient(), Sites) siteIDs = []string{siteID} ) diff --git a/src/internal/connector/graph/service_helper.go b/src/internal/connector/graph/service_helper.go index b374933f8..a69183a79 100644 --- a/src/internal/connector/graph/service_helper.go +++ b/src/internal/connector/graph/service_helper.go @@ -55,6 +55,31 @@ func CreateHTTPClient() *http.Client { return httpClient } +// LargeItemClient generates a client that's configured to handle +// large file downloads. This client isn't suitable for other queries +// due to loose restrictions on timeouts and such. +// +// Re-use of http clients is critical, or else we leak os resources +// and consume relatively unbound socket connections. It is important +// to centralize this client to be passed downstream where api calls +// can utilize it on a per-download basis. +// +// TODO: this should get owned by an API client layer, not the GC itself. +func LargeItemClient() *http.Client { + httpClient := CreateHTTPClient() + httpClient.Timeout = 0 // infinite timeout for pulling large files + httpClient.Transport = &http.Transport{ + Proxy: http.ProxyFromEnvironment, + ForceAttemptHTTP2: true, + MaxIdleConns: 100, + IdleConnTimeout: 30 * time.Second, + TLSHandshakeTimeout: 10 * time.Second, + ExpectContinueTimeout: 1 * time.Second, + } + + return httpClient +} + // --------------------------------------------------------------------------- // Logging Middleware // --------------------------------------------------------------------------- @@ -85,6 +110,10 @@ func (handler *LoggingMiddleware) Intercept( logger.Ctx(ctx).Infow("graph api throttling", "method", req.Method, "url", req.URL) } + if resp.StatusCode != http.StatusTooManyRequests && (resp.StatusCode/100) != 2 { + logger.Ctx(ctx).Infow("graph api error", "method", req.Method, "url", req.URL) + } + if logger.DebugAPI || os.Getenv(logGraphRequestsEnvKey) != "" { respDump, _ := httputil.DumpResponse(resp, true) diff --git a/src/internal/connector/graph_connector.go b/src/internal/connector/graph_connector.go index 68f03d048..3dbc0e60c 100644 --- a/src/internal/connector/graph_connector.go +++ b/src/internal/connector/graph_connector.go @@ -4,6 +4,7 @@ package connector import ( "context" + "net/http" "runtime/trace" "strings" "sync" @@ -38,8 +39,9 @@ import ( // GraphRequestAdapter from the msgraph-sdk-go. Additional fields are for // bookkeeping and interfacing with other component. type GraphConnector struct { - Service graph.Servicer - Owners api.Client + Service graph.Servicer + Owners api.Client + itemClient *http.Client // configured to handle large item downloads tenant string Users map[string]string // key value @@ -64,13 +66,19 @@ const ( Sites ) -func NewGraphConnector(ctx context.Context, acct account.Account, r resource) (*GraphConnector, error) { +func NewGraphConnector( + ctx context.Context, + itemClient *http.Client, + acct account.Account, + r resource, +) (*GraphConnector, error) { m365, err := acct.M365Config() if err != nil { return nil, errors.Wrap(err, "retrieving m365 account configuration") } gc := GraphConnector{ + itemClient: itemClient, tenant: m365.AzureTenantID, Users: make(map[string]string, 0), wg: &sync.WaitGroup{}, diff --git a/src/internal/connector/graph_connector_disconnected_test.go b/src/internal/connector/graph_connector_disconnected_test.go index f7f583ebd..711e55ff8 100644 --- a/src/internal/connector/graph_connector_disconnected_test.go +++ b/src/internal/connector/graph_connector_disconnected_test.go @@ -9,6 +9,7 @@ import ( "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" + "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/support" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/account" @@ -65,7 +66,7 @@ func (suite *DisconnectedGraphConnectorSuite) TestBadConnection() { for _, test := range table { suite.T().Run(test.name, func(t *testing.T) { - gc, err := NewGraphConnector(ctx, test.acct(t), Users) + gc, err := NewGraphConnector(ctx, graph.LargeItemClient(), test.acct(t), Users) assert.Nil(t, gc, test.name+" failed") assert.NotNil(t, err, test.name+"failed") }) diff --git a/src/internal/connector/graph_connector_helper_test.go b/src/internal/connector/graph_connector_helper_test.go index c614df05d..ce16a5a4d 100644 --- a/src/internal/connector/graph_connector_helper_test.go +++ b/src/internal/connector/graph_connector_helper_test.go @@ -3,6 +3,7 @@ package connector import ( "context" "io" + "net/http" "reflect" "testing" @@ -976,9 +977,9 @@ func getSelectorWith( } } -func loadConnector(ctx context.Context, t *testing.T, r resource) *GraphConnector { +func loadConnector(ctx context.Context, t *testing.T, itemClient *http.Client, r resource) *GraphConnector { a := tester.NewM365Account(t) - connector, err := NewGraphConnector(ctx, a, r) + connector, err := NewGraphConnector(ctx, itemClient, a, r) require.NoError(t, err) return connector diff --git a/src/internal/connector/graph_connector_test.go b/src/internal/connector/graph_connector_test.go index 71eff095a..85ad3f45f 100644 --- a/src/internal/connector/graph_connector_test.go +++ b/src/internal/connector/graph_connector_test.go @@ -156,7 +156,7 @@ func (suite *GraphConnectorIntegrationSuite) SetupSuite() { tester.MustGetEnvSets(suite.T(), tester.M365AcctCredEnvs) - suite.connector = loadConnector(ctx, suite.T(), Users) + suite.connector = loadConnector(ctx, suite.T(), graph.LargeItemClient(), Users) suite.user = tester.M365UserID(suite.T()) suite.acct = tester.NewM365Account(suite.T()) @@ -380,7 +380,7 @@ func runRestoreBackupTest( start := time.Now() - restoreGC := loadConnector(ctx, t, test.resource) + restoreGC := loadConnector(ctx, t, graph.LargeItemClient(), test.resource) restoreSel := getSelectorWith(t, test.service, resourceOwners, true) deets, err := restoreGC.RestoreDataCollections( ctx, @@ -419,7 +419,7 @@ func runRestoreBackupTest( }) } - backupGC := loadConnector(ctx, t, test.resource) + backupGC := loadConnector(ctx, t, graph.LargeItemClient(), test.resource) backupSel := backupSelectorForExpected(t, test.service, expectedDests) t.Logf("Selective backup of %s\n", backupSel) @@ -870,7 +870,7 @@ func (suite *GraphConnectorIntegrationSuite) TestMultiFolderBackupDifferentNames dest.ContainerName, ) - restoreGC := loadConnector(ctx, t, test.resource) + restoreGC := loadConnector(ctx, t, graph.LargeItemClient(), test.resource) deets, err := restoreGC.RestoreDataCollections(ctx, suite.acct, restoreSel, dest, collections) require.NoError(t, err) require.NotNil(t, deets) @@ -888,7 +888,7 @@ func (suite *GraphConnectorIntegrationSuite) TestMultiFolderBackupDifferentNames // Run a backup and compare its output with what we put in. - backupGC := loadConnector(ctx, t, test.resource) + backupGC := loadConnector(ctx, t, graph.LargeItemClient(), test.resource) backupSel := backupSelectorForExpected(t, test.service, expectedDests) t.Log("Selective backup of", backupSel) diff --git a/src/internal/connector/onedrive/collection.go b/src/internal/connector/onedrive/collection.go index 4ea9ea9eb..ac0aa9fb3 100644 --- a/src/internal/connector/onedrive/collection.go +++ b/src/internal/connector/onedrive/collection.go @@ -4,11 +4,13 @@ package onedrive import ( "context" "io" + "net/http" "sync" "sync/atomic" "time" "github.com/microsoftgraph/msgraph-sdk-go/models" + "github.com/pkg/errors" "github.com/spatialcurrent/go-lazy/pkg/lazy" "github.com/alcionai/corso/src/internal/connector/graph" @@ -43,6 +45,9 @@ var ( // Collection represents a set of OneDrive objects retrieved from M365 type Collection struct { + // configured to handle large item downloads + itemClient *http.Client + // data is used to share data streams with the collection consumer data chan data.Stream // folderPath indicates what level in the hierarchy this collection @@ -64,12 +69,13 @@ type Collection struct { // itemReadFunc returns a reader for the specified item type itemReaderFunc func( - ctx context.Context, + hc *http.Client, item models.DriveItemable, ) (itemInfo details.ItemInfo, itemData io.ReadCloser, err error) // NewCollection creates a Collection func NewCollection( + itemClient *http.Client, folderPath path.Path, driveID string, service graph.Servicer, @@ -78,6 +84,7 @@ func NewCollection( ctrlOpts control.Options, ) *Collection { c := &Collection{ + itemClient: itemClient, folderPath: folderPath, driveItems: map[string]models.DriveItemable{}, driveID: driveID, @@ -198,11 +205,16 @@ func (oc *Collection) populateItems(ctx context.Context) { m.Unlock() } - for _, item := range oc.driveItems { + for id, item := range oc.driveItems { if oc.ctrl.FailFast && errs != nil { break } + if item == nil { + errUpdater(id, errors.New("nil item")) + continue + } + semaphoreCh <- struct{}{} wg.Add(1) @@ -219,10 +231,9 @@ func (oc *Collection) populateItems(ctx context.Context) { ) for i := 1; i <= maxRetries; i++ { - itemInfo, itemData, err = oc.itemReader(ctx, item) - - // retry on Timeout type errors, break otherwise. + itemInfo, itemData, err = oc.itemReader(oc.itemClient, item) if err == nil || graph.IsErrTimeout(err) == nil { + // retry on Timeout type errors, break otherwise. break } diff --git a/src/internal/connector/onedrive/collection_test.go b/src/internal/connector/onedrive/collection_test.go index a19021ff7..a36db58c9 100644 --- a/src/internal/connector/onedrive/collection_test.go +++ b/src/internal/connector/onedrive/collection_test.go @@ -2,9 +2,8 @@ package onedrive import ( "bytes" - "context" - "errors" "io" + "net/http" "sync" "testing" "time" @@ -15,6 +14,7 @@ import ( "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" + "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/support" "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/pkg/backup/details" @@ -73,7 +73,7 @@ func (suite *CollectionUnitTestSuite) TestCollection() { name: "oneDrive, no duplicates", numInstances: 1, source: OneDriveSource, - itemReader: func(context.Context, models.DriveItemable) (details.ItemInfo, io.ReadCloser, error) { + itemReader: func(*http.Client, models.DriveItemable) (details.ItemInfo, io.ReadCloser, error) { return details.ItemInfo{OneDrive: &details.OneDriveInfo{ItemName: testItemName, Modified: now}}, io.NopCloser(bytes.NewReader(testItemData)), nil @@ -87,7 +87,7 @@ func (suite *CollectionUnitTestSuite) TestCollection() { name: "oneDrive, duplicates", numInstances: 3, source: OneDriveSource, - itemReader: func(context.Context, models.DriveItemable) (details.ItemInfo, io.ReadCloser, error) { + itemReader: func(*http.Client, models.DriveItemable) (details.ItemInfo, io.ReadCloser, error) { return details.ItemInfo{OneDrive: &details.OneDriveInfo{ItemName: testItemName, Modified: now}}, io.NopCloser(bytes.NewReader(testItemData)), nil @@ -101,7 +101,7 @@ func (suite *CollectionUnitTestSuite) TestCollection() { name: "sharePoint, no duplicates", numInstances: 1, source: SharePointSource, - itemReader: func(context.Context, models.DriveItemable) (details.ItemInfo, io.ReadCloser, error) { + itemReader: func(*http.Client, models.DriveItemable) (details.ItemInfo, io.ReadCloser, error) { return details.ItemInfo{SharePoint: &details.SharePointInfo{ItemName: testItemName, Modified: now}}, io.NopCloser(bytes.NewReader(testItemData)), nil @@ -115,7 +115,7 @@ func (suite *CollectionUnitTestSuite) TestCollection() { name: "sharePoint, duplicates", numInstances: 3, source: SharePointSource, - itemReader: func(context.Context, models.DriveItemable) (details.ItemInfo, io.ReadCloser, error) { + itemReader: func(*http.Client, models.DriveItemable) (details.ItemInfo, io.ReadCloser, error) { return details.ItemInfo{SharePoint: &details.SharePointInfo{ItemName: testItemName, Modified: now}}, io.NopCloser(bytes.NewReader(testItemData)), nil @@ -140,6 +140,7 @@ func (suite *CollectionUnitTestSuite) TestCollection() { require.NoError(t, err) coll := NewCollection( + graph.LargeItemClient(), folderPath, "drive-id", suite, @@ -224,6 +225,7 @@ func (suite *CollectionUnitTestSuite) TestCollectionReadError() { require.NoError(t, err) coll := NewCollection( + graph.LargeItemClient(), folderPath, "fakeDriveID", suite, @@ -235,10 +237,8 @@ func (suite *CollectionUnitTestSuite) TestCollectionReadError() { mockItem.SetId(&testItemID) coll.Add(mockItem) - readError := errors.New("Test error") - - coll.itemReader = func(context.Context, models.DriveItemable) (details.ItemInfo, io.ReadCloser, error) { - return details.ItemInfo{}, nil, readError + coll.itemReader = func(*http.Client, models.DriveItemable) (details.ItemInfo, io.ReadCloser, error) { + return details.ItemInfo{}, nil, assert.AnError } coll.Items() diff --git a/src/internal/connector/onedrive/collections.go b/src/internal/connector/onedrive/collections.go index 8f18aa780..f446aa246 100644 --- a/src/internal/connector/onedrive/collections.go +++ b/src/internal/connector/onedrive/collections.go @@ -3,6 +3,7 @@ package onedrive import ( "context" "fmt" + "net/http" "strings" "github.com/microsoftgraph/msgraph-sdk-go/models" @@ -46,6 +47,9 @@ type folderMatcher interface { // Collections is used to retrieve drive data for a // resource owner, which can be either a user or a sharepoint site. type Collections struct { + // configured to handle large item downloads + itemClient *http.Client + tenant string resourceOwner string source driveSource @@ -66,6 +70,7 @@ type Collections struct { } func NewCollections( + itemClient *http.Client, tenant string, resourceOwner string, source driveSource, @@ -75,6 +80,7 @@ func NewCollections( ctrlOpts control.Options, ) *Collections { return &Collections{ + itemClient: itemClient, tenant: tenant, resourceOwner: resourceOwner, source: source, @@ -265,13 +271,13 @@ func (c *Collections) UpdateCollections( // TODO(ashmrtn): Compare old and new path and set collection state // accordingly. col = NewCollection( + c.itemClient, collectionPath, driveID, c.service, c.statusUpdater, c.source, - c.ctrl, - ) + c.ctrl) c.CollectionMap[collectionPath.String()] = col c.NumContainers++ diff --git a/src/internal/connector/onedrive/collections_test.go b/src/internal/connector/onedrive/collections_test.go index 485b9ed86..21ca061a9 100644 --- a/src/internal/connector/onedrive/collections_test.go +++ b/src/internal/connector/onedrive/collections_test.go @@ -10,6 +10,7 @@ import ( "github.com/stretchr/testify/suite" "golang.org/x/exp/maps" + "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/control" "github.com/alcionai/corso/src/pkg/selectors" @@ -587,6 +588,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { outputFolderMap := map[string]string{} maps.Copy(outputFolderMap, tt.inputFolderMap) c := NewCollections( + graph.LargeItemClient(), tenant, user, OneDriveSource, diff --git a/src/internal/connector/onedrive/drive_test.go b/src/internal/connector/onedrive/drive_test.go index 7f6901621..755b7293b 100644 --- a/src/internal/connector/onedrive/drive_test.go +++ b/src/internal/connector/onedrive/drive_test.go @@ -9,6 +9,7 @@ import ( "github.com/stretchr/testify/suite" "github.com/alcionai/corso/src/internal/common" + "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/control" "github.com/alcionai/corso/src/pkg/logger" @@ -146,6 +147,7 @@ func (suite *OneDriveSuite) TestOneDriveNewCollections() { NewOneDriveBackup([]string{test.user}). AllData()[0] odcs, err := NewCollections( + graph.LargeItemClient(), creds.AzureTenantID, test.user, OneDriveSource, diff --git a/src/internal/connector/onedrive/item.go b/src/internal/connector/onedrive/item.go index 73391033b..00ec1f630 100644 --- a/src/internal/connector/onedrive/item.go +++ b/src/internal/connector/onedrive/item.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "io" + "net/http" "strings" msdrives "github.com/microsoftgraph/msgraph-sdk-go/drives" @@ -27,7 +28,7 @@ const ( // It crafts this by querying M365 for a download URL for the item // and using a http client to initialize a reader func sharePointItemReader( - ctx context.Context, + hc *http.Client, item models.DriveItemable, ) (details.ItemInfo, io.ReadCloser, error) { url, ok := item.GetAdditionalData()[downloadURLKey].(*string) @@ -35,7 +36,7 @@ func sharePointItemReader( return details.ItemInfo{}, nil, fmt.Errorf("failed to get url for %s", *item.GetName()) } - rc, err := driveItemReader(ctx, *url) + resp, err := hc.Get(*url) if err != nil { return details.ItemInfo{}, nil, err } @@ -44,14 +45,14 @@ func sharePointItemReader( SharePoint: sharePointItemInfo(item, *item.GetSize()), } - return dii, rc, nil + return dii, resp.Body, nil } // oneDriveItemReader will return a io.ReadCloser for the specified item // It crafts this by querying M365 for a download URL for the item // and using a http client to initialize a reader func oneDriveItemReader( - ctx context.Context, + hc *http.Client, item models.DriveItemable, ) (details.ItemInfo, io.ReadCloser, error) { url, ok := item.GetAdditionalData()[downloadURLKey].(*string) @@ -59,7 +60,7 @@ func oneDriveItemReader( return details.ItemInfo{}, nil, fmt.Errorf("failed to get url for %s", *item.GetName()) } - rc, err := driveItemReader(ctx, *url) + resp, err := hc.Get(*url) if err != nil { return details.ItemInfo{}, nil, err } @@ -68,25 +69,7 @@ func oneDriveItemReader( OneDrive: oneDriveItemInfo(item, *item.GetSize()), } - return dii, rc, nil -} - -// driveItemReader will return a io.ReadCloser for the specified item -// It crafts this by querying M365 for a download URL for the item -// and using a http client to initialize a reader -func driveItemReader( - ctx context.Context, - url string, -) (io.ReadCloser, error) { - httpClient := graph.CreateHTTPClient() - httpClient.Timeout = 0 // infinite timeout for pulling large files - - resp, err := httpClient.Get(url) - if err != nil { - return nil, errors.Wrapf(err, "failed to download file from %s", url) - } - - return resp.Body, nil + return dii, resp.Body, nil } // oneDriveItemInfo will populate a details.OneDriveInfo struct @@ -97,7 +80,7 @@ func driveItemReader( func oneDriveItemInfo(di models.DriveItemable, itemSize int64) *details.OneDriveInfo { var email, parent string - if di.GetCreatedBy().GetUser() != nil { + if di.GetCreatedBy() != nil && di.GetCreatedBy().GetUser() != nil { // User is sometimes not available when created via some // external applications (like backup/restore solutions) ed, ok := di.GetCreatedBy().GetUser().GetAdditionalData()["email"] @@ -106,11 +89,9 @@ func oneDriveItemInfo(di models.DriveItemable, itemSize int64) *details.OneDrive } } - if di.GetParentReference() != nil { - if di.GetParentReference().GetName() != nil { - // EndPoint is not always populated from external apps - parent = *di.GetParentReference().GetName() - } + if di.GetParentReference() != nil && di.GetParentReference().GetName() != nil { + // EndPoint is not always populated from external apps + parent = *di.GetParentReference().GetName() } return &details.OneDriveInfo{ diff --git a/src/internal/connector/onedrive/item_test.go b/src/internal/connector/onedrive/item_test.go index b3f45ca56..2b28f0910 100644 --- a/src/internal/connector/onedrive/item_test.go +++ b/src/internal/connector/onedrive/item_test.go @@ -126,7 +126,7 @@ func (suite *ItemIntegrationSuite) TestItemReader_oneDrive() { // Read data for the file - itemInfo, itemData, err := oneDriveItemReader(ctx, driveItem) + itemInfo, itemData, err := oneDriveItemReader(graph.LargeItemClient(), driveItem) require.NoError(suite.T(), err) require.NotNil(suite.T(), itemInfo.OneDrive) require.NotEmpty(suite.T(), itemInfo.OneDrive.ItemName) diff --git a/src/internal/connector/sharepoint/data_collections.go b/src/internal/connector/sharepoint/data_collections.go index d7c6547a0..5950daede 100644 --- a/src/internal/connector/sharepoint/data_collections.go +++ b/src/internal/connector/sharepoint/data_collections.go @@ -2,6 +2,7 @@ package sharepoint import ( "context" + "net/http" "github.com/pkg/errors" @@ -24,6 +25,7 @@ type statusUpdater interface { // for the specified user func DataCollections( ctx context.Context, + itemClient *http.Client, selector selectors.Selector, tenantID string, serv graph.Servicer, @@ -66,6 +68,7 @@ func DataCollections( case path.LibrariesCategory: spcs, err = collectLibraries( ctx, + itemClient, serv, tenantID, site, @@ -124,6 +127,7 @@ func collectLists( // all the drives associated with the site. func collectLibraries( ctx context.Context, + itemClient *http.Client, serv graph.Servicer, tenantID, siteID string, scope selectors.SharePointScope, @@ -138,6 +142,7 @@ func collectLibraries( logger.Ctx(ctx).With("site", siteID).Debug("Creating SharePoint Library collections") colls := onedrive.NewCollections( + itemClient, tenantID, siteID, onedrive.SharePointSource, diff --git a/src/internal/connector/sharepoint/data_collections_test.go b/src/internal/connector/sharepoint/data_collections_test.go index 423336253..4daca0877 100644 --- a/src/internal/connector/sharepoint/data_collections_test.go +++ b/src/internal/connector/sharepoint/data_collections_test.go @@ -7,6 +7,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" + "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/onedrive" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/control" @@ -91,6 +92,7 @@ func (suite *SharePointLibrariesSuite) TestUpdateCollections() { newPaths := map[string]string{} excluded := map[string]struct{}{} c := onedrive.NewCollections( + graph.LargeItemClient(), tenant, site, onedrive.SharePointSource, diff --git a/src/internal/operations/backup_integration_test.go b/src/internal/operations/backup_integration_test.go index 3ee5c0230..83748baa2 100644 --- a/src/internal/operations/backup_integration_test.go +++ b/src/internal/operations/backup_integration_test.go @@ -655,7 +655,7 @@ func (suite *BackupOpIntegrationSuite) TestBackup_Run_exchangeIncrementals() { m365, err := acct.M365Config() require.NoError(t, err) - gc, err := connector.NewGraphConnector(ctx, acct, connector.Users) + gc, err := connector.NewGraphConnector(ctx, graph.LargeItemClient(), acct, connector.Users) require.NoError(t, err) ac, err := api.NewClient(m365) diff --git a/src/internal/operations/operation.go b/src/internal/operations/operation.go index 30770bdf5..e5382b022 100644 --- a/src/internal/operations/operation.go +++ b/src/internal/operations/operation.go @@ -7,6 +7,7 @@ import ( "github.com/pkg/errors" "github.com/alcionai/corso/src/internal/connector" + "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/events" "github.com/alcionai/corso/src/internal/kopia" "github.com/alcionai/corso/src/internal/observe" @@ -107,7 +108,7 @@ func connectToM365( resource = connector.Sites } - gc, err := connector.NewGraphConnector(ctx, acct, resource) + gc, err := connector.NewGraphConnector(ctx, graph.LargeItemClient(), acct, resource) if err != nil { return nil, err } diff --git a/src/pkg/services/m365/m365.go b/src/pkg/services/m365/m365.go index ba9cc2a59..e0dd75af9 100644 --- a/src/pkg/services/m365/m365.go +++ b/src/pkg/services/m365/m365.go @@ -8,6 +8,7 @@ import ( "github.com/alcionai/corso/src/internal/connector" "github.com/alcionai/corso/src/internal/connector/discovery" + "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/pkg/account" ) @@ -20,7 +21,7 @@ type User struct { // Users returns a list of users in the specified M365 tenant // TODO: Implement paging support func Users(ctx context.Context, m365Account account.Account) ([]*User, error) { - gc, err := connector.NewGraphConnector(ctx, m365Account, connector.Users) + gc, err := connector.NewGraphConnector(ctx, graph.LargeItemClient(), m365Account, connector.Users) if err != nil { return nil, errors.Wrap(err, "could not initialize M365 graph connection") } @@ -76,7 +77,7 @@ func UserPNs(ctx context.Context, m365Account account.Account) ([]string, error) // SiteURLs returns a list of SharePoint site WebURLs in the specified M365 tenant func SiteURLs(ctx context.Context, m365Account account.Account) ([]string, error) { - gc, err := connector.NewGraphConnector(ctx, m365Account, connector.Sites) + gc, err := connector.NewGraphConnector(ctx, graph.LargeItemClient(), m365Account, connector.Sites) if err != nil { return nil, errors.Wrap(err, "could not initialize M365 graph connection") } @@ -86,7 +87,7 @@ func SiteURLs(ctx context.Context, m365Account account.Account) ([]string, error // SiteURLs returns a list of SharePoint sites IDs in the specified M365 tenant func SiteIDs(ctx context.Context, m365Account account.Account) ([]string, error) { - gc, err := connector.NewGraphConnector(ctx, m365Account, connector.Sites) + gc, err := connector.NewGraphConnector(ctx, graph.LargeItemClient(), m365Account, connector.Sites) if err != nil { return nil, errors.Wrap(err, "could not initialize M365 graph connection") } From dc02a714467d0ec247016acd0a0de1383b4e9e9d Mon Sep 17 00:00:00 2001 From: Niraj Tolia Date: Wed, 25 Jan 2023 21:27:45 -0800 Subject: [PATCH 12/18] Print Corso version on one line (#2269) ## Description Don't split the Corso version information across lines. This is unexpected. ## Does this PR need a docs update or release note? - [x] :no_entry: No ## Type of change - [x] :broom: Tech Debt/Cleanup --- src/cli/cli.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cli/cli.go b/src/cli/cli.go index be5059809..f06354f0b 100644 --- a/src/cli/cli.go +++ b/src/cli/cli.go @@ -39,7 +39,7 @@ var corsoCmd = &cobra.Command{ func handleCorsoCmd(cmd *cobra.Command, args []string) error { v, _ := cmd.Flags().GetBool("version") if v { - print.Outf(cmd.Context(), "Corso\nversion: "+version.Version) + print.Outf(cmd.Context(), "Corso version: "+version.Version) return nil } From 84a21aef5255f7ddf16bb626aacab33cac1596b8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 26 Jan 2023 06:30:46 +0000 Subject: [PATCH 13/18] =?UTF-8?q?=E2=AC=86=EF=B8=8F=20Bump=20github.com/aw?= =?UTF-8?q?s/aws-sdk-go=20from=201.44.184=20to=201.44.187=20in=20/src=20(#?= =?UTF-8?q?2270)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [github.com/aws/aws-sdk-go](https://github.com/aws/aws-sdk-go) from 1.44.184 to 1.44.187.
Release notes

Sourced from github.com/aws/aws-sdk-go's releases.

Release v1.44.187 (2023-01-25)

Service Client Updates

  • service/cloudformation: Adds new service
    • Enabled FIPS aws-us-gov endpoints in SDK.
  • service/ec2: Updates service API, documentation, and paginators
    • This release adds new functionality that allows customers to provision IPv6 CIDR blocks through Amazon VPC IP Address Manager (IPAM) as well as allowing customers to utilize IPAM Resource Discovery APIs.
  • service/m2: Updates service API and documentation
  • service/polly: Updates service API
    • Add 5 new neural voices - Sergio (es-ES), Andres (es-MX), Remi (fr-FR), Adriano (it-IT) and Thiago (pt-BR).
  • service/redshift-serverless: Updates service documentation
  • service/s3control: Updates service API
    • Add additional endpoint tests for S3 Control. Fix missing endpoint parameters for PutBucketVersioning and GetBucketVersioning. Prior to this fix, those operations may have resulted in an invalid endpoint being resolved.
  • service/sagemaker: Updates service API and documentation
    • SageMaker Inference Recommender now decouples from Model Registry and could accept Model Name to invoke inference recommendations job; Inference Recommender now provides CPU/Memory Utilization metrics data in recommendation output.
  • service/sts: Updates service documentation
    • Doc only change to update wording in a key topic

Release v1.44.186 (2023-01-24)

Service Client Updates

  • service/databrew: Adds new service
  • service/route53: Updates service API
    • Amazon Route 53 now supports the Asia Pacific (Melbourne) Region (ap-southeast-4) for latency records, geoproximity records, and private DNS for Amazon VPCs in that region.
  • service/ssm-sap: Updates service API, documentation, and paginators

Release v1.44.185 (2023-01-23)

Service Client Updates

  • service/lambda: Updates service API and documentation
    • Release Lambda RuntimeManagementConfig, enabling customers to better manage runtime updates to their Lambda functions. This release adds two new APIs, GetRuntimeManagementConfig and PutRuntimeManagementConfig, as well as support on existing Create/Get/Update function APIs.
  • service/sagemaker: Updates service API and documentation
    • Amazon SageMaker Inference now supports P4de instance types.
Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=github.com/aws/aws-sdk-go&package-manager=go_modules&previous-version=1.44.184&new-version=1.44.187)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) You can trigger a rebase of this PR by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
--- src/go.mod | 2 +- src/go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/go.mod b/src/go.mod index 7a8aced9e..86a5df660 100644 --- a/src/go.mod +++ b/src/go.mod @@ -5,7 +5,7 @@ go 1.19 require ( github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.2.0 github.com/alcionai/clues v0.0.0-20230120231953-1cf61dbafc40 - github.com/aws/aws-sdk-go v1.44.184 + github.com/aws/aws-sdk-go v1.44.187 github.com/aws/aws-xray-sdk-go v1.8.0 github.com/google/uuid v1.3.0 github.com/hashicorp/go-multierror v1.1.1 diff --git a/src/go.sum b/src/go.sum index 2644af9be..ff6021e3c 100644 --- a/src/go.sum +++ b/src/go.sum @@ -62,8 +62,8 @@ github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk5 github.com/alessio/shellescape v1.4.1 h1:V7yhSDDn8LP4lc4jS8pFkt0zCnzVJlG5JXy9BVKJUX0= github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY= github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= -github.com/aws/aws-sdk-go v1.44.184 h1:/MggyE66rOImXJKl1HqhLQITvWvqIV7w1Q4MaG6FHUo= -github.com/aws/aws-sdk-go v1.44.184/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI= +github.com/aws/aws-sdk-go v1.44.187 h1:D5CsRomPnlwDHJCanL2mtaLIcbhjiWxNh5j8zvaWdJA= +github.com/aws/aws-sdk-go v1.44.187/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI= github.com/aws/aws-xray-sdk-go v1.8.0 h1:0xncHZ588wB/geLjbM/esoW3FOEThWy2TJyb4VXfLFY= github.com/aws/aws-xray-sdk-go v1.8.0/go.mod h1:7LKe47H+j3evfvS1+q0wzpoaGXGrF3mUsfM+thqVO+A= github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8= From e9354a8429798f02b9fe06129f9307de30d2b87a Mon Sep 17 00:00:00 2001 From: Vaibhav Kamra Date: Wed, 25 Jan 2023 23:52:56 -0800 Subject: [PATCH 14/18] Revert "add itemClient for re-usable od item downloads" (#2273) Reverts alcionai/corso#2266. Am seeing issues on data restore that need to be debugged. --- src/cli/backup/sharepoint.go | 3 +- src/cmd/factory/impl/common.go | 3 +- src/cmd/getM365/getItem.go | 3 +- src/cmd/purge/purge.go | 2 +- src/internal/connector/data_collections.go | 2 - .../connector/data_collections_test.go | 16 ++++---- .../connector/graph/service_helper.go | 29 ------------- src/internal/connector/graph_connector.go | 14 ++----- .../graph_connector_disconnected_test.go | 3 +- .../connector/graph_connector_helper_test.go | 5 +-- .../connector/graph_connector_test.go | 10 ++--- src/internal/connector/onedrive/collection.go | 21 +++------- .../connector/onedrive/collection_test.go | 20 ++++----- .../connector/onedrive/collections.go | 10 +---- .../connector/onedrive/collections_test.go | 2 - src/internal/connector/onedrive/drive_test.go | 2 - src/internal/connector/onedrive/item.go | 41 ++++++++++++++----- src/internal/connector/onedrive/item_test.go | 2 +- .../connector/sharepoint/data_collections.go | 5 --- .../sharepoint/data_collections_test.go | 2 - .../operations/backup_integration_test.go | 2 +- src/internal/operations/operation.go | 3 +- src/pkg/services/m365/m365.go | 7 ++-- 23 files changed, 75 insertions(+), 132 deletions(-) diff --git a/src/cli/backup/sharepoint.go b/src/cli/backup/sharepoint.go index fb4f7e766..4eb62dca0 100644 --- a/src/cli/backup/sharepoint.go +++ b/src/cli/backup/sharepoint.go @@ -13,7 +13,6 @@ import ( . "github.com/alcionai/corso/src/cli/print" "github.com/alcionai/corso/src/cli/utils" "github.com/alcionai/corso/src/internal/connector" - "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/kopia" "github.com/alcionai/corso/src/internal/model" "github.com/alcionai/corso/src/pkg/backup" @@ -210,7 +209,7 @@ func createSharePointCmd(cmd *cobra.Command, args []string) error { defer utils.CloseRepo(ctx, r) - gc, err := connector.NewGraphConnector(ctx, graph.LargeItemClient(), acct, connector.Sites) + gc, err := connector.NewGraphConnector(ctx, acct, connector.Sites) if err != nil { return Only(ctx, errors.Wrap(err, "Failed to connect to Microsoft APIs")) } diff --git a/src/cmd/factory/impl/common.go b/src/cmd/factory/impl/common.go index 585118442..369d80d20 100644 --- a/src/cmd/factory/impl/common.go +++ b/src/cmd/factory/impl/common.go @@ -12,7 +12,6 @@ import ( . "github.com/alcionai/corso/src/cli/print" "github.com/alcionai/corso/src/internal/common" "github.com/alcionai/corso/src/internal/connector" - "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/mockconnector" "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/pkg/account" @@ -112,7 +111,7 @@ func getGCAndVerifyUser(ctx context.Context, userID string) (*connector.GraphCon } // build a graph connector - gc, err := connector.NewGraphConnector(ctx, graph.LargeItemClient(), acct, connector.Users) + gc, err := connector.NewGraphConnector(ctx, acct, connector.Users) if err != nil { return nil, account.Account{}, errors.Wrap(err, "connecting to graph api") } diff --git a/src/cmd/getM365/getItem.go b/src/cmd/getM365/getItem.go index 24ce81d9a..25961a236 100644 --- a/src/cmd/getM365/getItem.go +++ b/src/cmd/getM365/getItem.go @@ -19,7 +19,6 @@ import ( "github.com/alcionai/corso/src/internal/common" "github.com/alcionai/corso/src/internal/connector" "github.com/alcionai/corso/src/internal/connector/exchange/api" - "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/pkg/account" "github.com/alcionai/corso/src/pkg/backup/details" "github.com/alcionai/corso/src/pkg/credentials" @@ -178,7 +177,7 @@ func getGC(ctx context.Context) (*connector.GraphConnector, account.M365Config, return nil, m365Cfg, Only(ctx, errors.Wrap(err, "finding m365 account details")) } - gc, err := connector.NewGraphConnector(ctx, graph.LargeItemClient(), acct, connector.Users) + gc, err := connector.NewGraphConnector(ctx, acct, connector.Users) if err != nil { return nil, m365Cfg, Only(ctx, errors.Wrap(err, "connecting to graph API")) } diff --git a/src/cmd/purge/purge.go b/src/cmd/purge/purge.go index 32100772d..a59f7c2b8 100644 --- a/src/cmd/purge/purge.go +++ b/src/cmd/purge/purge.go @@ -255,7 +255,7 @@ func getGC(ctx context.Context) (*connector.GraphConnector, error) { } // build a graph connector - gc, err := connector.NewGraphConnector(ctx, graph.LargeItemClient(), acct, connector.Users) + gc, err := connector.NewGraphConnector(ctx, acct, connector.Users) if err != nil { return nil, Only(ctx, errors.Wrap(err, "connecting to graph api")) } diff --git a/src/internal/connector/data_collections.go b/src/internal/connector/data_collections.go index 0b6d20b27..7eaf1c517 100644 --- a/src/internal/connector/data_collections.go +++ b/src/internal/connector/data_collections.go @@ -87,7 +87,6 @@ func (gc *GraphConnector) DataCollections( case selectors.ServiceSharePoint: colls, err := sharepoint.DataCollections( ctx, - gc.itemClient, sels, gc.credentials.AzureTenantID, gc.Service, @@ -199,7 +198,6 @@ func (gc *GraphConnector) OneDriveDataCollections( logger.Ctx(ctx).With("user", user).Debug("Creating OneDrive collections") odcs, err := onedrive.NewCollections( - gc.itemClient, gc.credentials.AzureTenantID, user, onedrive.OneDriveSource, diff --git a/src/internal/connector/data_collections_test.go b/src/internal/connector/data_collections_test.go index 57332ce1a..e16aa4b51 100644 --- a/src/internal/connector/data_collections_test.go +++ b/src/internal/connector/data_collections_test.go @@ -10,7 +10,6 @@ import ( "github.com/stretchr/testify/suite" "github.com/alcionai/corso/src/internal/connector/exchange" - "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/sharepoint" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/control" @@ -44,7 +43,7 @@ func (suite *ConnectorDataCollectionIntegrationSuite) SetupSuite() { tester.MustGetEnvVars(suite.T(), tester.M365AcctCredEnvs...) - suite.connector = loadConnector(ctx, suite.T(), graph.LargeItemClient(), AllResources) + suite.connector = loadConnector(ctx, suite.T(), AllResources) suite.user = tester.M365UserID(suite.T()) suite.site = tester.M365SiteID(suite.T()) @@ -63,7 +62,7 @@ func (suite *ConnectorDataCollectionIntegrationSuite) TestExchangeDataCollection selUsers := []string{suite.user} - connector := loadConnector(ctx, suite.T(), graph.LargeItemClient(), Users) + connector := loadConnector(ctx, suite.T(), Users) tests := []struct { name string getSelector func(t *testing.T) selectors.Selector @@ -139,7 +138,7 @@ func (suite *ConnectorDataCollectionIntegrationSuite) TestDataCollections_invali owners := []string{"snuffleupagus"} - connector := loadConnector(ctx, suite.T(), graph.LargeItemClient(), Users) + connector := loadConnector(ctx, suite.T(), Users) tests := []struct { name string getSelector func(t *testing.T) selectors.Selector @@ -215,7 +214,7 @@ func (suite *ConnectorDataCollectionIntegrationSuite) TestSharePointDataCollecti selSites := []string{suite.site} - connector := loadConnector(ctx, suite.T(), graph.LargeItemClient(), Sites) + connector := loadConnector(ctx, suite.T(), Sites) tests := []struct { name string expected int @@ -244,7 +243,6 @@ func (suite *ConnectorDataCollectionIntegrationSuite) TestSharePointDataCollecti suite.T().Run(test.name, func(t *testing.T) { collections, err := sharepoint.DataCollections( ctx, - graph.LargeItemClient(), test.getSelector(), connector.credentials.AzureTenantID, connector.Service, @@ -300,7 +298,7 @@ func (suite *ConnectorCreateSharePointCollectionIntegrationSuite) SetupSuite() { tester.MustGetEnvSets(suite.T(), tester.M365AcctCredEnvs) - suite.connector = loadConnector(ctx, suite.T(), graph.LargeItemClient(), Sites) + suite.connector = loadConnector(ctx, suite.T(), Sites) suite.user = tester.M365UserID(suite.T()) tester.LogTimeOfTest(suite.T()) @@ -313,7 +311,7 @@ func (suite *ConnectorCreateSharePointCollectionIntegrationSuite) TestCreateShar var ( t = suite.T() siteID = tester.M365SiteID(t) - gc = loadConnector(ctx, t, graph.LargeItemClient(), Sites) + gc = loadConnector(ctx, t, Sites) siteIDs = []string{siteID} ) @@ -337,7 +335,7 @@ func (suite *ConnectorCreateSharePointCollectionIntegrationSuite) TestCreateShar var ( t = suite.T() siteID = tester.M365SiteID(t) - gc = loadConnector(ctx, t, graph.LargeItemClient(), Sites) + gc = loadConnector(ctx, t, Sites) siteIDs = []string{siteID} ) diff --git a/src/internal/connector/graph/service_helper.go b/src/internal/connector/graph/service_helper.go index a69183a79..b374933f8 100644 --- a/src/internal/connector/graph/service_helper.go +++ b/src/internal/connector/graph/service_helper.go @@ -55,31 +55,6 @@ func CreateHTTPClient() *http.Client { return httpClient } -// LargeItemClient generates a client that's configured to handle -// large file downloads. This client isn't suitable for other queries -// due to loose restrictions on timeouts and such. -// -// Re-use of http clients is critical, or else we leak os resources -// and consume relatively unbound socket connections. It is important -// to centralize this client to be passed downstream where api calls -// can utilize it on a per-download basis. -// -// TODO: this should get owned by an API client layer, not the GC itself. -func LargeItemClient() *http.Client { - httpClient := CreateHTTPClient() - httpClient.Timeout = 0 // infinite timeout for pulling large files - httpClient.Transport = &http.Transport{ - Proxy: http.ProxyFromEnvironment, - ForceAttemptHTTP2: true, - MaxIdleConns: 100, - IdleConnTimeout: 30 * time.Second, - TLSHandshakeTimeout: 10 * time.Second, - ExpectContinueTimeout: 1 * time.Second, - } - - return httpClient -} - // --------------------------------------------------------------------------- // Logging Middleware // --------------------------------------------------------------------------- @@ -110,10 +85,6 @@ func (handler *LoggingMiddleware) Intercept( logger.Ctx(ctx).Infow("graph api throttling", "method", req.Method, "url", req.URL) } - if resp.StatusCode != http.StatusTooManyRequests && (resp.StatusCode/100) != 2 { - logger.Ctx(ctx).Infow("graph api error", "method", req.Method, "url", req.URL) - } - if logger.DebugAPI || os.Getenv(logGraphRequestsEnvKey) != "" { respDump, _ := httputil.DumpResponse(resp, true) diff --git a/src/internal/connector/graph_connector.go b/src/internal/connector/graph_connector.go index 3dbc0e60c..68f03d048 100644 --- a/src/internal/connector/graph_connector.go +++ b/src/internal/connector/graph_connector.go @@ -4,7 +4,6 @@ package connector import ( "context" - "net/http" "runtime/trace" "strings" "sync" @@ -39,9 +38,8 @@ import ( // GraphRequestAdapter from the msgraph-sdk-go. Additional fields are for // bookkeeping and interfacing with other component. type GraphConnector struct { - Service graph.Servicer - Owners api.Client - itemClient *http.Client // configured to handle large item downloads + Service graph.Servicer + Owners api.Client tenant string Users map[string]string // key value @@ -66,19 +64,13 @@ const ( Sites ) -func NewGraphConnector( - ctx context.Context, - itemClient *http.Client, - acct account.Account, - r resource, -) (*GraphConnector, error) { +func NewGraphConnector(ctx context.Context, acct account.Account, r resource) (*GraphConnector, error) { m365, err := acct.M365Config() if err != nil { return nil, errors.Wrap(err, "retrieving m365 account configuration") } gc := GraphConnector{ - itemClient: itemClient, tenant: m365.AzureTenantID, Users: make(map[string]string, 0), wg: &sync.WaitGroup{}, diff --git a/src/internal/connector/graph_connector_disconnected_test.go b/src/internal/connector/graph_connector_disconnected_test.go index 711e55ff8..f7f583ebd 100644 --- a/src/internal/connector/graph_connector_disconnected_test.go +++ b/src/internal/connector/graph_connector_disconnected_test.go @@ -9,7 +9,6 @@ import ( "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" - "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/support" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/account" @@ -66,7 +65,7 @@ func (suite *DisconnectedGraphConnectorSuite) TestBadConnection() { for _, test := range table { suite.T().Run(test.name, func(t *testing.T) { - gc, err := NewGraphConnector(ctx, graph.LargeItemClient(), test.acct(t), Users) + gc, err := NewGraphConnector(ctx, test.acct(t), Users) assert.Nil(t, gc, test.name+" failed") assert.NotNil(t, err, test.name+"failed") }) diff --git a/src/internal/connector/graph_connector_helper_test.go b/src/internal/connector/graph_connector_helper_test.go index ce16a5a4d..c614df05d 100644 --- a/src/internal/connector/graph_connector_helper_test.go +++ b/src/internal/connector/graph_connector_helper_test.go @@ -3,7 +3,6 @@ package connector import ( "context" "io" - "net/http" "reflect" "testing" @@ -977,9 +976,9 @@ func getSelectorWith( } } -func loadConnector(ctx context.Context, t *testing.T, itemClient *http.Client, r resource) *GraphConnector { +func loadConnector(ctx context.Context, t *testing.T, r resource) *GraphConnector { a := tester.NewM365Account(t) - connector, err := NewGraphConnector(ctx, itemClient, a, r) + connector, err := NewGraphConnector(ctx, a, r) require.NoError(t, err) return connector diff --git a/src/internal/connector/graph_connector_test.go b/src/internal/connector/graph_connector_test.go index 85ad3f45f..71eff095a 100644 --- a/src/internal/connector/graph_connector_test.go +++ b/src/internal/connector/graph_connector_test.go @@ -156,7 +156,7 @@ func (suite *GraphConnectorIntegrationSuite) SetupSuite() { tester.MustGetEnvSets(suite.T(), tester.M365AcctCredEnvs) - suite.connector = loadConnector(ctx, suite.T(), graph.LargeItemClient(), Users) + suite.connector = loadConnector(ctx, suite.T(), Users) suite.user = tester.M365UserID(suite.T()) suite.acct = tester.NewM365Account(suite.T()) @@ -380,7 +380,7 @@ func runRestoreBackupTest( start := time.Now() - restoreGC := loadConnector(ctx, t, graph.LargeItemClient(), test.resource) + restoreGC := loadConnector(ctx, t, test.resource) restoreSel := getSelectorWith(t, test.service, resourceOwners, true) deets, err := restoreGC.RestoreDataCollections( ctx, @@ -419,7 +419,7 @@ func runRestoreBackupTest( }) } - backupGC := loadConnector(ctx, t, graph.LargeItemClient(), test.resource) + backupGC := loadConnector(ctx, t, test.resource) backupSel := backupSelectorForExpected(t, test.service, expectedDests) t.Logf("Selective backup of %s\n", backupSel) @@ -870,7 +870,7 @@ func (suite *GraphConnectorIntegrationSuite) TestMultiFolderBackupDifferentNames dest.ContainerName, ) - restoreGC := loadConnector(ctx, t, graph.LargeItemClient(), test.resource) + restoreGC := loadConnector(ctx, t, test.resource) deets, err := restoreGC.RestoreDataCollections(ctx, suite.acct, restoreSel, dest, collections) require.NoError(t, err) require.NotNil(t, deets) @@ -888,7 +888,7 @@ func (suite *GraphConnectorIntegrationSuite) TestMultiFolderBackupDifferentNames // Run a backup and compare its output with what we put in. - backupGC := loadConnector(ctx, t, graph.LargeItemClient(), test.resource) + backupGC := loadConnector(ctx, t, test.resource) backupSel := backupSelectorForExpected(t, test.service, expectedDests) t.Log("Selective backup of", backupSel) diff --git a/src/internal/connector/onedrive/collection.go b/src/internal/connector/onedrive/collection.go index ac0aa9fb3..4ea9ea9eb 100644 --- a/src/internal/connector/onedrive/collection.go +++ b/src/internal/connector/onedrive/collection.go @@ -4,13 +4,11 @@ package onedrive import ( "context" "io" - "net/http" "sync" "sync/atomic" "time" "github.com/microsoftgraph/msgraph-sdk-go/models" - "github.com/pkg/errors" "github.com/spatialcurrent/go-lazy/pkg/lazy" "github.com/alcionai/corso/src/internal/connector/graph" @@ -45,9 +43,6 @@ var ( // Collection represents a set of OneDrive objects retrieved from M365 type Collection struct { - // configured to handle large item downloads - itemClient *http.Client - // data is used to share data streams with the collection consumer data chan data.Stream // folderPath indicates what level in the hierarchy this collection @@ -69,13 +64,12 @@ type Collection struct { // itemReadFunc returns a reader for the specified item type itemReaderFunc func( - hc *http.Client, + ctx context.Context, item models.DriveItemable, ) (itemInfo details.ItemInfo, itemData io.ReadCloser, err error) // NewCollection creates a Collection func NewCollection( - itemClient *http.Client, folderPath path.Path, driveID string, service graph.Servicer, @@ -84,7 +78,6 @@ func NewCollection( ctrlOpts control.Options, ) *Collection { c := &Collection{ - itemClient: itemClient, folderPath: folderPath, driveItems: map[string]models.DriveItemable{}, driveID: driveID, @@ -205,16 +198,11 @@ func (oc *Collection) populateItems(ctx context.Context) { m.Unlock() } - for id, item := range oc.driveItems { + for _, item := range oc.driveItems { if oc.ctrl.FailFast && errs != nil { break } - if item == nil { - errUpdater(id, errors.New("nil item")) - continue - } - semaphoreCh <- struct{}{} wg.Add(1) @@ -231,9 +219,10 @@ func (oc *Collection) populateItems(ctx context.Context) { ) for i := 1; i <= maxRetries; i++ { - itemInfo, itemData, err = oc.itemReader(oc.itemClient, item) + itemInfo, itemData, err = oc.itemReader(ctx, item) + + // retry on Timeout type errors, break otherwise. if err == nil || graph.IsErrTimeout(err) == nil { - // retry on Timeout type errors, break otherwise. break } diff --git a/src/internal/connector/onedrive/collection_test.go b/src/internal/connector/onedrive/collection_test.go index a36db58c9..a19021ff7 100644 --- a/src/internal/connector/onedrive/collection_test.go +++ b/src/internal/connector/onedrive/collection_test.go @@ -2,8 +2,9 @@ package onedrive import ( "bytes" + "context" + "errors" "io" - "net/http" "sync" "testing" "time" @@ -14,7 +15,6 @@ import ( "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" - "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/support" "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/pkg/backup/details" @@ -73,7 +73,7 @@ func (suite *CollectionUnitTestSuite) TestCollection() { name: "oneDrive, no duplicates", numInstances: 1, source: OneDriveSource, - itemReader: func(*http.Client, models.DriveItemable) (details.ItemInfo, io.ReadCloser, error) { + itemReader: func(context.Context, models.DriveItemable) (details.ItemInfo, io.ReadCloser, error) { return details.ItemInfo{OneDrive: &details.OneDriveInfo{ItemName: testItemName, Modified: now}}, io.NopCloser(bytes.NewReader(testItemData)), nil @@ -87,7 +87,7 @@ func (suite *CollectionUnitTestSuite) TestCollection() { name: "oneDrive, duplicates", numInstances: 3, source: OneDriveSource, - itemReader: func(*http.Client, models.DriveItemable) (details.ItemInfo, io.ReadCloser, error) { + itemReader: func(context.Context, models.DriveItemable) (details.ItemInfo, io.ReadCloser, error) { return details.ItemInfo{OneDrive: &details.OneDriveInfo{ItemName: testItemName, Modified: now}}, io.NopCloser(bytes.NewReader(testItemData)), nil @@ -101,7 +101,7 @@ func (suite *CollectionUnitTestSuite) TestCollection() { name: "sharePoint, no duplicates", numInstances: 1, source: SharePointSource, - itemReader: func(*http.Client, models.DriveItemable) (details.ItemInfo, io.ReadCloser, error) { + itemReader: func(context.Context, models.DriveItemable) (details.ItemInfo, io.ReadCloser, error) { return details.ItemInfo{SharePoint: &details.SharePointInfo{ItemName: testItemName, Modified: now}}, io.NopCloser(bytes.NewReader(testItemData)), nil @@ -115,7 +115,7 @@ func (suite *CollectionUnitTestSuite) TestCollection() { name: "sharePoint, duplicates", numInstances: 3, source: SharePointSource, - itemReader: func(*http.Client, models.DriveItemable) (details.ItemInfo, io.ReadCloser, error) { + itemReader: func(context.Context, models.DriveItemable) (details.ItemInfo, io.ReadCloser, error) { return details.ItemInfo{SharePoint: &details.SharePointInfo{ItemName: testItemName, Modified: now}}, io.NopCloser(bytes.NewReader(testItemData)), nil @@ -140,7 +140,6 @@ func (suite *CollectionUnitTestSuite) TestCollection() { require.NoError(t, err) coll := NewCollection( - graph.LargeItemClient(), folderPath, "drive-id", suite, @@ -225,7 +224,6 @@ func (suite *CollectionUnitTestSuite) TestCollectionReadError() { require.NoError(t, err) coll := NewCollection( - graph.LargeItemClient(), folderPath, "fakeDriveID", suite, @@ -237,8 +235,10 @@ func (suite *CollectionUnitTestSuite) TestCollectionReadError() { mockItem.SetId(&testItemID) coll.Add(mockItem) - coll.itemReader = func(*http.Client, models.DriveItemable) (details.ItemInfo, io.ReadCloser, error) { - return details.ItemInfo{}, nil, assert.AnError + readError := errors.New("Test error") + + coll.itemReader = func(context.Context, models.DriveItemable) (details.ItemInfo, io.ReadCloser, error) { + return details.ItemInfo{}, nil, readError } coll.Items() diff --git a/src/internal/connector/onedrive/collections.go b/src/internal/connector/onedrive/collections.go index f446aa246..8f18aa780 100644 --- a/src/internal/connector/onedrive/collections.go +++ b/src/internal/connector/onedrive/collections.go @@ -3,7 +3,6 @@ package onedrive import ( "context" "fmt" - "net/http" "strings" "github.com/microsoftgraph/msgraph-sdk-go/models" @@ -47,9 +46,6 @@ type folderMatcher interface { // Collections is used to retrieve drive data for a // resource owner, which can be either a user or a sharepoint site. type Collections struct { - // configured to handle large item downloads - itemClient *http.Client - tenant string resourceOwner string source driveSource @@ -70,7 +66,6 @@ type Collections struct { } func NewCollections( - itemClient *http.Client, tenant string, resourceOwner string, source driveSource, @@ -80,7 +75,6 @@ func NewCollections( ctrlOpts control.Options, ) *Collections { return &Collections{ - itemClient: itemClient, tenant: tenant, resourceOwner: resourceOwner, source: source, @@ -271,13 +265,13 @@ func (c *Collections) UpdateCollections( // TODO(ashmrtn): Compare old and new path and set collection state // accordingly. col = NewCollection( - c.itemClient, collectionPath, driveID, c.service, c.statusUpdater, c.source, - c.ctrl) + c.ctrl, + ) c.CollectionMap[collectionPath.String()] = col c.NumContainers++ diff --git a/src/internal/connector/onedrive/collections_test.go b/src/internal/connector/onedrive/collections_test.go index 21ca061a9..485b9ed86 100644 --- a/src/internal/connector/onedrive/collections_test.go +++ b/src/internal/connector/onedrive/collections_test.go @@ -10,7 +10,6 @@ import ( "github.com/stretchr/testify/suite" "golang.org/x/exp/maps" - "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/control" "github.com/alcionai/corso/src/pkg/selectors" @@ -588,7 +587,6 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { outputFolderMap := map[string]string{} maps.Copy(outputFolderMap, tt.inputFolderMap) c := NewCollections( - graph.LargeItemClient(), tenant, user, OneDriveSource, diff --git a/src/internal/connector/onedrive/drive_test.go b/src/internal/connector/onedrive/drive_test.go index 755b7293b..7f6901621 100644 --- a/src/internal/connector/onedrive/drive_test.go +++ b/src/internal/connector/onedrive/drive_test.go @@ -9,7 +9,6 @@ import ( "github.com/stretchr/testify/suite" "github.com/alcionai/corso/src/internal/common" - "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/control" "github.com/alcionai/corso/src/pkg/logger" @@ -147,7 +146,6 @@ func (suite *OneDriveSuite) TestOneDriveNewCollections() { NewOneDriveBackup([]string{test.user}). AllData()[0] odcs, err := NewCollections( - graph.LargeItemClient(), creds.AzureTenantID, test.user, OneDriveSource, diff --git a/src/internal/connector/onedrive/item.go b/src/internal/connector/onedrive/item.go index 00ec1f630..73391033b 100644 --- a/src/internal/connector/onedrive/item.go +++ b/src/internal/connector/onedrive/item.go @@ -4,7 +4,6 @@ import ( "context" "fmt" "io" - "net/http" "strings" msdrives "github.com/microsoftgraph/msgraph-sdk-go/drives" @@ -28,7 +27,7 @@ const ( // It crafts this by querying M365 for a download URL for the item // and using a http client to initialize a reader func sharePointItemReader( - hc *http.Client, + ctx context.Context, item models.DriveItemable, ) (details.ItemInfo, io.ReadCloser, error) { url, ok := item.GetAdditionalData()[downloadURLKey].(*string) @@ -36,7 +35,7 @@ func sharePointItemReader( return details.ItemInfo{}, nil, fmt.Errorf("failed to get url for %s", *item.GetName()) } - resp, err := hc.Get(*url) + rc, err := driveItemReader(ctx, *url) if err != nil { return details.ItemInfo{}, nil, err } @@ -45,14 +44,14 @@ func sharePointItemReader( SharePoint: sharePointItemInfo(item, *item.GetSize()), } - return dii, resp.Body, nil + return dii, rc, nil } // oneDriveItemReader will return a io.ReadCloser for the specified item // It crafts this by querying M365 for a download URL for the item // and using a http client to initialize a reader func oneDriveItemReader( - hc *http.Client, + ctx context.Context, item models.DriveItemable, ) (details.ItemInfo, io.ReadCloser, error) { url, ok := item.GetAdditionalData()[downloadURLKey].(*string) @@ -60,7 +59,7 @@ func oneDriveItemReader( return details.ItemInfo{}, nil, fmt.Errorf("failed to get url for %s", *item.GetName()) } - resp, err := hc.Get(*url) + rc, err := driveItemReader(ctx, *url) if err != nil { return details.ItemInfo{}, nil, err } @@ -69,7 +68,25 @@ func oneDriveItemReader( OneDrive: oneDriveItemInfo(item, *item.GetSize()), } - return dii, resp.Body, nil + return dii, rc, nil +} + +// driveItemReader will return a io.ReadCloser for the specified item +// It crafts this by querying M365 for a download URL for the item +// and using a http client to initialize a reader +func driveItemReader( + ctx context.Context, + url string, +) (io.ReadCloser, error) { + httpClient := graph.CreateHTTPClient() + httpClient.Timeout = 0 // infinite timeout for pulling large files + + resp, err := httpClient.Get(url) + if err != nil { + return nil, errors.Wrapf(err, "failed to download file from %s", url) + } + + return resp.Body, nil } // oneDriveItemInfo will populate a details.OneDriveInfo struct @@ -80,7 +97,7 @@ func oneDriveItemReader( func oneDriveItemInfo(di models.DriveItemable, itemSize int64) *details.OneDriveInfo { var email, parent string - if di.GetCreatedBy() != nil && di.GetCreatedBy().GetUser() != nil { + if di.GetCreatedBy().GetUser() != nil { // User is sometimes not available when created via some // external applications (like backup/restore solutions) ed, ok := di.GetCreatedBy().GetUser().GetAdditionalData()["email"] @@ -89,9 +106,11 @@ func oneDriveItemInfo(di models.DriveItemable, itemSize int64) *details.OneDrive } } - if di.GetParentReference() != nil && di.GetParentReference().GetName() != nil { - // EndPoint is not always populated from external apps - parent = *di.GetParentReference().GetName() + if di.GetParentReference() != nil { + if di.GetParentReference().GetName() != nil { + // EndPoint is not always populated from external apps + parent = *di.GetParentReference().GetName() + } } return &details.OneDriveInfo{ diff --git a/src/internal/connector/onedrive/item_test.go b/src/internal/connector/onedrive/item_test.go index 2b28f0910..b3f45ca56 100644 --- a/src/internal/connector/onedrive/item_test.go +++ b/src/internal/connector/onedrive/item_test.go @@ -126,7 +126,7 @@ func (suite *ItemIntegrationSuite) TestItemReader_oneDrive() { // Read data for the file - itemInfo, itemData, err := oneDriveItemReader(graph.LargeItemClient(), driveItem) + itemInfo, itemData, err := oneDriveItemReader(ctx, driveItem) require.NoError(suite.T(), err) require.NotNil(suite.T(), itemInfo.OneDrive) require.NotEmpty(suite.T(), itemInfo.OneDrive.ItemName) diff --git a/src/internal/connector/sharepoint/data_collections.go b/src/internal/connector/sharepoint/data_collections.go index 5950daede..d7c6547a0 100644 --- a/src/internal/connector/sharepoint/data_collections.go +++ b/src/internal/connector/sharepoint/data_collections.go @@ -2,7 +2,6 @@ package sharepoint import ( "context" - "net/http" "github.com/pkg/errors" @@ -25,7 +24,6 @@ type statusUpdater interface { // for the specified user func DataCollections( ctx context.Context, - itemClient *http.Client, selector selectors.Selector, tenantID string, serv graph.Servicer, @@ -68,7 +66,6 @@ func DataCollections( case path.LibrariesCategory: spcs, err = collectLibraries( ctx, - itemClient, serv, tenantID, site, @@ -127,7 +124,6 @@ func collectLists( // all the drives associated with the site. func collectLibraries( ctx context.Context, - itemClient *http.Client, serv graph.Servicer, tenantID, siteID string, scope selectors.SharePointScope, @@ -142,7 +138,6 @@ func collectLibraries( logger.Ctx(ctx).With("site", siteID).Debug("Creating SharePoint Library collections") colls := onedrive.NewCollections( - itemClient, tenantID, siteID, onedrive.SharePointSource, diff --git a/src/internal/connector/sharepoint/data_collections_test.go b/src/internal/connector/sharepoint/data_collections_test.go index 4daca0877..423336253 100644 --- a/src/internal/connector/sharepoint/data_collections_test.go +++ b/src/internal/connector/sharepoint/data_collections_test.go @@ -7,7 +7,6 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" - "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/onedrive" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/control" @@ -92,7 +91,6 @@ func (suite *SharePointLibrariesSuite) TestUpdateCollections() { newPaths := map[string]string{} excluded := map[string]struct{}{} c := onedrive.NewCollections( - graph.LargeItemClient(), tenant, site, onedrive.SharePointSource, diff --git a/src/internal/operations/backup_integration_test.go b/src/internal/operations/backup_integration_test.go index 83748baa2..3ee5c0230 100644 --- a/src/internal/operations/backup_integration_test.go +++ b/src/internal/operations/backup_integration_test.go @@ -655,7 +655,7 @@ func (suite *BackupOpIntegrationSuite) TestBackup_Run_exchangeIncrementals() { m365, err := acct.M365Config() require.NoError(t, err) - gc, err := connector.NewGraphConnector(ctx, graph.LargeItemClient(), acct, connector.Users) + gc, err := connector.NewGraphConnector(ctx, acct, connector.Users) require.NoError(t, err) ac, err := api.NewClient(m365) diff --git a/src/internal/operations/operation.go b/src/internal/operations/operation.go index e5382b022..30770bdf5 100644 --- a/src/internal/operations/operation.go +++ b/src/internal/operations/operation.go @@ -7,7 +7,6 @@ import ( "github.com/pkg/errors" "github.com/alcionai/corso/src/internal/connector" - "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/events" "github.com/alcionai/corso/src/internal/kopia" "github.com/alcionai/corso/src/internal/observe" @@ -108,7 +107,7 @@ func connectToM365( resource = connector.Sites } - gc, err := connector.NewGraphConnector(ctx, graph.LargeItemClient(), acct, resource) + gc, err := connector.NewGraphConnector(ctx, acct, resource) if err != nil { return nil, err } diff --git a/src/pkg/services/m365/m365.go b/src/pkg/services/m365/m365.go index e0dd75af9..ba9cc2a59 100644 --- a/src/pkg/services/m365/m365.go +++ b/src/pkg/services/m365/m365.go @@ -8,7 +8,6 @@ import ( "github.com/alcionai/corso/src/internal/connector" "github.com/alcionai/corso/src/internal/connector/discovery" - "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/pkg/account" ) @@ -21,7 +20,7 @@ type User struct { // Users returns a list of users in the specified M365 tenant // TODO: Implement paging support func Users(ctx context.Context, m365Account account.Account) ([]*User, error) { - gc, err := connector.NewGraphConnector(ctx, graph.LargeItemClient(), m365Account, connector.Users) + gc, err := connector.NewGraphConnector(ctx, m365Account, connector.Users) if err != nil { return nil, errors.Wrap(err, "could not initialize M365 graph connection") } @@ -77,7 +76,7 @@ func UserPNs(ctx context.Context, m365Account account.Account) ([]string, error) // SiteURLs returns a list of SharePoint site WebURLs in the specified M365 tenant func SiteURLs(ctx context.Context, m365Account account.Account) ([]string, error) { - gc, err := connector.NewGraphConnector(ctx, graph.LargeItemClient(), m365Account, connector.Sites) + gc, err := connector.NewGraphConnector(ctx, m365Account, connector.Sites) if err != nil { return nil, errors.Wrap(err, "could not initialize M365 graph connection") } @@ -87,7 +86,7 @@ func SiteURLs(ctx context.Context, m365Account account.Account) ([]string, error // SiteURLs returns a list of SharePoint sites IDs in the specified M365 tenant func SiteIDs(ctx context.Context, m365Account account.Account) ([]string, error) { - gc, err := connector.NewGraphConnector(ctx, graph.LargeItemClient(), m365Account, connector.Sites) + gc, err := connector.NewGraphConnector(ctx, m365Account, connector.Sites) if err != nil { return nil, errors.Wrap(err, "could not initialize M365 graph connection") } From 24734f22b0a03466e45c0b69abe914af39146c90 Mon Sep 17 00:00:00 2001 From: Vaibhav Kamra Date: Wed, 25 Jan 2023 23:54:30 -0800 Subject: [PATCH 15/18] Revert "add itemClient for re-usable od item downloads" (#2273) Reverts alcionai/corso#2266. Am seeing issues on data restore that need to be debugged. From 0587df91efbb1e8a9e093d9f03a666d348f9a6fa Mon Sep 17 00:00:00 2001 From: Vaibhav Kamra Date: Thu, 26 Jan 2023 00:52:39 -0800 Subject: [PATCH 16/18] Remove OneDrive license GUID check (#2275) ## Description This check is brittle and shouldn't be needed. It should be sufficient to check whether a user has a drive or not (similar to what we now do for exchange mailboxes). Validated with the existing integration test. ## Does this PR need a docs update or release note? - [x] :white_check_mark: Yes, it's included - [ ] :clock1: Yes, but in a later PR - [ ] :no_entry: No ## Type of change - [ ] :sunflower: Feature - [x] :bug: Bugfix - [ ] :world_map: Documentation - [ ] :robot: Test - [ ] :computer: CI/Deployment - [ ] :broom: Tech Debt/Cleanup ## Issue(s) * #2271 ## Test Plan - [ ] :muscle: Manual - [x] :zap: Unit test - [ ] :green_heart: E2E --- CHANGELOG.md | 1 + src/internal/connector/onedrive/drive.go | 118 +---------------------- 2 files changed, 4 insertions(+), 115 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 248260c0a..1d8cdfee0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Handle case where user's drive has not been initialized - Inline attachments (e.g. copy/paste ) are discovered and backed up correctly ([#2163](https://github.com/alcionai/corso/issues/2163)) - Guest and External users (for cloud accounts) and non-on-premise users (for systems that use on-prem AD syncs) are now excluded from backup and restore operations. +- Remove the M365 license guid check in OneDrive backup which wasn't reliable. ## [v0.1.0] (alpha) - 2023-01-13 diff --git a/src/internal/connector/onedrive/drive.go b/src/internal/connector/onedrive/drive.go index 315eaee5c..c765e3719 100644 --- a/src/internal/connector/onedrive/drive.go +++ b/src/internal/connector/onedrive/drive.go @@ -6,7 +6,6 @@ import ( "strings" "time" - msgraphgocore "github.com/microsoftgraph/msgraph-sdk-go-core" msdrive "github.com/microsoftgraph/msgraph-sdk-go/drive" msdrives "github.com/microsoftgraph/msgraph-sdk-go/drives" "github.com/microsoftgraph/msgraph-sdk-go/models" @@ -20,55 +19,7 @@ import ( "github.com/alcionai/corso/src/pkg/logger" ) -var ( - errFolderNotFound = errors.New("folder not found") - - // nolint:lll - // OneDrive associated SKUs located at: - // https://learn.microsoft.com/en-us/azure/active-directory/enterprise-users/licensing-service-plan-reference - skuIDs = []string{ - // Microsoft 365 Apps for Business 0365 - "cdd28e44-67e3-425e-be4c-737fab2899d3", - // Microsoft 365 Apps for Business SMB_Business - "b214fe43-f5a3-4703-beeb-fa97188220fc", - // Microsoft 365 Apps for enterprise - "c2273bd0-dff7-4215-9ef5-2c7bcfb06425", - // Microsoft 365 Apps for Faculty - "12b8c807-2e20-48fc-b453-542b6ee9d171", - // Microsoft 365 Apps for Students - "c32f9321-a627-406d-a114-1f9c81aaafac", - // OneDrive for Business (Plan 1) - "e6778190-713e-4e4f-9119-8b8238de25df", - // OneDrive for Business (Plan 2) - "ed01faf2-1d88-4947-ae91-45ca18703a96", - // Visio Plan 1 - "ca7f3140-d88c-455b-9a1c-7f0679e31a76", - // Visio Plan 2 - "38b434d2-a15e-4cde-9a98-e737c75623e1", - // Visio Online Plan 1 - "4b244418-9658-4451-a2b8-b5e2b364e9bd", - // Visio Online Plan 2 - "c5928f49-12ba-48f7-ada3-0d743a3601d5", - // Visio Plan 2 for GCC - "4ae99959-6b0f-43b0-b1ce-68146001bdba", - // ONEDRIVEENTERPRISE - "afcafa6a-d966-4462-918c-ec0b4e0fe642", - // Microsoft 365 E5 Developer - "c42b9cae-ea4f-4ab7-9717-81576235ccac", - // Microsoft 365 E5 - "06ebc4ee-1bb5-47dd-8120-11324bc54e06", - // Office 365 E4 - "1392051d-0cb9-4b7a-88d5-621fee5e8711", - // Microsoft 365 E3 - "05e9a617-0261-4cee-bb44-138d3ef5d965", - // Microsoft 365 Business Premium - "cbdc14ab-d96c-4c30-b9f4-6ada7cdc1d46", - // Microsoft 365 Business Standard - "f245ecc8-75af-4f8e-b61f-27d8114de5f3", - // Microsoft 365 Business Basic - "3b555118-da6a-4418-894f-7df1e2096870", - } -) +var errFolderNotFound = errors.New("folder not found") const ( // nextLinkKey is used to find the next link in a paged @@ -116,21 +67,11 @@ func siteDrives(ctx context.Context, service graph.Servicer, site string) ([]mod func userDrives(ctx context.Context, service graph.Servicer, user string) ([]models.Driveable, error) { var ( - hasDrive bool numberOfRetries = 3 r models.DriveCollectionResponseable + err error ) - hasDrive, err := hasDriveLicense(ctx, service, user) - if err != nil { - return nil, errors.Wrap(err, user) - } - - if !hasDrive { - logger.Ctx(ctx).Debugf("User %s does not have a license for OneDrive", user) - return make([]models.Driveable, 0), nil // no license - } - // Retry Loop for Drive retrieval. Request can timeout for i := 0; i <= numberOfRetries; i++ { r, err = service.Client().UsersById(user).Drives().Get(ctx, nil) @@ -138,7 +79,7 @@ func userDrives(ctx context.Context, service graph.Servicer, user string) ([]mod detailedError := support.ConnectorStackErrorTrace(err) if strings.Contains(detailedError, userMysiteURLNotFound) || strings.Contains(detailedError, userMysiteNotFound) { - logger.Ctx(ctx).Debugf("User %s does not have a drive", user) + logger.Ctx(ctx).Infof("User %s does not have a drive", user) return make([]models.Driveable, 0), nil // no license } @@ -422,56 +363,3 @@ func DeleteItem( return nil } - -// hasDriveLicense utility function that queries M365 server -// to investigate the user's includes access to OneDrive. -func hasDriveLicense( - ctx context.Context, - service graph.Servicer, - user string, -) (bool, error) { - var hasDrive bool - - resp, err := service.Client().UsersById(user).LicenseDetails().Get(ctx, nil) - if err != nil { - return false, - errors.Wrap(err, "failure obtaining license details for user") - } - - iter, err := msgraphgocore.NewPageIterator( - resp, service.Adapter(), - models.CreateLicenseDetailsCollectionResponseFromDiscriminatorValue, - ) - if err != nil { - return false, err - } - - cb := func(pageItem any) bool { - entry, ok := pageItem.(models.LicenseDetailsable) - if !ok { - err = errors.New("casting item to models.LicenseDetailsable") - return false - } - - sku := entry.GetSkuId() - if sku == nil { - return true - } - - for _, license := range skuIDs { - if sku.String() == license { - hasDrive = true - return false - } - } - - return true - } - - if err := iter.Iterate(ctx, cb); err != nil { - return false, - errors.Wrap(err, support.ConnectorStackErrorTrace(err)) - } - - return hasDrive, nil -} From 53b5032c1bfcaa6ad8ff448f2cf5fa19b4e72a2f Mon Sep 17 00:00:00 2001 From: Niraj Tolia Date: Thu, 26 Jan 2023 03:01:28 -0800 Subject: [PATCH 17/18] Fix direct GH docs edit link (#2274) ## Description Was broken when we combined the docs and website ## Does this PR need a docs update or release note? - [x] :no_entry: No ## Type of change - [x] :bug: Bugfix - [x] :world_map: Documentation --- website/docusaurus.config.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/docusaurus.config.js b/website/docusaurus.config.js index ea99ddc6e..375df5a88 100644 --- a/website/docusaurus.config.js +++ b/website/docusaurus.config.js @@ -47,7 +47,7 @@ const config = { sidebarPath: require.resolve('./sidebars.js'), remarkPlugins: [require('mdx-mermaid')], editUrl: - 'https://github.com/alcionai/corso/tree/main/docs', + 'https://github.com/alcionai/corso/tree/main/website', }, blog: { showReadingTime: true, From bdb7f2b109b55539ef17cc2ca82845308c15ffbd Mon Sep 17 00:00:00 2001 From: Vaibhav Kamra Date: Thu, 26 Jan 2023 07:05:12 -0800 Subject: [PATCH 18/18] Re-use client for OneDrive item downloads (#2276) ## Description This PR reintroduces the changes from #2266 with a change to *not* reset the transport when initializing the shared client. Doing so was removing the retry and other middlewares and also resulting in throttled requests being masked as success Also - we now decorate our download traffic with an ISV tag as recommended [here](https://learn.microsoft.com/en-us/sharepoint/dev/general-development/how-to-avoid-getting-throttled-or-blocked-in-sharepoint-online#how-to-decorate-your-http-traffic) ## Does this PR need a docs update or release note? - [ ] :white_check_mark: Yes, it's included - [x] :clock1: Yes, but in a later PR - [ ] :no_entry: No ## Type of change - [ ] :sunflower: Feature - [x] :bug: Bugfix - [ ] :world_map: Documentation - [ ] :robot: Test - [ ] :computer: CI/Deployment - [ ] :broom: Tech Debt/Cleanup ## Issue(s) * #2266 ## Test Plan - [ ] :muscle: Manual - [ ] :zap: Unit test - [x] :green_heart: E2E --- src/cli/backup/sharepoint.go | 3 +- src/cmd/factory/impl/common.go | 3 +- src/cmd/getM365/getItem.go | 3 +- src/cmd/purge/purge.go | 2 +- src/internal/connector/data_collections.go | 2 + .../connector/data_collections_test.go | 16 +++--- .../connector/graph/service_helper.go | 21 ++++++++ src/internal/connector/graph_connector.go | 14 +++-- .../graph_connector_disconnected_test.go | 3 +- .../connector/graph_connector_helper_test.go | 5 +- .../connector/graph_connector_test.go | 10 ++-- src/internal/connector/onedrive/collection.go | 21 ++++++-- .../connector/onedrive/collection_test.go | 20 +++---- .../connector/onedrive/collections.go | 10 +++- .../connector/onedrive/collections_test.go | 2 + src/internal/connector/onedrive/drive_test.go | 2 + src/internal/connector/onedrive/item.go | 52 ++++++++----------- src/internal/connector/onedrive/item_test.go | 2 +- .../connector/sharepoint/data_collections.go | 5 ++ .../sharepoint/data_collections_test.go | 2 + .../operations/backup_integration_test.go | 2 +- src/internal/operations/operation.go | 3 +- src/pkg/services/m365/m365.go | 7 +-- 23 files changed, 135 insertions(+), 75 deletions(-) diff --git a/src/cli/backup/sharepoint.go b/src/cli/backup/sharepoint.go index 4eb62dca0..fb4f7e766 100644 --- a/src/cli/backup/sharepoint.go +++ b/src/cli/backup/sharepoint.go @@ -13,6 +13,7 @@ import ( . "github.com/alcionai/corso/src/cli/print" "github.com/alcionai/corso/src/cli/utils" "github.com/alcionai/corso/src/internal/connector" + "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/kopia" "github.com/alcionai/corso/src/internal/model" "github.com/alcionai/corso/src/pkg/backup" @@ -209,7 +210,7 @@ func createSharePointCmd(cmd *cobra.Command, args []string) error { defer utils.CloseRepo(ctx, r) - gc, err := connector.NewGraphConnector(ctx, acct, connector.Sites) + gc, err := connector.NewGraphConnector(ctx, graph.LargeItemClient(), acct, connector.Sites) if err != nil { return Only(ctx, errors.Wrap(err, "Failed to connect to Microsoft APIs")) } diff --git a/src/cmd/factory/impl/common.go b/src/cmd/factory/impl/common.go index 369d80d20..585118442 100644 --- a/src/cmd/factory/impl/common.go +++ b/src/cmd/factory/impl/common.go @@ -12,6 +12,7 @@ import ( . "github.com/alcionai/corso/src/cli/print" "github.com/alcionai/corso/src/internal/common" "github.com/alcionai/corso/src/internal/connector" + "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/mockconnector" "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/pkg/account" @@ -111,7 +112,7 @@ func getGCAndVerifyUser(ctx context.Context, userID string) (*connector.GraphCon } // build a graph connector - gc, err := connector.NewGraphConnector(ctx, acct, connector.Users) + gc, err := connector.NewGraphConnector(ctx, graph.LargeItemClient(), acct, connector.Users) if err != nil { return nil, account.Account{}, errors.Wrap(err, "connecting to graph api") } diff --git a/src/cmd/getM365/getItem.go b/src/cmd/getM365/getItem.go index 25961a236..24ce81d9a 100644 --- a/src/cmd/getM365/getItem.go +++ b/src/cmd/getM365/getItem.go @@ -19,6 +19,7 @@ import ( "github.com/alcionai/corso/src/internal/common" "github.com/alcionai/corso/src/internal/connector" "github.com/alcionai/corso/src/internal/connector/exchange/api" + "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/pkg/account" "github.com/alcionai/corso/src/pkg/backup/details" "github.com/alcionai/corso/src/pkg/credentials" @@ -177,7 +178,7 @@ func getGC(ctx context.Context) (*connector.GraphConnector, account.M365Config, return nil, m365Cfg, Only(ctx, errors.Wrap(err, "finding m365 account details")) } - gc, err := connector.NewGraphConnector(ctx, acct, connector.Users) + gc, err := connector.NewGraphConnector(ctx, graph.LargeItemClient(), acct, connector.Users) if err != nil { return nil, m365Cfg, Only(ctx, errors.Wrap(err, "connecting to graph API")) } diff --git a/src/cmd/purge/purge.go b/src/cmd/purge/purge.go index a59f7c2b8..32100772d 100644 --- a/src/cmd/purge/purge.go +++ b/src/cmd/purge/purge.go @@ -255,7 +255,7 @@ func getGC(ctx context.Context) (*connector.GraphConnector, error) { } // build a graph connector - gc, err := connector.NewGraphConnector(ctx, acct, connector.Users) + gc, err := connector.NewGraphConnector(ctx, graph.LargeItemClient(), acct, connector.Users) if err != nil { return nil, Only(ctx, errors.Wrap(err, "connecting to graph api")) } diff --git a/src/internal/connector/data_collections.go b/src/internal/connector/data_collections.go index 7eaf1c517..0b6d20b27 100644 --- a/src/internal/connector/data_collections.go +++ b/src/internal/connector/data_collections.go @@ -87,6 +87,7 @@ func (gc *GraphConnector) DataCollections( case selectors.ServiceSharePoint: colls, err := sharepoint.DataCollections( ctx, + gc.itemClient, sels, gc.credentials.AzureTenantID, gc.Service, @@ -198,6 +199,7 @@ func (gc *GraphConnector) OneDriveDataCollections( logger.Ctx(ctx).With("user", user).Debug("Creating OneDrive collections") odcs, err := onedrive.NewCollections( + gc.itemClient, gc.credentials.AzureTenantID, user, onedrive.OneDriveSource, diff --git a/src/internal/connector/data_collections_test.go b/src/internal/connector/data_collections_test.go index e16aa4b51..57332ce1a 100644 --- a/src/internal/connector/data_collections_test.go +++ b/src/internal/connector/data_collections_test.go @@ -10,6 +10,7 @@ import ( "github.com/stretchr/testify/suite" "github.com/alcionai/corso/src/internal/connector/exchange" + "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/sharepoint" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/control" @@ -43,7 +44,7 @@ func (suite *ConnectorDataCollectionIntegrationSuite) SetupSuite() { tester.MustGetEnvVars(suite.T(), tester.M365AcctCredEnvs...) - suite.connector = loadConnector(ctx, suite.T(), AllResources) + suite.connector = loadConnector(ctx, suite.T(), graph.LargeItemClient(), AllResources) suite.user = tester.M365UserID(suite.T()) suite.site = tester.M365SiteID(suite.T()) @@ -62,7 +63,7 @@ func (suite *ConnectorDataCollectionIntegrationSuite) TestExchangeDataCollection selUsers := []string{suite.user} - connector := loadConnector(ctx, suite.T(), Users) + connector := loadConnector(ctx, suite.T(), graph.LargeItemClient(), Users) tests := []struct { name string getSelector func(t *testing.T) selectors.Selector @@ -138,7 +139,7 @@ func (suite *ConnectorDataCollectionIntegrationSuite) TestDataCollections_invali owners := []string{"snuffleupagus"} - connector := loadConnector(ctx, suite.T(), Users) + connector := loadConnector(ctx, suite.T(), graph.LargeItemClient(), Users) tests := []struct { name string getSelector func(t *testing.T) selectors.Selector @@ -214,7 +215,7 @@ func (suite *ConnectorDataCollectionIntegrationSuite) TestSharePointDataCollecti selSites := []string{suite.site} - connector := loadConnector(ctx, suite.T(), Sites) + connector := loadConnector(ctx, suite.T(), graph.LargeItemClient(), Sites) tests := []struct { name string expected int @@ -243,6 +244,7 @@ func (suite *ConnectorDataCollectionIntegrationSuite) TestSharePointDataCollecti suite.T().Run(test.name, func(t *testing.T) { collections, err := sharepoint.DataCollections( ctx, + graph.LargeItemClient(), test.getSelector(), connector.credentials.AzureTenantID, connector.Service, @@ -298,7 +300,7 @@ func (suite *ConnectorCreateSharePointCollectionIntegrationSuite) SetupSuite() { tester.MustGetEnvSets(suite.T(), tester.M365AcctCredEnvs) - suite.connector = loadConnector(ctx, suite.T(), Sites) + suite.connector = loadConnector(ctx, suite.T(), graph.LargeItemClient(), Sites) suite.user = tester.M365UserID(suite.T()) tester.LogTimeOfTest(suite.T()) @@ -311,7 +313,7 @@ func (suite *ConnectorCreateSharePointCollectionIntegrationSuite) TestCreateShar var ( t = suite.T() siteID = tester.M365SiteID(t) - gc = loadConnector(ctx, t, Sites) + gc = loadConnector(ctx, t, graph.LargeItemClient(), Sites) siteIDs = []string{siteID} ) @@ -335,7 +337,7 @@ func (suite *ConnectorCreateSharePointCollectionIntegrationSuite) TestCreateShar var ( t = suite.T() siteID = tester.M365SiteID(t) - gc = loadConnector(ctx, t, Sites) + gc = loadConnector(ctx, t, graph.LargeItemClient(), Sites) siteIDs = []string{siteID} ) diff --git a/src/internal/connector/graph/service_helper.go b/src/internal/connector/graph/service_helper.go index b374933f8..900919406 100644 --- a/src/internal/connector/graph/service_helper.go +++ b/src/internal/connector/graph/service_helper.go @@ -55,6 +55,23 @@ func CreateHTTPClient() *http.Client { return httpClient } +// LargeItemClient generates a client that's configured to handle +// large file downloads. This client isn't suitable for other queries +// due to loose restrictions on timeouts and such. +// +// Re-use of http clients is critical, or else we leak os resources +// and consume relatively unbound socket connections. It is important +// to centralize this client to be passed downstream where api calls +// can utilize it on a per-download basis. +// +// TODO: this should get owned by an API client layer, not the GC itself. +func LargeItemClient() *http.Client { + httpClient := CreateHTTPClient() + httpClient.Timeout = 0 // infinite timeout for pulling large files + + return httpClient +} + // --------------------------------------------------------------------------- // Logging Middleware // --------------------------------------------------------------------------- @@ -85,6 +102,10 @@ func (handler *LoggingMiddleware) Intercept( logger.Ctx(ctx).Infow("graph api throttling", "method", req.Method, "url", req.URL) } + if resp.StatusCode != http.StatusTooManyRequests && (resp.StatusCode/100) != 2 { + logger.Ctx(ctx).Infow("graph api error", "method", req.Method, "url", req.URL) + } + if logger.DebugAPI || os.Getenv(logGraphRequestsEnvKey) != "" { respDump, _ := httputil.DumpResponse(resp, true) diff --git a/src/internal/connector/graph_connector.go b/src/internal/connector/graph_connector.go index 68f03d048..3dbc0e60c 100644 --- a/src/internal/connector/graph_connector.go +++ b/src/internal/connector/graph_connector.go @@ -4,6 +4,7 @@ package connector import ( "context" + "net/http" "runtime/trace" "strings" "sync" @@ -38,8 +39,9 @@ import ( // GraphRequestAdapter from the msgraph-sdk-go. Additional fields are for // bookkeeping and interfacing with other component. type GraphConnector struct { - Service graph.Servicer - Owners api.Client + Service graph.Servicer + Owners api.Client + itemClient *http.Client // configured to handle large item downloads tenant string Users map[string]string // key value @@ -64,13 +66,19 @@ const ( Sites ) -func NewGraphConnector(ctx context.Context, acct account.Account, r resource) (*GraphConnector, error) { +func NewGraphConnector( + ctx context.Context, + itemClient *http.Client, + acct account.Account, + r resource, +) (*GraphConnector, error) { m365, err := acct.M365Config() if err != nil { return nil, errors.Wrap(err, "retrieving m365 account configuration") } gc := GraphConnector{ + itemClient: itemClient, tenant: m365.AzureTenantID, Users: make(map[string]string, 0), wg: &sync.WaitGroup{}, diff --git a/src/internal/connector/graph_connector_disconnected_test.go b/src/internal/connector/graph_connector_disconnected_test.go index f7f583ebd..711e55ff8 100644 --- a/src/internal/connector/graph_connector_disconnected_test.go +++ b/src/internal/connector/graph_connector_disconnected_test.go @@ -9,6 +9,7 @@ import ( "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" + "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/support" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/account" @@ -65,7 +66,7 @@ func (suite *DisconnectedGraphConnectorSuite) TestBadConnection() { for _, test := range table { suite.T().Run(test.name, func(t *testing.T) { - gc, err := NewGraphConnector(ctx, test.acct(t), Users) + gc, err := NewGraphConnector(ctx, graph.LargeItemClient(), test.acct(t), Users) assert.Nil(t, gc, test.name+" failed") assert.NotNil(t, err, test.name+"failed") }) diff --git a/src/internal/connector/graph_connector_helper_test.go b/src/internal/connector/graph_connector_helper_test.go index c614df05d..ce16a5a4d 100644 --- a/src/internal/connector/graph_connector_helper_test.go +++ b/src/internal/connector/graph_connector_helper_test.go @@ -3,6 +3,7 @@ package connector import ( "context" "io" + "net/http" "reflect" "testing" @@ -976,9 +977,9 @@ func getSelectorWith( } } -func loadConnector(ctx context.Context, t *testing.T, r resource) *GraphConnector { +func loadConnector(ctx context.Context, t *testing.T, itemClient *http.Client, r resource) *GraphConnector { a := tester.NewM365Account(t) - connector, err := NewGraphConnector(ctx, a, r) + connector, err := NewGraphConnector(ctx, itemClient, a, r) require.NoError(t, err) return connector diff --git a/src/internal/connector/graph_connector_test.go b/src/internal/connector/graph_connector_test.go index 71eff095a..85ad3f45f 100644 --- a/src/internal/connector/graph_connector_test.go +++ b/src/internal/connector/graph_connector_test.go @@ -156,7 +156,7 @@ func (suite *GraphConnectorIntegrationSuite) SetupSuite() { tester.MustGetEnvSets(suite.T(), tester.M365AcctCredEnvs) - suite.connector = loadConnector(ctx, suite.T(), Users) + suite.connector = loadConnector(ctx, suite.T(), graph.LargeItemClient(), Users) suite.user = tester.M365UserID(suite.T()) suite.acct = tester.NewM365Account(suite.T()) @@ -380,7 +380,7 @@ func runRestoreBackupTest( start := time.Now() - restoreGC := loadConnector(ctx, t, test.resource) + restoreGC := loadConnector(ctx, t, graph.LargeItemClient(), test.resource) restoreSel := getSelectorWith(t, test.service, resourceOwners, true) deets, err := restoreGC.RestoreDataCollections( ctx, @@ -419,7 +419,7 @@ func runRestoreBackupTest( }) } - backupGC := loadConnector(ctx, t, test.resource) + backupGC := loadConnector(ctx, t, graph.LargeItemClient(), test.resource) backupSel := backupSelectorForExpected(t, test.service, expectedDests) t.Logf("Selective backup of %s\n", backupSel) @@ -870,7 +870,7 @@ func (suite *GraphConnectorIntegrationSuite) TestMultiFolderBackupDifferentNames dest.ContainerName, ) - restoreGC := loadConnector(ctx, t, test.resource) + restoreGC := loadConnector(ctx, t, graph.LargeItemClient(), test.resource) deets, err := restoreGC.RestoreDataCollections(ctx, suite.acct, restoreSel, dest, collections) require.NoError(t, err) require.NotNil(t, deets) @@ -888,7 +888,7 @@ func (suite *GraphConnectorIntegrationSuite) TestMultiFolderBackupDifferentNames // Run a backup and compare its output with what we put in. - backupGC := loadConnector(ctx, t, test.resource) + backupGC := loadConnector(ctx, t, graph.LargeItemClient(), test.resource) backupSel := backupSelectorForExpected(t, test.service, expectedDests) t.Log("Selective backup of", backupSel) diff --git a/src/internal/connector/onedrive/collection.go b/src/internal/connector/onedrive/collection.go index 4ea9ea9eb..ac0aa9fb3 100644 --- a/src/internal/connector/onedrive/collection.go +++ b/src/internal/connector/onedrive/collection.go @@ -4,11 +4,13 @@ package onedrive import ( "context" "io" + "net/http" "sync" "sync/atomic" "time" "github.com/microsoftgraph/msgraph-sdk-go/models" + "github.com/pkg/errors" "github.com/spatialcurrent/go-lazy/pkg/lazy" "github.com/alcionai/corso/src/internal/connector/graph" @@ -43,6 +45,9 @@ var ( // Collection represents a set of OneDrive objects retrieved from M365 type Collection struct { + // configured to handle large item downloads + itemClient *http.Client + // data is used to share data streams with the collection consumer data chan data.Stream // folderPath indicates what level in the hierarchy this collection @@ -64,12 +69,13 @@ type Collection struct { // itemReadFunc returns a reader for the specified item type itemReaderFunc func( - ctx context.Context, + hc *http.Client, item models.DriveItemable, ) (itemInfo details.ItemInfo, itemData io.ReadCloser, err error) // NewCollection creates a Collection func NewCollection( + itemClient *http.Client, folderPath path.Path, driveID string, service graph.Servicer, @@ -78,6 +84,7 @@ func NewCollection( ctrlOpts control.Options, ) *Collection { c := &Collection{ + itemClient: itemClient, folderPath: folderPath, driveItems: map[string]models.DriveItemable{}, driveID: driveID, @@ -198,11 +205,16 @@ func (oc *Collection) populateItems(ctx context.Context) { m.Unlock() } - for _, item := range oc.driveItems { + for id, item := range oc.driveItems { if oc.ctrl.FailFast && errs != nil { break } + if item == nil { + errUpdater(id, errors.New("nil item")) + continue + } + semaphoreCh <- struct{}{} wg.Add(1) @@ -219,10 +231,9 @@ func (oc *Collection) populateItems(ctx context.Context) { ) for i := 1; i <= maxRetries; i++ { - itemInfo, itemData, err = oc.itemReader(ctx, item) - - // retry on Timeout type errors, break otherwise. + itemInfo, itemData, err = oc.itemReader(oc.itemClient, item) if err == nil || graph.IsErrTimeout(err) == nil { + // retry on Timeout type errors, break otherwise. break } diff --git a/src/internal/connector/onedrive/collection_test.go b/src/internal/connector/onedrive/collection_test.go index a19021ff7..a36db58c9 100644 --- a/src/internal/connector/onedrive/collection_test.go +++ b/src/internal/connector/onedrive/collection_test.go @@ -2,9 +2,8 @@ package onedrive import ( "bytes" - "context" - "errors" "io" + "net/http" "sync" "testing" "time" @@ -15,6 +14,7 @@ import ( "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" + "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/support" "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/pkg/backup/details" @@ -73,7 +73,7 @@ func (suite *CollectionUnitTestSuite) TestCollection() { name: "oneDrive, no duplicates", numInstances: 1, source: OneDriveSource, - itemReader: func(context.Context, models.DriveItemable) (details.ItemInfo, io.ReadCloser, error) { + itemReader: func(*http.Client, models.DriveItemable) (details.ItemInfo, io.ReadCloser, error) { return details.ItemInfo{OneDrive: &details.OneDriveInfo{ItemName: testItemName, Modified: now}}, io.NopCloser(bytes.NewReader(testItemData)), nil @@ -87,7 +87,7 @@ func (suite *CollectionUnitTestSuite) TestCollection() { name: "oneDrive, duplicates", numInstances: 3, source: OneDriveSource, - itemReader: func(context.Context, models.DriveItemable) (details.ItemInfo, io.ReadCloser, error) { + itemReader: func(*http.Client, models.DriveItemable) (details.ItemInfo, io.ReadCloser, error) { return details.ItemInfo{OneDrive: &details.OneDriveInfo{ItemName: testItemName, Modified: now}}, io.NopCloser(bytes.NewReader(testItemData)), nil @@ -101,7 +101,7 @@ func (suite *CollectionUnitTestSuite) TestCollection() { name: "sharePoint, no duplicates", numInstances: 1, source: SharePointSource, - itemReader: func(context.Context, models.DriveItemable) (details.ItemInfo, io.ReadCloser, error) { + itemReader: func(*http.Client, models.DriveItemable) (details.ItemInfo, io.ReadCloser, error) { return details.ItemInfo{SharePoint: &details.SharePointInfo{ItemName: testItemName, Modified: now}}, io.NopCloser(bytes.NewReader(testItemData)), nil @@ -115,7 +115,7 @@ func (suite *CollectionUnitTestSuite) TestCollection() { name: "sharePoint, duplicates", numInstances: 3, source: SharePointSource, - itemReader: func(context.Context, models.DriveItemable) (details.ItemInfo, io.ReadCloser, error) { + itemReader: func(*http.Client, models.DriveItemable) (details.ItemInfo, io.ReadCloser, error) { return details.ItemInfo{SharePoint: &details.SharePointInfo{ItemName: testItemName, Modified: now}}, io.NopCloser(bytes.NewReader(testItemData)), nil @@ -140,6 +140,7 @@ func (suite *CollectionUnitTestSuite) TestCollection() { require.NoError(t, err) coll := NewCollection( + graph.LargeItemClient(), folderPath, "drive-id", suite, @@ -224,6 +225,7 @@ func (suite *CollectionUnitTestSuite) TestCollectionReadError() { require.NoError(t, err) coll := NewCollection( + graph.LargeItemClient(), folderPath, "fakeDriveID", suite, @@ -235,10 +237,8 @@ func (suite *CollectionUnitTestSuite) TestCollectionReadError() { mockItem.SetId(&testItemID) coll.Add(mockItem) - readError := errors.New("Test error") - - coll.itemReader = func(context.Context, models.DriveItemable) (details.ItemInfo, io.ReadCloser, error) { - return details.ItemInfo{}, nil, readError + coll.itemReader = func(*http.Client, models.DriveItemable) (details.ItemInfo, io.ReadCloser, error) { + return details.ItemInfo{}, nil, assert.AnError } coll.Items() diff --git a/src/internal/connector/onedrive/collections.go b/src/internal/connector/onedrive/collections.go index 8f18aa780..f446aa246 100644 --- a/src/internal/connector/onedrive/collections.go +++ b/src/internal/connector/onedrive/collections.go @@ -3,6 +3,7 @@ package onedrive import ( "context" "fmt" + "net/http" "strings" "github.com/microsoftgraph/msgraph-sdk-go/models" @@ -46,6 +47,9 @@ type folderMatcher interface { // Collections is used to retrieve drive data for a // resource owner, which can be either a user or a sharepoint site. type Collections struct { + // configured to handle large item downloads + itemClient *http.Client + tenant string resourceOwner string source driveSource @@ -66,6 +70,7 @@ type Collections struct { } func NewCollections( + itemClient *http.Client, tenant string, resourceOwner string, source driveSource, @@ -75,6 +80,7 @@ func NewCollections( ctrlOpts control.Options, ) *Collections { return &Collections{ + itemClient: itemClient, tenant: tenant, resourceOwner: resourceOwner, source: source, @@ -265,13 +271,13 @@ func (c *Collections) UpdateCollections( // TODO(ashmrtn): Compare old and new path and set collection state // accordingly. col = NewCollection( + c.itemClient, collectionPath, driveID, c.service, c.statusUpdater, c.source, - c.ctrl, - ) + c.ctrl) c.CollectionMap[collectionPath.String()] = col c.NumContainers++ diff --git a/src/internal/connector/onedrive/collections_test.go b/src/internal/connector/onedrive/collections_test.go index 485b9ed86..21ca061a9 100644 --- a/src/internal/connector/onedrive/collections_test.go +++ b/src/internal/connector/onedrive/collections_test.go @@ -10,6 +10,7 @@ import ( "github.com/stretchr/testify/suite" "golang.org/x/exp/maps" + "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/control" "github.com/alcionai/corso/src/pkg/selectors" @@ -587,6 +588,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { outputFolderMap := map[string]string{} maps.Copy(outputFolderMap, tt.inputFolderMap) c := NewCollections( + graph.LargeItemClient(), tenant, user, OneDriveSource, diff --git a/src/internal/connector/onedrive/drive_test.go b/src/internal/connector/onedrive/drive_test.go index 7f6901621..755b7293b 100644 --- a/src/internal/connector/onedrive/drive_test.go +++ b/src/internal/connector/onedrive/drive_test.go @@ -9,6 +9,7 @@ import ( "github.com/stretchr/testify/suite" "github.com/alcionai/corso/src/internal/common" + "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/control" "github.com/alcionai/corso/src/pkg/logger" @@ -146,6 +147,7 @@ func (suite *OneDriveSuite) TestOneDriveNewCollections() { NewOneDriveBackup([]string{test.user}). AllData()[0] odcs, err := NewCollections( + graph.LargeItemClient(), creds.AzureTenantID, test.user, OneDriveSource, diff --git a/src/internal/connector/onedrive/item.go b/src/internal/connector/onedrive/item.go index 73391033b..3e4e9e516 100644 --- a/src/internal/connector/onedrive/item.go +++ b/src/internal/connector/onedrive/item.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "io" + "net/http" "strings" msdrives "github.com/microsoftgraph/msgraph-sdk-go/drives" @@ -13,6 +14,7 @@ import ( "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/support" "github.com/alcionai/corso/src/internal/connector/uploadsession" + "github.com/alcionai/corso/src/internal/version" "github.com/alcionai/corso/src/pkg/backup/details" "github.com/alcionai/corso/src/pkg/logger" ) @@ -27,7 +29,7 @@ const ( // It crafts this by querying M365 for a download URL for the item // and using a http client to initialize a reader func sharePointItemReader( - ctx context.Context, + hc *http.Client, item models.DriveItemable, ) (details.ItemInfo, io.ReadCloser, error) { url, ok := item.GetAdditionalData()[downloadURLKey].(*string) @@ -35,7 +37,7 @@ func sharePointItemReader( return details.ItemInfo{}, nil, fmt.Errorf("failed to get url for %s", *item.GetName()) } - rc, err := driveItemReader(ctx, *url) + resp, err := hc.Get(*url) if err != nil { return details.ItemInfo{}, nil, err } @@ -44,14 +46,14 @@ func sharePointItemReader( SharePoint: sharePointItemInfo(item, *item.GetSize()), } - return dii, rc, nil + return dii, resp.Body, nil } // oneDriveItemReader will return a io.ReadCloser for the specified item // It crafts this by querying M365 for a download URL for the item // and using a http client to initialize a reader func oneDriveItemReader( - ctx context.Context, + hc *http.Client, item models.DriveItemable, ) (details.ItemInfo, io.ReadCloser, error) { url, ok := item.GetAdditionalData()[downloadURLKey].(*string) @@ -59,7 +61,17 @@ func oneDriveItemReader( return details.ItemInfo{}, nil, fmt.Errorf("failed to get url for %s", *item.GetName()) } - rc, err := driveItemReader(ctx, *url) + req, err := http.NewRequest(http.MethodGet, *url, nil) + if err != nil { + return details.ItemInfo{}, nil, err + } + + // Decorate the traffic + //nolint:lll + // See https://learn.microsoft.com/en-us/sharepoint/dev/general-development/how-to-avoid-getting-throttled-or-blocked-in-sharepoint-online#how-to-decorate-your-http-traffic + req.Header.Set("User-Agent", "ISV|Alcion|Corso/"+version.Version) + + resp, err := hc.Do(req) if err != nil { return details.ItemInfo{}, nil, err } @@ -68,25 +80,7 @@ func oneDriveItemReader( OneDrive: oneDriveItemInfo(item, *item.GetSize()), } - return dii, rc, nil -} - -// driveItemReader will return a io.ReadCloser for the specified item -// It crafts this by querying M365 for a download URL for the item -// and using a http client to initialize a reader -func driveItemReader( - ctx context.Context, - url string, -) (io.ReadCloser, error) { - httpClient := graph.CreateHTTPClient() - httpClient.Timeout = 0 // infinite timeout for pulling large files - - resp, err := httpClient.Get(url) - if err != nil { - return nil, errors.Wrapf(err, "failed to download file from %s", url) - } - - return resp.Body, nil + return dii, resp.Body, nil } // oneDriveItemInfo will populate a details.OneDriveInfo struct @@ -97,7 +91,7 @@ func driveItemReader( func oneDriveItemInfo(di models.DriveItemable, itemSize int64) *details.OneDriveInfo { var email, parent string - if di.GetCreatedBy().GetUser() != nil { + if di.GetCreatedBy() != nil && di.GetCreatedBy().GetUser() != nil { // User is sometimes not available when created via some // external applications (like backup/restore solutions) ed, ok := di.GetCreatedBy().GetUser().GetAdditionalData()["email"] @@ -106,11 +100,9 @@ func oneDriveItemInfo(di models.DriveItemable, itemSize int64) *details.OneDrive } } - if di.GetParentReference() != nil { - if di.GetParentReference().GetName() != nil { - // EndPoint is not always populated from external apps - parent = *di.GetParentReference().GetName() - } + if di.GetParentReference() != nil && di.GetParentReference().GetName() != nil { + // EndPoint is not always populated from external apps + parent = *di.GetParentReference().GetName() } return &details.OneDriveInfo{ diff --git a/src/internal/connector/onedrive/item_test.go b/src/internal/connector/onedrive/item_test.go index b3f45ca56..2b28f0910 100644 --- a/src/internal/connector/onedrive/item_test.go +++ b/src/internal/connector/onedrive/item_test.go @@ -126,7 +126,7 @@ func (suite *ItemIntegrationSuite) TestItemReader_oneDrive() { // Read data for the file - itemInfo, itemData, err := oneDriveItemReader(ctx, driveItem) + itemInfo, itemData, err := oneDriveItemReader(graph.LargeItemClient(), driveItem) require.NoError(suite.T(), err) require.NotNil(suite.T(), itemInfo.OneDrive) require.NotEmpty(suite.T(), itemInfo.OneDrive.ItemName) diff --git a/src/internal/connector/sharepoint/data_collections.go b/src/internal/connector/sharepoint/data_collections.go index d7c6547a0..5950daede 100644 --- a/src/internal/connector/sharepoint/data_collections.go +++ b/src/internal/connector/sharepoint/data_collections.go @@ -2,6 +2,7 @@ package sharepoint import ( "context" + "net/http" "github.com/pkg/errors" @@ -24,6 +25,7 @@ type statusUpdater interface { // for the specified user func DataCollections( ctx context.Context, + itemClient *http.Client, selector selectors.Selector, tenantID string, serv graph.Servicer, @@ -66,6 +68,7 @@ func DataCollections( case path.LibrariesCategory: spcs, err = collectLibraries( ctx, + itemClient, serv, tenantID, site, @@ -124,6 +127,7 @@ func collectLists( // all the drives associated with the site. func collectLibraries( ctx context.Context, + itemClient *http.Client, serv graph.Servicer, tenantID, siteID string, scope selectors.SharePointScope, @@ -138,6 +142,7 @@ func collectLibraries( logger.Ctx(ctx).With("site", siteID).Debug("Creating SharePoint Library collections") colls := onedrive.NewCollections( + itemClient, tenantID, siteID, onedrive.SharePointSource, diff --git a/src/internal/connector/sharepoint/data_collections_test.go b/src/internal/connector/sharepoint/data_collections_test.go index 423336253..4daca0877 100644 --- a/src/internal/connector/sharepoint/data_collections_test.go +++ b/src/internal/connector/sharepoint/data_collections_test.go @@ -7,6 +7,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" + "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/onedrive" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/control" @@ -91,6 +92,7 @@ func (suite *SharePointLibrariesSuite) TestUpdateCollections() { newPaths := map[string]string{} excluded := map[string]struct{}{} c := onedrive.NewCollections( + graph.LargeItemClient(), tenant, site, onedrive.SharePointSource, diff --git a/src/internal/operations/backup_integration_test.go b/src/internal/operations/backup_integration_test.go index 3ee5c0230..83748baa2 100644 --- a/src/internal/operations/backup_integration_test.go +++ b/src/internal/operations/backup_integration_test.go @@ -655,7 +655,7 @@ func (suite *BackupOpIntegrationSuite) TestBackup_Run_exchangeIncrementals() { m365, err := acct.M365Config() require.NoError(t, err) - gc, err := connector.NewGraphConnector(ctx, acct, connector.Users) + gc, err := connector.NewGraphConnector(ctx, graph.LargeItemClient(), acct, connector.Users) require.NoError(t, err) ac, err := api.NewClient(m365) diff --git a/src/internal/operations/operation.go b/src/internal/operations/operation.go index 30770bdf5..e5382b022 100644 --- a/src/internal/operations/operation.go +++ b/src/internal/operations/operation.go @@ -7,6 +7,7 @@ import ( "github.com/pkg/errors" "github.com/alcionai/corso/src/internal/connector" + "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/events" "github.com/alcionai/corso/src/internal/kopia" "github.com/alcionai/corso/src/internal/observe" @@ -107,7 +108,7 @@ func connectToM365( resource = connector.Sites } - gc, err := connector.NewGraphConnector(ctx, acct, resource) + gc, err := connector.NewGraphConnector(ctx, graph.LargeItemClient(), acct, resource) if err != nil { return nil, err } diff --git a/src/pkg/services/m365/m365.go b/src/pkg/services/m365/m365.go index ba9cc2a59..e0dd75af9 100644 --- a/src/pkg/services/m365/m365.go +++ b/src/pkg/services/m365/m365.go @@ -8,6 +8,7 @@ import ( "github.com/alcionai/corso/src/internal/connector" "github.com/alcionai/corso/src/internal/connector/discovery" + "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/pkg/account" ) @@ -20,7 +21,7 @@ type User struct { // Users returns a list of users in the specified M365 tenant // TODO: Implement paging support func Users(ctx context.Context, m365Account account.Account) ([]*User, error) { - gc, err := connector.NewGraphConnector(ctx, m365Account, connector.Users) + gc, err := connector.NewGraphConnector(ctx, graph.LargeItemClient(), m365Account, connector.Users) if err != nil { return nil, errors.Wrap(err, "could not initialize M365 graph connection") } @@ -76,7 +77,7 @@ func UserPNs(ctx context.Context, m365Account account.Account) ([]string, error) // SiteURLs returns a list of SharePoint site WebURLs in the specified M365 tenant func SiteURLs(ctx context.Context, m365Account account.Account) ([]string, error) { - gc, err := connector.NewGraphConnector(ctx, m365Account, connector.Sites) + gc, err := connector.NewGraphConnector(ctx, graph.LargeItemClient(), m365Account, connector.Sites) if err != nil { return nil, errors.Wrap(err, "could not initialize M365 graph connection") } @@ -86,7 +87,7 @@ func SiteURLs(ctx context.Context, m365Account account.Account) ([]string, error // SiteURLs returns a list of SharePoint sites IDs in the specified M365 tenant func SiteIDs(ctx context.Context, m365Account account.Account) ([]string, error) { - gc, err := connector.NewGraphConnector(ctx, m365Account, connector.Sites) + gc, err := connector.NewGraphConnector(ctx, graph.LargeItemClient(), m365Account, connector.Sites) if err != nil { return nil, errors.Wrap(err, "could not initialize M365 graph connection") }