From f45aecd5db59e6b4665e3effc067840577fc551e Mon Sep 17 00:00:00 2001 From: Abhishek Pandey Date: Tue, 22 Aug 2023 11:00:55 +0530 Subject: [PATCH] Reduce $select parameters for URL cache delta queries (#4074) This PR optimizes memory & cache refresh time for URL cache. The cache only makes use of a small subset of drive item properties, namely ID, deleted, file, folder, content download URL. We have found that reducing the number of query properties has a sizable impact on corso mem usage. This is especially relevant for large scale backups. See below graph for a comparison between original delta queries & mod. Note that this is with corso instrumentations to show comparisons side by side in the same run. - Reading this graph - We are doing 3 orig delta queries followed right after by 3 mod. Vertical lines are delta query spans. Originally, this investigation was done to improve mem usage for scale backups. But we also found that url cache delta query time drops by 22% with this PR. This is because we are now transferring & processing fewer bytes. ![image](https://github.com/alcionai/corso/assets/4962258/be4461db-f86c-42d4-bca1-2819aff078ce) --- #### Does this PR need a docs update or release note? - [ ] :white_check_mark: Yes, it's included - [ ] :clock1: Yes, but in a later PR - [x] :no_entry: No #### Type of change - [ ] :sunflower: Feature - [x] :bug: Bugfix - [ ] :world_map: Documentation - [ ] :robot: Supportability/Tests - [ ] :computer: CI/Deployment - [ ] :broom: Tech Debt/Cleanup #### Issue(s) * # #### Test Plan - [ ] :muscle: Manual - [x] :zap: Unit test - [x] :green_heart: E2E --- .../m365/collection/drive/collections.go | 2 +- .../m365/collection/drive/url_cache_test.go | 23 +++++++++++++------ src/pkg/services/m365/api/config.go | 9 ++++++++ 3 files changed, 26 insertions(+), 8 deletions(-) diff --git a/src/internal/m365/collection/drive/collections.go b/src/internal/m365/collection/drive/collections.go index 6964774b8..b88de4aaa 100644 --- a/src/internal/m365/collection/drive/collections.go +++ b/src/internal/m365/collection/drive/collections.go @@ -471,7 +471,7 @@ func (c *Collections) addURLCacheToDriveCollections( driveID, prevDelta, urlCacheRefreshInterval, - c.handler.NewItemPager(driveID, "", api.DriveItemSelectDefault()), + c.handler.NewItemPager(driveID, "", api.DriveItemSelectURLCache()), errs) if err != nil { return err diff --git a/src/internal/m365/collection/drive/url_cache_test.go b/src/internal/m365/collection/drive/url_cache_test.go index f2fd257b8..68b5b8a8b 100644 --- a/src/internal/m365/collection/drive/url_cache_test.go +++ b/src/internal/m365/collection/drive/url_cache_test.go @@ -3,6 +3,7 @@ package drive import ( "context" "errors" + "io" "math/rand" "net/http" "sync" @@ -87,6 +88,7 @@ func (suite *URLCacheIntegrationSuite) TestURLCacheBasic() { newItem(newFolderName, true), control.Copy) require.NoError(t, err, clues.ToCore(err)) + require.NotNil(t, newFolder.GetId()) nfid := ptr.Val(newFolder.GetId()) @@ -109,7 +111,7 @@ func (suite *URLCacheIntegrationSuite) TestURLCacheBasic() { // Get the previous delta to feed into url cache prevDelta, _, _, err := collectItems( ctx, - suite.ac.Drives().NewDriveItemDeltaPager(driveID, "", api.DriveItemSelectDefault()), + suite.ac.Drives().NewDriveItemDeltaPager(driveID, "", api.DriveItemSelectURLCache()), suite.driveID, "drive-name", collectorFunc, @@ -131,10 +133,7 @@ func (suite *URLCacheIntegrationSuite) TestURLCacheBasic() { nfid, newItem(newItemName, false), control.Copy) - if err != nil { - // Something bad happened, skip this item - continue - } + require.NoError(t, err, clues.ToCore(err)) items = append(items, item) } @@ -176,13 +175,23 @@ func (suite *URLCacheIntegrationSuite) TestURLCacheBasic() { nil, nil) require.NoError(t, err, clues.ToCore(err)) + + require.NotNil(t, resp) + require.NotNil(t, resp.Body) + + defer func(rc io.ReadCloser) { + if rc != nil { + rc.Close() + } + }(resp.Body) + require.Equal(t, http.StatusOK, resp.StatusCode) }(i) } wg.Wait() - // Validate that <= 1 delta queries were made by url cache - require.LessOrEqual(t, uc.deltaQueryCount, 1) + // Validate that exactly 1 delta query was made by url cache + require.Equal(t, 1, uc.deltaQueryCount) } type URLCacheUnitSuite struct { diff --git a/src/pkg/services/m365/api/config.go b/src/pkg/services/m365/api/config.go index 9e2247279..a1c752686 100644 --- a/src/pkg/services/m365/api/config.go +++ b/src/pkg/services/m365/api/config.go @@ -112,3 +112,12 @@ func DriveItemSelectDefault() []string { "malware", "shared") } + +// URL cache only needs a subset of item properties +func DriveItemSelectURLCache() []string { + return idAnd( + "content.downloadUrl", + "deleted", + "file", + "folder") +}