Reduce $select parameters for URL cache delta queries (#4074)

<!-- PR description-->

This PR optimizes memory & cache refresh time for URL cache. The cache only makes use of a small subset of drive item properties, namely ID, deleted, file, folder, content download URL. We have found that reducing the number of query properties has a sizable impact on corso mem usage. This is especially relevant for large scale backups.

See below graph for a comparison between original delta queries & mod. Note that this is with corso instrumentations to show comparisons side by side in the same run.
- Reading this graph
    - We are doing 3 orig delta queries followed right after by 3 mod. Vertical lines are delta query spans.

Originally, this investigation was done to improve mem usage for scale backups. But we also found that url cache delta query time drops by 22% with this PR. This is because we are now transferring & processing fewer bytes.

![image](https://github.com/alcionai/corso/assets/4962258/be4461db-f86c-42d4-bca1-2819aff078ce)


---

#### Does this PR need a docs update or release note?

- [ ]  Yes, it's included
- [ ] 🕐 Yes, but in a later PR
- [x]  No

#### Type of change

<!--- Please check the type of change your PR introduces: --->
- [ ] 🌻 Feature
- [x] 🐛 Bugfix
- [ ] 🗺️ Documentation
- [ ] 🤖 Supportability/Tests
- [ ] 💻 CI/Deployment
- [ ] 🧹 Tech Debt/Cleanup

#### Issue(s)

<!-- Can reference multiple issues. Use one of the following "magic words" - "closes, fixes" to auto-close the Github issue. -->
* #<issue>

#### Test Plan

<!-- How will this be tested prior to merging.-->
- [ ] 💪 Manual
- [x]  Unit test
- [x] 💚 E2E
This commit is contained in:
Abhishek Pandey 2023-08-22 11:00:55 +05:30 committed by GitHub
parent 11253bf816
commit f45aecd5db
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 26 additions and 8 deletions

View File

@ -471,7 +471,7 @@ func (c *Collections) addURLCacheToDriveCollections(
driveID,
prevDelta,
urlCacheRefreshInterval,
c.handler.NewItemPager(driveID, "", api.DriveItemSelectDefault()),
c.handler.NewItemPager(driveID, "", api.DriveItemSelectURLCache()),
errs)
if err != nil {
return err

View File

@ -3,6 +3,7 @@ package drive
import (
"context"
"errors"
"io"
"math/rand"
"net/http"
"sync"
@ -87,6 +88,7 @@ func (suite *URLCacheIntegrationSuite) TestURLCacheBasic() {
newItem(newFolderName, true),
control.Copy)
require.NoError(t, err, clues.ToCore(err))
require.NotNil(t, newFolder.GetId())
nfid := ptr.Val(newFolder.GetId())
@ -109,7 +111,7 @@ func (suite *URLCacheIntegrationSuite) TestURLCacheBasic() {
// Get the previous delta to feed into url cache
prevDelta, _, _, err := collectItems(
ctx,
suite.ac.Drives().NewDriveItemDeltaPager(driveID, "", api.DriveItemSelectDefault()),
suite.ac.Drives().NewDriveItemDeltaPager(driveID, "", api.DriveItemSelectURLCache()),
suite.driveID,
"drive-name",
collectorFunc,
@ -131,10 +133,7 @@ func (suite *URLCacheIntegrationSuite) TestURLCacheBasic() {
nfid,
newItem(newItemName, false),
control.Copy)
if err != nil {
// Something bad happened, skip this item
continue
}
require.NoError(t, err, clues.ToCore(err))
items = append(items, item)
}
@ -176,13 +175,23 @@ func (suite *URLCacheIntegrationSuite) TestURLCacheBasic() {
nil,
nil)
require.NoError(t, err, clues.ToCore(err))
require.NotNil(t, resp)
require.NotNil(t, resp.Body)
defer func(rc io.ReadCloser) {
if rc != nil {
rc.Close()
}
}(resp.Body)
require.Equal(t, http.StatusOK, resp.StatusCode)
}(i)
}
wg.Wait()
// Validate that <= 1 delta queries were made by url cache
require.LessOrEqual(t, uc.deltaQueryCount, 1)
// Validate that exactly 1 delta query was made by url cache
require.Equal(t, 1, uc.deltaQueryCount)
}
type URLCacheUnitSuite struct {

View File

@ -112,3 +112,12 @@ func DriveItemSelectDefault() []string {
"malware",
"shared")
}
// URL cache only needs a subset of item properties
func DriveItemSelectURLCache() []string {
return idAnd(
"content.downloadUrl",
"deleted",
"file",
"folder")
}