Ensure each drive gets a unique url cache instance (#4631)

<!-- PR description-->

This fixes an url cache bug. We were incorrectly attaching the same url cache instance to all drives belonging to a sharepoint library. If a library has n drives, this bug had an effect of turning off cache for n - 1 drives. 

---

#### Does this PR need a docs update or release note?

- [ ]  Yes, it's included
- [ ] 🕐 Yes, but in a later PR
- [x]  No

#### Type of change

<!--- Please check the type of change your PR introduces: --->
- [ ] 🌻 Feature
- [x] 🐛 Bugfix
- [ ] 🗺️ Documentation
- [ ] 🤖 Supportability/Tests
- [ ] 💻 CI/Deployment
- [ ] 🧹 Tech Debt/Cleanup

#### Issue(s)

<!-- Can reference multiple issues. Use one of the following "magic words" - "closes, fixes" to auto-close the Github issue. -->
* #<issue>

#### Test Plan

<!-- How will this be tested prior to merging.-->
- [ ] 💪 Manual
- [x]  Unit test
- [x] 💚 E2E
This commit is contained in:
Abhishek Pandey 2023-11-08 14:17:35 -08:00 committed by GitHub
parent 3f88ee4f44
commit 85f8c9b735
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 131 additions and 96 deletions

View File

@ -319,17 +319,28 @@ func (c *Collections) Get(
numDriveItems := c.NumItems - numPrevItems
numPrevItems = c.NumItems
// Attach an url cache
// Attach an url cache to the drive if the number of discovered items is
// below the threshold. Attaching cache to larger drives can cause
// performance issues since cache delta queries start taking up majority of
// the hour the refreshed URLs are valid for.
if numDriveItems < urlCacheDriveItemThreshold {
logger.Ctx(ictx).Info("adding url cache for drive")
logger.Ctx(ictx).Infow(
"adding url cache for drive",
"num_drive_items", numDriveItems)
err = c.addURLCacheToDriveCollections(
ictx,
uc, err := newURLCache(
driveID,
prevDeltaLink,
urlCacheRefreshInterval,
c.handler,
errs)
if err != nil {
return nil, false, err
return nil, false, clues.Stack(err)
}
// Set the URL cache instance for all collections in this drive.
for id := range c.CollectionMap[driveID] {
c.CollectionMap[driveID][id].urlCache = uc
}
}
@ -457,33 +468,6 @@ func (c *Collections) Get(
return collections, canUsePrevBackup, nil
}
// addURLCacheToDriveCollections adds an URL cache to all collections belonging to
// a drive.
func (c *Collections) addURLCacheToDriveCollections(
ctx context.Context,
driveID, prevDelta string,
errs *fault.Bus,
) error {
uc, err := newURLCache(
driveID,
prevDelta,
urlCacheRefreshInterval,
c.handler,
errs)
if err != nil {
return err
}
// Set the URL cache for all collections in this drive
for _, driveColls := range c.CollectionMap {
for _, coll := range driveColls {
coll.urlCache = uc
}
}
return nil
}
func updateCollectionPaths(
driveID, itemID string,
cmap map[string]map[string]*Collection,

View File

@ -2,7 +2,6 @@ package drive
import (
"context"
"strconv"
"testing"
"github.com/alcionai/clues"
@ -29,7 +28,6 @@ import (
"github.com/alcionai/corso/src/pkg/fault"
"github.com/alcionai/corso/src/pkg/path"
"github.com/alcionai/corso/src/pkg/selectors"
"github.com/alcionai/corso/src/pkg/services/m365/api"
apiMock "github.com/alcionai/corso/src/pkg/services/m365/api/mock"
"github.com/alcionai/corso/src/pkg/services/m365/api/pagers"
)
@ -3091,22 +3089,70 @@ func delItem(
return item
}
func (suite *OneDriveCollectionsUnitSuite) TestAddURLCacheToDriveCollections() {
driveID := "test-drive"
collCount := 3
anyFolder := (&selectors.OneDriveBackup{}).Folders(selectors.Any())[0]
// TestURLCacheAttach tests for 2 things:
// 1. All collections belong to the same drive share the url cache instance
// 2. Each drive has its own unique url cache instance
func (suite *OneDriveCollectionsUnitSuite) TestURLCacheAttach() {
var (
tenant = "a-tenant"
user = "a-user"
delta = "delta1"
delta2 = "delta2"
)
driveID1 := "drive-1-" + uuid.NewString()
drive1 := models.NewDrive()
drive1.SetId(&driveID1)
drive1.SetName(&driveID1)
driveID2 := "drive-2-" + uuid.NewString()
drive2 := models.NewDrive()
drive2.SetId(&driveID2)
drive2.SetName(&driveID2)
var (
driveBasePath1 = odConsts.DriveFolderPrefixBuilder(driveID1).String()
driveBasePath2 = odConsts.DriveFolderPrefixBuilder(driveID2).String()
)
table := []struct {
name string
items []apiMock.PagerResult[any]
deltaURL string
prevDeltaSuccess bool
prevDelta string
err error
drives []models.Driveable
enumerator mock.EnumerateItemsDeltaByDrive
errCheck assert.ErrorAssertionFunc
}{
{
name: "cache is attached",
name: "Two drives with unique url cache instances",
drives: []models.Driveable{
drive1,
drive2,
},
enumerator: mock.EnumerateItemsDeltaByDrive{
DrivePagers: map[string]*mock.DriveItemsDeltaPager{
driveID1: {
Pages: []mock.NextPage{{Items: []models.DriveItemable{
driveRootItem("root1"),
driveItem("folder", "folder", driveBasePath1, "root", false, true, false),
driveItem("file", "file", driveBasePath1+"/folder", "folder", true, false, false),
}}},
DeltaUpdate: pagers.DeltaUpdate{URL: delta, Reset: true},
},
driveID2: {
Pages: []mock.NextPage{{Items: []models.DriveItemable{
driveRootItem("root2"),
driveItem("folder2", "folder", driveBasePath2, "root2", false, true, false),
driveItem("file2", "file", driveBasePath2+"/folder", "folder2", true, false, false),
}}},
DeltaUpdate: pagers.DeltaUpdate{URL: delta2, Reset: true},
},
},
},
errCheck: assert.NoError,
},
// TODO(pandeyabs): Add a test case to check that the cache is not attached
// if a drive has more than urlCacheDriveItemThreshold discovered items.
// This will require creating 300k+ mock items for the test which might take
// up a lot of memory during the test. Include it after testing out mem usage.
}
for _, test := range table {
suite.Run(test.name, func() {
@ -3115,68 +3161,73 @@ func (suite *OneDriveCollectionsUnitSuite) TestAddURLCacheToDriveCollections() {
ctx, flush := tester.NewContext(t)
defer flush()
itemPagers := map[string]pagers.DeltaHandler[models.DriveItemable]{}
itemPagers[driveID] = &apiMock.DeltaPager[models.DriveItemable]{}
mockDrivePager := &apiMock.Pager[models.Driveable]{
ToReturn: []apiMock.PagerResult[models.Driveable]{
{Values: test.drives},
},
}
mbh := mock.DefaultOneDriveBH("test-user")
mbh.ItemPagerV = itemPagers
mbh := mock.DefaultOneDriveBH("a-user")
mbh.DrivePagerV = mockDrivePager
mbh.DriveItemEnumeration = test.enumerator
c := NewCollections(
mbh,
"test-tenant",
idname.NewProvider("test-user", "test-user"),
nil,
tenant,
idname.NewProvider(user, user),
func(*support.ControllerOperationStatus) {},
control.Options{ToggleFeatures: control.Toggles{}})
if _, ok := c.CollectionMap[driveID]; !ok {
c.CollectionMap[driveID] = map[string]*Collection{}
errs := fault.New(true)
delList := prefixmatcher.NewStringSetBuilder()
cols, _, err := c.Get(ctx, nil, delList, errs)
test.errCheck(t, err)
// Group collections by drive ID
colsByDrive := map[string][]*Collection{}
for _, col := range cols {
c, ok := col.(*Collection)
if !ok {
// skip metadata collection
continue
}
// Add a few collections
for i := 0; i < collCount; i++ {
coll, err := NewCollection(
&userDriveBackupHandler{
baseUserDriveHandler: baseUserDriveHandler{
ac: api.Drives{},
},
userID: "test-user",
scope: anyFolder,
},
idname.NewProvider("", ""),
nil,
nil,
driveID,
nil,
control.Options{ToggleFeatures: control.Toggles{}},
false,
true,
nil)
require.NoError(t, err, clues.ToCore(err))
c.CollectionMap[driveID][strconv.Itoa(i)] = coll
require.Equal(t, nil, coll.urlCache, "cache not nil")
colsByDrive[c.driveID] = append(colsByDrive[c.driveID], c)
}
err := c.addURLCacheToDriveCollections(
ctx,
driveID,
"",
fault.New(true))
require.NoError(t, err, clues.ToCore(err))
caches := map[*urlCache]struct{}{}
// Check that all collections have the same cache instance attached
// to them
// Check that the URL cache is attached to each collection.
// Also check that each drive gets its own cache instance.
for drive, driveCols := range colsByDrive {
var uc *urlCache
for _, driveColls := range c.CollectionMap {
for _, coll := range driveColls {
require.NotNil(t, coll.urlCache, "cache is nil")
for _, col := range driveCols {
require.NotNil(t, col.urlCache, "cache is nil")
if uc == nil {
uc = coll.urlCache.(*urlCache)
uc = col.urlCache.(*urlCache)
} else {
require.Equal(t, uc, coll.urlCache, "cache not equal")
require.Equal(
t,
uc,
col.urlCache,
"drive collections have different url cache instances")
}
require.Equal(t, drive, uc.driveID, "drive ID mismatch")
}
caches[uc] = struct{}{}
}
// Check that we have the expected number of caches. One per drive.
require.Equal(
t,
len(test.drives),
len(caches),
"expected one cache per drive")
})
}
}