diff --git a/src/internal/m365/collection/drive/collections.go b/src/internal/m365/collection/drive/collections.go index 2f72e6740..6e61e897f 100644 --- a/src/internal/m365/collection/drive/collections.go +++ b/src/internal/m365/collection/drive/collections.go @@ -319,17 +319,28 @@ func (c *Collections) Get( numDriveItems := c.NumItems - numPrevItems numPrevItems = c.NumItems - // Attach an url cache + // Attach an url cache to the drive if the number of discovered items is + // below the threshold. Attaching cache to larger drives can cause + // performance issues since cache delta queries start taking up majority of + // the hour the refreshed URLs are valid for. if numDriveItems < urlCacheDriveItemThreshold { - logger.Ctx(ictx).Info("adding url cache for drive") + logger.Ctx(ictx).Infow( + "adding url cache for drive", + "num_drive_items", numDriveItems) - err = c.addURLCacheToDriveCollections( - ictx, + uc, err := newURLCache( driveID, prevDeltaLink, + urlCacheRefreshInterval, + c.handler, errs) if err != nil { - return nil, false, err + return nil, false, clues.Stack(err) + } + + // Set the URL cache instance for all collections in this drive. + for id := range c.CollectionMap[driveID] { + c.CollectionMap[driveID][id].urlCache = uc } } @@ -457,33 +468,6 @@ func (c *Collections) Get( return collections, canUsePrevBackup, nil } -// addURLCacheToDriveCollections adds an URL cache to all collections belonging to -// a drive. -func (c *Collections) addURLCacheToDriveCollections( - ctx context.Context, - driveID, prevDelta string, - errs *fault.Bus, -) error { - uc, err := newURLCache( - driveID, - prevDelta, - urlCacheRefreshInterval, - c.handler, - errs) - if err != nil { - return err - } - - // Set the URL cache for all collections in this drive - for _, driveColls := range c.CollectionMap { - for _, coll := range driveColls { - coll.urlCache = uc - } - } - - return nil -} - func updateCollectionPaths( driveID, itemID string, cmap map[string]map[string]*Collection, diff --git a/src/internal/m365/collection/drive/collections_test.go b/src/internal/m365/collection/drive/collections_test.go index 6dc378727..2467e100d 100644 --- a/src/internal/m365/collection/drive/collections_test.go +++ b/src/internal/m365/collection/drive/collections_test.go @@ -2,7 +2,6 @@ package drive import ( "context" - "strconv" "testing" "github.com/alcionai/clues" @@ -29,7 +28,6 @@ import ( "github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/path" "github.com/alcionai/corso/src/pkg/selectors" - "github.com/alcionai/corso/src/pkg/services/m365/api" apiMock "github.com/alcionai/corso/src/pkg/services/m365/api/mock" "github.com/alcionai/corso/src/pkg/services/m365/api/pagers" ) @@ -3091,22 +3089,70 @@ func delItem( return item } -func (suite *OneDriveCollectionsUnitSuite) TestAddURLCacheToDriveCollections() { - driveID := "test-drive" - collCount := 3 - anyFolder := (&selectors.OneDriveBackup{}).Folders(selectors.Any())[0] +// TestURLCacheAttach tests for 2 things: +// 1. All collections belong to the same drive share the url cache instance +// 2. Each drive has its own unique url cache instance +func (suite *OneDriveCollectionsUnitSuite) TestURLCacheAttach() { + var ( + tenant = "a-tenant" + user = "a-user" + delta = "delta1" + delta2 = "delta2" + ) + + driveID1 := "drive-1-" + uuid.NewString() + drive1 := models.NewDrive() + drive1.SetId(&driveID1) + drive1.SetName(&driveID1) + + driveID2 := "drive-2-" + uuid.NewString() + drive2 := models.NewDrive() + drive2.SetId(&driveID2) + drive2.SetName(&driveID2) + + var ( + driveBasePath1 = odConsts.DriveFolderPrefixBuilder(driveID1).String() + driveBasePath2 = odConsts.DriveFolderPrefixBuilder(driveID2).String() + ) table := []struct { - name string - items []apiMock.PagerResult[any] - deltaURL string - prevDeltaSuccess bool - prevDelta string - err error + name string + drives []models.Driveable + enumerator mock.EnumerateItemsDeltaByDrive + errCheck assert.ErrorAssertionFunc }{ { - name: "cache is attached", + name: "Two drives with unique url cache instances", + drives: []models.Driveable{ + drive1, + drive2, + }, + enumerator: mock.EnumerateItemsDeltaByDrive{ + DrivePagers: map[string]*mock.DriveItemsDeltaPager{ + driveID1: { + Pages: []mock.NextPage{{Items: []models.DriveItemable{ + driveRootItem("root1"), + driveItem("folder", "folder", driveBasePath1, "root", false, true, false), + driveItem("file", "file", driveBasePath1+"/folder", "folder", true, false, false), + }}}, + DeltaUpdate: pagers.DeltaUpdate{URL: delta, Reset: true}, + }, + driveID2: { + Pages: []mock.NextPage{{Items: []models.DriveItemable{ + driveRootItem("root2"), + driveItem("folder2", "folder", driveBasePath2, "root2", false, true, false), + driveItem("file2", "file", driveBasePath2+"/folder", "folder2", true, false, false), + }}}, + DeltaUpdate: pagers.DeltaUpdate{URL: delta2, Reset: true}, + }, + }, + }, + errCheck: assert.NoError, }, + // TODO(pandeyabs): Add a test case to check that the cache is not attached + // if a drive has more than urlCacheDriveItemThreshold discovered items. + // This will require creating 300k+ mock items for the test which might take + // up a lot of memory during the test. Include it after testing out mem usage. } for _, test := range table { suite.Run(test.name, func() { @@ -3115,68 +3161,73 @@ func (suite *OneDriveCollectionsUnitSuite) TestAddURLCacheToDriveCollections() { ctx, flush := tester.NewContext(t) defer flush() - itemPagers := map[string]pagers.DeltaHandler[models.DriveItemable]{} - itemPagers[driveID] = &apiMock.DeltaPager[models.DriveItemable]{} + mockDrivePager := &apiMock.Pager[models.Driveable]{ + ToReturn: []apiMock.PagerResult[models.Driveable]{ + {Values: test.drives}, + }, + } - mbh := mock.DefaultOneDriveBH("test-user") - mbh.ItemPagerV = itemPagers + mbh := mock.DefaultOneDriveBH("a-user") + mbh.DrivePagerV = mockDrivePager + mbh.DriveItemEnumeration = test.enumerator c := NewCollections( mbh, - "test-tenant", - idname.NewProvider("test-user", "test-user"), - nil, + tenant, + idname.NewProvider(user, user), + func(*support.ControllerOperationStatus) {}, control.Options{ToggleFeatures: control.Toggles{}}) - if _, ok := c.CollectionMap[driveID]; !ok { - c.CollectionMap[driveID] = map[string]*Collection{} - } + errs := fault.New(true) + delList := prefixmatcher.NewStringSetBuilder() - // Add a few collections - for i := 0; i < collCount; i++ { - coll, err := NewCollection( - &userDriveBackupHandler{ - baseUserDriveHandler: baseUserDriveHandler{ - ac: api.Drives{}, - }, - userID: "test-user", - scope: anyFolder, - }, - idname.NewProvider("", ""), - nil, - nil, - driveID, - nil, - control.Options{ToggleFeatures: control.Toggles{}}, - false, - true, - nil) - require.NoError(t, err, clues.ToCore(err)) + cols, _, err := c.Get(ctx, nil, delList, errs) + test.errCheck(t, err) - c.CollectionMap[driveID][strconv.Itoa(i)] = coll - require.Equal(t, nil, coll.urlCache, "cache not nil") - } + // Group collections by drive ID + colsByDrive := map[string][]*Collection{} - err := c.addURLCacheToDriveCollections( - ctx, - driveID, - "", - fault.New(true)) - require.NoError(t, err, clues.ToCore(err)) - - // Check that all collections have the same cache instance attached - // to them - var uc *urlCache - for _, driveColls := range c.CollectionMap { - for _, coll := range driveColls { - require.NotNil(t, coll.urlCache, "cache is nil") - if uc == nil { - uc = coll.urlCache.(*urlCache) - } else { - require.Equal(t, uc, coll.urlCache, "cache not equal") - } + for _, col := range cols { + c, ok := col.(*Collection) + if !ok { + // skip metadata collection + continue } + + colsByDrive[c.driveID] = append(colsByDrive[c.driveID], c) } + + caches := map[*urlCache]struct{}{} + + // Check that the URL cache is attached to each collection. + // Also check that each drive gets its own cache instance. + for drive, driveCols := range colsByDrive { + var uc *urlCache + for _, col := range driveCols { + require.NotNil(t, col.urlCache, "cache is nil") + + if uc == nil { + uc = col.urlCache.(*urlCache) + } else { + require.Equal( + t, + uc, + col.urlCache, + "drive collections have different url cache instances") + } + + require.Equal(t, drive, uc.driveID, "drive ID mismatch") + } + + caches[uc] = struct{}{} + } + + // Check that we have the expected number of caches. One per drive. + require.Equal( + t, + len(test.drives), + len(caches), + "expected one cache per drive") }) } }