From 6e572fd133db7d4dba61e9f0657abde8020ed6b4 Mon Sep 17 00:00:00 2001 From: Keepers Date: Mon, 4 Dec 2023 09:37:53 -0700 Subject: [PATCH] add and delete file support in the tree (#4724) now that folder handling is complete, we can ingest items in the delta tree as well. --- #### Does this PR need a docs update or release note? - [x] :no_entry: No #### Type of change - [x] :sunflower: Feature #### Issue(s) * #4689 #### Test Plan - [x] :zap: Unit test --- src/.golangci.yml | 3 + .../m365/collection/drive/collection_test.go | 40 +- .../m365/collection/drive/collections_test.go | 3 +- .../m365/collection/drive/collections_tree.go | 126 +++- .../collection/drive/collections_tree_test.go | 607 ++++++++++++++++-- .../m365/collection/drive/delta_tree.go | 109 +++- .../m365/collection/drive/delta_tree_test.go | 234 ++++++- .../m365/collection/drive/helper_test.go | 2 + src/internal/m365/collection/drive/limiter.go | 12 +- src/pkg/count/keys.go | 2 + 10 files changed, 1022 insertions(+), 116 deletions(-) diff --git a/src/.golangci.yml b/src/.golangci.yml index 0d4a66ba9..d930de245 100644 --- a/src/.golangci.yml +++ b/src/.golangci.yml @@ -131,6 +131,9 @@ issues: - path: internal/m365/collection/drive/collections_test.go linters: - lll + - path: internal/m365/collection/drive/collections_tree_test.go + linters: + - lll - path: pkg/services/m365/api/graph/betasdk linters: - wsl diff --git a/src/internal/m365/collection/drive/collection_test.go b/src/internal/m365/collection/drive/collection_test.go index 6ee30143a..8e63f64c6 100644 --- a/src/internal/m365/collection/drive/collection_test.go +++ b/src/internal/m365/collection/drive/collection_test.go @@ -107,7 +107,7 @@ func (suite *CollectionUnitSuite) TestCollection() { name: "oneDrive, no duplicates", numInstances: 1, service: path.OneDriveService, - itemDeets: nst{stubItemName, 42, now}, + itemDeets: nst{stubItemName, defaultItemSize, now}, itemInfo: details.ItemInfo{OneDrive: &details.OneDriveInfo{ItemName: stubItemName, Modified: now}}, getBody: io.NopCloser(bytes.NewReader(stubItemContent)), getErr: nil, @@ -117,7 +117,7 @@ func (suite *CollectionUnitSuite) TestCollection() { name: "oneDrive, duplicates", numInstances: 3, service: path.OneDriveService, - itemDeets: nst{stubItemName, 42, now}, + itemDeets: nst{stubItemName, defaultItemSize, now}, getBody: io.NopCloser(bytes.NewReader(stubItemContent)), getErr: nil, itemInfo: details.ItemInfo{OneDrive: &details.OneDriveInfo{ItemName: stubItemName, Modified: now}}, @@ -127,7 +127,7 @@ func (suite *CollectionUnitSuite) TestCollection() { name: "oneDrive, malware", numInstances: 3, service: path.OneDriveService, - itemDeets: nst{stubItemName, 42, now}, + itemDeets: nst{stubItemName, defaultItemSize, now}, itemInfo: details.ItemInfo{}, getBody: nil, getErr: clues.New("test malware").Label(graph.LabelsMalware), @@ -138,7 +138,7 @@ func (suite *CollectionUnitSuite) TestCollection() { name: "oneDrive, not found", numInstances: 3, service: path.OneDriveService, - itemDeets: nst{stubItemName, 42, now}, + itemDeets: nst{stubItemName, defaultItemSize, now}, itemInfo: details.ItemInfo{}, getBody: nil, getErr: clues.New("test not found").Label(graph.LabelStatus(http.StatusNotFound)), @@ -149,7 +149,7 @@ func (suite *CollectionUnitSuite) TestCollection() { name: "sharePoint, no duplicates", numInstances: 1, service: path.SharePointService, - itemDeets: nst{stubItemName, 42, now}, + itemDeets: nst{stubItemName, defaultItemSize, now}, itemInfo: details.ItemInfo{SharePoint: &details.SharePointInfo{ItemName: stubItemName, Modified: now}}, getBody: io.NopCloser(bytes.NewReader(stubItemContent)), getErr: nil, @@ -159,7 +159,7 @@ func (suite *CollectionUnitSuite) TestCollection() { name: "sharePoint, duplicates", numInstances: 3, service: path.SharePointService, - itemDeets: nst{stubItemName, 42, now}, + itemDeets: nst{stubItemName, defaultItemSize, now}, itemInfo: details.ItemInfo{SharePoint: &details.SharePointInfo{ItemName: stubItemName, Modified: now}}, getBody: io.NopCloser(bytes.NewReader(stubItemContent)), getErr: nil, @@ -299,13 +299,13 @@ func (suite *CollectionUnitSuite) TestCollection() { func (suite *CollectionUnitSuite) TestCollectionReadError() { var ( - t = suite.T() - stubItemID = "fakeItemID" - collStatus = support.ControllerOperationStatus{} - wg = sync.WaitGroup{} - name = "name" - size int64 = 42 - now = time.Now() + t = suite.T() + stubItemID = "fakeItemID" + collStatus = support.ControllerOperationStatus{} + wg = sync.WaitGroup{} + name = "name" + size = defaultItemSize + now = time.Now() ) ctx, flush := tester.NewContext(t) @@ -369,13 +369,13 @@ func (suite *CollectionUnitSuite) TestCollectionReadError() { func (suite *CollectionUnitSuite) TestCollectionReadUnauthorizedErrorRetry() { var ( - t = suite.T() - stubItemID = "fakeItemID" - collStatus = support.ControllerOperationStatus{} - wg = sync.WaitGroup{} - name = "name" - size int64 = 42 - now = time.Now() + t = suite.T() + stubItemID = "fakeItemID" + collStatus = support.ControllerOperationStatus{} + wg = sync.WaitGroup{} + name = "name" + size = defaultItemSize + now = time.Now() ) ctx, flush := tester.NewContext(t) diff --git a/src/internal/m365/collection/drive/collections_test.go b/src/internal/m365/collection/drive/collections_test.go index b1c939eeb..875d21ba2 100644 --- a/src/internal/m365/collection/drive/collections_test.go +++ b/src/internal/m365/collection/drive/collections_test.go @@ -112,6 +112,7 @@ func coreItem( switch it { case isFile: + item.SetSize(ptr.To[int64](defaultItemSize)) item.SetFile(models.NewFile()) case isFolder: item.SetFolder(models.NewFolder()) @@ -1719,7 +1720,7 @@ func (suite *CollectionsUnitSuite) TestGet_treeCannotBeUsedWhileIncomplete() { c.ctrl = opts _, _, err := c.Get(ctx, nil, nil, fault.New(true)) - require.ErrorContains(t, err, "not yet implemented", clues.ToCore(err)) + require.ErrorContains(t, err, "not implemented", clues.ToCore(err)) } func (suite *CollectionsUnitSuite) TestGet() { diff --git a/src/internal/m365/collection/drive/collections_tree.go b/src/internal/m365/collection/drive/collections_tree.go index d11841be8..bef284a3e 100644 --- a/src/internal/m365/collection/drive/collections_tree.go +++ b/src/internal/m365/collection/drive/collections_tree.go @@ -5,6 +5,7 @@ import ( "github.com/alcionai/clues" "github.com/microsoftgraph/msgraph-sdk-go/models" + "github.com/pkg/errors" "github.com/alcionai/corso/src/internal/common/prefixmatcher" "github.com/alcionai/corso/src/internal/common/ptr" @@ -300,7 +301,7 @@ func (c *Collections) populateTree( if reset { counter.Inc(count.PagerResets) - tree.Reset() + tree.reset() c.resetStats() *stats = driveEnumerationStats{} @@ -316,6 +317,10 @@ func (c *Collections) populateTree( counter, errs) if err != nil { + if errors.Is(err, errHitLimit) { + break + } + el.AddRecoverable(ctx, clues.Stack(err)) } @@ -369,6 +374,7 @@ func (c *Collections) enumeratePageOfItems( var ( isFolder = item.GetFolder() != nil || item.GetPackageEscaped() != nil + isFile = item.GetFile() != nil itemID = ptr.Val(item.GetId()) err error skipped *fault.Skipped @@ -382,15 +388,19 @@ func (c *Collections) enumeratePageOfItems( "item_is_folder", isFolder, "item_is_package", item.GetPackageEscaped() != nil) - if isFolder { - // check if the preview needs to exit before adding each folder - if !tree.ContainsFolder(itemID) && limiter.atLimit(stats, len(tree.folderIDToNode)) { - break + switch { + case isFolder: + // check limits before adding the next new folder + if !tree.containsFolder(itemID) && limiter.atLimit(stats, len(tree.folderIDToNode)) { + return errHitLimit } skipped, err = c.addFolderToTree(ictx, tree, drv, item, stats, counter) - } else { - skipped, err = c.addFileToTree(ictx, tree, drv, item, stats, counter) + case isFile: + skipped, err = c.addFileToTree(ictx, tree, drv, item, limiter, stats, counter) + default: + err = clues.NewWC(ictx, "item is neither folder nor file"). + Label(fault.LabelForceNoBackupCreation, count.UnknownItemType) } if skipped != nil { @@ -398,7 +408,7 @@ func (c *Collections) enumeratePageOfItems( } if err != nil { - el.AddRecoverable(ictx, clues.Wrap(err, "adding folder")) + el.AddRecoverable(ictx, clues.Wrap(err, "adding item")) } // Check if we reached the item or size limit while processing this page. @@ -406,8 +416,9 @@ func (c *Collections) enumeratePageOfItems( // We don't want to check all limits because it's possible we've reached // the container limit but haven't reached the item limit or really added // items to the last container we found. + // FIXME(keepers): this isn't getting handled properly at the moment if limiter.atItemLimit(stats) { - break + return errHitLimit } } @@ -466,13 +477,13 @@ func (c *Collections) addFolderToTree( folderName, graph.ItemInfo(folder)) - logger.Ctx(ctx).Infow("malware detected") + logger.Ctx(ctx).Infow("malware folder detected") return skip, nil } if isDeleted { - err := tree.SetTombstone(ctx, folderID) + err := tree.setTombstone(ctx, folderID) return nil, clues.Stack(err).OrNil() } @@ -488,7 +499,7 @@ func (c *Collections) addFolderToTree( return nil, nil } - err = tree.SetFolder(ctx, parentID, folderID, folderName, isPkg) + err = tree.setFolder(ctx, parentID, folderID, folderName, isPkg) return nil, clues.Stack(err).OrNil() } @@ -528,11 +539,98 @@ func (c *Collections) addFileToTree( ctx context.Context, tree *folderyMcFolderFace, drv models.Driveable, - item models.DriveItemable, + file models.DriveItemable, + limiter *pagerLimiter, stats *driveEnumerationStats, counter *count.Bus, ) (*fault.Skipped, error) { - return nil, clues.New("not yet implemented") + var ( + driveID = ptr.Val(drv.GetId()) + fileID = ptr.Val(file.GetId()) + fileName = ptr.Val(file.GetName()) + fileSize = ptr.Val(file.GetSize()) + isDeleted = file.GetDeleted() != nil + isMalware = file.GetMalware() != nil + parent = file.GetParentReference() + parentID string + ) + + if parent != nil { + parentID = ptr.Val(parent.GetId()) + } + + defer func() { + switch { + case isMalware: + counter.Inc(count.TotalMalwareProcessed) + case isDeleted: + counter.Inc(count.TotalDeleteFilesProcessed) + default: + counter.Inc(count.TotalFilesProcessed) + } + }() + + if isMalware { + skip := fault.FileSkip( + fault.SkipMalware, + driveID, + fileID, + fileName, + graph.ItemInfo(file)) + + logger.Ctx(ctx).Infow("malware file detected") + + return skip, nil + } + + _, alreadySeen := tree.fileIDToParentID[fileID] + + if isDeleted { + tree.deleteFile(fileID) + + if alreadySeen { + stats.numAddedFiles-- + // FIXME(keepers): this might be faulty, + // since deletes may not include the file size. + // it will likely need to be tracked in + // the tree alongside the file modtime. + stats.numBytes -= fileSize + } else { + c.NumItems++ + c.NumFiles++ + } + + return nil, nil + } + + parentNode, ok := tree.folderIDToNode[parentID] + + // Don't add new items if the new collection is already reached it's limit. + // item moves and updates are generally allowed through. + if ok && !alreadySeen && limiter.atContainerItemsLimit(len(parentNode.files)) { + return nil, nil + } + + // Skip large files that don't fit within the size limit. + if limiter.aboveSizeLimit(fileSize + stats.numBytes) { + return nil, nil + } + + err := tree.addFile(parentID, fileID, ptr.Val(file.GetLastModifiedDateTime())) + if err != nil { + return nil, clues.StackWC(ctx, err) + } + + // Only increment counters for new files + if !alreadySeen { + // todo: remmove c.NumItems/Files in favor of counter and tree counting. + c.NumItems++ + c.NumFiles++ + stats.numAddedFiles++ + stats.numBytes += fileSize + } + + return nil, nil } // quality-of-life wrapper that transforms each tombstone in the map diff --git a/src/internal/m365/collection/drive/collections_tree_test.go b/src/internal/m365/collection/drive/collections_tree_test.go index 58b01d92b..50af70721 100644 --- a/src/internal/m365/collection/drive/collections_tree_test.go +++ b/src/internal/m365/collection/drive/collections_tree_test.go @@ -552,11 +552,12 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree() { drv.SetName(ptr.To(name(drive))) type expected struct { - counts countTD.Expected - err require.ErrorAssertionFunc - treeSize int - treeContainsFolderIDs []string - treeContainsTombstoneIDs []string + counts countTD.Expected + err require.ErrorAssertionFunc + treeSize int + treeContainsFolderIDs []string + treeContainsTombstoneIDs []string + treeContainsFileIDsWithParent map[string]string } table := []struct { @@ -579,11 +580,12 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree() { }, limiter: newPagerLimiter(control.DefaultOptions()), expect: expected{ - counts: countTD.Expected{}, - err: require.NoError, - treeSize: 0, - treeContainsFolderIDs: []string{}, - treeContainsTombstoneIDs: []string{}, + counts: countTD.Expected{}, + err: require.NoError, + treeSize: 0, + treeContainsFolderIDs: []string{}, + treeContainsTombstoneIDs: []string{}, + treeContainsFileIDsWithParent: map[string]string{}, }, }, { @@ -601,6 +603,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree() { expect: expected{ counts: countTD.Expected{ count.TotalFoldersProcessed: 1, + count.TotalFilesProcessed: 0, count.PagesEnumerated: 1, }, err: require.NoError, @@ -608,7 +611,8 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree() { treeContainsFolderIDs: []string{ rootID, }, - treeContainsTombstoneIDs: []string{}, + treeContainsTombstoneIDs: []string{}, + treeContainsFileIDsWithParent: map[string]string{}, }, }, { @@ -626,6 +630,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree() { expect: expected{ counts: countTD.Expected{ count.TotalFoldersProcessed: 2, + count.TotalFilesProcessed: 0, count.PagesEnumerated: 2, }, err: require.NoError, @@ -633,7 +638,8 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree() { treeContainsFolderIDs: []string{ rootID, }, - treeContainsTombstoneIDs: []string{}, + treeContainsTombstoneIDs: []string{}, + treeContainsFileIDsWithParent: map[string]string{}, }, }, { @@ -657,6 +663,47 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree() { counts: countTD.Expected{ count.TotalFoldersProcessed: 7, count.PagesEnumerated: 3, + count.TotalFilesProcessed: 0, + }, + err: require.NoError, + treeSize: 4, + treeContainsFolderIDs: []string{ + rootID, + id(folder), + idx(folder, "sib"), + idx(folder, "chld"), + }, + treeContainsTombstoneIDs: []string{}, + treeContainsFileIDsWithParent: map[string]string{}, + }, + }, + { + name: "many folders with files", + tree: newFolderyMcFolderFace(nil), + enumerator: mock.EnumerateItemsDeltaByDrive{ + DrivePagers: map[string]*mock.DriveItemsDeltaPager{ + id(drive): { + Pages: pagesOf( + rootAnd( + driveItem(id(folder), name(folder), parent(0), rootID, isFolder), + driveItem(id(file), name(file), parent(0, name(folder)), id(folder), isFile)), + rootAnd( + driveItem(idx(folder, "sib"), namex(folder, "sib"), parent(0), rootID, isFolder), + driveItem(idx(file, "sib"), namex(file, "sib"), parent(0, namex(folder, "sib")), idx(folder, "sib"), isFile)), + rootAnd( + driveItem(id(folder), name(folder), parent(0), rootID, isFolder), + driveItem(idx(folder, "chld"), namex(folder, "chld"), parent(0), id(folder), isFolder), + driveItem(idx(file, "chld"), namex(file, "chld"), parent(0, namex(folder, "chld")), idx(folder, "chld"), isFile))), + DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, + }, + }, + }, + limiter: newPagerLimiter(control.DefaultOptions()), + expect: expected{ + counts: countTD.Expected{ + count.TotalFoldersProcessed: 7, + count.TotalFilesProcessed: 3, + count.PagesEnumerated: 3, }, err: require.NoError, treeSize: 4, @@ -667,6 +714,11 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree() { idx(folder, "chld"), }, treeContainsTombstoneIDs: []string{}, + treeContainsFileIDsWithParent: map[string]string{ + id(file): id(folder), + idx(file, "sib"): idx(folder, "sib"), + idx(file, "chld"): idx(folder, "chld"), + }, }, }, { @@ -678,7 +730,9 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree() { DrivePagers: map[string]*mock.DriveItemsDeltaPager{ id(drive): { Pages: pagesOf( - rootAnd(driveItem(id(folder), name(folder), parent(0), rootID, isFolder)), + rootAnd( + driveItem(id(folder), name(folder), parent(0), rootID, isFolder), + driveItem(id(file), name(file), parent(0, name(folder)), id(folder), isFile)), rootAnd(delItem(id(folder), parent(0), rootID, isFolder))), DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, }, @@ -688,6 +742,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree() { expect: expected{ counts: countTD.Expected{ count.TotalFoldersProcessed: 3, + count.TotalFilesProcessed: 1, count.TotalDeleteFoldersProcessed: 1, count.PagesEnumerated: 2, }, @@ -697,12 +752,15 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree() { rootID, }, treeContainsTombstoneIDs: []string{}, + treeContainsFileIDsWithParent: map[string]string{ + id(file): id(folder), + }, }, }, { // technically you won't see this behavior from graph deltas, since deletes always // precede creates/updates. But it's worth checking that we can handle it anyways. - name: "move, delete on next page", + name: "move->delete folder with populated tree", tree: treeWithFolders(), enumerator: mock.EnumerateItemsDeltaByDrive{ DrivePagers: map[string]*mock.DriveItemsDeltaPager{ @@ -710,7 +768,8 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree() { Pages: pagesOf( rootAnd( driveItem(idx(folder, "parent"), namex(folder, "parent"), parent(0), rootID, isFolder), - driveItem(id(folder), namex(folder, "moved"), parent(0), idx(folder, "parent"), isFolder)), + driveItem(id(folder), namex(folder, "moved"), parent(0), idx(folder, "parent"), isFolder), + driveItem(id(file), name(file), parent(0, namex(folder, "parent"), name(folder)), id(folder), isFile)), rootAnd(delItem(id(folder), parent(0), idx(folder, "parent"), isFolder))), DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, }, @@ -721,6 +780,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree() { counts: countTD.Expected{ count.TotalFoldersProcessed: 4, count.TotalDeleteFoldersProcessed: 1, + count.TotalFilesProcessed: 1, count.PagesEnumerated: 2, }, err: require.NoError, @@ -732,20 +792,28 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree() { treeContainsTombstoneIDs: []string{ id(folder), }, + treeContainsFileIDsWithParent: map[string]string{ + id(file): id(folder), + }, }, }, { - name: "at limit before enumeration", + name: "at folder limit before enumeration", tree: treeWithRoot(), enumerator: mock.EnumerateItemsDeltaByDrive{ DrivePagers: map[string]*mock.DriveItemsDeltaPager{ id(drive): { Pages: pagesOf( - rootAnd(driveItem(id(folder), name(folder), parent(0), rootID, isFolder)), - rootAnd(driveItem(idx(folder, "sib"), namex(folder, "sib"), parent(0), rootID, isFolder)), rootAnd( driveItem(id(folder), name(folder), parent(0), rootID, isFolder), - driveItem(idx(folder, "chld"), namex(folder, "chld"), parent(0), id(folder), isFolder))), + driveItem(id(file), name(file), parent(0, name(folder)), id(folder), isFile)), + rootAnd( + driveItem(idx(folder, "sib"), namex(folder, "sib"), parent(0), rootID, isFolder), + driveItem(idx(file, "sib"), namex(file, "sib"), parent(0, namex(folder, "sib")), idx(folder, "sib"), isFile)), + rootAnd( + driveItem(id(folder), name(folder), parent(0), rootID, isFolder), + driveItem(idx(folder, "chld"), namex(folder, "chld"), parent(0), id(folder), isFolder), + driveItem(idx(file, "chld"), namex(file, "chld"), parent(0, namex(folder, "chld")), idx(folder, "chld"), isFile))), DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, }, }, @@ -755,6 +823,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree() { counts: countTD.Expected{ count.TotalDeleteFoldersProcessed: 0, count.TotalFoldersProcessed: 1, + count.TotalFilesProcessed: 0, count.PagesEnumerated: 1, }, err: require.NoError, @@ -762,21 +831,27 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree() { treeContainsFolderIDs: []string{ rootID, }, - treeContainsTombstoneIDs: []string{}, + treeContainsTombstoneIDs: []string{}, + treeContainsFileIDsWithParent: map[string]string{}, }, }, { - name: "hit limit during enumeration", + name: "hit folder limit during enumeration", tree: newFolderyMcFolderFace(nil), enumerator: mock.EnumerateItemsDeltaByDrive{ DrivePagers: map[string]*mock.DriveItemsDeltaPager{ id(drive): { Pages: pagesOf( - rootAnd(driveItem(id(folder), name(folder), parent(0), rootID, isFolder)), - rootAnd(driveItem(idx(folder, "sib"), namex(folder, "sib"), parent(0), rootID, isFolder)), rootAnd( driveItem(id(folder), name(folder), parent(0), rootID, isFolder), - driveItem(idx(folder, "chld"), namex(folder, "chld"), parent(0), id(folder), isFolder))), + driveItem(id(file), name(file), parent(0, name(folder)), id(folder), isFile)), + rootAnd( + driveItem(idx(folder, "sib"), namex(folder, "sib"), parent(0), rootID, isFolder), + driveItem(idx(file, "sib"), namex(file, "sib"), parent(0, namex(folder, "sib")), idx(folder, "sib"), isFile)), + rootAnd( + driveItem(id(folder), name(folder), parent(0), rootID, isFolder), + driveItem(idx(folder, "chld"), namex(folder, "chld"), parent(0), id(folder), isFolder), + driveItem(idx(file, "chld"), namex(file, "chld"), parent(0, namex(folder, "chld")), idx(folder, "chld"), isFile))), DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, }, }, @@ -786,6 +861,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree() { counts: countTD.Expected{ count.TotalDeleteFoldersProcessed: 0, count.TotalFoldersProcessed: 1, + count.TotalFilesProcessed: 0, count.PagesEnumerated: 1, }, err: require.NoError, @@ -793,7 +869,8 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree() { treeContainsFolderIDs: []string{ rootID, }, - treeContainsTombstoneIDs: []string{}, + treeContainsTombstoneIDs: []string{}, + treeContainsFileIDsWithParent: map[string]string{}, }, }, } @@ -824,20 +901,29 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree() { counter, fault.New(true)) test.expect.err(t, err, clues.ToCore(err)) - assert.Equal(t, test.expect.treeSize, test.tree.CountFolders(), "count folders in tree") + assert.Equal(t, test.expect.treeSize, test.tree.countFolders(), "count folders in tree") test.expect.counts.Compare(t, counter) for _, id := range test.expect.treeContainsFolderIDs { - require.NotNil(t, test.tree.folderIDToNode[id], "node exists") + assert.NotNil(t, test.tree.folderIDToNode[id], "node exists") } for _, id := range test.expect.treeContainsTombstoneIDs { - require.NotNil(t, test.tree.tombstones[id], "tombstone exists") + assert.NotNil(t, test.tree.tombstones[id], "tombstone exists") + } + + for iID, pID := range test.expect.treeContainsFileIDsWithParent { + assert.Contains(t, test.tree.fileIDToParentID, iID) + assert.Equal(t, pID, test.tree.fileIDToParentID[iID]) } }) } } +// --------------------------------------------------------------------------- +// folder tests +// --------------------------------------------------------------------------- + // This test focuses on folder assertions when enumerating a page of items. // File-specific assertions are focused in the _folders test variant. func (suite *CollectionsTreeUnitSuite) TestCollections_EnumeratePageOfItems_folders() { @@ -913,7 +999,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_EnumeratePageOfItems_fold page: rootAnd( driveItem(id(folder), name(folder), parent(0), rootID, isFolder), driveItem(idx(folder, "sib"), namex(folder, "sib"), parent(0), rootID, isFolder), - driveItem(idx(folder, "chld"), namex(folder, "chld"), parent(0), id(folder), isFolder)), + driveItem(idx(folder, "chld"), namex(folder, "chld"), parent(0, name(folder)), id(folder), isFolder)), limiter: newPagerLimiter(control.DefaultOptions()), expect: expected{ counts: countTD.Expected{ @@ -936,13 +1022,13 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_EnumeratePageOfItems_fold page: rootAnd( driveItem(id(folder), name(folder), parent(0), rootID, isFolder), driveItem(idx(folder, "sib"), namex(folder, "sib"), parent(0), rootID, isFolder), - driveItem(idx(folder, "chld"), namex(folder, "chld"), parent(0), id(folder), isFolder)), + driveItem(idx(folder, "chld"), namex(folder, "chld"), parent(0, name(folder)), id(folder), isFolder)), limiter: newPagerLimiter(minimumLimitOpts()), expect: expected{ counts: countTD.Expected{ count.TotalFoldersProcessed: 1, }, - err: require.NoError, + err: require.Error, treeSize: 1, treeContainsFolderIDs: []string{ rootID, @@ -975,7 +1061,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_EnumeratePageOfItems_fold tree: treeWithFolders(), page: rootAnd( driveItem(idx(folder, "parent"), namex(folder, "parent"), parent(0), rootID, isFolder), - driveItem(id(folder), namex(folder, "moved"), parent(0), idx(folder, "parent"), isFolder), + driveItem(id(folder), namex(folder, "moved"), parent(0, namex(folder, "parent")), idx(folder, "parent"), isFolder), delItem(id(folder), parent(0), idx(folder, "parent"), isFolder)), limiter: newPagerLimiter(control.DefaultOptions()), expect: expected{ @@ -1036,7 +1122,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_EnumeratePageOfItems_fold counter, fault.New(true)) test.expect.err(t, err, clues.ToCore(err)) - assert.Equal(t, test.expect.treeSize, test.tree.CountFolders(), "count folders in tree") + assert.Equal(t, test.expect.treeSize, test.tree.countFolders(), "count folders in tree") test.expect.counts.Compare(t, counter) for _, id := range test.expect.treeContainsFolderIDs { @@ -1181,8 +1267,8 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_AddFolderToTree() { test.expect.err(t, err, clues.ToCore(err)) test.expect.skipped(t, skipped) test.expect.counts.Compare(t, counter) - assert.Equal(t, test.expect.treeSize, test.tree.CountFolders(), "folders in tree") - test.expect.treeContainsFolder(t, test.tree.ContainsFolder(ptr.Val(test.folder.GetId()))) + assert.Equal(t, test.expect.treeSize, test.tree.countFolders(), "folders in tree") + test.expect.treeContainsFolder(t, test.tree.containsFolder(ptr.Val(test.folder.GetId()))) }) } } @@ -1238,21 +1324,444 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_MakeFolderCollectionPath( } } -func (suite *CollectionsTreeUnitSuite) TestCollections_AddFileToTree() { - t := suite.T() +// --------------------------------------------------------------------------- +// file tests +// --------------------------------------------------------------------------- - ctx, flush := tester.NewContext(t) - defer flush() +// this test focuses on folder assertions when enumerating a page of items +// file-specific assertions are in the next test +func (suite *CollectionsTreeUnitSuite) TestCollections_EnumeratePageOfItems_files() { + drv := models.NewDrive() + drv.SetId(ptr.To(id(drive))) + drv.SetName(ptr.To(name(drive))) - c := collWithMBH(mock.DefaultOneDriveBH(user)) + type expected struct { + counts countTD.Expected + err require.ErrorAssertionFunc + treeContainsFileIDsWithParent map[string]string + statsNumAddedFiles int + statsNumBytes int64 + } - skipped, err := c.addFileToTree( - ctx, - nil, - nil, - nil, - nil, - nil) - require.ErrorContains(t, err, "not yet implemented", clues.ToCore(err)) - require.Nil(t, skipped) + table := []struct { + name string + tree *folderyMcFolderFace + page []models.DriveItemable + expect expected + }{ + { + name: "one file at root", + tree: treeWithRoot(), + page: rootAnd(driveItem(id(file), name(file), parent(0, name(folder)), rootID, isFile)), + expect: expected{ + counts: countTD.Expected{ + count.TotalDeleteFilesProcessed: 0, + count.TotalFoldersProcessed: 1, + count.TotalFilesProcessed: 1, + }, + err: require.NoError, + treeContainsFileIDsWithParent: map[string]string{ + id(file): rootID, + }, + statsNumAddedFiles: 1, + statsNumBytes: defaultItemSize, + }, + }, + { + name: "one file in a folder", + tree: newFolderyMcFolderFace(nil), + page: rootAnd( + driveItem(id(folder), name(folder), parent(0), rootID, isFolder), + driveItem(id(file), name(file), parent(0, name(folder)), id(folder), isFile)), + expect: expected{ + counts: countTD.Expected{ + count.TotalDeleteFilesProcessed: 0, + count.TotalFoldersProcessed: 2, + count.TotalFilesProcessed: 1, + }, + err: require.NoError, + treeContainsFileIDsWithParent: map[string]string{ + id(file): id(folder), + }, + statsNumAddedFiles: 1, + statsNumBytes: defaultItemSize, + }, + }, + { + name: "many files in a hierarchy", + tree: treeWithRoot(), + page: rootAnd( + driveItem(id(file), name(file), parent(0), rootID, isFile), + driveItem(id(folder), name(folder), parent(0), rootID, isFolder), + driveItem(idx(file, "chld"), namex(file, "chld"), parent(0, name(folder)), id(folder), isFile)), + expect: expected{ + counts: countTD.Expected{ + count.TotalDeleteFilesProcessed: 0, + count.TotalFoldersProcessed: 2, + count.TotalFilesProcessed: 2, + }, + err: require.NoError, + treeContainsFileIDsWithParent: map[string]string{ + id(file): rootID, + idx(file, "chld"): id(folder), + }, + statsNumAddedFiles: 2, + statsNumBytes: defaultItemSize * 2, + }, + }, + { + name: "many updates to the same file", + tree: treeWithRoot(), + page: rootAnd( + driveItem(id(file), name(file), parent(0), rootID, isFile), + driveItem(id(file), namex(file, 1), parent(0), rootID, isFile), + driveItem(id(file), namex(file, 2), parent(0), rootID, isFile)), + expect: expected{ + counts: countTD.Expected{ + count.TotalDeleteFilesProcessed: 0, + count.TotalFoldersProcessed: 1, + count.TotalFilesProcessed: 3, + }, + err: require.NoError, + treeContainsFileIDsWithParent: map[string]string{ + id(file): rootID, + }, + statsNumAddedFiles: 1, + statsNumBytes: defaultItemSize, + }, + }, + { + name: "delete an existing file", + tree: treeWithFileAtRoot(), + page: rootAnd(delItem(id(file), parent(0), rootID, isFile)), + expect: expected{ + counts: countTD.Expected{ + count.TotalDeleteFilesProcessed: 1, + count.TotalFoldersProcessed: 1, + count.TotalFilesProcessed: 0, + }, + err: require.NoError, + treeContainsFileIDsWithParent: map[string]string{}, + statsNumAddedFiles: -1, + statsNumBytes: 0, + }, + }, + { + name: "delete the same file twice", + tree: treeWithFileAtRoot(), + page: rootAnd( + delItem(id(file), parent(0), rootID, isFile), + delItem(id(file), parent(0), rootID, isFile)), + expect: expected{ + counts: countTD.Expected{ + count.TotalDeleteFilesProcessed: 2, + count.TotalFoldersProcessed: 1, + count.TotalFilesProcessed: 0, + }, + err: require.NoError, + treeContainsFileIDsWithParent: map[string]string{}, + statsNumAddedFiles: -1, + statsNumBytes: 0, + }, + }, + { + name: "create->delete", + tree: treeWithRoot(), + page: rootAnd( + driveItem(id(file), name(file), parent(0), rootID, isFile), + delItem(id(file), parent(0), rootID, isFile)), + expect: expected{ + counts: countTD.Expected{ + count.TotalDeleteFilesProcessed: 1, + count.TotalFoldersProcessed: 1, + count.TotalFilesProcessed: 1, + }, + err: require.NoError, + treeContainsFileIDsWithParent: map[string]string{}, + statsNumAddedFiles: 0, + statsNumBytes: defaultItemSize, + }, + }, + { + name: "move->delete", + tree: treeWithFileAtRoot(), + page: rootAnd( + driveItem(id(folder), name(folder), parent(0), rootID, isFolder), + driveItem(id(file), name(file), parent(0, name(folder)), id(folder), isFile), + delItem(id(file), parent(0, name(folder)), id(folder), isFile)), + expect: expected{ + counts: countTD.Expected{ + count.TotalDeleteFilesProcessed: 1, + count.TotalFoldersProcessed: 2, + count.TotalFilesProcessed: 1, + }, + err: require.NoError, + treeContainsFileIDsWithParent: map[string]string{}, + statsNumAddedFiles: -1, + statsNumBytes: 0, + }, + }, + { + name: "delete->create an existing file", + tree: treeWithFileAtRoot(), + page: rootAnd( + delItem(id(file), parent(0), rootID, isFile), + driveItem(id(file), name(file), parent(0), rootID, isFile)), + expect: expected{ + counts: countTD.Expected{ + count.TotalDeleteFilesProcessed: 1, + count.TotalFoldersProcessed: 1, + count.TotalFilesProcessed: 1, + }, + err: require.NoError, + treeContainsFileIDsWithParent: map[string]string{ + id(file): rootID, + }, + statsNumAddedFiles: 0, + statsNumBytes: defaultItemSize, + }, + }, + { + name: "delete->create a non-existing file", + tree: treeWithRoot(), + page: rootAnd( + delItem(id(file), parent(0), rootID, isFile), + driveItem(id(file), name(file), parent(0), rootID, isFile)), + expect: expected{ + counts: countTD.Expected{ + count.TotalDeleteFilesProcessed: 1, + count.TotalFoldersProcessed: 1, + count.TotalFilesProcessed: 1, + }, + err: require.NoError, + treeContainsFileIDsWithParent: map[string]string{ + id(file): rootID, + }, + statsNumAddedFiles: 1, + statsNumBytes: defaultItemSize, + }, + }, + } + for _, test := range table { + suite.Run(test.name, func() { + t := suite.T() + + ctx, flush := tester.NewContext(t) + defer flush() + + c := collWithMBH(mock.DefaultOneDriveBH(user)) + counter := count.New() + stats := &driveEnumerationStats{} + + err := c.enumeratePageOfItems( + ctx, + test.tree, + newPagerLimiter(control.DefaultOptions()), + stats, + drv, + test.page, + counter, + fault.New(true)) + test.expect.err(t, err, clues.ToCore(err)) + assert.Equal(t, test.expect.statsNumAddedFiles, stats.numAddedFiles, "num added files") + assert.Equal(t, test.expect.statsNumBytes, stats.numBytes, "num bytes") + assert.Equal(t, test.expect.treeContainsFileIDsWithParent, test.tree.fileIDToParentID) + test.expect.counts.Compare(t, counter) + }) + } +} + +func (suite *CollectionsTreeUnitSuite) TestCollections_AddFileToTree() { + drv := models.NewDrive() + drv.SetId(ptr.To(id(drive))) + drv.SetName(ptr.To(name(drive))) + + type expected struct { + counts countTD.Expected + err require.ErrorAssertionFunc + skipped assert.ValueAssertionFunc + treeFileCount int + treeContainsFileIDsWithParent map[string]string + statsNumAddedFiles int + statsNumBytes int64 + } + + table := []struct { + name string + tree *folderyMcFolderFace + file models.DriveItemable + limiter *pagerLimiter + expect expected + }{ + { + name: "add new file", + tree: treeWithRoot(), + file: driveItem(id(file), name(file), parent(0), rootID, isFile), + limiter: newPagerLimiter(control.DefaultOptions()), + expect: expected{ + counts: countTD.Expected{ + count.TotalFilesProcessed: 1, + }, + err: require.NoError, + skipped: assert.Nil, + treeFileCount: 1, + treeContainsFileIDsWithParent: map[string]string{ + id(file): rootID, + }, + statsNumAddedFiles: 1, + statsNumBytes: defaultItemSize, + }, + }, + { + name: "duplicate file", + tree: treeWithFileAtRoot(), + file: driveItem(id(file), name(file), parent(0), rootID, isFile), + limiter: newPagerLimiter(control.DefaultOptions()), + expect: expected{ + counts: countTD.Expected{ + count.TotalFilesProcessed: 1, + }, + err: require.NoError, + skipped: assert.Nil, + treeFileCount: 1, + treeContainsFileIDsWithParent: map[string]string{ + id(file): rootID, + }, + statsNumAddedFiles: 0, + statsNumBytes: 0, + }, + }, + { + name: "error file seen before parent", + tree: treeWithRoot(), + file: driveItem(id(file), name(file), parent(0, name(folder)), id(folder), isFile), + limiter: newPagerLimiter(control.DefaultOptions()), + expect: expected{ + counts: countTD.Expected{ + count.TotalFilesProcessed: 1, + }, + err: require.Error, + skipped: assert.Nil, + treeFileCount: 0, + treeContainsFileIDsWithParent: map[string]string{}, + statsNumAddedFiles: 0, + statsNumBytes: 0, + }, + }, + { + name: "malware file", + tree: treeWithRoot(), + file: malwareItem(id(file), name(file), parent(0, name(folder)), rootID, isFile), + limiter: newPagerLimiter(control.DefaultOptions()), + expect: expected{ + counts: countTD.Expected{ + count.TotalMalwareProcessed: 1, + }, + err: require.NoError, + skipped: assert.NotNil, + treeFileCount: 0, + treeContainsFileIDsWithParent: map[string]string{}, + statsNumAddedFiles: 0, + statsNumBytes: 0, + }, + }, + { + name: "delete non-existing file", + tree: treeWithRoot(), + file: delItem(id(file), parent(0, name(folder)), id(folder), isFile), + limiter: newPagerLimiter(control.DefaultOptions()), + expect: expected{ + counts: countTD.Expected{ + count.TotalDeleteFilesProcessed: 1, + }, + err: require.NoError, + skipped: assert.Nil, + treeFileCount: 0, + treeContainsFileIDsWithParent: map[string]string{}, + statsNumAddedFiles: 0, + statsNumBytes: 0, + }, + }, + { + name: "delete existing file", + tree: treeWithFileAtRoot(), + file: delItem(id(file), parent(0), rootID, isFile), + limiter: newPagerLimiter(control.DefaultOptions()), + expect: expected{ + counts: countTD.Expected{ + count.TotalDeleteFilesProcessed: 1, + }, + err: require.NoError, + skipped: assert.Nil, + treeFileCount: 0, + treeContainsFileIDsWithParent: map[string]string{}, + statsNumAddedFiles: -1, + statsNumBytes: 0, + }, + }, + { + name: "already at container file limit", + tree: treeWithFileAtRoot(), + file: driveItem(id(file), name(file), parent(0), rootID, isFile), + limiter: newPagerLimiter(minimumLimitOpts()), + expect: expected{ + counts: countTD.Expected{ + count.TotalFilesProcessed: 1, + }, + err: require.NoError, + skipped: assert.Nil, + treeFileCount: 1, + treeContainsFileIDsWithParent: map[string]string{ + id(file): rootID, + }, + statsNumAddedFiles: 0, + statsNumBytes: 0, + }, + }, + { + name: "goes over total byte limit", + tree: treeWithRoot(), + file: driveItem(id(file), name(file), parent(0), rootID, isFile), + limiter: newPagerLimiter(minimumLimitOpts()), + expect: expected{ + counts: countTD.Expected{ + count.TotalFilesProcessed: 1, + }, + err: require.NoError, + skipped: assert.Nil, + treeFileCount: 0, + treeContainsFileIDsWithParent: map[string]string{}, + statsNumAddedFiles: 0, + statsNumBytes: 0, + }, + }, + } + for _, test := range table { + suite.Run(test.name, func() { + t := suite.T() + + ctx, flush := tester.NewContext(t) + defer flush() + + c := collWithMBH(mock.DefaultOneDriveBH(user)) + counter := count.New() + stats := &driveEnumerationStats{} + + skipped, err := c.addFileToTree( + ctx, + test.tree, + drv, + test.file, + test.limiter, + stats, + counter) + test.expect.err(t, err, clues.ToCore(err)) + test.expect.skipped(t, skipped) + assert.Len(t, test.tree.fileIDToParentID, test.expect.treeFileCount, "count of files in tree") + assert.Equal(t, test.expect.treeContainsFileIDsWithParent, test.tree.fileIDToParentID) + test.expect.counts.Compare(t, counter) + assert.Equal(t, test.expect.statsNumAddedFiles, stats.numAddedFiles) + assert.Equal(t, test.expect.statsNumBytes, stats.numBytes) + }) + } } diff --git a/src/internal/m365/collection/drive/delta_tree.go b/src/internal/m365/collection/drive/delta_tree.go index 53d51c393..dd95640df 100644 --- a/src/internal/m365/collection/drive/delta_tree.go +++ b/src/internal/m365/collection/drive/delta_tree.go @@ -33,9 +33,11 @@ type folderyMcFolderFace struct { // and forth between live and tombstoned states. tombstones map[string]*nodeyMcNodeFace - // it's just a sensible place to store the data, since we're - // already pushing file additions through the api. - excludeFileIDs map[string]struct{} + // will also be used to construct the excluded file id map + // during the post-processing step + fileIDToParentID map[string]string + // required for populating the excluded file id map + deletedFileIDs map[string]struct{} // true if Reset() was called hadReset bool @@ -45,23 +47,24 @@ func newFolderyMcFolderFace( prefix path.Path, ) *folderyMcFolderFace { return &folderyMcFolderFace{ - prefix: prefix, - folderIDToNode: map[string]*nodeyMcNodeFace{}, - tombstones: map[string]*nodeyMcNodeFace{}, - excludeFileIDs: map[string]struct{}{}, + prefix: prefix, + folderIDToNode: map[string]*nodeyMcNodeFace{}, + tombstones: map[string]*nodeyMcNodeFace{}, + fileIDToParentID: map[string]string{}, + deletedFileIDs: map[string]struct{}{}, } } -// Reset erases all data contained in the tree. This is intended for +// reset erases all data contained in the tree. This is intended for // tracking a delta enumeration reset, not for tree re-use, and will // cause the tree to flag itself as dirty in order to appropriately // post-process the data. -func (face *folderyMcFolderFace) Reset() { +func (face *folderyMcFolderFace) reset() { face.hadReset = true face.root = nil face.folderIDToNode = map[string]*nodeyMcNodeFace{} face.tombstones = map[string]*nodeyMcNodeFace{} - face.excludeFileIDs = map[string]struct{}{} + face.fileIDToParentID = map[string]string{} } type nodeyMcNodeFace struct { @@ -75,10 +78,10 @@ type nodeyMcNodeFace struct { name string // only contains the folders starting at and including '/root:' prev path.Elements - // map folderID -> node + // folderID -> node children map[string]*nodeyMcNodeFace - // items are keyed by item ID - items map[string]time.Time + // file item ID -> last modified time + files map[string]time.Time // for special handling protocols around packages isPackage bool } @@ -93,7 +96,7 @@ func newNodeyMcNodeFace( id: id, name: name, children: map[string]*nodeyMcNodeFace{}, - items: map[string]time.Time{}, + files: map[string]time.Time{}, isPackage: isPackage, } } @@ -102,9 +105,9 @@ func newNodeyMcNodeFace( // folder handling // --------------------------------------------------------------------------- -// ContainsFolder returns true if the given folder id is present as either +// containsFolder returns true if the given folder id is present as either // a live node or a tombstone. -func (face *folderyMcFolderFace) ContainsFolder(id string) bool { +func (face *folderyMcFolderFace) containsFolder(id string) bool { _, stillKicking := face.folderIDToNode[id] _, alreadyBuried := face.tombstones[id] @@ -113,14 +116,22 @@ func (face *folderyMcFolderFace) ContainsFolder(id string) bool { // CountNodes returns a count that is the sum of live folders and // tombstones recorded in the tree. -func (face *folderyMcFolderFace) CountFolders() int { +func (face *folderyMcFolderFace) countFolders() int { return len(face.tombstones) + len(face.folderIDToNode) } -// SetFolder adds a node with the following details to the tree. +func (face *folderyMcFolderFace) getNode(id string) *nodeyMcNodeFace { + if zombey, alreadyBuried := face.tombstones[id]; alreadyBuried { + return zombey + } + + return face.folderIDToNode[id] +} + +// setFolder adds a node with the following details to the tree. // If the node already exists with the given ID, the name and parent // values are updated to match (isPackage is assumed not to change). -func (face *folderyMcFolderFace) SetFolder( +func (face *folderyMcFolderFace) setFolder( ctx context.Context, parentID, id, name string, isPackage bool, @@ -217,7 +228,7 @@ func (face *folderyMcFolderFace) SetFolder( return nil } -func (face *folderyMcFolderFace) SetTombstone( +func (face *folderyMcFolderFace) setTombstone( ctx context.Context, id string, ) error { @@ -252,3 +263,61 @@ func (face *folderyMcFolderFace) SetTombstone( return nil } + +// addFile places the file in the correct parent node. If the +// file was already added to the tree and is getting relocated, +// this func will update and/or clean up all the old references. +func (face *folderyMcFolderFace) addFile( + parentID, id string, + lastModifed time.Time, +) error { + if len(parentID) == 0 { + return clues.New("item added without parent folder ID") + } + + if len(id) == 0 { + return clues.New("item added without ID") + } + + // in case of file movement, clean up any references + // to the file in the old parent + oldParentID, ok := face.fileIDToParentID[id] + if ok && oldParentID != parentID { + if nodey, ok := face.folderIDToNode[oldParentID]; ok { + delete(nodey.files, id) + } + + if zombey, ok := face.tombstones[oldParentID]; ok { + delete(zombey.files, id) + } + } + + parent, ok := face.folderIDToNode[parentID] + if !ok { + return clues.New("item added before parent") + } + + face.fileIDToParentID[id] = parentID + parent.files[id] = lastModifed + + delete(face.deletedFileIDs, id) + + return nil +} + +func (face *folderyMcFolderFace) deleteFile(id string) { + parentID, ok := face.fileIDToParentID[id] + if ok { + if nodey, ok := face.folderIDToNode[parentID]; ok { + delete(nodey.files, id) + } + + if zombey, ok := face.tombstones[parentID]; ok { + delete(zombey.files, id) + } + } + + delete(face.fileIDToParentID, id) + + face.deletedFileIDs[id] = struct{}{} +} diff --git a/src/internal/m365/collection/drive/delta_tree_test.go b/src/internal/m365/collection/drive/delta_tree_test.go index 9c4d4a2c1..ec832b00c 100644 --- a/src/internal/m365/collection/drive/delta_tree_test.go +++ b/src/internal/m365/collection/drive/delta_tree_test.go @@ -2,6 +2,7 @@ package drive import ( "testing" + "time" "github.com/alcionai/clues" "github.com/stretchr/testify/assert" @@ -47,6 +48,30 @@ func treeWithFolders() *folderyMcFolderFace { return tree } +func treeWithFileAtRoot() *folderyMcFolderFace { + tree := treeWithFolders() + tree.root.files[id(file)] = time.Now() + tree.fileIDToParentID[id(file)] = rootID + + return tree +} + +func treeWithFileInFolder() *folderyMcFolderFace { + tree := treeWithFileAtRoot() + tree.folderIDToNode[id(folder)].files[id(file)] = time.Now() + tree.fileIDToParentID[id(file)] = id(folder) + + return tree +} + +func treeWithFileInTombstone() *folderyMcFolderFace { + tree := treeWithTombstone() + tree.tombstones[id(folder)].files[id(file)] = time.Now() + tree.fileIDToParentID[id(file)] = id(folder) + + return tree +} + // --------------------------------------------------------------------------- // tests // --------------------------------------------------------------------------- @@ -72,7 +97,7 @@ func (suite *DeltaTreeUnitSuite) TestNewFolderyMcFolderFace() { assert.Nil(t, folderFace.root) assert.NotNil(t, folderFace.folderIDToNode) assert.NotNil(t, folderFace.tombstones) - assert.NotNil(t, folderFace.excludeFileIDs) + assert.NotNil(t, folderFace.fileIDToParentID) } func (suite *DeltaTreeUnitSuite) TestNewNodeyMcNodeFace() { @@ -88,9 +113,13 @@ func (suite *DeltaTreeUnitSuite) TestNewNodeyMcNodeFace() { assert.NotEqual(t, loc, nodeFace.prev) assert.True(t, nodeFace.isPackage) assert.NotNil(t, nodeFace.children) - assert.NotNil(t, nodeFace.items) + assert.NotNil(t, nodeFace.files) } +// --------------------------------------------------------------------------- +// folder tests +// --------------------------------------------------------------------------- + // note that this test is focused on the SetFolder function, // and intentionally does not verify the resulting node tree func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_SetFolder() { @@ -104,10 +133,8 @@ func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_SetFolder() { expectErr assert.ErrorAssertionFunc }{ { - tname: "add root", - tree: &folderyMcFolderFace{ - folderIDToNode: map[string]*nodeyMcNodeFace{}, - }, + tname: "add root", + tree: newFolderyMcFolderFace(nil), id: rootID, name: rootName, isPackage: true, @@ -196,7 +223,7 @@ func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_SetFolder() { ctx, flush := tester.NewContext(t) defer flush() - err := test.tree.SetFolder( + err := test.tree.setFolder( ctx, test.parentID, test.id, @@ -269,7 +296,7 @@ func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_AddTombstone() { ctx, flush := tester.NewContext(t) defer flush() - err := test.tree.SetTombstone(ctx, test.id) + err := test.tree.setTombstone(ctx, test.id) test.expectErr(t, err, clues.ToCore(err)) if err != nil { @@ -404,7 +431,7 @@ func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_SetFolder_correctTree() parentID, fid, fname string, isPackage bool, ) { - err := tree.SetFolder(ctx, parentID, fid, fname, isPackage) + err := tree.setFolder(ctx, parentID, fid, fname, isPackage) require.NoError(t, err, clues.ToCore(err)) } @@ -490,7 +517,7 @@ func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_SetFolder_correctTombst parentID, fid, fname string, isPackage bool, ) { - err := tree.SetFolder(ctx, parentID, fid, fname, isPackage) + err := tree.setFolder(ctx, parentID, fid, fname, isPackage) require.NoError(t, err, clues.ToCore(err)) } @@ -498,7 +525,7 @@ func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_SetFolder_correctTombst tid string, loc path.Elements, ) { - err := tree.SetTombstone(ctx, tid) + err := tree.setTombstone(ctx, tid) require.NoError(t, err, clues.ToCore(err)) } @@ -651,3 +678,188 @@ func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_SetFolder_correctTombst entomb().compare(t, tree.tombstones) } + +// --------------------------------------------------------------------------- +// file tests +// --------------------------------------------------------------------------- + +func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_AddFile() { + table := []struct { + tname string + tree *folderyMcFolderFace + oldParentID string + parentID string + expectErr assert.ErrorAssertionFunc + expectFiles map[string]string + }{ + { + tname: "add file to root", + tree: treeWithRoot(), + oldParentID: "", + parentID: rootID, + expectErr: assert.NoError, + expectFiles: map[string]string{id(file): rootID}, + }, + { + tname: "add file to folder", + tree: treeWithFolders(), + oldParentID: "", + parentID: id(folder), + expectErr: assert.NoError, + expectFiles: map[string]string{id(file): id(folder)}, + }, + { + tname: "re-add file at the same location", + tree: treeWithFileAtRoot(), + oldParentID: rootID, + parentID: rootID, + expectErr: assert.NoError, + expectFiles: map[string]string{id(file): rootID}, + }, + { + tname: "move file from folder to root", + tree: treeWithFileInFolder(), + oldParentID: id(folder), + parentID: rootID, + expectErr: assert.NoError, + expectFiles: map[string]string{id(file): rootID}, + }, + { + tname: "move file from tombstone to root", + tree: treeWithFileInTombstone(), + oldParentID: id(folder), + parentID: rootID, + expectErr: assert.NoError, + expectFiles: map[string]string{id(file): rootID}, + }, + { + tname: "error adding file to tombstone", + tree: treeWithTombstone(), + oldParentID: "", + parentID: id(folder), + expectErr: assert.Error, + expectFiles: map[string]string{}, + }, + { + tname: "error adding file before parent", + tree: treeWithTombstone(), + oldParentID: "", + parentID: idx(folder, 1), + expectErr: assert.Error, + expectFiles: map[string]string{}, + }, + { + tname: "error adding file without parent id", + tree: treeWithTombstone(), + oldParentID: "", + parentID: "", + expectErr: assert.Error, + expectFiles: map[string]string{}, + }, + } + for _, test := range table { + suite.Run(test.tname, func() { + t := suite.T() + + err := test.tree.addFile( + test.parentID, + id(file), + time.Now()) + test.expectErr(t, err, clues.ToCore(err)) + assert.Equal(t, test.expectFiles, test.tree.fileIDToParentID) + + if err != nil { + return + } + + parent := test.tree.getNode(test.parentID) + + require.NotNil(t, parent) + assert.Contains(t, parent.files, id(file)) + + if len(test.oldParentID) > 0 && test.oldParentID != test.parentID { + old, ok := test.tree.folderIDToNode[test.oldParentID] + if !ok { + old = test.tree.tombstones[test.oldParentID] + } + + require.NotNil(t, old) + assert.NotContains(t, old.files, id(file)) + } + }) + } +} + +func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_DeleteFile() { + table := []struct { + tname string + tree *folderyMcFolderFace + parentID string + }{ + { + tname: "delete unseen file", + tree: treeWithRoot(), + parentID: rootID, + }, + { + tname: "delete file from root", + tree: treeWithFolders(), + parentID: rootID, + }, + { + tname: "delete file from folder", + tree: treeWithFileInFolder(), + parentID: id(folder), + }, + { + tname: "delete file from tombstone", + tree: treeWithFileInTombstone(), + parentID: id(folder), + }, + } + for _, test := range table { + suite.Run(test.tname, func() { + t := suite.T() + + test.tree.deleteFile(id(file)) + + parent := test.tree.getNode(test.parentID) + + require.NotNil(t, parent) + assert.NotContains(t, parent.files, id(file)) + assert.NotContains(t, test.tree.fileIDToParentID, id(file)) + assert.Contains(t, test.tree.deletedFileIDs, id(file)) + }) + } +} + +func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_addAndDeleteFile() { + t := suite.T() + tree := treeWithRoot() + fID := id(file) + + require.Len(t, tree.fileIDToParentID, 0) + require.Len(t, tree.deletedFileIDs, 0) + + tree.deleteFile(fID) + + assert.Len(t, tree.fileIDToParentID, 0) + assert.NotContains(t, tree.fileIDToParentID, fID) + assert.Len(t, tree.deletedFileIDs, 1) + assert.Contains(t, tree.deletedFileIDs, fID) + + err := tree.addFile(rootID, fID, time.Now()) + require.NoError(t, err, clues.ToCore(err)) + + assert.Len(t, tree.fileIDToParentID, 1) + assert.Contains(t, tree.fileIDToParentID, fID) + assert.Len(t, tree.deletedFileIDs, 0) + assert.NotContains(t, tree.deletedFileIDs, fID) + + tree.deleteFile(fID) + + assert.Len(t, tree.fileIDToParentID, 0) + assert.NotContains(t, tree.fileIDToParentID, fID) + assert.Len(t, tree.deletedFileIDs, 1) + assert.Contains(t, tree.deletedFileIDs, fID) +} diff --git a/src/internal/m365/collection/drive/helper_test.go b/src/internal/m365/collection/drive/helper_test.go index 29e6e63c0..807b1e04f 100644 --- a/src/internal/m365/collection/drive/helper_test.go +++ b/src/internal/m365/collection/drive/helper_test.go @@ -14,6 +14,8 @@ import ( "github.com/alcionai/corso/src/pkg/services/m365/api" ) +const defaultItemSize int64 = 42 + // TODO(ashmrtn): Merge with similar structs in graph and exchange packages. type oneDriveService struct { credentials account.M365Config diff --git a/src/internal/m365/collection/drive/limiter.go b/src/internal/m365/collection/drive/limiter.go index 7acf8f62a..8b64acb60 100644 --- a/src/internal/m365/collection/drive/limiter.go +++ b/src/internal/m365/collection/drive/limiter.go @@ -1,10 +1,16 @@ package drive -import "github.com/alcionai/corso/src/pkg/control" +import ( + "github.com/alcionai/clues" + + "github.com/alcionai/corso/src/pkg/control" +) // used to mark an unused variable while we transition handling. const ignoreMe = -1 +var errHitLimit = clues.New("hit limiter limits") + type driveEnumerationStats struct { numPages int numAddedFiles int @@ -56,6 +62,10 @@ func (l pagerLimiter) sizeLimit() int64 { return l.limits.MaxBytes } +func (l pagerLimiter) aboveSizeLimit(i int64) bool { + return l.limits.Enabled && (i >= l.limits.MaxBytes) +} + // atItemLimit returns true if the limiter is enabled and has reached the limit // for individual items added to collections for this backup. func (l pagerLimiter) atItemLimit(stats *driveEnumerationStats) bool { diff --git a/src/pkg/count/keys.go b/src/pkg/count/keys.go index 2e2945918..93c5c4347 100644 --- a/src/pkg/count/keys.go +++ b/src/pkg/count/keys.go @@ -78,7 +78,9 @@ const ( // and use a separate Key (Folders) for the end count of folders produced // at the end of the delta enumeration. const ( + TotalDeleteFilesProcessed Key = "total-delete-files-processed" TotalDeleteFoldersProcessed Key = "total-delete-folders-processed" + TotalFilesProcessed Key = "total-files-processed" TotalFoldersProcessed Key = "total-folders-processed" TotalMalwareProcessed Key = "total-malware-processed" TotalPackagesProcessed Key = "total-packages-processed"