From e7d2aeac5dc96346c4a87901185e1bef68a929f1 Mon Sep 17 00:00:00 2001 From: Keepers Date: Wed, 10 May 2023 21:20:28 -0600 Subject: [PATCH] add drive-level tombstone for deleted drives (#3381) adds a tombstone collection for any drive that has been completely deleted (or that surfaced from a prior backup, but does not exist in the current) from the driveish account. --- #### Does this PR need a docs update or release note? - [x] :white_check_mark: Yes, it's included #### Type of change - [x] :bug: Bugfix #### Issue(s) * #3379 #### Test Plan - [x] :zap: Unit test --- src/internal/connector/onedrive/collection.go | 14 +-- .../connector/onedrive/collections.go | 63 ++++++++--- .../connector/onedrive/collections_test.go | 106 +++++++++++++----- src/internal/connector/onedrive/drive.go | 20 ++++ src/pkg/path/drive_test.go | 6 +- src/pkg/selectors/onedrive_test.go | 2 +- src/pkg/selectors/sharepoint_test.go | 4 +- 7 files changed, 163 insertions(+), 52 deletions(-) diff --git a/src/internal/connector/onedrive/collection.go b/src/internal/connector/onedrive/collection.go index a4caafae2..26fd41283 100644 --- a/src/internal/connector/onedrive/collection.go +++ b/src/internal/connector/onedrive/collection.go @@ -131,7 +131,7 @@ func pathToLocation(p path.Path) (*path.Builder, error) { // NewCollection creates a Collection func NewCollection( itemClient graph.Requester, - folderPath path.Path, + currPath path.Path, prevPath path.Path, driveID string, service graph.Servicer, @@ -145,9 +145,9 @@ func NewCollection( // to be changed as we won't be able to extract path information from the // storage path. In that case, we'll need to start storing the location paths // like we do the previous path. - locPath, err := pathToLocation(folderPath) + locPath, err := pathToLocation(currPath) if err != nil { - return nil, clues.Wrap(err, "getting location").With("folder_path", folderPath.String()) + return nil, clues.Wrap(err, "getting location").With("curr_path", currPath.String()) } prevLocPath, err := pathToLocation(prevPath) @@ -157,7 +157,7 @@ func NewCollection( c := newColl( itemClient, - folderPath, + currPath, prevPath, driveID, service, @@ -175,7 +175,7 @@ func NewCollection( func newColl( gr graph.Requester, - folderPath path.Path, + currPath path.Path, prevPath path.Path, driveID string, service graph.Servicer, @@ -188,7 +188,7 @@ func newColl( c := &Collection{ itemClient: gr, itemGetter: api.GetDriveItem, - folderPath: folderPath, + folderPath: currPath, prevPath: prevPath, driveItems: map[string]models.DriveItemable{}, driveID: driveID, @@ -197,7 +197,7 @@ func newColl( data: make(chan data.Stream, graph.Parallelism(path.OneDriveMetadataService).CollectionBufferSize()), statusUpdater: statusUpdater, ctrl: ctrlOpts, - state: data.StateOf(prevPath, folderPath), + state: data.StateOf(prevPath, currPath), scope: colScope, doNotMergeItems: doNotMergeItems, } diff --git a/src/internal/connector/onedrive/collections.go b/src/internal/connector/onedrive/collections.go index 8594e4a6f..4cb1944f7 100644 --- a/src/internal/connector/onedrive/collections.go +++ b/src/internal/connector/onedrive/collections.go @@ -101,6 +101,7 @@ type Collections struct { servicer graph.Servicer, driveID, link string, ) itemPager + servicePathPfxFunc pathPrefixerFunc // Track stats from drive enumeration. Represents the items backed up. NumItems int @@ -119,17 +120,18 @@ func NewCollections( ctrlOpts control.Options, ) *Collections { return &Collections{ - itemClient: itemClient, - tenant: tenant, - resourceOwner: resourceOwner, - source: source, - matcher: matcher, - CollectionMap: map[string]map[string]*Collection{}, - drivePagerFunc: PagerForSource, - itemPagerFunc: defaultItemPager, - service: service, - statusUpdater: statusUpdater, - ctrl: ctrlOpts, + itemClient: itemClient, + tenant: tenant, + resourceOwner: resourceOwner, + source: source, + matcher: matcher, + CollectionMap: map[string]map[string]*Collection{}, + drivePagerFunc: PagerForSource, + itemPagerFunc: defaultItemPager, + servicePathPfxFunc: pathPrefixerForSource(tenant, resourceOwner, source), + service: service, + statusUpdater: statusUpdater, + ctrl: ctrlOpts, } } @@ -280,6 +282,12 @@ func (c *Collections) Get( return nil, err } + driveTombstones := map[string]struct{}{} + + for driveID := range oldPathsByDriveID { + driveTombstones[driveID] = struct{}{} + } + driveComplete, closer := observe.MessageWithCompletion(ctx, observe.Bulletf("files")) defer closer() defer close(driveComplete) @@ -312,6 +320,8 @@ func (c *Collections) Get( ictx = clues.Add(ctx, "drive_id", driveID, "drive_name", driveName) ) + delete(driveTombstones, driveID) + if _, ok := c.CollectionMap[driveID]; !ok { c.CollectionMap[driveID] = map[string]*Collection{} } @@ -408,7 +418,7 @@ func (c *Collections) Get( col, err := NewCollection( c.itemClient, - nil, + nil, // delete the folder prevPath, driveID, c.service, @@ -427,15 +437,41 @@ func (c *Collections) Get( observe.Message(ctx, fmt.Sprintf("Discovered %d items to backup", c.NumItems)) - // Add an extra for the metadata collection. collections := []data.BackupCollection{} + // add all the drives we found for _, driveColls := range c.CollectionMap { for _, coll := range driveColls { collections = append(collections, coll) } } + // generate tombstones for drives that were removed. + for driveID := range driveTombstones { + prevDrivePath, err := c.servicePathPfxFunc(driveID) + if err != nil { + return nil, clues.Wrap(err, "making drive tombstone previous path").WithClues(ctx) + } + + coll, err := NewCollection( + c.itemClient, + nil, // delete the drive + prevDrivePath, + driveID, + c.service, + c.statusUpdater, + c.source, + c.ctrl, + CollectionScopeUnknown, + true) + if err != nil { + return nil, clues.Wrap(err, "making drive tombstone").WithClues(ctx) + } + + collections = append(collections, coll) + } + + // add metadata collections service, category := c.source.toPathServiceCat() md, err := graph.MakeMetadataCollection( c.tenant, @@ -457,7 +493,6 @@ func (c *Collections) Get( collections = append(collections, md) } - // TODO(ashmrtn): Track and return the set of items to exclude. return collections, nil } diff --git a/src/internal/connector/onedrive/collections_test.go b/src/internal/connector/onedrive/collections_test.go index 1baaed521..e55bf2db8 100644 --- a/src/internal/connector/onedrive/collections_test.go +++ b/src/internal/connector/onedrive/collections_test.go @@ -1246,16 +1246,15 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { user, path.OneDriveService, path.FilesCategory, - false, - ) + false) require.NoError(suite.T(), err, "making metadata path", clues.ToCore(err)) - driveID1 := uuid.NewString() + driveID1 := "drive-1-" + uuid.NewString() drive1 := models.NewDrive() drive1.SetId(&driveID1) drive1.SetName(&driveID1) - driveID2 := uuid.NewString() + driveID2 := "drive-2-" + uuid.NewString() drive2 := models.NewDrive() drive2.SetId(&driveID2) drive2.SetName(&driveID2) @@ -1287,7 +1286,8 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { expectedFolderPaths map[string]map[string]string expectedDelList *pmMock.PrefixMap expectedSkippedCount int - doNotMergeItems bool + // map full or previous path (prefers full) -> bool + doNotMergeItems map[string]bool }{ { name: "OneDrive_OneItemPage_DelFileOnly_NoFolders_NoErrors", @@ -1321,7 +1321,7 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { }), }, { - name: "OneDrive_OneItemPage_NoFolders_NoErrors", + name: "OneDrive_OneItemPage_NoFolderDeltas_NoErrors", drives: []models.Driveable{drive1}, items: map[string][]deltaPagerResult{ driveID1: { @@ -1699,7 +1699,9 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { }, }, expectedDelList: pmMock.NewPrefixMap(map[string]map[string]struct{}{}), - doNotMergeItems: true, + doNotMergeItems: map[string]bool{ + rootFolderPath1: true, + }, }, { name: "OneDrive_TwoItemPage_DeltaError", @@ -1741,7 +1743,10 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { }, }, expectedDelList: pmMock.NewPrefixMap(map[string]map[string]struct{}{}), - doNotMergeItems: true, + doNotMergeItems: map[string]bool{ + rootFolderPath1: true, + folderPath1: true, + }, }, { name: "OneDrive_TwoItemPage_NoDeltaError", @@ -1785,7 +1790,7 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { expectedDelList: pmMock.NewPrefixMap(map[string]map[string]struct{}{ rootFolderPath1: getDelList("file", "file2"), }), - doNotMergeItems: false, + doNotMergeItems: map[string]bool{}, }, { name: "OneDrive_OneItemPage_InvalidPrevDelta_DeleteNonExistentFolder", @@ -1827,7 +1832,11 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { }, }, expectedDelList: pmMock.NewPrefixMap(map[string]map[string]struct{}{}), - doNotMergeItems: true, + doNotMergeItems: map[string]bool{ + rootFolderPath1: true, + folderPath1: true, + expectedPath1("/folder2"): true, + }, }, { name: "OneDrive_OneItemPage_InvalidPrevDelta_AnotherFolderAtDeletedLocation", @@ -1873,7 +1882,10 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { }, }, expectedDelList: pmMock.NewPrefixMap(map[string]map[string]struct{}{}), - doNotMergeItems: true, + doNotMergeItems: map[string]bool{ + rootFolderPath1: true, + folderPath1: true, + }, }, { name: "OneDrive Two Item Pages with Malware", @@ -1973,7 +1985,11 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { }, }, expectedDelList: pmMock.NewPrefixMap(map[string]map[string]struct{}{}), - doNotMergeItems: true, + doNotMergeItems: map[string]bool{ + rootFolderPath1: true, + folderPath1: true, + expectedPath1("/folder2"): true, + }, }, { name: "One Drive Delta Error Random Folder Delete", @@ -2012,7 +2028,10 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { }, }, expectedDelList: pmMock.NewPrefixMap(map[string]map[string]struct{}{}), - doNotMergeItems: true, + doNotMergeItems: map[string]bool{ + rootFolderPath1: true, + folderPath1: true, + }, }, { name: "One Drive Delta Error Random Item Delete", @@ -2049,7 +2068,9 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { }, }, expectedDelList: pmMock.NewPrefixMap(map[string]map[string]struct{}{}), - doNotMergeItems: true, + doNotMergeItems: map[string]bool{ + rootFolderPath1: true, + }, }, { name: "One Drive Folder Made And Deleted", @@ -2200,6 +2221,37 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { rootFolderPath1: getDelList("file"), }), }, + { + name: "TwoPriorDrives_OneTombstoned", + drives: []models.Driveable{drive1}, + items: map[string][]deltaPagerResult{ + driveID1: { + { + items: []models.DriveItemable{ + driveRootItem("root"), // will be present + }, + deltaLink: &delta, + }, + }, + }, + errCheck: assert.NoError, + prevFolderPaths: map[string]map[string]string{ + driveID1: {"root": rootFolderPath1}, + driveID2: {"root": rootFolderPath2}, + }, + expectedCollections: map[string]map[data.CollectionState][]string{ + rootFolderPath1: {data.NotMovedState: {}}, + rootFolderPath2: {data.DeletedState: {}}, + }, + expectedDeltaURLs: map[string]string{driveID1: delta}, + expectedFolderPaths: map[string]map[string]string{ + driveID1: {"root": rootFolderPath1}, + }, + expectedDelList: pmMock.NewPrefixMap(map[string]map[string]struct{}{}), + doNotMergeItems: map[string]bool{ + rootFolderPath2: true, + }, + }, } for _, test := range table { suite.Run(test.name, func() { @@ -2257,12 +2309,10 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { map[string]string{ driveID1: prevDelta, driveID2: prevDelta, - }, - ), + }), graph.NewMetadataEntry( graph.PreviousPathFileName, - test.prevFolderPaths, - ), + test.prevFolderPaths), }, func(*support.ConnectorOperationStatus) {}, ) @@ -2329,18 +2379,24 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { "state: %d, path: %s", baseCol.State(), folderPath) - assert.Equal(t, test.doNotMergeItems, baseCol.DoNotMergeItems(), "DoNotMergeItems") + + p := baseCol.FullPath() + if p == nil { + p = baseCol.PreviousPath() + } + + assert.Equalf( + t, + test.doNotMergeItems[p.String()], + baseCol.DoNotMergeItems(), + "DoNotMergeItems in collection: %s", p) } expectedCollectionCount := 0 - for c := range test.expectedCollections { - for range test.expectedCollections[c] { - expectedCollectionCount++ - } + for _, ec := range test.expectedCollections { + expectedCollectionCount += len(ec) } - // This check is necessary to make sure we are all the - // collections we expect it to assert.Equal(t, expectedCollectionCount, collectionCount, "number of collections") test.expectedDelList.AssertEqual(t, delList) diff --git a/src/internal/connector/onedrive/drive.go b/src/internal/connector/onedrive/drive.go index 99487c66b..b34f860da 100644 --- a/src/internal/connector/onedrive/drive.go +++ b/src/internal/connector/onedrive/drive.go @@ -16,6 +16,7 @@ import ( "github.com/alcionai/corso/src/internal/connector/onedrive/api" "github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/logger" + "github.com/alcionai/corso/src/pkg/path" ) const ( @@ -55,6 +56,25 @@ func PagerForSource( } } +type pathPrefixerFunc func(driveID string) (path.Path, error) + +func pathPrefixerForSource( + tenantID, resourceOwner string, + source driveSource, +) pathPrefixerFunc { + cat := path.FilesCategory + serv := path.OneDriveService + + if source == SharePointSource { + cat = path.LibrariesCategory + serv = path.SharePointService + } + + return func(driveID string) (path.Path, error) { + return path.Build(tenantID, resourceOwner, serv, cat, false, "drives", driveID, "root:") + } +} + // itemCollector functions collect the items found in a drive type itemCollector func( ctx context.Context, diff --git a/src/pkg/path/drive_test.go b/src/pkg/path/drive_test.go index bdbf09d9c..459548db5 100644 --- a/src/pkg/path/drive_test.go +++ b/src/pkg/path/drive_test.go @@ -32,18 +32,18 @@ func (suite *OneDrivePathSuite) Test_ToOneDrivePath() { }{ { name: "Not enough path elements", - pathElements: []string{"drive", "driveID"}, + pathElements: []string{"drives", "driveID"}, errCheck: assert.Error, }, { name: "Root path", - pathElements: []string{"drive", "driveID", root}, + pathElements: []string{"drives", "driveID", root}, expected: &path.DrivePath{DriveID: "driveID", Root: root, Folders: []string{}}, errCheck: assert.NoError, }, { name: "Deeper path", - pathElements: []string{"drive", "driveID", root, "folder1", "folder2"}, + pathElements: []string{"drives", "driveID", root, "folder1", "folder2"}, expected: &path.DrivePath{DriveID: "driveID", Root: root, Folders: []string{"folder1", "folder2"}}, errCheck: assert.NoError, }, diff --git a/src/pkg/selectors/onedrive_test.go b/src/pkg/selectors/onedrive_test.go index c91f27b04..f8fe4297d 100644 --- a/src/pkg/selectors/onedrive_test.go +++ b/src/pkg/selectors/onedrive_test.go @@ -315,7 +315,7 @@ func (suite *OneDriveSelectorSuite) TestOneDriveCategory_PathValues() { fileName := "file" fileID := fileName + "-id" shortRef := "short" - elems := []string{"drive", "driveID", "root:", "dir1.d", "dir2.d", fileID} + elems := []string{"drives", "driveID", "root:", "dir1.d", "dir2.d", fileID} filePath, err := path.Build("tenant", "user", path.OneDriveService, path.FilesCategory, true, elems...) require.NoError(t, err, clues.ToCore(err)) diff --git a/src/pkg/selectors/sharepoint_test.go b/src/pkg/selectors/sharepoint_test.go index b2c9e2344..63ec7e8ec 100644 --- a/src/pkg/selectors/sharepoint_test.go +++ b/src/pkg/selectors/sharepoint_test.go @@ -223,7 +223,7 @@ func (suite *SharePointSelectorSuite) TestSharePointRestore_Reduce() { var ( prefixElems = []string{ - "drive", + "drives", "drive!id", "root:", } @@ -415,7 +415,7 @@ func (suite *SharePointSelectorSuite) TestSharePointCategory_PathValues() { itemName = "item" itemID = "item-id" shortRef = "short" - driveElems = []string{"drive", "drive!id", "root:.d", "dir1.d", "dir2.d", itemID} + driveElems = []string{"drives", "drive!id", "root:.d", "dir1.d", "dir2.d", itemID} elems = []string{"dir1", "dir2", itemID} )