From d113fa39265089554d9a9559a5e9cbff99ed28c5 Mon Sep 17 00:00:00 2001 From: Hitesh Pattanayak <48874082+HiteshRepo@users.noreply.github.com> Date: Thu, 4 Jan 2024 19:19:33 +0530 Subject: [PATCH] store lists previouspath (#4938) saves previous path for lists in storage. #### Does this PR need a docs update or release note? - [x] :no_entry: No #### Type of change - [x] :sunflower: Feature #### Issue(s) #4754 #### Test Plan - [x] :muscle: Manual - [x] :zap: Unit test - [x] :green_heart: E2E --- src/internal/m365/backup.go | 4 + src/internal/m365/backup_test.go | 3 +- src/internal/m365/collection/site/backup.go | 188 ++++++- .../m365/collection/site/backup_test.go | 472 +++++++++++++++++- .../m365/collection/site/collection.go | 42 +- .../m365/collection/site/collection_test.go | 183 +++++-- src/internal/m365/collection/site/handlers.go | 11 + .../m365/collection/site/lists_handler.go | 15 + .../m365/collection/site/lists_metadata.go | 118 +++++ .../m365/collection/site/mock/list.go | 100 +++- .../m365/service/sharepoint/backup.go | 18 +- src/internal/operations/manifests_test.go | 28 ++ src/pkg/count/keys.go | 1 + 13 files changed, 1088 insertions(+), 95 deletions(-) create mode 100644 src/internal/m365/collection/site/lists_metadata.go diff --git a/src/internal/m365/backup.go b/src/internal/m365/backup.go index 06af7b9bd..c76d2e703 100644 --- a/src/internal/m365/backup.go +++ b/src/internal/m365/backup.go @@ -198,6 +198,10 @@ func (ctrl *Controller) GetMetadataPaths( if err != nil { return nil, err } + case reason.Service() == path.SharePointService && reason.Category() == path.ListsCategory: + for _, fn := range sharepoint.ListsMetadataFileNames() { + filePaths = append(filePaths, []string{fn}) + } default: for _, fn := range bupMD.AllMetadataFileNames() { filePaths = append(filePaths, []string{fn}) diff --git a/src/internal/m365/backup_test.go b/src/internal/m365/backup_test.go index 1c1c7b3d5..04ea6b66a 100644 --- a/src/internal/m365/backup_test.go +++ b/src/internal/m365/backup_test.go @@ -463,7 +463,8 @@ func (suite *SPCollectionIntgSuite) TestCreateSharePointCollection_Lists() { assert.True(t, excludes.Empty()) for _, collection := range cols { - assert.Equal(t, path.SharePointService, collection.FullPath().Service()) + assert.True(t, path.SharePointService == collection.FullPath().Service() || + path.SharePointMetadataService == collection.FullPath().Service()) assert.Equal(t, path.ListsCategory, collection.FullPath().Category()) for item := range collection.Items(ctx, fault.New(true)) { diff --git a/src/internal/m365/collection/site/backup.go b/src/internal/m365/collection/site/backup.go index a181a5d50..0a09f0be7 100644 --- a/src/internal/m365/collection/site/backup.go +++ b/src/internal/m365/collection/site/backup.go @@ -7,6 +7,7 @@ import ( "time" "github.com/alcionai/clues" + "github.com/microsoftgraph/msgraph-sdk-go/models" "github.com/alcionai/corso/src/internal/common/prefixmatcher" "github.com/alcionai/corso/src/internal/common/ptr" @@ -17,6 +18,7 @@ import ( "github.com/alcionai/corso/src/internal/observe" "github.com/alcionai/corso/src/internal/operations/inject" "github.com/alcionai/corso/src/pkg/account" + "github.com/alcionai/corso/src/pkg/backup/metadata" "github.com/alcionai/corso/src/pkg/count" "github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/logger" @@ -127,10 +129,13 @@ func CollectPages( collection := NewPrefetchCollection( nil, dir, + nil, + nil, ac, scope, su, - bpc.Options) + bpc.Options, + nil) collection.SetBetaService(betaService) collection.AddItem(tuple.ID, time.Now()) @@ -148,24 +153,76 @@ func CollectLists( tenantID string, scope selectors.SharePointScope, su support.StatusUpdater, - errs *fault.Bus, counter *count.Bus, -) ([]data.BackupCollection, error) { + errs *fault.Bus, +) ([]data.BackupCollection, bool, error) { logger.Ctx(ctx).Debug("Creating SharePoint List Collections") var ( - collection data.BackupCollection - el = errs.Local() - cl = counter.Local() - spcs = make([]data.BackupCollection, 0) - cfg = api.CallConfig{Select: idAnd("list", "lastModifiedDateTime")} + el = errs.Local() + spcs = make([]data.BackupCollection, 0) + cfg = api.CallConfig{Select: idAnd("list", "lastModifiedDateTime")} ) + dps, canUsePreviousBackup, err := parseListsMetadataCollections(ctx, path.ListsCategory, bpc.MetadataCollections) + if err != nil { + return nil, false, err + } + + ctx = clues.Add(ctx, "can_use_previous_backup", canUsePreviousBackup) + lists, err := bh.GetItems(ctx, cfg) if err != nil { - return nil, err + return nil, false, err } + collections, err := populateListsCollections( + ctx, + bh, + bpc, + ac, + tenantID, + scope, + su, + lists, + dps, + counter, + el) + if err != nil { + return nil, false, err + } + + for _, spc := range collections { + spcs = append(spcs, spc) + } + + return spcs, canUsePreviousBackup, el.Failure() +} + +func populateListsCollections( + ctx context.Context, + bh backupHandler, + bpc inject.BackupProducerConfig, + ac api.Client, + tenantID string, + scope selectors.SharePointScope, + su support.StatusUpdater, + lists []models.Listable, + dps metadata.DeltaPaths, + counter *count.Bus, + el *fault.Bus, +) (map[string]data.BackupCollection, error) { + var ( + err error + collection data.BackupCollection + // collections: list-id -> backup-collection + collections = make(map[string]data.BackupCollection) + currPaths = make(map[string]string) + tombstones = makeTombstones(dps) + ) + + counter.Add(count.Lists, int64(len(lists))) + for _, list := range lists { if el.Failure() != nil { break @@ -175,24 +232,40 @@ func CollectLists( continue } - modTime := ptr.Val(list.GetLastModifiedDateTime()) + var ( + listID = ptr.Val(list.GetId()) + storageDir = path.Elements{listID} + dp = dps[storageDir.String()] + prevPathStr = dp.Path + prevPath path.Path + ) - dir, err := path.Build( - tenantID, - bpc.ProtectedResource.ID(), - path.SharePointService, - path.ListsCategory, - false, - ptr.Val(list.GetId())) + delete(tombstones, listID) + + if len(prevPathStr) > 0 { + if prevPath, err = pathFromPrevString(prevPathStr); err != nil { + err = clues.StackWC(ctx, err).Label(count.BadPrevPath) + logger.CtxErr(ctx, err).Error("parsing prev path") + + return nil, err + } + } + + currPath, err := bh.CanonicalPath(storageDir, tenantID) if err != nil { el.AddRecoverable(ctx, clues.WrapWC(ctx, err, "creating list collection path")) + return nil, err } + modTime := ptr.Val(list.GetLastModifiedDateTime()) + lazyFetchCol := NewLazyFetchCollection( bh, - dir, + currPath, + prevPath, + storageDir.Builder(), su, - cl) + counter.Local()) lazyFetchCol.AddItem( ptr.Val(list.GetId()), @@ -205,11 +278,14 @@ func CollectLists( if modTime.IsZero() { prefetchCol := NewPrefetchCollection( bh, - dir, + currPath, + prevPath, + storageDir.Builder(), ac, scope, su, - bpc.Options) + bpc.Options, + counter.Local()) prefetchCol.AddItem( ptr.Val(list.GetId()), @@ -218,10 +294,38 @@ func CollectLists( collection = prefetchCol } - spcs = append(spcs, collection) + collections[storageDir.String()] = collection + currPaths[storageDir.String()] = currPath.String() } - return spcs, el.Failure() + handleTombstones(ctx, bpc, tombstones, collections, counter, el) + + // Build metadata path + pathPrefix, err := path.BuildMetadata( + tenantID, + bpc.ProtectedResource.ID(), + path.SharePointService, + path.ListsCategory, + false) + if err != nil { + return nil, clues.WrapWC(ctx, err, "making metadata path prefix"). + Label(count.BadPathPrefix) + } + + mdCol, err := graph.MakeMetadataCollection( + pathPrefix, + []graph.MetadataCollectionEntry{ + graph.NewMetadataEntry(metadata.PreviousPathFileName, currPaths), + }, + su, + counter.Local()) + if err != nil { + return nil, clues.WrapWC(ctx, err, "making metadata collection") + } + + collections["metadata"] = mdCol + + return collections, nil } func idAnd(ss ...string) []string { @@ -233,3 +337,41 @@ func idAnd(ss ...string) []string { return append(id, ss...) } + +func handleTombstones( + ctx context.Context, + bpc inject.BackupProducerConfig, + tombstones map[string]string, + collections map[string]data.BackupCollection, + counter *count.Bus, + el *fault.Bus, +) { + for id, p := range tombstones { + if el.Failure() != nil { + return + } + + ictx := clues.Add(ctx, "tombstone_id", id) + + if collections[id] != nil { + err := clues.NewWC(ictx, "conflict: tombstone exists for a live collection").Label(count.CollectionTombstoneConflict) + el.AddRecoverable(ictx, err) + + continue + } + + if len(p) == 0 { + continue + } + + prevPath, err := pathFromPrevString(p) + if err != nil { + err := clues.StackWC(ictx, err).Label(count.BadPrevPath) + logger.CtxErr(ictx, err).Error("parsing tombstone prev path") + + continue + } + + collections[id] = data.NewTombstoneCollection(prevPath, bpc.Options, counter.Local()) + } +} diff --git a/src/internal/m365/collection/site/backup_test.go b/src/internal/m365/collection/site/backup_test.go index 83989b8ed..734238597 100644 --- a/src/internal/m365/collection/site/backup_test.go +++ b/src/internal/m365/collection/site/backup_test.go @@ -1,26 +1,333 @@ package site import ( + "context" + "errors" "testing" "github.com/alcionai/clues" + "github.com/microsoftgraph/msgraph-sdk-go/models" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" "github.com/alcionai/corso/src/internal/common/idname/mock" + "github.com/alcionai/corso/src/internal/data" + dataMock "github.com/alcionai/corso/src/internal/data/mock" + siteMock "github.com/alcionai/corso/src/internal/m365/collection/site/mock" + "github.com/alcionai/corso/src/internal/m365/support" "github.com/alcionai/corso/src/internal/operations/inject" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/internal/tester/tconfig" "github.com/alcionai/corso/src/internal/version" + "github.com/alcionai/corso/src/pkg/account" + "github.com/alcionai/corso/src/pkg/backup/metadata" "github.com/alcionai/corso/src/pkg/control" "github.com/alcionai/corso/src/pkg/count" "github.com/alcionai/corso/src/pkg/fault" + "github.com/alcionai/corso/src/pkg/path" "github.com/alcionai/corso/src/pkg/selectors" "github.com/alcionai/corso/src/pkg/services/m365/api" "github.com/alcionai/corso/src/pkg/services/m365/api/graph" ) +type SharePointBackupUnitSuite struct { + tester.Suite + creds account.M365Config +} + +func TestSharePointBackupUnitSuite(t *testing.T) { + suite.Run(t, &SharePointBackupUnitSuite{Suite: tester.NewUnitSuite(t)}) +} + +func (suite *SharePointBackupUnitSuite) SetupSuite() { + a := tconfig.NewFakeM365Account(suite.T()) + m365, err := a.M365Config() + require.NoError(suite.T(), err, clues.ToCore(err)) + suite.creds = m365 +} + +func (suite *SharePointBackupUnitSuite) TestCollectLists() { + t := suite.T() + + var ( + statusUpdater = func(*support.ControllerOperationStatus) {} + siteID = tconfig.M365SiteID(t) + sel = selectors.NewSharePointBackup([]string{siteID}) + ) + + table := []struct { + name string + mock siteMock.ListHandler + expectErr require.ErrorAssertionFunc + expectColls int + expectNewColls int + expectMetadataColls int + canUsePreviousBackup bool + }{ + { + name: "one list", + mock: siteMock.NewListHandler(siteMock.StubLists("one"), siteID, nil), + expectErr: require.NoError, + expectColls: 2, + expectNewColls: 1, + expectMetadataColls: 1, + canUsePreviousBackup: true, + }, + { + name: "many lists", + mock: siteMock.NewListHandler(siteMock.StubLists("one", "two"), siteID, nil), + expectErr: require.NoError, + expectColls: 3, + expectNewColls: 2, + expectMetadataColls: 1, + canUsePreviousBackup: true, + }, + { + name: "with error", + mock: siteMock.NewListHandler(siteMock.StubLists("one"), siteID, errors.New("some error")), + expectErr: require.Error, + expectColls: 0, + expectNewColls: 0, + expectMetadataColls: 0, + canUsePreviousBackup: false, + }, + } + for _, test := range table { + suite.Run(test.name, func() { + ctx, flush := tester.NewContext(t) + defer flush() + + ac, err := api.NewClient( + suite.creds, + control.DefaultOptions(), + count.New()) + require.NoError(t, err, clues.ToCore(err)) + + bpc := inject.BackupProducerConfig{ + LastBackupVersion: version.NoBackup, + Options: control.DefaultOptions(), + ProtectedResource: mock.NewProvider(siteID, siteID), + } + + cs, canUsePreviousBackup, err := CollectLists( + ctx, + test.mock, + bpc, + ac, + suite.creds.AzureTenantID, + sel.Lists(selectors.Any())[0], + statusUpdater, + count.New(), + fault.New(false)) + + test.expectErr(t, err, clues.ToCore(err)) + assert.Len(t, cs, test.expectColls, "number of collections") + assert.Equal(t, test.canUsePreviousBackup, canUsePreviousBackup) + + newStates, metadatas := 0, 0 + for _, c := range cs { + if c.FullPath() != nil && c.FullPath().Service() == path.SharePointMetadataService { + metadatas++ + continue + } + + if c.State() == data.NewState { + newStates++ + } + } + + assert.Equal(t, test.expectNewColls, newStates, "new collections") + assert.Equal(t, test.expectMetadataColls, metadatas, "metadata collections") + }) + } +} + +func (suite *SharePointBackupUnitSuite) TestPopulateListsCollections_incremental() { + t := suite.T() + + var ( + statusUpdater = func(*support.ControllerOperationStatus) {} + siteID = tconfig.M365SiteID(t) + sel = selectors.NewSharePointBackup([]string{siteID}) + ) + + ac, err := api.NewClient( + suite.creds, + control.DefaultOptions(), + count.New()) + require.NoError(t, err, clues.ToCore(err)) + + listPathOne, err := path.Build( + suite.creds.AzureTenantID, + siteID, + path.SharePointService, + path.ListsCategory, + false, + "one") + require.NoError(suite.T(), err, clues.ToCore(err)) + + listPathTwo, err := path.Build( + suite.creds.AzureTenantID, + siteID, + path.SharePointService, + path.ListsCategory, + false, + "two") + require.NoError(suite.T(), err, clues.ToCore(err)) + + listPathThree, err := path.Build( + suite.creds.AzureTenantID, + siteID, + path.SharePointService, + path.ListsCategory, + false, + "three") + require.NoError(suite.T(), err, clues.ToCore(err)) + + table := []struct { + name string + lists []models.Listable + deltaPaths metadata.DeltaPaths + expectErr require.ErrorAssertionFunc + expectColls int + expectNewColls int + expectNotMovedColls int + expectMetadataColls int + expectTombstoneCols int + }{ + { + name: "one list", + lists: siteMock.StubLists("one"), + deltaPaths: metadata.DeltaPaths{ + "one": { + Path: listPathOne.String(), + }, + }, + expectErr: require.NoError, + expectColls: 2, + expectNotMovedColls: 1, + expectNewColls: 0, + expectMetadataColls: 1, + expectTombstoneCols: 0, + }, + { + name: "one lists, one deleted", + lists: siteMock.StubLists("two"), + deltaPaths: metadata.DeltaPaths{ + "one": { + Path: listPathOne.String(), + }, + }, + expectErr: require.NoError, + expectColls: 3, + expectNewColls: 1, + expectMetadataColls: 1, + expectTombstoneCols: 1, + }, + { + name: "two lists, one deleted", + lists: siteMock.StubLists("one", "two"), + deltaPaths: metadata.DeltaPaths{ + "one": { + Path: listPathOne.String(), + }, + "three": { + Path: listPathThree.String(), + }, + }, + expectErr: require.NoError, + expectColls: 4, + expectNotMovedColls: 1, + expectNewColls: 1, + expectMetadataColls: 1, + expectTombstoneCols: 1, + }, + { + name: "no previous paths", + lists: siteMock.StubLists("one", "two"), + deltaPaths: metadata.DeltaPaths{}, + expectErr: require.NoError, + expectColls: 3, + expectNotMovedColls: 0, + expectNewColls: 2, + expectMetadataColls: 1, + expectTombstoneCols: 0, + }, + { + name: "two lists, unchanges", + lists: siteMock.StubLists("one", "two"), + deltaPaths: metadata.DeltaPaths{ + "one": { + Path: listPathOne.String(), + }, + "two": { + Path: listPathTwo.String(), + }, + }, + expectErr: require.NoError, + expectColls: 3, + expectNotMovedColls: 2, + expectNewColls: 0, + expectMetadataColls: 1, + expectTombstoneCols: 0, + }, + } + for _, test := range table { + suite.Run(test.name, func() { + ctx, flush := tester.NewContext(t) + defer flush() + + bpc := inject.BackupProducerConfig{ + LastBackupVersion: version.NoBackup, + Options: control.DefaultOptions(), + ProtectedResource: mock.NewProvider(siteID, siteID), + } + + cs, err := populateListsCollections( + ctx, + siteMock.NewListHandler(test.lists, siteID, nil), + bpc, + ac, + suite.creds.AzureTenantID, + sel.Lists(selectors.Any())[0], + statusUpdater, + test.lists, + test.deltaPaths, + count.New(), + fault.New(false)) + + test.expectErr(t, err, clues.ToCore(err)) + assert.Len(t, cs, test.expectColls, "number of collections") + + newStates, notMovedStates, metadatas, tombstoned := 0, 0, 0, 0 + for _, c := range cs { + if c.FullPath() != nil && c.FullPath().Service() == path.SharePointMetadataService { + metadatas++ + continue + } + + if c.State() == data.DeletedState { + tombstoned++ + } + + if c.State() == data.NewState { + newStates++ + } + + if c.State() == data.NotMovedState { + notMovedStates++ + } + } + + assert.Equal(t, test.expectNewColls, newStates, "new collections") + assert.Equal(t, test.expectNotMovedColls, notMovedStates, "not moved collections") + assert.Equal(t, test.expectMetadataColls, metadatas, "metadata collections") + assert.Equal(t, test.expectTombstoneCols, tombstoned, "tombstone collections") + }) + } +} + type SharePointSuite struct { tester.Suite } @@ -113,7 +420,7 @@ func (suite *SharePointSuite) TestCollectLists() { bh := NewListsBackupHandler(bpc.ProtectedResource.ID(), ac.Lists()) - col, err := CollectLists( + col, _, err := CollectLists( ctx, bh, bpc, @@ -121,8 +428,167 @@ func (suite *SharePointSuite) TestCollectLists() { creds.AzureTenantID, sel.Lists(selectors.Any())[0], (&MockGraphService{}).UpdateStatus, - fault.New(true), - count.New()) + count.New(), + fault.New(true)) require.NoError(t, err, clues.ToCore(err)) + + metadataFound := false + + for _, c := range col { + if c.FullPath().Service() == path.SharePointMetadataService { + metadataFound = true + break + } + } + assert.Less(t, 0, len(col)) + assert.True(t, metadataFound) +} + +func (suite *SharePointSuite) TestParseListsMetadataCollections() { + type fileValues struct { + fileName string + value string + } + + table := []struct { + name string + cat path.CategoryType + wantedCategorycat path.CategoryType + data []fileValues + expect map[string]metadata.DeltaPath + canUsePreviousBackup bool + expectError assert.ErrorAssertionFunc + }{ + { + name: "previous path only", + cat: path.ListsCategory, + wantedCategorycat: path.ListsCategory, + data: []fileValues{ + {metadata.PreviousPathFileName, "prev-path"}, + }, + expect: map[string]metadata.DeltaPath{ + "key": { + Path: "prev-path", + }, + }, + canUsePreviousBackup: true, + expectError: assert.NoError, + }, + { + name: "multiple previous paths", + cat: path.ListsCategory, + wantedCategorycat: path.ListsCategory, + data: []fileValues{ + {metadata.PreviousPathFileName, "prev-path"}, + {metadata.PreviousPathFileName, "prev-path-2"}, + }, + canUsePreviousBackup: false, + expectError: assert.Error, + }, + { + name: "unwanted category", + cat: path.LibrariesCategory, + wantedCategorycat: path.ListsCategory, + data: []fileValues{ + {metadata.PreviousPathFileName, "prev-path"}, + }, + expectError: assert.NoError, + }, + } + + for _, test := range table { + suite.Run(test.name, func() { + t := suite.T() + + ctx, flush := tester.NewContext(t) + defer flush() + + entries := []graph.MetadataCollectionEntry{} + + for _, d := range test.data { + entries = append( + entries, + graph.NewMetadataEntry(d.fileName, map[string]string{"key": d.value})) + } + + pathPrefix, err := path.BuildMetadata( + "t", "u", + path.SharePointService, + test.cat, + false) + require.NoError(t, err, "path prefix") + + coll, err := graph.MakeMetadataCollection( + pathPrefix, + entries, + func(cos *support.ControllerOperationStatus) {}, + count.New()) + require.NoError(t, err, clues.ToCore(err)) + + dps, canUsePreviousBackup, err := parseListsMetadataCollections( + ctx, + test.wantedCategorycat, + []data.RestoreCollection{ + dataMock.NewUnversionedRestoreCollection(t, data.NoFetchRestoreCollection{Collection: coll}), + }) + test.expectError(t, err, clues.ToCore(err)) + + if test.cat != test.wantedCategorycat { + assert.Len(t, dps, 0) + } else { + assert.Equal(t, test.canUsePreviousBackup, canUsePreviousBackup, "can use previous backup") + + assert.Len(t, dps, len(test.expect)) + + for k, v := range dps { + assert.Equal(t, v.Path, test.expect[k].Path, "path") + } + } + }) + } +} + +type failingColl struct { + t *testing.T +} + +func (f failingColl) Items(ctx context.Context, errs *fault.Bus) <-chan data.Item { + ic := make(chan data.Item) + defer close(ic) + + errs.AddRecoverable(ctx, assert.AnError) + + return ic +} + +func (f failingColl) FullPath() path.Path { + tmp, err := path.Build( + "tenant", + "siteid", + path.SharePointService, + path.ListsCategory, + false, + "list1") + require.NoError(f.t, err, clues.ToCore(err)) + + return tmp +} + +func (f failingColl) FetchItemByName(context.Context, string) (data.Item, error) { + // no fetch calls will be made + return nil, nil +} + +func (suite *SharePointSuite) TestParseListsMetadataCollections_ReadFailure() { + t := suite.T() + + ctx, flush := tester.NewContext(t) + defer flush() + + fc := failingColl{t} + + _, canUsePreviousBackup, err := parseListsMetadataCollections(ctx, path.ListsCategory, []data.RestoreCollection{fc}) + require.NoError(t, err) + require.False(t, canUsePreviousBackup) } diff --git a/src/internal/m365/collection/site/collection.go b/src/internal/m365/collection/site/collection.go index b5e7474d3..1bfaf8979 100644 --- a/src/internal/m365/collection/site/collection.go +++ b/src/internal/m365/collection/site/collection.go @@ -60,7 +60,9 @@ type prefetchCollection struct { // where the category type serves as the key, and the associated channel holds the items. stream map[path.CategoryType]chan data.Item // fullPath indicates the hierarchy within the collection - fullPath path.Path + fullPath path.Path + prevPath path.Path + locationPath *path.Builder // items contains the SharePoint.List.IDs or SharePoint.Page.IDs // and their corresponding last modified time items map[string]time.Time @@ -71,19 +73,25 @@ type prefetchCollection struct { betaService *betaAPI.BetaService statusUpdater support.StatusUpdater getter getItemByIDer + Counter *count.Bus + state data.CollectionState } // NewPrefetchCollection constructor function for creating a prefetchCollection func NewPrefetchCollection( getter getItemByIDer, - folderPath path.Path, + folderPath, prevPath path.Path, + locPb *path.Builder, ac api.Client, scope selectors.SharePointScope, statusUpdater support.StatusUpdater, ctrlOpts control.Options, + counter *count.Bus, ) *prefetchCollection { c := &prefetchCollection{ fullPath: folderPath, + prevPath: prevPath, + locationPath: locPb, items: make(map[string]time.Time), getter: getter, stream: make(map[path.CategoryType]chan data.Item), @@ -91,6 +99,8 @@ func NewPrefetchCollection( statusUpdater: statusUpdater, category: scope.Category().PathType(), ctrl: ctrlOpts, + Counter: counter.Local(), + state: data.StateOf(prevPath, folderPath, counter), } return c @@ -109,18 +119,16 @@ func (pc *prefetchCollection) FullPath() path.Path { return pc.fullPath } -// TODO(ashmrtn): Fill in with previous path once the Controller compares old -// and new folder hierarchies. func (pc prefetchCollection) PreviousPath() path.Path { - return nil + return pc.prevPath } func (pc prefetchCollection) LocationPath() *path.Builder { - return path.Builder{}.Append(pc.fullPath.Folders()...) + return pc.locationPath } func (pc prefetchCollection) State() data.CollectionState { - return data.NewState + return pc.state } func (pc prefetchCollection) DoNotMergeItems() bool { @@ -340,6 +348,8 @@ func (pc *prefetchCollection) handleListItems( atomic.AddInt64(objectBytes, size) atomic.AddInt64(objectSuccesses, 1) + info.ParentPath = pc.LocationPath().String() + rc := io.NopCloser(bytes.NewReader(entryBytes)) itemInfo := details.ItemInfo{ SharePoint: info, @@ -361,27 +371,33 @@ type lazyFetchCollection struct { // stream is the container for each individual SharePoint item of list stream chan data.Item // fullPath indicates the hierarchy within the collection - fullPath path.Path + fullPath, prevPath path.Path + locationPath *path.Builder // jobs contain the SharePoint.List.IDs and their last modified time items map[string]time.Time statusUpdater support.StatusUpdater getter getItemByIDer counter *count.Bus + state data.CollectionState } func NewLazyFetchCollection( getter getItemByIDer, - folderPath path.Path, + folderPath, prevPath path.Path, + locPb *path.Builder, statusUpdater support.StatusUpdater, counter *count.Bus, ) *lazyFetchCollection { c := &lazyFetchCollection{ fullPath: folderPath, + prevPath: prevPath, + locationPath: locPb, items: make(map[string]time.Time), getter: getter, stream: make(chan data.Item, collectionChannelBufferSize), statusUpdater: statusUpdater, counter: counter, + state: data.StateOf(prevPath, folderPath, counter), } return c @@ -397,17 +413,15 @@ func (lc *lazyFetchCollection) FullPath() path.Path { } func (lc lazyFetchCollection) LocationPath() *path.Builder { - return path.Builder{}.Append(lc.fullPath.Folders()...) + return lc.locationPath } -// TODO(hitesh): Implement PreviousPath, State, DoNotMergeItems -// once the Controller compares old and new folder hierarchies. func (lc lazyFetchCollection) PreviousPath() path.Path { - return nil + return lc.prevPath } func (lc lazyFetchCollection) State() data.CollectionState { - return data.NewState + return lc.state } func (lc lazyFetchCollection) DoNotMergeItems() bool { diff --git a/src/internal/m365/collection/site/collection_test.go b/src/internal/m365/collection/site/collection_test.go index da8c6407c..a268d80d3 100644 --- a/src/internal/m365/collection/site/collection_test.go +++ b/src/internal/m365/collection/site/collection_test.go @@ -32,6 +32,87 @@ import ( "github.com/alcionai/corso/src/pkg/services/m365/api/graph" ) +type SharePointCollectionUnitSuite struct { + tester.Suite + creds account.M365Config +} + +func TestSharePointCollectionUnitSuite(t *testing.T) { + suite.Run(t, &SharePointCollectionUnitSuite{Suite: tester.NewUnitSuite(t)}) +} + +func (suite *SharePointCollectionUnitSuite) SetupSuite() { + a := tconfig.NewFakeM365Account(suite.T()) + m365, err := a.M365Config() + require.NoError(suite.T(), err, clues.ToCore(err)) + suite.creds = m365 +} + +func (suite *SharePointCollectionUnitSuite) TestPrefetchCollection_state() { + t := suite.T() + + one, err := path.Build("tid", "siteid", path.SharePointService, path.ListsCategory, false, "one") + require.NoError(suite.T(), err, clues.ToCore(err)) + two, err := path.Build("tid", "siteid", path.SharePointService, path.ListsCategory, false, "two") + require.NoError(suite.T(), err, clues.ToCore(err)) + + sel := selectors.NewSharePointBackup([]string{"site"}) + ac, err := api.NewClient(suite.creds, control.DefaultOptions(), count.New()) + require.NoError(t, err, clues.ToCore(err)) + + table := []struct { + name string + prev path.Path + curr path.Path + loc *path.Builder + expect data.CollectionState + }{ + { + name: "new", + curr: one, + loc: path.Elements{"one"}.Builder(), + expect: data.NewState, + }, + { + name: "not moved", + prev: one, + curr: one, + loc: path.Elements{"one"}.Builder(), + expect: data.NotMovedState, + }, + { + name: "moved", + prev: one, + curr: two, + loc: path.Elements{"two"}.Builder(), + expect: data.MovedState, + }, + { + name: "deleted", + prev: one, + expect: data.DeletedState, + }, + } + for _, test := range table { + suite.Run(test.name, func() { + c := NewPrefetchCollection( + nil, + test.curr, + test.prev, + test.loc, + ac, + sel.Lists(selectors.Any())[0], + nil, + control.DefaultOptions(), + count.New()) + assert.Equal(t, test.expect, c.State(), "collection state") + assert.Equal(t, test.curr, c.FullPath(), "full path") + assert.Equal(t, test.prev, c.PreviousPath(), "prev path") + assert.Equal(t, test.loc, c.LocationPath(), "location path") + }) + } +} + type SharePointCollectionSuite struct { tester.Suite siteID string @@ -70,35 +151,44 @@ func TestSharePointCollectionSuite(t *testing.T) { // SharePoint collection and to use the data stream channel. func (suite *SharePointCollectionSuite) TestPrefetchCollection_Items() { var ( - tenant = "some" - user = "user" - dirRoot = "directory" + tenant = "some" + user = "user" + prevRoot = "prev" + dirRoot = "directory" ) sel := selectors.NewSharePointBackup([]string{"site"}) tables := []struct { name, itemName string + itemCount int64 scope selectors.SharePointScope cat path.CategoryType getter getItemByIDer - getDir func(t *testing.T) path.Path + prev string + curr string + locPb *path.Builder + getDir func(t *testing.T, root string) path.Path getItem func(t *testing.T, itemName string) data.Item }{ { - name: "List", - itemName: "MockListing", - cat: path.ListsCategory, - scope: sel.Lists(selectors.Any())[0], - getter: &mock.ListHandler{}, - getDir: func(t *testing.T) path.Path { + name: "List", + itemName: "MockListing", + itemCount: 1, + cat: path.ListsCategory, + scope: sel.Lists(selectors.Any())[0], + prev: prevRoot, + curr: dirRoot, + locPb: path.Elements{"MockListing"}.Builder(), + getter: &mock.ListHandler{}, + getDir: func(t *testing.T, root string) path.Path { dir, err := path.Build( tenant, user, path.SharePointService, path.ListsCategory, false, - dirRoot) + root) require.NoError(t, err, clues.ToCore(err)) return dir @@ -115,8 +205,10 @@ func (suite *SharePointCollectionSuite) TestPrefetchCollection_Items() { require.NoError(t, err, clues.ToCore(err)) info := &details.SharePointInfo{ + ItemType: details.SharePointList, List: &details.ListInfo{ - Name: name, + Name: name, + ItemCount: 1, }, } @@ -134,15 +226,18 @@ func (suite *SharePointCollectionSuite) TestPrefetchCollection_Items() { itemName: "MockPages", cat: path.PagesCategory, scope: sel.Pages(selectors.Any())[0], + prev: prevRoot, + curr: dirRoot, + locPb: path.Elements{"Pages"}.Builder(), getter: nil, - getDir: func(t *testing.T) path.Path { + getDir: func(t *testing.T, root string) path.Path { dir, err := path.Build( tenant, user, path.SharePointService, path.PagesCategory, false, - dirRoot) + root) require.NoError(t, err, clues.ToCore(err)) return dir @@ -172,11 +267,14 @@ func (suite *SharePointCollectionSuite) TestPrefetchCollection_Items() { col := NewPrefetchCollection( test.getter, - test.getDir(t), + test.getDir(t, test.curr), + test.getDir(t, test.prev), + test.locPb, suite.ac, test.scope, nil, - control.DefaultOptions()) + control.DefaultOptions(), + count.New()) col.stream[test.cat] = make(chan data.Item, collectionChannelBufferSize) col.stream[test.cat] <- test.getItem(t, test.itemName) @@ -195,10 +293,14 @@ func (suite *SharePointCollectionSuite) TestPrefetchCollection_Items() { require.NoError(t, err, clues.ToCore(err)) assert.NotNil(t, info) - assert.NotNil(t, info.SharePoint) + require.NotNil(t, info.SharePoint) - if test.cat == path.ListsCategory { + if info.SharePoint.ItemType == details.SharePointList { + require.NotNil(t, info.SharePoint.List) assert.Equal(t, test.itemName, info.SharePoint.List.Name) + assert.Equal(t, test.itemCount, info.SharePoint.List.ItemCount) + } else { + assert.Equal(t, test.itemName, info.SharePoint.ItemName) } }) } @@ -213,7 +315,23 @@ func (suite *SharePointCollectionSuite) TestLazyCollection_Items() { ) fullPath, err := path.Build( - "t", "pr", path.SharePointService, path.ListsCategory, false, "listid") + "t", + "pr", + path.SharePointService, + path.ListsCategory, + false, + "full") + require.NoError(t, err, clues.ToCore(err)) + + locPath := path.Elements{"full"}.Builder() + + prevPath, err := path.Build( + "t", + "pr", + path.SharePointService, + path.ListsCategory, + false, + "prev") require.NoError(t, err, clues.ToCore(err)) tables := []struct { @@ -223,7 +341,8 @@ func (suite *SharePointCollectionSuite) TestLazyCollection_Items() { expectReads []string }{ { - name: "no lists", + name: "no lists", + expectReads: []string{}, }, { name: "added lists", @@ -248,15 +367,19 @@ func (suite *SharePointCollectionSuite) TestLazyCollection_Items() { ctx, flush := tester.NewContext(t) defer flush() - getter := &mock.ListHandler{} + getter := mock.NewListHandler(nil, "", nil) defer getter.Check(t, test.expectReads) - col := &lazyFetchCollection{ - stream: make(chan data.Item), - fullPath: fullPath, - items: test.items, - getter: getter, - statusUpdater: statusUpdater, + col := NewLazyFetchCollection( + getter, + fullPath, + prevPath, + locPath, + statusUpdater, + count.New()) + + for listID, modTime := range test.items { + col.AddItem(listID, modTime) } for item := range col.Items(ctx, errs) { @@ -302,7 +425,7 @@ func (suite *SharePointCollectionSuite) TestLazyItem() { ctx, flush := tester.NewContext(t) defer flush() - lh := mock.ListHandler{} + lh := mock.NewListHandler(nil, "", nil) li := data.NewLazyItemWithInfo( ctx, @@ -346,9 +469,7 @@ func (suite *SharePointCollectionSuite) TestLazyItem_ReturnsEmptyReaderOnDeleted ctx, flush := tester.NewContext(t) defer flush() - lh := mock.ListHandler{ - Err: graph.ErrDeletedInFlight, - } + lh := mock.NewListHandler(nil, "", graph.ErrDeletedInFlight) li := data.NewLazyItemWithInfo( ctx, diff --git a/src/internal/m365/collection/site/handlers.go b/src/internal/m365/collection/site/handlers.go index 4ce482e59..253883a0e 100644 --- a/src/internal/m365/collection/site/handlers.go +++ b/src/internal/m365/collection/site/handlers.go @@ -7,12 +7,23 @@ import ( "github.com/alcionai/corso/src/pkg/backup/details" "github.com/alcionai/corso/src/pkg/fault" + "github.com/alcionai/corso/src/pkg/path" "github.com/alcionai/corso/src/pkg/services/m365/api" ) type backupHandler interface { getItemByIDer getItemser + canonicalPather +} + +// canonicalPath constructs the service and category specific path for +// the given builder. +type canonicalPather interface { + CanonicalPath( + storageDir path.Elements, + tenantID string, + ) (path.Path, error) } type getItemByIDer interface { diff --git a/src/internal/m365/collection/site/lists_handler.go b/src/internal/m365/collection/site/lists_handler.go index 72b282101..5ba1b9f78 100644 --- a/src/internal/m365/collection/site/lists_handler.go +++ b/src/internal/m365/collection/site/lists_handler.go @@ -7,6 +7,7 @@ import ( "github.com/alcionai/corso/src/pkg/backup/details" "github.com/alcionai/corso/src/pkg/fault" + "github.com/alcionai/corso/src/pkg/path" "github.com/alcionai/corso/src/pkg/services/m365/api" ) @@ -24,6 +25,20 @@ func NewListsBackupHandler(protectedResource string, ac api.Lists) listsBackupHa } } +func (bh listsBackupHandler) CanonicalPath( + storageDirFolders path.Elements, + tenantID string, +) (path.Path, error) { + return storageDirFolders. + Builder(). + ToDataLayerPath( + tenantID, + bh.protectedResource, + path.SharePointService, + path.ListsCategory, + false) +} + func (bh listsBackupHandler) GetItemByID( ctx context.Context, itemID string, diff --git a/src/internal/m365/collection/site/lists_metadata.go b/src/internal/m365/collection/site/lists_metadata.go new file mode 100644 index 000000000..ea1aa8907 --- /dev/null +++ b/src/internal/m365/collection/site/lists_metadata.go @@ -0,0 +1,118 @@ +package site + +import ( + "context" + "encoding/json" + + "github.com/alcionai/clues" + + "github.com/alcionai/corso/src/internal/data" + "github.com/alcionai/corso/src/pkg/backup/metadata" + "github.com/alcionai/corso/src/pkg/fault" + "github.com/alcionai/corso/src/pkg/logger" + "github.com/alcionai/corso/src/pkg/path" +) + +func parseListsMetadataCollections( + ctx context.Context, + cat path.CategoryType, + colls []data.RestoreCollection, +) (metadata.DeltaPaths, bool, error) { + cdp := metadata.CatDeltaPaths{ + cat: {}, + } + + found := map[path.CategoryType]map[string]struct{}{ + cat: {}, + } + + errs := fault.New(true) + + for _, coll := range colls { + var ( + breakLoop bool + items = coll.Items(ctx, errs) + category = coll.FullPath().Category() + ) + + for { + select { + case <-ctx.Done(): + return nil, false, clues.WrapWC(ctx, ctx.Err(), "parsing collection metadata") + + case item, ok := <-items: + if !ok || errs.Failure() != nil { + breakLoop = true + break + } + + var ( + m = map[string]string{} + cdps, wantedCategory = cdp[category] + ) + + if !wantedCategory { + continue + } + + err := json.NewDecoder(item.ToReader()).Decode(&m) + if err != nil { + return nil, false, clues.WrapWC(ctx, err, "decoding metadata json") + } + + if item.ID() == metadata.PreviousPathFileName { + if _, ok := found[category][metadata.PathKey]; ok { + return nil, false, clues.WrapWC(ctx, err, "multiple versions of path metadata") + } + + for k, p := range m { + cdps.AddPath(k, p) + } + + found[category][metadata.PathKey] = struct{}{} + + cdp[category] = cdps + } + } + + if breakLoop { + break + } + } + } + + if errs.Failure() != nil { + logger.CtxErr(ctx, errs.Failure()).Info("reading metadata collection items") + + return metadata.DeltaPaths{}, false, nil + } + + for _, dps := range cdp { + for k, dp := range dps { + if len(dp.Path) == 0 { + delete(dps, k) + } + } + } + + return cdp[cat], true, nil +} + +func pathFromPrevString(ps string) (path.Path, error) { + p, err := path.FromDataLayerPath(ps, false) + if err != nil { + return nil, clues.Wrap(err, "parsing previous path string") + } + + return p, nil +} + +func makeTombstones(dps metadata.DeltaPaths) map[string]string { + r := make(map[string]string, len(dps)) + + for id, v := range dps { + r[id] = v.Path + } + + return r +} diff --git a/src/internal/m365/collection/site/mock/list.go b/src/internal/m365/collection/site/mock/list.go index f75a4a406..ede41d372 100644 --- a/src/internal/m365/collection/site/mock/list.go +++ b/src/internal/m365/collection/site/mock/list.go @@ -7,40 +7,90 @@ import ( "github.com/microsoftgraph/msgraph-sdk-go/models" "github.com/stretchr/testify/assert" + "golang.org/x/exp/maps" "github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/pkg/backup/details" + "github.com/alcionai/corso/src/pkg/path" + "github.com/alcionai/corso/src/pkg/services/m365/api" ) type ListHandler struct { - List models.Listable - ListIDs []string - Err error + protectedResource string + lists []models.Listable + listsMap map[string]models.Listable + err error } -func (lh *ListHandler) GetItemByID( +func NewListHandler(lists []models.Listable, protectedResource string, err error) ListHandler { + lstMap := make(map[string]models.Listable) + for _, lst := range lists { + lstMap[ptr.Val(lst.GetId())] = lst + } + + return ListHandler{ + protectedResource: protectedResource, + lists: lists, + listsMap: lstMap, + err: err, + } +} + +func (lh ListHandler) GetItemByID( ctx context.Context, itemID string, ) (models.Listable, *details.SharePointInfo, error) { - lh.ListIDs = append(lh.ListIDs, itemID) - - ls := models.NewList() - - lh.List = ls - lh.List.SetId(ptr.To(itemID)) - - info := &details.SharePointInfo{ - ItemName: itemID, + lstInfo := &details.SharePointInfo{ + List: &details.ListInfo{ + Name: itemID, + }, } - return ls, info, lh.Err + lst, ok := lh.listsMap[itemID] + if ok { + return lst, lstInfo, lh.err + } + + listInfo := models.NewListInfo() + listInfo.SetTemplate(ptr.To("genericList")) + + ls := models.NewList() + ls.SetId(ptr.To(itemID)) + ls.SetList(listInfo) + + lh.listsMap[itemID] = ls + + return ls, lstInfo, lh.err +} + +func (lh ListHandler) GetItems( + context.Context, + api.CallConfig, +) ([]models.Listable, error) { + return lh.lists, lh.err +} + +func (lh ListHandler) CanonicalPath( + storageDirFolders path.Elements, + tenantID string, +) (path.Path, error) { + return storageDirFolders. + Builder(). + ToDataLayerPath( + tenantID, + lh.protectedResource, + path.SharePointService, + path.ListsCategory, + false) } func (lh *ListHandler) Check(t *testing.T, expected []string) { - slices.Sort(lh.ListIDs) + listIDs := maps.Keys(lh.listsMap) + + slices.Sort(listIDs) slices.Sort(expected) - assert.Equal(t, expected, lh.ListIDs, "expected calls") + assert.Equal(t, expected, listIDs, "expected calls") } type ListRestoreHandler struct { @@ -60,3 +110,21 @@ func (lh *ListRestoreHandler) PostList( return lh.List, lh.Err } + +func StubLists(ids ...string) []models.Listable { + lists := make([]models.Listable, 0, len(ids)) + + for _, id := range ids { + listInfo := models.NewListInfo() + listInfo.SetTemplate(ptr.To("genericList")) + + lst := models.NewList() + lst.SetDisplayName(ptr.To(id)) + lst.SetId(ptr.To(id)) + lst.SetList(listInfo) + + lists = append(lists, lst) + } + + return lists +} diff --git a/src/internal/m365/service/sharepoint/backup.go b/src/internal/m365/service/sharepoint/backup.go index b3733c237..9cebb74a3 100644 --- a/src/internal/m365/service/sharepoint/backup.go +++ b/src/internal/m365/service/sharepoint/backup.go @@ -12,6 +12,7 @@ import ( "github.com/alcionai/corso/src/internal/m365/support" "github.com/alcionai/corso/src/internal/operations/inject" "github.com/alcionai/corso/src/pkg/account" + "github.com/alcionai/corso/src/pkg/backup/metadata" "github.com/alcionai/corso/src/pkg/count" "github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/path" @@ -57,7 +58,7 @@ func ProduceBackupCollections( case path.ListsCategory: bh := site.NewListsBackupHandler(bpc.ProtectedResource.ID(), ac.Lists()) - spcs, err = site.CollectLists( + spcs, canUsePreviousBackup, err = site.CollectLists( ctx, bh, bpc, @@ -65,17 +66,13 @@ func ProduceBackupCollections( creds.AzureTenantID, scope, su, - errs, - counter) + counter, + errs) if err != nil { el.AddRecoverable(ctx, err) continue } - // Lists don't make use of previous metadata - // TODO: Revisit when we add support of lists - canUsePreviousBackup = true - case path.LibrariesCategory: spcs, canUsePreviousBackup, err = site.CollectLibraries( ctx, @@ -140,3 +137,10 @@ func ProduceBackupCollections( return collections, ssmb.ToReader(), canUsePreviousBackup, el.Failure() } + +// ListsMetadataFileNames only contains PreviousPathFileName +// and not DeltaURLsFileName because graph apis do not have delta support +// for Sharepoint Lists +func ListsMetadataFileNames() []string { + return []string{metadata.PreviousPathFileName} +} diff --git a/src/internal/operations/manifests_test.go b/src/internal/operations/manifests_test.go index 6538a7c43..a63459634 100644 --- a/src/internal/operations/manifests_test.go +++ b/src/internal/operations/manifests_test.go @@ -18,6 +18,7 @@ import ( "github.com/alcionai/corso/src/internal/kopia" "github.com/alcionai/corso/src/internal/m365" odConsts "github.com/alcionai/corso/src/internal/m365/service/onedrive/consts" + "github.com/alcionai/corso/src/internal/m365/service/sharepoint" "github.com/alcionai/corso/src/internal/operations/inject/mock" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/backup/identity" @@ -124,6 +125,12 @@ func (suite *OperationsManifestsUnitSuite) TestGetMetadataPaths() { path.GroupsService, ro, path.LibrariesCategory) + sharepointListsPath = makeMetadataBasePath( + suite.T(), + tid, + path.SharePointService, + ro, + path.ListsCategory) ) groupLibsSitesPath, err := groupLibsPath.Append(false, odConsts.SitesPathDir) @@ -276,6 +283,27 @@ func (suite *OperationsManifestsUnitSuite) TestGetMetadataPaths() { }, }}, }, + { + name: "single reason sharepoint lists", + manID: "single-sharepoint-lists", + reasons: []identity.Reasoner{ + identity.NewReason(tid, ro, path.SharePointService, path.ListsCategory), + }, + preFetchPaths: []string{"previouspath"}, + expectPaths: func(t *testing.T, files []string) []path.Path { + ps := make([]path.Path, 0, len(files)) + + assert.NoError(t, err, clues.ToCore(err)) + for _, f := range files { + p, err := sharepointListsPath.AppendItem(f) + assert.NoError(t, err, clues.ToCore(err)) + ps = append(ps, p) + } + + return ps + }, + restorePaths: getRestorePaths(t, sharepointListsPath, sharepoint.ListsMetadataFileNames()), + }, } for _, test := range table { suite.Run(test.name, func() { diff --git a/src/pkg/count/keys.go b/src/pkg/count/keys.go index 9aad8acf7..9ee1fb9fc 100644 --- a/src/pkg/count/keys.go +++ b/src/pkg/count/keys.go @@ -64,6 +64,7 @@ const ( PrevPaths Key = "previous-paths" PreviousPathMetadataCollision Key = "previous-path-metadata-collision" Sites Key = "sites" + Lists Key = "lists" SkippedContainers Key = "skipped-containers" StreamBytesAdded Key = "stream-bytes-added" StreamDirsAdded Key = "stream-dirs-added"