From fe27fea2aecf2a06d57c8ae6de58948395eaa5c7 Mon Sep 17 00:00:00 2001 From: Danny Date: Tue, 20 Dec 2022 16:30:20 -0500 Subject: [PATCH] GC: Backup: SharePoint List Integration with Tests (#1788) ## Description Framework for the SharePoint backup workflow. ### Special Instructions for Review Ensure that the information used to build `path.Path` are in line with future PRs (@ashmrtn ) ## Type of change - [x] :sunflower: Feature ## Issue(s) * closes #1403 * closes #1474 * closes #1795 ## Test Plan - SharePoint Integration checked: - `src/internal/connector/data_collections_test.go` - `TestSharePointDataCollection()` - SharePoint List basics checked: - `src/internal/connector/sharepoint/collection_test.go` - SharePoint Operational Backup: - `src/internal/operations/backup_test.go` - `TestBackup_Run_sharePoint() ` verified as operational during testing - [x] :zap: Unit test --- .../connector/data_collections_test.go | 73 ++++++-- .../connector/sharepoint/collection.go | 104 ++++++----- .../connector/sharepoint/collection_test.go | 1 + .../connector/sharepoint/data_collections.go | 65 ++++++- src/internal/connector/sharepoint/list.go | 161 +++++++++++++----- .../connector/sharepoint/list_test.go | 6 +- .../connector/sharepoint/service_iterators.go | 88 ---------- src/internal/operations/backup_test.go | 4 +- 8 files changed, 305 insertions(+), 197 deletions(-) delete mode 100644 src/internal/connector/sharepoint/service_iterators.go diff --git a/src/internal/connector/data_collections_test.go b/src/internal/connector/data_collections_test.go index 95e512818..d3614214f 100644 --- a/src/internal/connector/data_collections_test.go +++ b/src/internal/connector/data_collections_test.go @@ -175,14 +175,26 @@ func (suite *ConnectorDataCollectionIntegrationSuite) TestSharePointDataCollecti connector := loadConnector(ctx, suite.T(), Sites) tests := []struct { name string - getSelector func(t *testing.T) selectors.Selector + expected int + getSelector func() selectors.Selector }{ { - name: "Libraries", - getSelector: func(t *testing.T) selectors.Selector { + name: "Libraries", + expected: 1, + getSelector: func() selectors.Selector { sel := selectors.NewSharePointBackup() sel.Include(sel.Libraries([]string{suite.site}, selectors.Any())) + return sel.Selector + }, + }, + { + name: "Lists", + expected: 0, + getSelector: func() selectors.Selector { + sel := selectors.NewSharePointBackup() + sel.Include(sel.Lists([]string{suite.site}, selectors.Any())) + return sel.Selector }, }, @@ -192,7 +204,7 @@ func (suite *ConnectorDataCollectionIntegrationSuite) TestSharePointDataCollecti suite.T().Run(test.name, func(t *testing.T) { collection, err := sharepoint.DataCollections( ctx, - test.getSelector(t), + test.getSelector(), []string{suite.site}, connector.credentials.AzureTenantID, connector.Service, @@ -202,7 +214,7 @@ func (suite *ConnectorDataCollectionIntegrationSuite) TestSharePointDataCollecti // we don't know an exact count of drives this will produce, // but it should be more than one. - assert.Less(t, 1, len(collection)) + assert.Less(t, test.expected, len(collection)) // the test only reads the firstt collection connector.incrementAwaitingMessages() @@ -601,6 +613,8 @@ func (suite *ConnectorCreateSharePointCollectionIntegrationSuite) SetupSuite() { tester.LogTimeOfTest(suite.T()) } +// TestCreateSharePointCollection. Ensures the proper amount of collections are created based +// on the selector. func (suite *ConnectorCreateSharePointCollectionIntegrationSuite) TestCreateSharePointCollection() { ctx, flush := tester.NewContext() defer flush() @@ -609,15 +623,48 @@ func (suite *ConnectorCreateSharePointCollectionIntegrationSuite) TestCreateShar t = suite.T() siteID = tester.M365SiteID(t) gc = loadConnector(ctx, t, Sites) - sel = selectors.NewSharePointBackup() ) - sel.Include(sel.Libraries( - []string{siteID}, - []string{"foo"}, - selectors.PrefixMatch(), - )) + tables := []struct { + name string + sel func() selectors.Selector + comparator assert.ComparisonAssertionFunc + }{ + { + name: "SharePoint.Libraries", + comparator: assert.Equal, + sel: func() selectors.Selector { + sel := selectors.NewSharePointBackup() + sel.Include(sel.Libraries( + []string{siteID}, + []string{"foo"}, + selectors.PrefixMatch(), + )) - _, err := gc.DataCollections(ctx, sel.Selector, nil, control.Options{}) - require.NoError(t, err) + return sel.Selector + }, + }, + { + name: "SharePoint.Lists", + comparator: assert.Less, + sel: func() selectors.Selector { + sel := selectors.NewSharePointBackup() + sel.Include(sel.Lists( + []string{siteID}, + selectors.Any(), + selectors.PrefixMatch(), // without this option a SEG Fault occurs + )) + + return sel.Selector + }, + }, + } + + for _, test := range tables { + t.Run(test.name, func(t *testing.T) { + cols, err := gc.DataCollections(ctx, test.sel(), nil, control.Options{}) + require.NoError(t, err) + test.comparator(t, 0, len(cols)) + }) + } } diff --git a/src/internal/connector/sharepoint/collection.go b/src/internal/connector/sharepoint/collection.go index c17b9c732..fee8d33e9 100644 --- a/src/internal/connector/sharepoint/collection.go +++ b/src/internal/connector/sharepoint/collection.go @@ -7,6 +7,7 @@ import ( "time" kw "github.com/microsoft/kiota-serialization-json-go" + "github.com/microsoftgraph/msgraph-sdk-go/models" "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/support" @@ -34,16 +35,21 @@ var ( _ data.StreamModTime = &Item{} ) +// Collection is the SharePoint.List implementation of data.Collection. SharePoint.Libraries collections are supported +// by the oneDrive.Collection as the calls are identical for populating the Collection type Collection struct { + // data is the container for each individual SharePoint.List data chan data.Stream - jobs []string // fullPath indicates the hierarchy within the collection fullPath path.Path + // jobs contain the SharePoint.Site.ListIDs for the associated list(s). + jobs []string // M365 IDs of the items of this collection service graph.Servicer statusUpdater support.StatusUpdater } +// NewCollection helper function for creating a Collection func NewCollection( folderPath path.Path, service graph.Servicer, @@ -116,13 +122,14 @@ func (sd *Item) ModTime() time.Time { return sd.modTime } -func (sc *Collection) finishPopulation(ctx context.Context, success int, totalBytes int64, errs error) { +func (sc *Collection) finishPopulation(ctx context.Context, attempts, success int, totalBytes int64, errs error) { close(sc.data) - attempted := len(sc.jobs) + + attempted := attempts status := support.CreateStatus( ctx, support.Backup, - 1, + len(sc.jobs), support.CollectionMetrics{ Objects: attempted, Successes: success, @@ -131,12 +138,16 @@ func (sc *Collection) finishPopulation(ctx context.Context, success int, totalBy errs, sc.fullPath.Folder()) logger.Ctx(ctx).Debug(status.String()) + + if sc.statusUpdater != nil { + sc.statusUpdater(status) + } } // populate utility function to retrieve data from back store for a given collection func (sc *Collection) populate(ctx context.Context) { var ( - success int + objects, success int totalBytes, arrayLength int64 errs error writer = kw.NewJsonSerializationWriter() @@ -148,52 +159,59 @@ func (sc *Collection) populate(ctx context.Context) { defer func() { close(colProgress) - sc.finishPopulation(ctx, success, totalBytes, errs) + sc.finishPopulation(ctx, objects, success, totalBytes, errs) }() - // sc.jobs contains query = all of the site IDs. - for _, id := range sc.jobs { - // Retrieve list data from M365 - lists, err := loadLists(ctx, sc.service, id) + // Retrieve list data from M365 + lists, err := loadSiteLists(ctx, sc.service, sc.fullPath.ResourceOwner(), sc.jobs) + if err != nil { + errs = support.WrapAndAppend(sc.fullPath.ResourceOwner(), err, errs) + } + + objects += len(lists) + // Write Data and Send + for _, lst := range lists { + byteArray, err := serializeListContent(writer, lst) if err != nil { - errs = support.WrapAndAppend(id, err, errs) + errs = support.WrapAndAppend(*lst.GetId(), err, errs) + continue } - // Write Data and Send - for _, lst := range lists { - err = writer.WriteObjectValue("", lst) - if err != nil { - errs = support.WrapAndAppend(*lst.GetId(), err, errs) - continue + + arrayLength = int64(len(byteArray)) + + if arrayLength > 0 { + t := time.Now() + if t1 := lst.GetLastModifiedDateTime(); t1 != nil { + t = *t1 } - byteArray, err := writer.GetSerializedContent() - if err != nil { - errs = support.WrapAndAppend(*lst.GetId(), err, errs) - continue + totalBytes += arrayLength + + success++ + sc.data <- &Item{ + id: *lst.GetId(), + data: io.NopCloser(bytes.NewReader(byteArray)), + info: sharePointListInfo(lst, arrayLength), + modTime: t, } - writer.Close() - - arrayLength = int64(len(byteArray)) - - if arrayLength > 0 { - t := time.Now() - if t1 := lst.GetLastModifiedDateTime(); t1 != nil { - t = *t1 - } - - totalBytes += arrayLength - - success++ - sc.data <- &Item{ - id: *lst.GetId(), - data: io.NopCloser(bytes.NewReader(byteArray)), - info: sharePointListInfo(lst, arrayLength), - modTime: t, - } - - colProgress <- struct{}{} - } + colProgress <- struct{}{} } } } + +func serializeListContent(writer *kw.JsonSerializationWriter, lst models.Listable) ([]byte, error) { + defer writer.Close() + + err := writer.WriteObjectValue("", lst) + if err != nil { + return nil, err + } + + byteArray, err := writer.GetSerializedContent() + if err != nil { + return nil, err + } + + return byteArray, nil +} diff --git a/src/internal/connector/sharepoint/collection_test.go b/src/internal/connector/sharepoint/collection_test.go index 5524c7976..02a15ce69 100644 --- a/src/internal/connector/sharepoint/collection_test.go +++ b/src/internal/connector/sharepoint/collection_test.go @@ -70,6 +70,7 @@ func (suite *SharePointCollectionSuite) TestSharePointListCollection() { } readItems := []data.Stream{} + for item := range col.Items() { readItems = append(readItems, item) } diff --git a/src/internal/connector/sharepoint/data_collections.go b/src/internal/connector/sharepoint/data_collections.go index c1c38c67e..66ca0ed5d 100644 --- a/src/internal/connector/sharepoint/data_collections.go +++ b/src/internal/connector/sharepoint/data_collections.go @@ -52,15 +52,25 @@ func DataCollections( defer closer() defer close(foldersComplete) + var spcs []data.Collection + switch scope.Category().PathType() { - // TODO path.ListCategory: PR - // collect Lists - // done? case path.ListsCategory: - return nil, fmt.Errorf("sharePoint list collections not supported") + spcs, err = collectLists( + ctx, + serv, + tenantID, + site, + scope, + su, + ctrlOpts, + ) + if err != nil { + return nil, support.WrapAndAppend(site, err, errs) + } case path.LibrariesCategory: - spcs, err := collectLibraries( + spcs, err = collectLibraries( ctx, serv, tenantID, @@ -71,10 +81,10 @@ func DataCollections( if err != nil { return nil, support.WrapAndAppend(site, err, errs) } - - collections = append(collections, spcs...) } + collections = append(collections, spcs...) + foldersComplete <- struct{}{} } } @@ -82,6 +92,47 @@ func DataCollections( return collections, errs } +func collectLists( + ctx context.Context, + serv graph.Servicer, + tenantID, siteID string, + scope selectors.SharePointScope, + updater statusUpdater, + ctrlOpts control.Options, +) ([]data.Collection, error) { + logger.Ctx(ctx).With("site", siteID).Debug("Creating SharePoint List Collections") + + if scope.Matches(selectors.SharePointSite, siteID) { + spcs := make([]data.Collection, 0) + + tuples, err := preFetchLists(ctx, serv, siteID) + if err != nil { + return nil, err + } + + for _, tuple := range tuples { + dir, err := path.Builder{}.Append(tuple.name). + ToDataLayerSharePointPath( + tenantID, + siteID, + path.ListsCategory, + false) + if err != nil { + return nil, errors.Wrapf(err, "failed to create collection path for site: %s", siteID) + } + + collection := NewCollection(dir, serv, updater.UpdateStatus) + collection.AddJob(tuple.id) + + spcs = append(spcs, collection) + } + + return spcs, nil + } + + return nil, nil +} + // collectLibraries constructs a onedrive Collections struct and Get()s // all the drives associated with the site. func collectLibraries( diff --git a/src/internal/connector/sharepoint/list.go b/src/internal/connector/sharepoint/list.go index e35487e9b..159fca216 100644 --- a/src/internal/connector/sharepoint/list.go +++ b/src/internal/connector/sharepoint/list.go @@ -11,65 +11,52 @@ import ( "github.com/alcionai/corso/src/internal/connector/support" ) -// list.go contains additional functions to help retrieve SharePoint List data from M365 -// SharePoint lists represent lists on a site. Inherits additional properties from -// baseItem: https://learn.microsoft.com/en-us/graph/api/resources/baseitem?view=graph-rest-1.0 -// The full details concerning SharePoint Lists can -// be found at: https://learn.microsoft.com/en-us/graph/api/resources/list?view=graph-rest-1.0 -// Note additional calls are required for the relationships that exist outside of the object properties. +type listTuple struct { + name string + id string +} -// loadLists is a utility function to populate the List object. -// @param siteID the M365 ID that represents the SharePoint Site -// Makes additional calls to retrieve the following relationships: -// - Columns -// - ContentTypes -// - List Items -func loadLists( +func preFetchListOptions() *mssite.ItemListsRequestBuilderGetRequestConfiguration { + selecting := []string{"id", "displayName"} + queryOptions := mssite.ItemListsRequestBuilderGetQueryParameters{ + Select: selecting, + } + options := &mssite.ItemListsRequestBuilderGetRequestConfiguration{ + QueryParameters: &queryOptions, + } + + return options +} + +func preFetchLists( ctx context.Context, gs graph.Servicer, siteID string, -) ([]models.Listable, error) { +) ([]listTuple, error) { var ( - prefix = gs.Client().SitesById(siteID) - builder = prefix.Lists() - results = make([]models.Listable, 0) - errs error + builder = gs.Client().SitesById(siteID).Lists() + options = preFetchListOptions() + listTuples = make([]listTuple, 0) + errs error ) for { - resp, err := builder.Get(ctx, nil) + resp, err := builder.Get(ctx, options) if err != nil { return nil, support.WrapAndAppend(support.ConnectorStackErrorTrace(err), err, errs) } for _, entry := range resp.GetValue() { - id := *entry.GetId() + temp := listTuple{id: *entry.GetId()} - cols, err := fetchColumns(ctx, gs, siteID, id, "") - if err != nil { - errs = support.WrapAndAppend(siteID, err, errs) - continue + name := entry.GetDisplayName() + if name != nil { + temp.name = *name + } else { + temp.name = *entry.GetId() } - entry.SetColumns(cols) - - cTypes, err := fetchContentTypes(ctx, gs, siteID, id) - if err != nil { - errs = support.WrapAndAppend(siteID, err, errs) - continue - } - - entry.SetContentTypes(cTypes) - - lItems, err := fetchListItems(ctx, gs, siteID, id) - if err != nil { - errs = support.WrapAndAppend(siteID, err, errs) - continue - } - - entry.SetItems(lItems) - - results = append(results, entry) + listTuples = append(listTuples, temp) } if resp.GetOdataNextLink() == nil { @@ -79,6 +66,57 @@ func loadLists( builder = mssite.NewItemListsRequestBuilder(*resp.GetOdataNextLink(), gs.Adapter()) } + return listTuples, nil +} + +// list.go contains additional functions to help retrieve SharePoint List data from M365 +// SharePoint lists represent lists on a site. Inherits additional properties from +// baseItem: https://learn.microsoft.com/en-us/graph/api/resources/baseitem?view=graph-rest-1.0 +// The full details concerning SharePoint Lists can +// be found at: https://learn.microsoft.com/en-us/graph/api/resources/list?view=graph-rest-1.0 +// Note additional calls are required for the relationships that exist outside of the object properties. + +// loadSiteLists is a utility function to populate a collection of SharePoint.List +// objects associated with a given siteID. +// @param siteID the M365 ID that represents the SharePoint Site +// Makes additional calls to retrieve the following relationships: +// - Columns +// - ContentTypes +// - List Items +func loadSiteLists( + ctx context.Context, + gs graph.Servicer, + siteID string, + listIDs []string, +) ([]models.Listable, error) { + var ( + results = make([]models.Listable, 0) + errs error + ) + + for _, listID := range listIDs { + entry, err := gs.Client().SitesById(siteID).ListsById(listID).Get(ctx, nil) + if err != nil { + errs = support.WrapAndAppend( + listID, + errors.Wrap(err, support.ConnectorStackErrorTrace(err)), + errs, + ) + } + + cols, cTypes, lItems, err := fetchListContents(ctx, gs, siteID, listID) + if err == nil { + entry.SetColumns(cols) + entry.SetContentTypes(cTypes) + entry.SetItems(lItems) + } else { + errs = support.WrapAndAppend("unable to fetchRelationships during loadSiteLists", err, errs) + continue + } + + results = append(results, entry) + } + if errs != nil { return nil, errs } @@ -86,6 +124,43 @@ func loadLists( return results, nil } +// fetchListContents utility function to retrieve associated M365 relationships +// which are not included with the standard List query: +// - Columns, ContentTypes, ListItems +func fetchListContents( + ctx context.Context, + service graph.Servicer, + siteID, listID string, +) ( + []models.ColumnDefinitionable, + []models.ContentTypeable, + []models.ListItemable, + error, +) { + var errs error + + cols, err := fetchColumns(ctx, service, siteID, listID, "") + if err != nil { + errs = support.WrapAndAppend(siteID, err, errs) + } + + cTypes, err := fetchContentTypes(ctx, service, siteID, listID) + if err != nil { + errs = support.WrapAndAppend(siteID, err, errs) + } + + lItems, err := fetchListItems(ctx, service, siteID, listID) + if err != nil { + errs = support.WrapAndAppend(siteID, err, errs) + } + + if errs != nil { + return nil, nil, nil, errs + } + + return cols, cTypes, lItems, nil +} + // fetchListItems utility for retrieving ListItem data and the associated relationship // data. Additional call append data to the tracked items, and do not create additional collections. // Additional Call: diff --git a/src/internal/connector/sharepoint/list_test.go b/src/internal/connector/sharepoint/list_test.go index eca904c1a..6621de210 100644 --- a/src/internal/connector/sharepoint/list_test.go +++ b/src/internal/connector/sharepoint/list_test.go @@ -54,7 +54,11 @@ func (suite *SharePointSuite) TestLoadList() { service, err := createTestService(suite.creds) require.NoError(t, err) - lists, err := loadLists(ctx, service, "root") + tuples, err := preFetchLists(ctx, service, "root") + require.NoError(t, err) + + job := []string{tuples[0].id} + lists, err := loadSiteLists(ctx, service, "root", job) assert.NoError(t, err) assert.Greater(t, len(lists), 0) t.Logf("Length: %d\n", len(lists)) diff --git a/src/internal/connector/sharepoint/service_iterators.go b/src/internal/connector/sharepoint/service_iterators.go deleted file mode 100644 index d7fa23484..000000000 --- a/src/internal/connector/sharepoint/service_iterators.go +++ /dev/null @@ -1,88 +0,0 @@ -package sharepoint - -import ( - "context" - - "github.com/alcionai/corso/src/internal/connector/graph" - "github.com/alcionai/corso/src/internal/connector/support" - "github.com/alcionai/corso/src/pkg/selectors" -) - -// FilterContainersAndFillCollections is a utility function -// that places the M365 object ids belonging to specific directories -// into a Collection. Items outside of those directories are omitted. -// @param collection is filled with during this function. -func FilterContainersAndFillCollections( - ctx context.Context, - qp graph.QueryParams, - collections map[string]*Collection, - statusUpdater support.StatusUpdater, - resolver graph.ContainerResolver, - scope selectors.SharePointScope, -) error { - return nil -} - -// code previously within the function, moved here to make the linter happy - -// var ( -// category = qp.Scope.Category().PathType() -// collectionType = CategoryToOptionIdentifier(category) -// errs error -// ) - -// for _, c := range resolver.Items() { -// dirPath, ok := pathAndMatch(qp, category, c) -// if ok { -// // Create only those that match -// service, err := createService(qp.Credentials, qp.FailFast) -// if err != nil { -// errs = support.WrapAndAppend( -// qp.User+" FilterContainerAndFillCollection", -// err, -// errs) - -// if qp.FailFast { -// return errs -// } -// } - -// edc := NewCollection( -// qp.User, -// dirPath, -// collectionType, -// service, -// statusUpdater, -// ) -// collections[*c.GetId()] = &edc -// } -// } - -// for directoryID, col := range collections { -// fetchFunc, err := getFetchIDFunc(category) -// if err != nil { -// errs = support.WrapAndAppend( -// qp.User, -// err, -// errs) - -// if qp.FailFast { -// return errs -// } - -// continue -// } - -// jobs, err := fetchFunc(ctx, col.service, qp.User, directoryID) -// if err != nil { -// errs = support.WrapAndAppend( -// qp.User, -// err, -// errs, -// ) -// } - -// col.jobs = append(col.jobs, jobs...) -// } - -// return errs diff --git a/src/internal/operations/backup_test.go b/src/internal/operations/backup_test.go index 1404e7927..273323c82 100644 --- a/src/internal/operations/backup_test.go +++ b/src/internal/operations/backup_test.go @@ -843,8 +843,8 @@ func (suite *BackupOpIntegrationSuite) TestBackup_Run_sharePoint() { siteID = tester.M365SiteID(t) sel = selectors.NewSharePointBackup() ) - // TODO: dadams39 Issue #1795: Revert to Sites Upon List Integration - sel.Include(sel.Libraries([]string{siteID}, selectors.Any())) + + sel.Include(sel.Sites([]string{siteID})) bo, _, _, _, closer := prepNewBackupOp(t, ctx, mb, sel.Selector) defer closer()