From e767bb0b77a865ccdfbb335ce9eb19701068fee0 Mon Sep 17 00:00:00 2001 From: Danny Date: Fri, 2 Dec 2022 14:31:20 -0500 Subject: [PATCH] Backup: SharePoint: List content retrieval (#1633) ## Description Logic for List retrieval from M365 back store. Keeping separate from Collection creation logic as there are a lot of calls to retrieve all supporting relationship data from the backstore. Adds collection to ## Type of change - [x] :sunflower: Feature ## Issue(s) *closes #1475 ## Test Plan - [x] :zap: Unit test --- .../connector/sharepoint/collection.go | 64 ++++ .../connector/sharepoint/collection_test.go | 5 - .../connector/sharepoint/data_collections.go | 6 + .../connector/sharepoint/helper_test.go | 52 ++++ src/internal/connector/sharepoint/list.go | 284 ++++++++++++++++++ .../connector/sharepoint/list_test.go | 61 ++++ 6 files changed, 467 insertions(+), 5 deletions(-) create mode 100644 src/internal/connector/sharepoint/helper_test.go create mode 100644 src/internal/connector/sharepoint/list.go create mode 100644 src/internal/connector/sharepoint/list_test.go diff --git a/src/internal/connector/sharepoint/collection.go b/src/internal/connector/sharepoint/collection.go index d7ac7196e..afb78b8f2 100644 --- a/src/internal/connector/sharepoint/collection.go +++ b/src/internal/connector/sharepoint/collection.go @@ -1,12 +1,16 @@ package sharepoint import ( + "bytes" "context" "io" + kw "github.com/microsoft/kiota-serialization-json-go" + "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/support" "github.com/alcionai/corso/src/internal/data" + "github.com/alcionai/corso/src/internal/observe" "github.com/alcionai/corso/src/pkg/backup/details" "github.com/alcionai/corso/src/pkg/logger" "github.com/alcionai/corso/src/pkg/path" @@ -63,6 +67,7 @@ func (sc *Collection) FullPath() path.Path { } func (sc *Collection) Items() <-chan data.Stream { + go sc.populate(context.TODO()) return sc.data } @@ -100,3 +105,62 @@ func (sc *Collection) finishPopulation(ctx context.Context, success int, totalBy sc.fullPath.Folder()) logger.Ctx(ctx).Debug(status.String()) } + +// populate utility function to retrieve data from back store for a given collection +func (sc *Collection) populate(ctx context.Context) { + var ( + success int + totalBytes, arrayLength int64 + errs error + writer = kw.NewJsonSerializationWriter() + ) + + // TODO: Insert correct ID for CollectionProgress + colProgress, closer := observe.CollectionProgress("name", sc.fullPath.Category().String(), sc.fullPath.Folder()) + go closer() + + defer func() { + close(colProgress) + sc.finishPopulation(ctx, success, totalBytes, errs) + }() + + // sc.jobs contains query = all of the site IDs. + for _, id := range sc.jobs { + // Retrieve list data from M365 + lists, err := loadLists(ctx, sc.service, id) + if err != nil { + errs = support.WrapAndAppend(id, err, errs) + } + // Write Data and Send + for _, lst := range lists { + err = writer.WriteObjectValue("", lst) + if err != nil { + errs = support.WrapAndAppend(*lst.GetId(), err, errs) + continue + } + + byteArray, err := writer.GetSerializedContent() + if err != nil { + errs = support.WrapAndAppend(*lst.GetId(), err, errs) + continue + } + + writer.Close() + + arrayLength = int64(len(byteArray)) + + if arrayLength > 0 { + totalBytes += arrayLength + + success++ + sc.data <- &Item{ + id: *lst.GetId(), + data: io.NopCloser(bytes.NewReader(byteArray)), + info: sharePointListInfo(lst, arrayLength), + } + + colProgress <- struct{}{} + } + } + } +} diff --git a/src/internal/connector/sharepoint/collection_test.go b/src/internal/connector/sharepoint/collection_test.go index 57c14c013..96806239b 100644 --- a/src/internal/connector/sharepoint/collection_test.go +++ b/src/internal/connector/sharepoint/collection_test.go @@ -12,7 +12,6 @@ import ( "github.com/alcionai/corso/src/internal/connector/mockconnector" "github.com/alcionai/corso/src/internal/data" - "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/path" ) @@ -43,9 +42,6 @@ func (suite *SharePointCollectionSuite) TestSharePointDataReader_Valid() { // SharePoint collection and to use the data stream channel. func (suite *SharePointCollectionSuite) TestSharePointListCollection() { t := suite.T() - ctx, flush := tester.NewContext() - - defer flush() ow := kw.NewJsonSerializationWriter() listing := mockconnector.GetMockList("Mock List") @@ -73,7 +69,6 @@ func (suite *SharePointCollectionSuite) TestSharePointListCollection() { data: io.NopCloser(bytes.NewReader(byteArray)), info: sharePointListInfo(listing, int64(len(byteArray))), } - col.finishPopulation(ctx, 0, 0, nil) readItems := []data.Stream{} for item := range col.Items() { diff --git a/src/internal/connector/sharepoint/data_collections.go b/src/internal/connector/sharepoint/data_collections.go index 16bea69d9..bde8b1108 100644 --- a/src/internal/connector/sharepoint/data_collections.go +++ b/src/internal/connector/sharepoint/data_collections.go @@ -57,6 +57,12 @@ func DataCollections( defer close(foldersComplete) switch scope.Category().PathType() { + // TODO path.ListCategory: PR + // collect Lists + // done? + case path.ListsCategory: + return nil, fmt.Errorf("sharePoint list collections not supported") + case path.LibrariesCategory: spcs, err := collectLibraries( ctx, diff --git a/src/internal/connector/sharepoint/helper_test.go b/src/internal/connector/sharepoint/helper_test.go new file mode 100644 index 000000000..360bdf457 --- /dev/null +++ b/src/internal/connector/sharepoint/helper_test.go @@ -0,0 +1,52 @@ +package sharepoint + +import ( + msgraphsdk "github.com/microsoftgraph/msgraph-sdk-go" + "github.com/pkg/errors" + + "github.com/alcionai/corso/src/internal/connector/graph" + "github.com/alcionai/corso/src/pkg/account" +) + +type testService struct { + client msgraphsdk.GraphServiceClient + adapter msgraphsdk.GraphRequestAdapter + credentials account.M365Config +} + +//------------------------------------------------------------ +// Functions to comply with graph.Service Interface +//------------------------------------------------------------ + +func (ts *testService) Client() *msgraphsdk.GraphServiceClient { + return &ts.client +} + +func (ts *testService) Adapter() *msgraphsdk.GraphRequestAdapter { + return &ts.adapter +} + +func (ts *testService) ErrPolicy() bool { + return false +} + +func createTestService(credentials account.M365Config) (*testService, error) { + { + adapter, err := graph.CreateAdapter( + credentials.AzureTenantID, + credentials.AzureClientID, + credentials.AzureClientSecret, + ) + if err != nil { + return nil, errors.Wrap(err, "creating microsoft graph service for exchange") + } + + service := testService{ + adapter: *adapter, + client: *msgraphsdk.NewGraphServiceClient(adapter), + credentials: credentials, + } + + return &service, nil + } +} diff --git a/src/internal/connector/sharepoint/list.go b/src/internal/connector/sharepoint/list.go new file mode 100644 index 000000000..f2503cf78 --- /dev/null +++ b/src/internal/connector/sharepoint/list.go @@ -0,0 +1,284 @@ +package sharepoint + +import ( + "context" + + "github.com/microsoftgraph/msgraph-sdk-go/models" + "github.com/microsoftgraph/msgraph-sdk-go/sites/item/lists" + "github.com/microsoftgraph/msgraph-sdk-go/sites/item/lists/item/columns" + "github.com/microsoftgraph/msgraph-sdk-go/sites/item/lists/item/contenttypes" + "github.com/microsoftgraph/msgraph-sdk-go/sites/item/lists/item/contenttypes/item/columnlinks" + tc "github.com/microsoftgraph/msgraph-sdk-go/sites/item/lists/item/contenttypes/item/columns" + "github.com/microsoftgraph/msgraph-sdk-go/sites/item/lists/item/items" + "github.com/pkg/errors" + + "github.com/alcionai/corso/src/internal/connector/graph" + "github.com/alcionai/corso/src/internal/connector/support" +) + +// list.go contains additional functions to help retrieve SharePoint List data from M365 +// SharePoint lists represent lists on a site. Inherits additional properties from +// baseItem: https://learn.microsoft.com/en-us/graph/api/resources/baseitem?view=graph-rest-1.0 +// The full details concerning SharePoint Lists can +// be found at: https://learn.microsoft.com/en-us/graph/api/resources/list?view=graph-rest-1.0 +// Note additional calls are required for the relationships that exist outside of the object properties. + +// loadLists is a utility function to populate the List object. +// @param siteID the M365 ID that represents the SharePoint Site +// Makes additional calls to retrieve the following relationships: +// - Columns +// - ContentTypes +// - List Items +func loadLists( + ctx context.Context, + gs graph.Service, + siteID string, +) ([]models.Listable, error) { + var ( + prefix = gs.Client().SitesById(siteID) + builder = prefix.Lists() + results = make([]models.Listable, 0) + errs error + ) + + for { + resp, err := builder.Get(ctx, nil) + if err != nil { + return nil, support.WrapAndAppend(support.ConnectorStackErrorTrace(err), err, errs) + } + + for _, entry := range resp.GetValue() { + id := *entry.GetId() + + cols, err := fetchColumns(ctx, gs, siteID, id, "") + if err != nil { + errs = support.WrapAndAppend(siteID, err, errs) + continue + } + + entry.SetColumns(cols) + + cTypes, err := fetchContentTypes(ctx, gs, siteID, id) + if err != nil { + errs = support.WrapAndAppend(siteID, err, errs) + continue + } + + entry.SetContentTypes(cTypes) + + lItems, err := fetchListItems(ctx, gs, siteID, id) + if err != nil { + errs = support.WrapAndAppend(siteID, err, errs) + continue + } + + entry.SetItems(lItems) + + results = append(results, entry) + } + + if resp.GetOdataNextLink() == nil { + break + } + + builder = lists.NewListsRequestBuilder(*resp.GetOdataNextLink(), gs.Adapter()) + } + + if errs != nil { + return nil, errs + } + + return results, nil +} + +// fetchListItems utility for retrieving ListItem data and the associated relationship +// data. Additional call append data to the tracked items, and do not create additional collections. +// Additional Call: +// * Fields +func fetchListItems( + ctx context.Context, + gs graph.Service, + siteID, listID string, +) ([]models.ListItemable, error) { + var ( + prefix = gs.Client().SitesById(siteID).ListsById(listID) + builder = prefix.Items() + itms = make([]models.ListItemable, 0) + errs error + ) + + for { + resp, err := builder.Get(ctx, nil) + if err != nil { + return nil, errors.Wrap(err, support.ConnectorStackErrorTrace(err)) + } + + for _, itm := range resp.GetValue() { + newPrefix := prefix.ItemsById(*itm.GetId()) + + fields, err := newPrefix.Fields().Get(ctx, nil) + if err != nil { + errs = errors.Wrap(err, support.ConnectorStackErrorTrace(err)) + } + + itm.SetFields(fields) + + itms = append(itms, itm) + } + + if resp.GetOdataNextLink() == nil { + break + } + + builder = items.NewItemsRequestBuilder(*resp.GetOdataNextLink(), gs.Adapter()) + } + + if errs != nil { + return nil, errors.Wrap(errs, "fetchListItem unsuccessful") + } + + return itms, nil +} + +// fetchColumns utility function to return columns from a site. +// An additional call required to check for details concerning the SourceColumn. +// For additional details: https://learn.microsoft.com/en-us/graph/api/resources/columndefinition?view=graph-rest-1.0 +// TODO: Refactor on if/else (dadams39) +func fetchColumns( + ctx context.Context, + gs graph.Service, + siteID, listID, cTypeID string, +) ([]models.ColumnDefinitionable, error) { + cs := make([]models.ColumnDefinitionable, 0) + + if len(cTypeID) == 0 { + builder := gs.Client().SitesById(siteID).ListsById(listID).Columns() + + for { + resp, err := builder.Get(ctx, nil) + if err != nil { + return nil, support.WrapAndAppend(support.ConnectorStackErrorTrace(err), err, nil) + } + + cs = append(cs, resp.GetValue()...) + + if resp.GetOdataNextLink() == nil { + break + } + + builder = columns.NewColumnsRequestBuilder(*resp.GetOdataNextLink(), gs.Adapter()) + } + } else { + builder := gs.Client().SitesById(siteID).ListsById(listID).ContentTypesById(cTypeID).Columns() + + for { + resp, err := builder.Get(ctx, nil) + if err != nil { + return nil, errors.Wrap(err, support.ConnectorStackErrorTrace(err)) + } + + cs = append(cs, resp.GetValue()...) + + if resp.GetOdataNextLink() == nil { + break + } + + builder = tc.NewColumnsRequestBuilder(*resp.GetOdataNextLink(), gs.Adapter()) + } + } + + return cs, nil +} + +// fetchContentTypes retrieves all data for content type. Additional queries required +// for the following: +// - ColumnLinks +// - Columns +// The following two are not included: +// - ColumnPositions +// - BaseTypes +// These relationships are not included as they following error from the API: +// itemNotFound Item not found: error status code received from the API +// Current as of github.com/microsoftgraph/msgraph-sdk-go v0.40.0 +// TODO: Verify functionality after version upgrade or remove (dadams39) Check Stubs +func fetchContentTypes( + ctx context.Context, + gs graph.Service, + siteID, listID string, +) ([]models.ContentTypeable, error) { + var ( + cTypes = make([]models.ContentTypeable, 0) + builder = gs.Client().SitesById(siteID).ListsById(listID).ContentTypes() + errs error + ) + + for { + resp, err := builder.Get(ctx, nil) + if err != nil { + return nil, support.WrapAndAppend(support.ConnectorStackErrorTrace(err), err, errs) + } + + for _, cont := range resp.GetValue() { + id := *cont.GetId() + + links, err := fetchColumnLinks(ctx, gs, siteID, listID, id) + if err != nil { + errs = support.WrapAndAppend("unable to add column links to list", err, errs) + break + } + + cont.SetColumnLinks(links) + // TODO: stub for columPositions + + cs, err := fetchColumns(ctx, gs, siteID, listID, id) + if err != nil { + errs = support.WrapAndAppend("unable to populate columns for contentType", err, errs) + } + + cont.SetColumns(cs) + // TODO: stub for BaseTypes + + cTypes = append(cTypes, cont) + } + + if resp.GetOdataNextLink() == nil { + break + } + + builder = contenttypes.NewContentTypesRequestBuilder(*resp.GetOdataNextLink(), gs.Adapter()) + } + + if errs != nil { + return nil, errs + } + + return cTypes, nil +} + +func fetchColumnLinks( + ctx context.Context, + gs graph.Service, + siteID, listID, cTypeID string, +) ([]models.ColumnLinkable, error) { + var ( + builder = gs.Client().SitesById(siteID).ListsById(listID).ContentTypesById(cTypeID).ColumnLinks() + links = make([]models.ColumnLinkable, 0) + ) + + for { + resp, err := builder.Get(ctx, nil) + if err != nil { + return nil, errors.Wrap(err, support.ConnectorStackErrorTrace(err)) + } + + links = append(links, resp.GetValue()...) + + if resp.GetOdataNextLink() == nil { + break + } + + builder = columnlinks.NewColumnLinksRequestBuilder(*resp.GetOdataNextLink(), gs.Adapter()) + } + + return links, nil +} diff --git a/src/internal/connector/sharepoint/list_test.go b/src/internal/connector/sharepoint/list_test.go new file mode 100644 index 000000000..eca904c1a --- /dev/null +++ b/src/internal/connector/sharepoint/list_test.go @@ -0,0 +1,61 @@ +package sharepoint + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + + "github.com/alcionai/corso/src/internal/tester" + "github.com/alcionai/corso/src/pkg/account" +) + +type SharePointSuite struct { + suite.Suite + creds account.M365Config +} + +func (suite *SharePointSuite) SetupSuite() { + t := suite.T() + a := tester.NewM365Account(t) + m365, err := a.M365Config() + require.NoError(t, err) + + suite.creds = m365 +} + +func TestSharePointSuite(t *testing.T) { + if err := tester.RunOnAny( + tester.CorsoCITests, + ); err != nil { + t.Skip(err) + } + + suite.Run(t, new(SharePointSuite)) +} + +// Test LoadList --> Retrieves all data from backStore +// Functions tested: +// - fetchListItems() +// - fetchColumns() +// - fetchContentColumns() +// - fetchContentTypes() +// - fetchColumnLinks +// TODO: upgrade passed github.com/microsoftgraph/msgraph-sdk-go v0.40.0 +// to verify if these 2 calls are valid +// - fetchContentBaseTypes +// - fetchColumnPositions +func (suite *SharePointSuite) TestLoadList() { + ctx, flush := tester.NewContext() + defer flush() + + t := suite.T() + service, err := createTestService(suite.creds) + require.NoError(t, err) + + lists, err := loadLists(ctx, service, "root") + assert.NoError(t, err) + assert.Greater(t, len(lists), 0) + t.Logf("Length: %d\n", len(lists)) +}