GC: Backup: SharePoint List Integration with Tests (#1788)

## Description
Framework for the SharePoint backup workflow. 

### Special Instructions for Review
Ensure that the information used to build `path.Path` are in line with future PRs (@ashmrtn )
## Type of change

<!--- Please check the type of change your PR introduces: --->
- [x] 🌻 Feature

## Issue(s)

* closes #1403
* closes #1474<issue>
* closes #1795

## Test Plan
- SharePoint Integration checked:
  - `src/internal/connector/data_collections_test.go`
  - `TestSharePointDataCollection()`
- SharePoint List basics checked:  
  - `src/internal/connector/sharepoint/collection_test.go`
- SharePoint Operational Backup:
  - `src/internal/operations/backup_test.go`
  - `TestBackup_Run_sharePoint() ` verified as operational during testing
- [x]  Unit test
This commit is contained in:
Danny 2022-12-20 16:30:20 -05:00 committed by GitHub
parent d383a2d568
commit fe27fea2ae
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 305 additions and 197 deletions

View File

@ -175,14 +175,26 @@ func (suite *ConnectorDataCollectionIntegrationSuite) TestSharePointDataCollecti
connector := loadConnector(ctx, suite.T(), Sites) connector := loadConnector(ctx, suite.T(), Sites)
tests := []struct { tests := []struct {
name string name string
getSelector func(t *testing.T) selectors.Selector expected int
getSelector func() selectors.Selector
}{ }{
{ {
name: "Libraries", name: "Libraries",
getSelector: func(t *testing.T) selectors.Selector { expected: 1,
getSelector: func() selectors.Selector {
sel := selectors.NewSharePointBackup() sel := selectors.NewSharePointBackup()
sel.Include(sel.Libraries([]string{suite.site}, selectors.Any())) sel.Include(sel.Libraries([]string{suite.site}, selectors.Any()))
return sel.Selector
},
},
{
name: "Lists",
expected: 0,
getSelector: func() selectors.Selector {
sel := selectors.NewSharePointBackup()
sel.Include(sel.Lists([]string{suite.site}, selectors.Any()))
return sel.Selector return sel.Selector
}, },
}, },
@ -192,7 +204,7 @@ func (suite *ConnectorDataCollectionIntegrationSuite) TestSharePointDataCollecti
suite.T().Run(test.name, func(t *testing.T) { suite.T().Run(test.name, func(t *testing.T) {
collection, err := sharepoint.DataCollections( collection, err := sharepoint.DataCollections(
ctx, ctx,
test.getSelector(t), test.getSelector(),
[]string{suite.site}, []string{suite.site},
connector.credentials.AzureTenantID, connector.credentials.AzureTenantID,
connector.Service, connector.Service,
@ -202,7 +214,7 @@ func (suite *ConnectorDataCollectionIntegrationSuite) TestSharePointDataCollecti
// we don't know an exact count of drives this will produce, // we don't know an exact count of drives this will produce,
// but it should be more than one. // but it should be more than one.
assert.Less(t, 1, len(collection)) assert.Less(t, test.expected, len(collection))
// the test only reads the firstt collection // the test only reads the firstt collection
connector.incrementAwaitingMessages() connector.incrementAwaitingMessages()
@ -601,6 +613,8 @@ func (suite *ConnectorCreateSharePointCollectionIntegrationSuite) SetupSuite() {
tester.LogTimeOfTest(suite.T()) tester.LogTimeOfTest(suite.T())
} }
// TestCreateSharePointCollection. Ensures the proper amount of collections are created based
// on the selector.
func (suite *ConnectorCreateSharePointCollectionIntegrationSuite) TestCreateSharePointCollection() { func (suite *ConnectorCreateSharePointCollectionIntegrationSuite) TestCreateSharePointCollection() {
ctx, flush := tester.NewContext() ctx, flush := tester.NewContext()
defer flush() defer flush()
@ -609,15 +623,48 @@ func (suite *ConnectorCreateSharePointCollectionIntegrationSuite) TestCreateShar
t = suite.T() t = suite.T()
siteID = tester.M365SiteID(t) siteID = tester.M365SiteID(t)
gc = loadConnector(ctx, t, Sites) gc = loadConnector(ctx, t, Sites)
sel = selectors.NewSharePointBackup()
) )
tables := []struct {
name string
sel func() selectors.Selector
comparator assert.ComparisonAssertionFunc
}{
{
name: "SharePoint.Libraries",
comparator: assert.Equal,
sel: func() selectors.Selector {
sel := selectors.NewSharePointBackup()
sel.Include(sel.Libraries( sel.Include(sel.Libraries(
[]string{siteID}, []string{siteID},
[]string{"foo"}, []string{"foo"},
selectors.PrefixMatch(), selectors.PrefixMatch(),
)) ))
_, err := gc.DataCollections(ctx, sel.Selector, nil, control.Options{}) return sel.Selector
},
},
{
name: "SharePoint.Lists",
comparator: assert.Less,
sel: func() selectors.Selector {
sel := selectors.NewSharePointBackup()
sel.Include(sel.Lists(
[]string{siteID},
selectors.Any(),
selectors.PrefixMatch(), // without this option a SEG Fault occurs
))
return sel.Selector
},
},
}
for _, test := range tables {
t.Run(test.name, func(t *testing.T) {
cols, err := gc.DataCollections(ctx, test.sel(), nil, control.Options{})
require.NoError(t, err) require.NoError(t, err)
test.comparator(t, 0, len(cols))
})
}
} }

View File

@ -7,6 +7,7 @@ import (
"time" "time"
kw "github.com/microsoft/kiota-serialization-json-go" kw "github.com/microsoft/kiota-serialization-json-go"
"github.com/microsoftgraph/msgraph-sdk-go/models"
"github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/graph"
"github.com/alcionai/corso/src/internal/connector/support" "github.com/alcionai/corso/src/internal/connector/support"
@ -34,16 +35,21 @@ var (
_ data.StreamModTime = &Item{} _ data.StreamModTime = &Item{}
) )
// Collection is the SharePoint.List implementation of data.Collection. SharePoint.Libraries collections are supported
// by the oneDrive.Collection as the calls are identical for populating the Collection
type Collection struct { type Collection struct {
// data is the container for each individual SharePoint.List
data chan data.Stream data chan data.Stream
jobs []string
// fullPath indicates the hierarchy within the collection // fullPath indicates the hierarchy within the collection
fullPath path.Path fullPath path.Path
// jobs contain the SharePoint.Site.ListIDs for the associated list(s).
jobs []string
// M365 IDs of the items of this collection // M365 IDs of the items of this collection
service graph.Servicer service graph.Servicer
statusUpdater support.StatusUpdater statusUpdater support.StatusUpdater
} }
// NewCollection helper function for creating a Collection
func NewCollection( func NewCollection(
folderPath path.Path, folderPath path.Path,
service graph.Servicer, service graph.Servicer,
@ -116,13 +122,14 @@ func (sd *Item) ModTime() time.Time {
return sd.modTime return sd.modTime
} }
func (sc *Collection) finishPopulation(ctx context.Context, success int, totalBytes int64, errs error) { func (sc *Collection) finishPopulation(ctx context.Context, attempts, success int, totalBytes int64, errs error) {
close(sc.data) close(sc.data)
attempted := len(sc.jobs)
attempted := attempts
status := support.CreateStatus( status := support.CreateStatus(
ctx, ctx,
support.Backup, support.Backup,
1, len(sc.jobs),
support.CollectionMetrics{ support.CollectionMetrics{
Objects: attempted, Objects: attempted,
Successes: success, Successes: success,
@ -131,12 +138,16 @@ func (sc *Collection) finishPopulation(ctx context.Context, success int, totalBy
errs, errs,
sc.fullPath.Folder()) sc.fullPath.Folder())
logger.Ctx(ctx).Debug(status.String()) logger.Ctx(ctx).Debug(status.String())
if sc.statusUpdater != nil {
sc.statusUpdater(status)
}
} }
// populate utility function to retrieve data from back store for a given collection // populate utility function to retrieve data from back store for a given collection
func (sc *Collection) populate(ctx context.Context) { func (sc *Collection) populate(ctx context.Context) {
var ( var (
success int objects, success int
totalBytes, arrayLength int64 totalBytes, arrayLength int64
errs error errs error
writer = kw.NewJsonSerializationWriter() writer = kw.NewJsonSerializationWriter()
@ -148,32 +159,24 @@ func (sc *Collection) populate(ctx context.Context) {
defer func() { defer func() {
close(colProgress) close(colProgress)
sc.finishPopulation(ctx, success, totalBytes, errs) sc.finishPopulation(ctx, objects, success, totalBytes, errs)
}() }()
// sc.jobs contains query = all of the site IDs.
for _, id := range sc.jobs {
// Retrieve list data from M365 // Retrieve list data from M365
lists, err := loadLists(ctx, sc.service, id) lists, err := loadSiteLists(ctx, sc.service, sc.fullPath.ResourceOwner(), sc.jobs)
if err != nil { if err != nil {
errs = support.WrapAndAppend(id, err, errs) errs = support.WrapAndAppend(sc.fullPath.ResourceOwner(), err, errs)
} }
objects += len(lists)
// Write Data and Send // Write Data and Send
for _, lst := range lists { for _, lst := range lists {
err = writer.WriteObjectValue("", lst) byteArray, err := serializeListContent(writer, lst)
if err != nil { if err != nil {
errs = support.WrapAndAppend(*lst.GetId(), err, errs) errs = support.WrapAndAppend(*lst.GetId(), err, errs)
continue continue
} }
byteArray, err := writer.GetSerializedContent()
if err != nil {
errs = support.WrapAndAppend(*lst.GetId(), err, errs)
continue
}
writer.Close()
arrayLength = int64(len(byteArray)) arrayLength = int64(len(byteArray))
if arrayLength > 0 { if arrayLength > 0 {
@ -195,5 +198,20 @@ func (sc *Collection) populate(ctx context.Context) {
colProgress <- struct{}{} colProgress <- struct{}{}
} }
} }
} }
func serializeListContent(writer *kw.JsonSerializationWriter, lst models.Listable) ([]byte, error) {
defer writer.Close()
err := writer.WriteObjectValue("", lst)
if err != nil {
return nil, err
}
byteArray, err := writer.GetSerializedContent()
if err != nil {
return nil, err
}
return byteArray, nil
} }

View File

@ -70,6 +70,7 @@ func (suite *SharePointCollectionSuite) TestSharePointListCollection() {
} }
readItems := []data.Stream{} readItems := []data.Stream{}
for item := range col.Items() { for item := range col.Items() {
readItems = append(readItems, item) readItems = append(readItems, item)
} }

View File

@ -52,15 +52,25 @@ func DataCollections(
defer closer() defer closer()
defer close(foldersComplete) defer close(foldersComplete)
var spcs []data.Collection
switch scope.Category().PathType() { switch scope.Category().PathType() {
// TODO path.ListCategory: PR
// collect Lists
// done?
case path.ListsCategory: case path.ListsCategory:
return nil, fmt.Errorf("sharePoint list collections not supported") spcs, err = collectLists(
ctx,
serv,
tenantID,
site,
scope,
su,
ctrlOpts,
)
if err != nil {
return nil, support.WrapAndAppend(site, err, errs)
}
case path.LibrariesCategory: case path.LibrariesCategory:
spcs, err := collectLibraries( spcs, err = collectLibraries(
ctx, ctx,
serv, serv,
tenantID, tenantID,
@ -71,9 +81,9 @@ func DataCollections(
if err != nil { if err != nil {
return nil, support.WrapAndAppend(site, err, errs) return nil, support.WrapAndAppend(site, err, errs)
} }
}
collections = append(collections, spcs...) collections = append(collections, spcs...)
}
foldersComplete <- struct{}{} foldersComplete <- struct{}{}
} }
@ -82,6 +92,47 @@ func DataCollections(
return collections, errs return collections, errs
} }
func collectLists(
ctx context.Context,
serv graph.Servicer,
tenantID, siteID string,
scope selectors.SharePointScope,
updater statusUpdater,
ctrlOpts control.Options,
) ([]data.Collection, error) {
logger.Ctx(ctx).With("site", siteID).Debug("Creating SharePoint List Collections")
if scope.Matches(selectors.SharePointSite, siteID) {
spcs := make([]data.Collection, 0)
tuples, err := preFetchLists(ctx, serv, siteID)
if err != nil {
return nil, err
}
for _, tuple := range tuples {
dir, err := path.Builder{}.Append(tuple.name).
ToDataLayerSharePointPath(
tenantID,
siteID,
path.ListsCategory,
false)
if err != nil {
return nil, errors.Wrapf(err, "failed to create collection path for site: %s", siteID)
}
collection := NewCollection(dir, serv, updater.UpdateStatus)
collection.AddJob(tuple.id)
spcs = append(spcs, collection)
}
return spcs, nil
}
return nil, nil
}
// collectLibraries constructs a onedrive Collections struct and Get()s // collectLibraries constructs a onedrive Collections struct and Get()s
// all the drives associated with the site. // all the drives associated with the site.
func collectLibraries( func collectLibraries(

View File

@ -11,65 +11,52 @@ import (
"github.com/alcionai/corso/src/internal/connector/support" "github.com/alcionai/corso/src/internal/connector/support"
) )
// list.go contains additional functions to help retrieve SharePoint List data from M365 type listTuple struct {
// SharePoint lists represent lists on a site. Inherits additional properties from name string
// baseItem: https://learn.microsoft.com/en-us/graph/api/resources/baseitem?view=graph-rest-1.0 id string
// The full details concerning SharePoint Lists can }
// be found at: https://learn.microsoft.com/en-us/graph/api/resources/list?view=graph-rest-1.0
// Note additional calls are required for the relationships that exist outside of the object properties.
// loadLists is a utility function to populate the List object. func preFetchListOptions() *mssite.ItemListsRequestBuilderGetRequestConfiguration {
// @param siteID the M365 ID that represents the SharePoint Site selecting := []string{"id", "displayName"}
// Makes additional calls to retrieve the following relationships: queryOptions := mssite.ItemListsRequestBuilderGetQueryParameters{
// - Columns Select: selecting,
// - ContentTypes }
// - List Items options := &mssite.ItemListsRequestBuilderGetRequestConfiguration{
func loadLists( QueryParameters: &queryOptions,
}
return options
}
func preFetchLists(
ctx context.Context, ctx context.Context,
gs graph.Servicer, gs graph.Servicer,
siteID string, siteID string,
) ([]models.Listable, error) { ) ([]listTuple, error) {
var ( var (
prefix = gs.Client().SitesById(siteID) builder = gs.Client().SitesById(siteID).Lists()
builder = prefix.Lists() options = preFetchListOptions()
results = make([]models.Listable, 0) listTuples = make([]listTuple, 0)
errs error errs error
) )
for { for {
resp, err := builder.Get(ctx, nil) resp, err := builder.Get(ctx, options)
if err != nil { if err != nil {
return nil, support.WrapAndAppend(support.ConnectorStackErrorTrace(err), err, errs) return nil, support.WrapAndAppend(support.ConnectorStackErrorTrace(err), err, errs)
} }
for _, entry := range resp.GetValue() { for _, entry := range resp.GetValue() {
id := *entry.GetId() temp := listTuple{id: *entry.GetId()}
cols, err := fetchColumns(ctx, gs, siteID, id, "") name := entry.GetDisplayName()
if err != nil { if name != nil {
errs = support.WrapAndAppend(siteID, err, errs) temp.name = *name
continue } else {
temp.name = *entry.GetId()
} }
entry.SetColumns(cols) listTuples = append(listTuples, temp)
cTypes, err := fetchContentTypes(ctx, gs, siteID, id)
if err != nil {
errs = support.WrapAndAppend(siteID, err, errs)
continue
}
entry.SetContentTypes(cTypes)
lItems, err := fetchListItems(ctx, gs, siteID, id)
if err != nil {
errs = support.WrapAndAppend(siteID, err, errs)
continue
}
entry.SetItems(lItems)
results = append(results, entry)
} }
if resp.GetOdataNextLink() == nil { if resp.GetOdataNextLink() == nil {
@ -79,6 +66,57 @@ func loadLists(
builder = mssite.NewItemListsRequestBuilder(*resp.GetOdataNextLink(), gs.Adapter()) builder = mssite.NewItemListsRequestBuilder(*resp.GetOdataNextLink(), gs.Adapter())
} }
return listTuples, nil
}
// list.go contains additional functions to help retrieve SharePoint List data from M365
// SharePoint lists represent lists on a site. Inherits additional properties from
// baseItem: https://learn.microsoft.com/en-us/graph/api/resources/baseitem?view=graph-rest-1.0
// The full details concerning SharePoint Lists can
// be found at: https://learn.microsoft.com/en-us/graph/api/resources/list?view=graph-rest-1.0
// Note additional calls are required for the relationships that exist outside of the object properties.
// loadSiteLists is a utility function to populate a collection of SharePoint.List
// objects associated with a given siteID.
// @param siteID the M365 ID that represents the SharePoint Site
// Makes additional calls to retrieve the following relationships:
// - Columns
// - ContentTypes
// - List Items
func loadSiteLists(
ctx context.Context,
gs graph.Servicer,
siteID string,
listIDs []string,
) ([]models.Listable, error) {
var (
results = make([]models.Listable, 0)
errs error
)
for _, listID := range listIDs {
entry, err := gs.Client().SitesById(siteID).ListsById(listID).Get(ctx, nil)
if err != nil {
errs = support.WrapAndAppend(
listID,
errors.Wrap(err, support.ConnectorStackErrorTrace(err)),
errs,
)
}
cols, cTypes, lItems, err := fetchListContents(ctx, gs, siteID, listID)
if err == nil {
entry.SetColumns(cols)
entry.SetContentTypes(cTypes)
entry.SetItems(lItems)
} else {
errs = support.WrapAndAppend("unable to fetchRelationships during loadSiteLists", err, errs)
continue
}
results = append(results, entry)
}
if errs != nil { if errs != nil {
return nil, errs return nil, errs
} }
@ -86,6 +124,43 @@ func loadLists(
return results, nil return results, nil
} }
// fetchListContents utility function to retrieve associated M365 relationships
// which are not included with the standard List query:
// - Columns, ContentTypes, ListItems
func fetchListContents(
ctx context.Context,
service graph.Servicer,
siteID, listID string,
) (
[]models.ColumnDefinitionable,
[]models.ContentTypeable,
[]models.ListItemable,
error,
) {
var errs error
cols, err := fetchColumns(ctx, service, siteID, listID, "")
if err != nil {
errs = support.WrapAndAppend(siteID, err, errs)
}
cTypes, err := fetchContentTypes(ctx, service, siteID, listID)
if err != nil {
errs = support.WrapAndAppend(siteID, err, errs)
}
lItems, err := fetchListItems(ctx, service, siteID, listID)
if err != nil {
errs = support.WrapAndAppend(siteID, err, errs)
}
if errs != nil {
return nil, nil, nil, errs
}
return cols, cTypes, lItems, nil
}
// fetchListItems utility for retrieving ListItem data and the associated relationship // fetchListItems utility for retrieving ListItem data and the associated relationship
// data. Additional call append data to the tracked items, and do not create additional collections. // data. Additional call append data to the tracked items, and do not create additional collections.
// Additional Call: // Additional Call:

View File

@ -54,7 +54,11 @@ func (suite *SharePointSuite) TestLoadList() {
service, err := createTestService(suite.creds) service, err := createTestService(suite.creds)
require.NoError(t, err) require.NoError(t, err)
lists, err := loadLists(ctx, service, "root") tuples, err := preFetchLists(ctx, service, "root")
require.NoError(t, err)
job := []string{tuples[0].id}
lists, err := loadSiteLists(ctx, service, "root", job)
assert.NoError(t, err) assert.NoError(t, err)
assert.Greater(t, len(lists), 0) assert.Greater(t, len(lists), 0)
t.Logf("Length: %d\n", len(lists)) t.Logf("Length: %d\n", len(lists))

View File

@ -1,88 +0,0 @@
package sharepoint
import (
"context"
"github.com/alcionai/corso/src/internal/connector/graph"
"github.com/alcionai/corso/src/internal/connector/support"
"github.com/alcionai/corso/src/pkg/selectors"
)
// FilterContainersAndFillCollections is a utility function
// that places the M365 object ids belonging to specific directories
// into a Collection. Items outside of those directories are omitted.
// @param collection is filled with during this function.
func FilterContainersAndFillCollections(
ctx context.Context,
qp graph.QueryParams,
collections map[string]*Collection,
statusUpdater support.StatusUpdater,
resolver graph.ContainerResolver,
scope selectors.SharePointScope,
) error {
return nil
}
// code previously within the function, moved here to make the linter happy
// var (
// category = qp.Scope.Category().PathType()
// collectionType = CategoryToOptionIdentifier(category)
// errs error
// )
// for _, c := range resolver.Items() {
// dirPath, ok := pathAndMatch(qp, category, c)
// if ok {
// // Create only those that match
// service, err := createService(qp.Credentials, qp.FailFast)
// if err != nil {
// errs = support.WrapAndAppend(
// qp.User+" FilterContainerAndFillCollection",
// err,
// errs)
// if qp.FailFast {
// return errs
// }
// }
// edc := NewCollection(
// qp.User,
// dirPath,
// collectionType,
// service,
// statusUpdater,
// )
// collections[*c.GetId()] = &edc
// }
// }
// for directoryID, col := range collections {
// fetchFunc, err := getFetchIDFunc(category)
// if err != nil {
// errs = support.WrapAndAppend(
// qp.User,
// err,
// errs)
// if qp.FailFast {
// return errs
// }
// continue
// }
// jobs, err := fetchFunc(ctx, col.service, qp.User, directoryID)
// if err != nil {
// errs = support.WrapAndAppend(
// qp.User,
// err,
// errs,
// )
// }
// col.jobs = append(col.jobs, jobs...)
// }
// return errs

View File

@ -843,8 +843,8 @@ func (suite *BackupOpIntegrationSuite) TestBackup_Run_sharePoint() {
siteID = tester.M365SiteID(t) siteID = tester.M365SiteID(t)
sel = selectors.NewSharePointBackup() sel = selectors.NewSharePointBackup()
) )
// TODO: dadams39 Issue #1795: Revert to Sites Upon List Integration
sel.Include(sel.Libraries([]string{siteID}, selectors.Any())) sel.Include(sel.Sites([]string{siteID}))
bo, _, _, _, closer := prepNewBackupOp(t, ctx, mb, sel.Selector) bo, _, _, _, closer := prepNewBackupOp(t, ctx, mb, sel.Selector)
defer closer() defer closer()