GC: Backup: SharePoint List Integration with Tests (#1788)

## Description
Framework for the SharePoint backup workflow. 

### Special Instructions for Review
Ensure that the information used to build `path.Path` are in line with future PRs (@ashmrtn )
## Type of change

<!--- Please check the type of change your PR introduces: --->
- [x] 🌻 Feature

## Issue(s)

* closes #1403
* closes #1474<issue>
* closes #1795

## Test Plan
- SharePoint Integration checked:
  - `src/internal/connector/data_collections_test.go`
  - `TestSharePointDataCollection()`
- SharePoint List basics checked:  
  - `src/internal/connector/sharepoint/collection_test.go`
- SharePoint Operational Backup:
  - `src/internal/operations/backup_test.go`
  - `TestBackup_Run_sharePoint() ` verified as operational during testing
- [x]  Unit test
This commit is contained in:
Danny 2022-12-20 16:30:20 -05:00 committed by GitHub
parent d383a2d568
commit fe27fea2ae
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 305 additions and 197 deletions

View File

@ -175,14 +175,26 @@ func (suite *ConnectorDataCollectionIntegrationSuite) TestSharePointDataCollecti
connector := loadConnector(ctx, suite.T(), Sites)
tests := []struct {
name string
getSelector func(t *testing.T) selectors.Selector
expected int
getSelector func() selectors.Selector
}{
{
name: "Libraries",
getSelector: func(t *testing.T) selectors.Selector {
name: "Libraries",
expected: 1,
getSelector: func() selectors.Selector {
sel := selectors.NewSharePointBackup()
sel.Include(sel.Libraries([]string{suite.site}, selectors.Any()))
return sel.Selector
},
},
{
name: "Lists",
expected: 0,
getSelector: func() selectors.Selector {
sel := selectors.NewSharePointBackup()
sel.Include(sel.Lists([]string{suite.site}, selectors.Any()))
return sel.Selector
},
},
@ -192,7 +204,7 @@ func (suite *ConnectorDataCollectionIntegrationSuite) TestSharePointDataCollecti
suite.T().Run(test.name, func(t *testing.T) {
collection, err := sharepoint.DataCollections(
ctx,
test.getSelector(t),
test.getSelector(),
[]string{suite.site},
connector.credentials.AzureTenantID,
connector.Service,
@ -202,7 +214,7 @@ func (suite *ConnectorDataCollectionIntegrationSuite) TestSharePointDataCollecti
// we don't know an exact count of drives this will produce,
// but it should be more than one.
assert.Less(t, 1, len(collection))
assert.Less(t, test.expected, len(collection))
// the test only reads the firstt collection
connector.incrementAwaitingMessages()
@ -601,6 +613,8 @@ func (suite *ConnectorCreateSharePointCollectionIntegrationSuite) SetupSuite() {
tester.LogTimeOfTest(suite.T())
}
// TestCreateSharePointCollection. Ensures the proper amount of collections are created based
// on the selector.
func (suite *ConnectorCreateSharePointCollectionIntegrationSuite) TestCreateSharePointCollection() {
ctx, flush := tester.NewContext()
defer flush()
@ -609,15 +623,48 @@ func (suite *ConnectorCreateSharePointCollectionIntegrationSuite) TestCreateShar
t = suite.T()
siteID = tester.M365SiteID(t)
gc = loadConnector(ctx, t, Sites)
sel = selectors.NewSharePointBackup()
)
sel.Include(sel.Libraries(
[]string{siteID},
[]string{"foo"},
selectors.PrefixMatch(),
))
tables := []struct {
name string
sel func() selectors.Selector
comparator assert.ComparisonAssertionFunc
}{
{
name: "SharePoint.Libraries",
comparator: assert.Equal,
sel: func() selectors.Selector {
sel := selectors.NewSharePointBackup()
sel.Include(sel.Libraries(
[]string{siteID},
[]string{"foo"},
selectors.PrefixMatch(),
))
_, err := gc.DataCollections(ctx, sel.Selector, nil, control.Options{})
require.NoError(t, err)
return sel.Selector
},
},
{
name: "SharePoint.Lists",
comparator: assert.Less,
sel: func() selectors.Selector {
sel := selectors.NewSharePointBackup()
sel.Include(sel.Lists(
[]string{siteID},
selectors.Any(),
selectors.PrefixMatch(), // without this option a SEG Fault occurs
))
return sel.Selector
},
},
}
for _, test := range tables {
t.Run(test.name, func(t *testing.T) {
cols, err := gc.DataCollections(ctx, test.sel(), nil, control.Options{})
require.NoError(t, err)
test.comparator(t, 0, len(cols))
})
}
}

View File

@ -7,6 +7,7 @@ import (
"time"
kw "github.com/microsoft/kiota-serialization-json-go"
"github.com/microsoftgraph/msgraph-sdk-go/models"
"github.com/alcionai/corso/src/internal/connector/graph"
"github.com/alcionai/corso/src/internal/connector/support"
@ -34,16 +35,21 @@ var (
_ data.StreamModTime = &Item{}
)
// Collection is the SharePoint.List implementation of data.Collection. SharePoint.Libraries collections are supported
// by the oneDrive.Collection as the calls are identical for populating the Collection
type Collection struct {
// data is the container for each individual SharePoint.List
data chan data.Stream
jobs []string
// fullPath indicates the hierarchy within the collection
fullPath path.Path
// jobs contain the SharePoint.Site.ListIDs for the associated list(s).
jobs []string
// M365 IDs of the items of this collection
service graph.Servicer
statusUpdater support.StatusUpdater
}
// NewCollection helper function for creating a Collection
func NewCollection(
folderPath path.Path,
service graph.Servicer,
@ -116,13 +122,14 @@ func (sd *Item) ModTime() time.Time {
return sd.modTime
}
func (sc *Collection) finishPopulation(ctx context.Context, success int, totalBytes int64, errs error) {
func (sc *Collection) finishPopulation(ctx context.Context, attempts, success int, totalBytes int64, errs error) {
close(sc.data)
attempted := len(sc.jobs)
attempted := attempts
status := support.CreateStatus(
ctx,
support.Backup,
1,
len(sc.jobs),
support.CollectionMetrics{
Objects: attempted,
Successes: success,
@ -131,12 +138,16 @@ func (sc *Collection) finishPopulation(ctx context.Context, success int, totalBy
errs,
sc.fullPath.Folder())
logger.Ctx(ctx).Debug(status.String())
if sc.statusUpdater != nil {
sc.statusUpdater(status)
}
}
// populate utility function to retrieve data from back store for a given collection
func (sc *Collection) populate(ctx context.Context) {
var (
success int
objects, success int
totalBytes, arrayLength int64
errs error
writer = kw.NewJsonSerializationWriter()
@ -148,52 +159,59 @@ func (sc *Collection) populate(ctx context.Context) {
defer func() {
close(colProgress)
sc.finishPopulation(ctx, success, totalBytes, errs)
sc.finishPopulation(ctx, objects, success, totalBytes, errs)
}()
// sc.jobs contains query = all of the site IDs.
for _, id := range sc.jobs {
// Retrieve list data from M365
lists, err := loadLists(ctx, sc.service, id)
// Retrieve list data from M365
lists, err := loadSiteLists(ctx, sc.service, sc.fullPath.ResourceOwner(), sc.jobs)
if err != nil {
errs = support.WrapAndAppend(sc.fullPath.ResourceOwner(), err, errs)
}
objects += len(lists)
// Write Data and Send
for _, lst := range lists {
byteArray, err := serializeListContent(writer, lst)
if err != nil {
errs = support.WrapAndAppend(id, err, errs)
errs = support.WrapAndAppend(*lst.GetId(), err, errs)
continue
}
// Write Data and Send
for _, lst := range lists {
err = writer.WriteObjectValue("", lst)
if err != nil {
errs = support.WrapAndAppend(*lst.GetId(), err, errs)
continue
arrayLength = int64(len(byteArray))
if arrayLength > 0 {
t := time.Now()
if t1 := lst.GetLastModifiedDateTime(); t1 != nil {
t = *t1
}
byteArray, err := writer.GetSerializedContent()
if err != nil {
errs = support.WrapAndAppend(*lst.GetId(), err, errs)
continue
totalBytes += arrayLength
success++
sc.data <- &Item{
id: *lst.GetId(),
data: io.NopCloser(bytes.NewReader(byteArray)),
info: sharePointListInfo(lst, arrayLength),
modTime: t,
}
writer.Close()
arrayLength = int64(len(byteArray))
if arrayLength > 0 {
t := time.Now()
if t1 := lst.GetLastModifiedDateTime(); t1 != nil {
t = *t1
}
totalBytes += arrayLength
success++
sc.data <- &Item{
id: *lst.GetId(),
data: io.NopCloser(bytes.NewReader(byteArray)),
info: sharePointListInfo(lst, arrayLength),
modTime: t,
}
colProgress <- struct{}{}
}
colProgress <- struct{}{}
}
}
}
func serializeListContent(writer *kw.JsonSerializationWriter, lst models.Listable) ([]byte, error) {
defer writer.Close()
err := writer.WriteObjectValue("", lst)
if err != nil {
return nil, err
}
byteArray, err := writer.GetSerializedContent()
if err != nil {
return nil, err
}
return byteArray, nil
}

View File

@ -70,6 +70,7 @@ func (suite *SharePointCollectionSuite) TestSharePointListCollection() {
}
readItems := []data.Stream{}
for item := range col.Items() {
readItems = append(readItems, item)
}

View File

@ -52,15 +52,25 @@ func DataCollections(
defer closer()
defer close(foldersComplete)
var spcs []data.Collection
switch scope.Category().PathType() {
// TODO path.ListCategory: PR
// collect Lists
// done?
case path.ListsCategory:
return nil, fmt.Errorf("sharePoint list collections not supported")
spcs, err = collectLists(
ctx,
serv,
tenantID,
site,
scope,
su,
ctrlOpts,
)
if err != nil {
return nil, support.WrapAndAppend(site, err, errs)
}
case path.LibrariesCategory:
spcs, err := collectLibraries(
spcs, err = collectLibraries(
ctx,
serv,
tenantID,
@ -71,10 +81,10 @@ func DataCollections(
if err != nil {
return nil, support.WrapAndAppend(site, err, errs)
}
collections = append(collections, spcs...)
}
collections = append(collections, spcs...)
foldersComplete <- struct{}{}
}
}
@ -82,6 +92,47 @@ func DataCollections(
return collections, errs
}
func collectLists(
ctx context.Context,
serv graph.Servicer,
tenantID, siteID string,
scope selectors.SharePointScope,
updater statusUpdater,
ctrlOpts control.Options,
) ([]data.Collection, error) {
logger.Ctx(ctx).With("site", siteID).Debug("Creating SharePoint List Collections")
if scope.Matches(selectors.SharePointSite, siteID) {
spcs := make([]data.Collection, 0)
tuples, err := preFetchLists(ctx, serv, siteID)
if err != nil {
return nil, err
}
for _, tuple := range tuples {
dir, err := path.Builder{}.Append(tuple.name).
ToDataLayerSharePointPath(
tenantID,
siteID,
path.ListsCategory,
false)
if err != nil {
return nil, errors.Wrapf(err, "failed to create collection path for site: %s", siteID)
}
collection := NewCollection(dir, serv, updater.UpdateStatus)
collection.AddJob(tuple.id)
spcs = append(spcs, collection)
}
return spcs, nil
}
return nil, nil
}
// collectLibraries constructs a onedrive Collections struct and Get()s
// all the drives associated with the site.
func collectLibraries(

View File

@ -11,65 +11,52 @@ import (
"github.com/alcionai/corso/src/internal/connector/support"
)
// list.go contains additional functions to help retrieve SharePoint List data from M365
// SharePoint lists represent lists on a site. Inherits additional properties from
// baseItem: https://learn.microsoft.com/en-us/graph/api/resources/baseitem?view=graph-rest-1.0
// The full details concerning SharePoint Lists can
// be found at: https://learn.microsoft.com/en-us/graph/api/resources/list?view=graph-rest-1.0
// Note additional calls are required for the relationships that exist outside of the object properties.
type listTuple struct {
name string
id string
}
// loadLists is a utility function to populate the List object.
// @param siteID the M365 ID that represents the SharePoint Site
// Makes additional calls to retrieve the following relationships:
// - Columns
// - ContentTypes
// - List Items
func loadLists(
func preFetchListOptions() *mssite.ItemListsRequestBuilderGetRequestConfiguration {
selecting := []string{"id", "displayName"}
queryOptions := mssite.ItemListsRequestBuilderGetQueryParameters{
Select: selecting,
}
options := &mssite.ItemListsRequestBuilderGetRequestConfiguration{
QueryParameters: &queryOptions,
}
return options
}
func preFetchLists(
ctx context.Context,
gs graph.Servicer,
siteID string,
) ([]models.Listable, error) {
) ([]listTuple, error) {
var (
prefix = gs.Client().SitesById(siteID)
builder = prefix.Lists()
results = make([]models.Listable, 0)
errs error
builder = gs.Client().SitesById(siteID).Lists()
options = preFetchListOptions()
listTuples = make([]listTuple, 0)
errs error
)
for {
resp, err := builder.Get(ctx, nil)
resp, err := builder.Get(ctx, options)
if err != nil {
return nil, support.WrapAndAppend(support.ConnectorStackErrorTrace(err), err, errs)
}
for _, entry := range resp.GetValue() {
id := *entry.GetId()
temp := listTuple{id: *entry.GetId()}
cols, err := fetchColumns(ctx, gs, siteID, id, "")
if err != nil {
errs = support.WrapAndAppend(siteID, err, errs)
continue
name := entry.GetDisplayName()
if name != nil {
temp.name = *name
} else {
temp.name = *entry.GetId()
}
entry.SetColumns(cols)
cTypes, err := fetchContentTypes(ctx, gs, siteID, id)
if err != nil {
errs = support.WrapAndAppend(siteID, err, errs)
continue
}
entry.SetContentTypes(cTypes)
lItems, err := fetchListItems(ctx, gs, siteID, id)
if err != nil {
errs = support.WrapAndAppend(siteID, err, errs)
continue
}
entry.SetItems(lItems)
results = append(results, entry)
listTuples = append(listTuples, temp)
}
if resp.GetOdataNextLink() == nil {
@ -79,6 +66,57 @@ func loadLists(
builder = mssite.NewItemListsRequestBuilder(*resp.GetOdataNextLink(), gs.Adapter())
}
return listTuples, nil
}
// list.go contains additional functions to help retrieve SharePoint List data from M365
// SharePoint lists represent lists on a site. Inherits additional properties from
// baseItem: https://learn.microsoft.com/en-us/graph/api/resources/baseitem?view=graph-rest-1.0
// The full details concerning SharePoint Lists can
// be found at: https://learn.microsoft.com/en-us/graph/api/resources/list?view=graph-rest-1.0
// Note additional calls are required for the relationships that exist outside of the object properties.
// loadSiteLists is a utility function to populate a collection of SharePoint.List
// objects associated with a given siteID.
// @param siteID the M365 ID that represents the SharePoint Site
// Makes additional calls to retrieve the following relationships:
// - Columns
// - ContentTypes
// - List Items
func loadSiteLists(
ctx context.Context,
gs graph.Servicer,
siteID string,
listIDs []string,
) ([]models.Listable, error) {
var (
results = make([]models.Listable, 0)
errs error
)
for _, listID := range listIDs {
entry, err := gs.Client().SitesById(siteID).ListsById(listID).Get(ctx, nil)
if err != nil {
errs = support.WrapAndAppend(
listID,
errors.Wrap(err, support.ConnectorStackErrorTrace(err)),
errs,
)
}
cols, cTypes, lItems, err := fetchListContents(ctx, gs, siteID, listID)
if err == nil {
entry.SetColumns(cols)
entry.SetContentTypes(cTypes)
entry.SetItems(lItems)
} else {
errs = support.WrapAndAppend("unable to fetchRelationships during loadSiteLists", err, errs)
continue
}
results = append(results, entry)
}
if errs != nil {
return nil, errs
}
@ -86,6 +124,43 @@ func loadLists(
return results, nil
}
// fetchListContents utility function to retrieve associated M365 relationships
// which are not included with the standard List query:
// - Columns, ContentTypes, ListItems
func fetchListContents(
ctx context.Context,
service graph.Servicer,
siteID, listID string,
) (
[]models.ColumnDefinitionable,
[]models.ContentTypeable,
[]models.ListItemable,
error,
) {
var errs error
cols, err := fetchColumns(ctx, service, siteID, listID, "")
if err != nil {
errs = support.WrapAndAppend(siteID, err, errs)
}
cTypes, err := fetchContentTypes(ctx, service, siteID, listID)
if err != nil {
errs = support.WrapAndAppend(siteID, err, errs)
}
lItems, err := fetchListItems(ctx, service, siteID, listID)
if err != nil {
errs = support.WrapAndAppend(siteID, err, errs)
}
if errs != nil {
return nil, nil, nil, errs
}
return cols, cTypes, lItems, nil
}
// fetchListItems utility for retrieving ListItem data and the associated relationship
// data. Additional call append data to the tracked items, and do not create additional collections.
// Additional Call:

View File

@ -54,7 +54,11 @@ func (suite *SharePointSuite) TestLoadList() {
service, err := createTestService(suite.creds)
require.NoError(t, err)
lists, err := loadLists(ctx, service, "root")
tuples, err := preFetchLists(ctx, service, "root")
require.NoError(t, err)
job := []string{tuples[0].id}
lists, err := loadSiteLists(ctx, service, "root", job)
assert.NoError(t, err)
assert.Greater(t, len(lists), 0)
t.Logf("Length: %d\n", len(lists))

View File

@ -1,88 +0,0 @@
package sharepoint
import (
"context"
"github.com/alcionai/corso/src/internal/connector/graph"
"github.com/alcionai/corso/src/internal/connector/support"
"github.com/alcionai/corso/src/pkg/selectors"
)
// FilterContainersAndFillCollections is a utility function
// that places the M365 object ids belonging to specific directories
// into a Collection. Items outside of those directories are omitted.
// @param collection is filled with during this function.
func FilterContainersAndFillCollections(
ctx context.Context,
qp graph.QueryParams,
collections map[string]*Collection,
statusUpdater support.StatusUpdater,
resolver graph.ContainerResolver,
scope selectors.SharePointScope,
) error {
return nil
}
// code previously within the function, moved here to make the linter happy
// var (
// category = qp.Scope.Category().PathType()
// collectionType = CategoryToOptionIdentifier(category)
// errs error
// )
// for _, c := range resolver.Items() {
// dirPath, ok := pathAndMatch(qp, category, c)
// if ok {
// // Create only those that match
// service, err := createService(qp.Credentials, qp.FailFast)
// if err != nil {
// errs = support.WrapAndAppend(
// qp.User+" FilterContainerAndFillCollection",
// err,
// errs)
// if qp.FailFast {
// return errs
// }
// }
// edc := NewCollection(
// qp.User,
// dirPath,
// collectionType,
// service,
// statusUpdater,
// )
// collections[*c.GetId()] = &edc
// }
// }
// for directoryID, col := range collections {
// fetchFunc, err := getFetchIDFunc(category)
// if err != nil {
// errs = support.WrapAndAppend(
// qp.User,
// err,
// errs)
// if qp.FailFast {
// return errs
// }
// continue
// }
// jobs, err := fetchFunc(ctx, col.service, qp.User, directoryID)
// if err != nil {
// errs = support.WrapAndAppend(
// qp.User,
// err,
// errs,
// )
// }
// col.jobs = append(col.jobs, jobs...)
// }
// return errs

View File

@ -843,8 +843,8 @@ func (suite *BackupOpIntegrationSuite) TestBackup_Run_sharePoint() {
siteID = tester.M365SiteID(t)
sel = selectors.NewSharePointBackup()
)
// TODO: dadams39 Issue #1795: Revert to Sites Upon List Integration
sel.Include(sel.Libraries([]string{siteID}, selectors.Any()))
sel.Include(sel.Sites([]string{siteID}))
bo, _, _, _, closer := prepNewBackupOp(t, ctx, mb, sel.Selector)
defer closer()