generic drive retrieval for sharepoint

Adapts the graph onedrive library to handle
access to drive data across both onedrive and
sharepoint services.
This commit is contained in:
ryanfkeepers 2022-11-16 13:01:14 -07:00
parent c97f5ea9a7
commit eb48ce06c0
10 changed files with 322 additions and 89 deletions

View File

@ -17,6 +17,7 @@ import (
D "github.com/alcionai/corso/src/internal/diagnostics"
"github.com/alcionai/corso/src/internal/observe"
"github.com/alcionai/corso/src/pkg/logger"
"github.com/alcionai/corso/src/pkg/path"
"github.com/alcionai/corso/src/pkg/selectors"
)
@ -193,6 +194,18 @@ func (gc *GraphConnector) ExchangeDataCollection(
// OneDrive
// ---------------------------------------------------------------------------
type odFolderMatcher struct {
scope selectors.OneDriveScope
}
func (fm odFolderMatcher) IsAny() bool {
return fm.scope.IsAny(selectors.OneDriveFolder)
}
func (fm odFolderMatcher) Matches(path string) bool {
return fm.scope.Matches(selectors.OneDriveFolder, path)
}
// OneDriveDataCollections returns a set of DataCollection which represents the OneDrive data
// for the specified user
func (gc *GraphConnector) OneDriveDataCollections(
@ -218,7 +231,8 @@ func (gc *GraphConnector) OneDriveDataCollections(
odcs, err := onedrive.NewCollections(
gc.credentials.AzureTenantID,
user,
scope,
onedrive.OneDriveSource,
odFolderMatcher{scope},
&gc.graphService,
gc.UpdateStatus,
).Get(ctx)
@ -247,55 +261,46 @@ func (gc *GraphConnector) OneDriveDataCollections(
func (gc *GraphConnector) createSharePointCollections(
ctx context.Context,
scope selectors.SharePointScope,
) ([]*sharepoint.Collection, error) {
) ([]data.Collection, error) {
var (
errs *multierror.Error
sites = scope.Get(selectors.SharePointSite)
colls = make([]*sharepoint.Collection, 0)
category = scope.Category().PathType()
collections = make([]data.Collection, 0)
)
// Create collection of ExchangeDataCollection
for _, site := range sites {
collections := make(map[string]*sharepoint.Collection)
qp := graph.QueryParams{
Category: scope.Category().PathType(),
ResourceOwner: site,
FailFast: gc.failFast,
Credentials: gc.credentials,
}
foldersComplete, closer := observe.MessageWithCompletion(fmt.Sprintf("∙ %s - %s:", qp.Category, site))
foldersComplete, closer := observe.MessageWithCompletion(fmt.Sprintf("∙ %s - %s:", category, site))
defer closer()
defer close(foldersComplete)
resolver, err := exchange.PopulateExchangeContainerResolver(ctx, qp)
switch category {
case path.FilesCategory: // TODO: better category for drives
spcs, err := sharepoint.CollectLibraries(
ctx,
gc.Service(),
gc.credentials.AzureTenantID,
gc.GetSiteIds(),
scope,
gc.UpdateStatus,
gc.incrementAwaitingMessages,
)
if err != nil {
return nil, errors.Wrap(err, "getting folder cache")
return nil, support.WrapAndAppend(site, err, errs)
}
err = sharepoint.FilterContainersAndFillCollections(
ctx,
qp,
collections,
gc.UpdateStatus,
resolver,
scope)
collections = append(collections, spcs...)
if err != nil {
return nil, errors.Wrap(err, "filling collections")
// case path.UnknownCategory: // TODO: ListsCategory
// // get lists
}
foldersComplete <- struct{}{}
for _, collection := range collections {
gc.incrementAwaitingMessages()
colls = append(colls, collection)
}
}
return colls, errs.ErrorOrNil()
return collections, errs.ErrorOrNil()
}
// SharePointDataCollections returns a set of DataCollection which represents the SharePoint data
@ -317,7 +322,7 @@ func (gc *GraphConnector) SharePointDataCollections(
// for each scope that includes oneDrive items, get all
for _, scope := range scopes {
// Creates a map of collections based on scope
// Creates a slice of collections based on scope
dcs, err := gc.createSharePointCollections(ctx, scope)
if err != nil {
return nil, support.WrapAndAppend(scope.Get(selectors.SharePointSite)[0], err, errs)

View File

@ -60,7 +60,7 @@ func (suite *OneDriveCollectionSuite) TestOneDriveCollection() {
wg := sync.WaitGroup{}
collStatus := support.ConnectorOperationStatus{}
folderPath, err := getCanonicalPath("drive/driveID1/root:/dir1/dir2/dir3", "a-tenant", "a-user")
folderPath, err := getCanonicalPath("drive/driveID1/root:/dir1/dir2/dir3", "a-tenant", "a-user", OneDriveSource)
require.NoError(t, err)
driveFolderPath, err := getDriveFolderPath(folderPath)
require.NoError(t, err)
@ -117,7 +117,7 @@ func (suite *OneDriveCollectionSuite) TestOneDriveCollectionReadError() {
wg := sync.WaitGroup{}
wg.Add(1)
folderPath, err := getCanonicalPath("drive/driveID1/root:/folderPath", "a-tenant", "a-user")
folderPath, err := getCanonicalPath("drive/driveID1/root:/folderPath", "a-tenant", "a-user", OneDriveSource)
require.NoError(t, err)
coll := NewCollection(folderPath, "fakeDriveID", suite, suite.testStatusUpdater(&wg, &collStatus))

View File

@ -14,20 +14,34 @@ import (
"github.com/alcionai/corso/src/internal/observe"
"github.com/alcionai/corso/src/pkg/logger"
"github.com/alcionai/corso/src/pkg/path"
"github.com/alcionai/corso/src/pkg/selectors"
)
// Collections is used to retrieve OneDrive data for a
// specified user
type driveSource int
const (
unknownDriveSource = iota
OneDriveSource
SharePointSource
)
type isAnyMatcher interface {
IsAny() bool
Matches(path string) bool
}
// Collections is used to retrieve drive data for a
// resource owner, which can be either a user or a sharepoint site.
type Collections struct {
tenant string
user string
scope selectors.OneDriveScope
resourceOwner string
source driveSource
matcher isAnyMatcher
service graph.Service
statusUpdater support.StatusUpdater
// collectionMap allows lookup of the data.Collection
// for a OneDrive folder
collectionMap map[string]data.Collection
service graph.Service
statusUpdater support.StatusUpdater
// Track stats from drive enumeration. Represents the items backed up.
numItems int
@ -37,25 +51,27 @@ type Collections struct {
func NewCollections(
tenant string,
user string,
scope selectors.OneDriveScope,
resourceOwner string,
source driveSource,
matcher isAnyMatcher,
service graph.Service,
statusUpdater support.StatusUpdater,
) *Collections {
return &Collections{
tenant: tenant,
user: user,
scope: scope,
resourceOwner: resourceOwner,
source: source,
matcher: matcher,
collectionMap: map[string]data.Collection{},
service: service,
statusUpdater: statusUpdater,
}
}
// Retrieves OneDrive data as set of `data.Collections`
// Retrieves drive data as set of `data.Collections`
func (c *Collections) Get(ctx context.Context) ([]data.Collection, error) {
// Enumerate drives for the specified user
drives, err := drives(ctx, c.service, c.user)
// Enumerate drives for the specified resourceOwner
drives, err := drives(ctx, c.service, c.resourceOwner, c.source)
if err != nil {
return nil, err
}
@ -78,29 +94,8 @@ func (c *Collections) Get(ctx context.Context) ([]data.Collection, error) {
return collections, nil
}
func getCanonicalPath(p, tenant, user string) (path.Path, error) {
pathBuilder := path.Builder{}.Append(strings.Split(p, "/")...)
res, err := pathBuilder.ToDataLayerOneDrivePath(tenant, user, false)
if err != nil {
return nil, errors.Wrap(err, "converting to canonical path")
}
return res, nil
}
// Returns the path to the folder within the drive (i.e. under `root:`)
func getDriveFolderPath(p path.Path) (string, error) {
drivePath, err := toOneDrivePath(p)
if err != nil {
return "", err
}
return path.Builder{}.Append(drivePath.folders...).String(), nil
}
// updateCollections initializes and adds the provided OneDrive items to Collections
// A new collection is created for every OneDrive folder (or package)
// updateCollections initializes and adds the provided drive items to Collections
// A new collection is created for every drive folder (or package)
func (c *Collections) updateCollections(ctx context.Context, driveID string, items []models.DriveItemable) error {
for _, item := range items {
if item.GetRoot() != nil {
@ -116,14 +111,15 @@ func (c *Collections) updateCollections(ctx context.Context, driveID string, ite
collectionPath, err := getCanonicalPath(
*item.GetParentReference().GetPath(),
c.tenant,
c.user,
c.resourceOwner,
c.source,
)
if err != nil {
return err
}
// Skip items that don't match the folder selectors we were given.
if !includePath(ctx, c.scope, collectionPath) {
if !includePath(ctx, c.matcher, collectionPath) {
logger.Ctx(ctx).Infof("Skipping path %s", collectionPath.String())
continue
}
@ -162,7 +158,40 @@ func (c *Collections) updateCollections(ctx context.Context, driveID string, ite
return nil
}
func includePath(ctx context.Context, scope selectors.OneDriveScope, folderPath path.Path) bool {
func getCanonicalPath(p, tenant, resourceOwner string, source driveSource) (path.Path, error) {
var (
pathBuilder = path.Builder{}.Append(strings.Split(p, "/")...)
result path.Path
err error
)
switch source {
case OneDriveSource:
result, err = pathBuilder.ToDataLayerOneDrivePath(tenant, resourceOwner, false)
case SharePointSource:
result, err = pathBuilder.ToDataLayerSharePointPath(tenant, resourceOwner, false)
default:
return nil, errors.Errorf("unrecognized drive data source")
}
if err != nil {
return nil, errors.Wrap(err, "converting to canonical path")
}
return result, nil
}
// Returns the path to the folder within the drive (i.e. under `root:`)
func getDriveFolderPath(p path.Path) (string, error) {
drivePath, err := toOneDrivePath(p)
if err != nil {
return "", err
}
return path.Builder{}.Append(drivePath.folders...).String(), nil
}
func includePath(ctx context.Context, m isAnyMatcher, folderPath path.Path) bool {
// Check if the folder is allowed by the scope.
folderPathString, err := getDriveFolderPath(folderPath)
if err != nil {
@ -172,9 +201,9 @@ func includePath(ctx context.Context, scope selectors.OneDriveScope, folderPath
// Hack for the edge case where we're looking at the root folder and can
// select any folder. Right now the root folder has an empty folder path.
if len(folderPathString) == 0 && scope.IsAny(selectors.OneDriveFolder) {
if len(folderPathString) == 0 && m.IsAny() {
return true
}
return scope.Matches(selectors.OneDriveFolder, folderPathString)
return m.Matches(folderPathString)
}

View File

@ -20,7 +20,7 @@ func expectedPathAsSlice(t *testing.T, tenant, user string, rest ...string) []st
res := make([]string, 0, len(rest))
for _, r := range rest {
p, err := getCanonicalPath(r, tenant, user)
p, err := getCanonicalPath(r, tenant, user, OneDriveSource)
require.NoError(t, err)
res = append(res, p.String())
@ -211,7 +211,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
ctx, flush := tester.NewContext()
defer flush()
c := NewCollections(tenant, user, tt.scope, &MockGraphService{}, nil)
c := NewCollections(tenant, user, OneDriveSource, testFolderMatcher{tt.scope}, &MockGraphService{}, nil)
err := c.updateCollections(ctx, "driveID", tt.items)
tt.expect(t, err)
assert.Equal(t, len(tt.expectedCollectionPaths), len(c.collectionMap), "collection paths")

View File

@ -67,7 +67,33 @@ const (
)
// Enumerates the drives for the specified user
func drives(ctx context.Context, service graph.Service, user string) ([]models.Driveable, error) {
func drives(
ctx context.Context,
service graph.Service,
resourceOwner string,
source driveSource,
) ([]models.Driveable, error) {
switch source {
case OneDriveSource:
return userDrives(ctx, service, resourceOwner)
case SharePointSource:
return siteDrives(ctx, service, resourceOwner)
default:
return nil, errors.Errorf("unrecognized drive data source")
}
}
func siteDrives(ctx context.Context, service graph.Service, site string) ([]models.Driveable, error) {
r, err := service.Client().SitesById(site).Drives().Get(ctx, nil)
if err != nil {
return nil, errors.Wrapf(err, "failed to retrieve site drives. site: %s, details: %s",
site, support.ConnectorStackErrorTrace(err))
}
return r.GetValue(), nil
}
func userDrives(ctx context.Context, service graph.Service, user string) ([]models.Driveable, error) {
var hasDrive bool
hasDrive, err := hasDriveLicense(ctx, service, user)
@ -237,7 +263,7 @@ func GetAllFolders(
userID string,
prefix string,
) ([]*Displayable, error) {
drives, err := drives(ctx, gs, userID)
drives, err := drives(ctx, gs, userID, OneDriveSource)
if err != nil {
return nil, errors.Wrap(err, "getting OneDrive folders")
}
@ -321,7 +347,7 @@ func hasDriveLicense(
cb := func(pageItem any) bool {
entry, ok := pageItem.(models.LicenseDetailsable)
if !ok {
err = errors.New("casting item to models.MailFolderable")
err = errors.New("casting item to models.LicenseDetailsable")
return false
}

View File

@ -43,7 +43,7 @@ func (suite *OneDriveSuite) TestCreateGetDeleteFolder() {
folderElements := []string{folderName1}
gs := loadTestService(t)
drives, err := drives(ctx, gs, suite.userID)
drives, err := drives(ctx, gs, suite.userID, OneDriveSource)
require.NoError(t, err)
require.NotEmpty(t, drives)
@ -100,6 +100,18 @@ func (suite *OneDriveSuite) TestCreateGetDeleteFolder() {
}
}
type testFolderMatcher struct {
scope selectors.OneDriveScope
}
func (fm testFolderMatcher) IsAny() bool {
return fm.scope.IsAny(selectors.OneDriveFolder)
}
func (fm testFolderMatcher) Matches(path string) bool {
return fm.scope.Matches(selectors.OneDriveFolder, path)
}
func (suite *OneDriveSuite) TestOneDriveNewCollections() {
ctx, flush := tester.NewContext()
defer flush()
@ -129,7 +141,8 @@ func (suite *OneDriveSuite) TestOneDriveNewCollections() {
odcs, err := NewCollections(
creds.AzureTenantID,
test.user,
scope,
OneDriveSource,
testFolderMatcher{scope},
service,
service.updateStatus,
).Get(ctx)

View File

@ -67,7 +67,7 @@ func (suite *ItemIntegrationSuite) SetupSuite() {
suite.user = tester.SecondaryM365UserID(suite.T())
drives, err := drives(ctx, suite, suite.user)
drives, err := drives(ctx, suite, suite.user, OneDriveSource)
require.NoError(suite.T(), err)
// Test Requirement 1: Need a drive
require.Greaterf(suite.T(), len(drives), 0, "user %s does not have a drive", suite.user)

View File

@ -0,0 +1,65 @@
package sharepoint
import (
"context"
"github.com/alcionai/corso/src/internal/connector/graph"
"github.com/alcionai/corso/src/internal/connector/onedrive"
"github.com/alcionai/corso/src/internal/connector/support"
"github.com/alcionai/corso/src/internal/data"
"github.com/alcionai/corso/src/pkg/logger"
"github.com/alcionai/corso/src/pkg/selectors"
)
func CollectLibraries(
ctx context.Context,
serv graph.Service,
tenantID string,
siteIDs []string,
scope selectors.SharePointScope,
updater support.StatusUpdater,
incrementWaitCount func(),
) ([]data.Collection, error) {
var (
collections = []data.Collection{}
errs error
)
for _, site := range scope.Get(selectors.SharePointSite) {
logger.Ctx(ctx).With("site", site).Debug("Creating SharePoint Libary collections")
colls := onedrive.NewCollections(
tenantID,
site,
onedrive.SharePointSource,
folderMatcher{scope},
serv,
updater,
)
odcs, err := colls.Get(ctx)
if err != nil {
return nil, support.WrapAndAppend(site, err, errs)
}
collections = append(collections, odcs...)
}
for range collections {
incrementWaitCount()
}
return collections, errs
}
type folderMatcher struct {
scope selectors.SharePointScope
}
func (fm folderMatcher) IsAny() bool {
return fm.scope.IsAny(selectors.SharePointFolder)
}
func (fm folderMatcher) Matches(path string) bool {
return fm.scope.Matches(selectors.SharePointFolder, path)
}

View File

@ -0,0 +1,95 @@
package sharepoint
import (
"context"
msgraphsdk "github.com/microsoftgraph/msgraph-sdk-go"
"github.com/pkg/errors"
"github.com/alcionai/corso/src/internal/connector/graph"
"github.com/alcionai/corso/src/pkg/account"
)
type sharePointService struct {
client msgraphsdk.GraphServiceClient
adapter msgraphsdk.GraphRequestAdapter
failFast bool // if true service will exit sequence upon encountering an error
credentials account.M365Config
}
///------------------------------------------------------------
// Functions to comply with graph.Service Interface
//-------------------------------------------------------
func (es *sharePointService) Client() *msgraphsdk.GraphServiceClient {
return &es.client
}
func (es *sharePointService) Adapter() *msgraphsdk.GraphRequestAdapter {
return &es.adapter
}
func (es *sharePointService) ErrPolicy() bool {
return es.failFast
}
// createService internal constructor for sharePointService struct returns an error
// iff the params for the entry are incorrect (e.g. len(TenantID) == 0, etc.)
// NOTE: Incorrect account information will result in errors on subsequent queries.
func createService(credentials account.M365Config, shouldFailFast bool) (*sharePointService, error) {
adapter, err := graph.CreateAdapter(
credentials.AzureTenantID,
credentials.AzureClientID,
credentials.AzureClientSecret,
)
if err != nil {
return nil, errors.Wrap(err, "creating microsoft graph service")
}
service := sharePointService{
adapter: *adapter,
client: *msgraphsdk.NewGraphServiceClient(adapter),
failFast: shouldFailFast,
credentials: credentials,
}
return &service, nil
}
// PopulateContainerResolver gets a container resolver if one is available for
// this category of data. If one is not available, returns nil so that other
// logic in the caller can complete as long as they check if the resolver is not
// nil. If an error occurs populating the resolver, returns an error.
func PopulateContainerResolver(
ctx context.Context,
qp graph.QueryParams,
) (graph.ContainerResolver, error) {
return nil, nil
// var (
// c graph.ContainerPopulater
// service, err = createService(qp.Credentials, qp.FailFast)
// cacheRoot string
// )
// if err != nil {
// return nil, err
// }
// switch qp.Category {
// case path.FilesCategory:
// c = &driveCache{
// siteID: qp.ResourceOwner,
// gs: service,
// }
// cacheRoot = "root"
// default:
// return nil, fmt.Errorf("ContainerResolver not present for %s type", qp.Category)
// }
// if err := c.Populate(ctx, cacheRoot); err != nil {
// return nil, errors.Wrap(err, "populating container resolver")
// }
// return c, nil
}

View File

@ -241,7 +241,7 @@ func (pb Builder) verifyPrefix(tenant, resourceOwner string) error {
}
if len(resourceOwner) == 0 {
return errors.Wrap(errMissingSegment, "user")
return errors.Wrap(errMissingSegment, "resourceOwner")
}
if len(pb.elements) == 0 {