move sharepoint onto map-based lookup

Refactors the sharepoint site lookup to use the
id-to-name maps.  This has a momentary regression
that will get solved in the next PR: we no longer
match on weburl suffixes, and instead require
a complete match.

Also, migrates the sharepoint lookup code out
of GC and into Discovery.
This commit is contained in:
ryanfkeepers 2023-03-27 15:41:05 -06:00
parent cdbf3910e8
commit 3531b3c84c
12 changed files with 487 additions and 435 deletions

View File

@ -167,7 +167,7 @@ func createExchangeCmd(cmd *cobra.Command, args []string) error {
idToPN, pnToID, err := m365.UsersMap(ctx, *acct, errs)
if err != nil {
return Only(ctx, clues.Wrap(err, "Failed to retrieve M365 user(s)"))
return Only(ctx, clues.Wrap(err, "Failed to retrieve M365 users"))
}
selectorSet := []selectors.Selector{}

View File

@ -7,18 +7,20 @@ import (
"github.com/pkg/errors"
"github.com/spf13/cobra"
"github.com/spf13/pflag"
"golang.org/x/exp/maps"
"golang.org/x/exp/slices"
"github.com/alcionai/corso/src/cli/options"
. "github.com/alcionai/corso/src/cli/print"
"github.com/alcionai/corso/src/cli/utils"
"github.com/alcionai/corso/src/internal/connector"
"github.com/alcionai/corso/src/internal/connector/graph"
"github.com/alcionai/corso/src/internal/data"
"github.com/alcionai/corso/src/pkg/backup/details"
"github.com/alcionai/corso/src/pkg/fault"
"github.com/alcionai/corso/src/pkg/filters"
"github.com/alcionai/corso/src/pkg/path"
"github.com/alcionai/corso/src/pkg/repository"
"github.com/alcionai/corso/src/pkg/selectors"
"github.com/alcionai/corso/src/pkg/services/m365"
)
// ------------------------------------------------------------------------------------------------
@ -154,20 +156,19 @@ func createSharePointCmd(cmd *cobra.Command, args []string) error {
// TODO: log/print recoverable errors
errs := fault.New(false)
// TODO: discovery of sharepoint sites instead of early GC construction.
gc, err := connector.NewGraphConnector(ctx, graph.HTTPClient(graph.NoTimeout()), *acct, connector.Sites, errs)
idToURL, urlToID, err := m365.SitesMap(ctx, *acct, errs)
if err != nil {
return Only(ctx, clues.Wrap(err, "Failed to connect to Microsoft APIs"))
return Only(ctx, clues.Wrap(err, "Failed to retrieve M365 sites"))
}
sel, err := sharePointBackupCreateSelectors(ctx, utils.SiteID, utils.WebURL, utils.CategoryData, gc)
sel, err := sharePointBackupCreateSelectors(ctx, urlToID, utils.SiteID, utils.WebURL, utils.CategoryData)
if err != nil {
return Only(ctx, clues.Wrap(err, "Retrieving up sharepoint sites by ID and URL"))
}
selectorSet := []selectors.Selector{}
for _, discSel := range sel.SplitByResourceOwner(gc.GetSiteIDs()) {
for _, discSel := range sel.SplitByResourceOwner(maps.Keys(idToURL)) {
selectorSet = append(selectorSet, discSel.Selector)
}
@ -176,7 +177,7 @@ func createSharePointCmd(cmd *cobra.Command, args []string) error {
r,
"SharePoint", "site",
selectorSet,
nil, nil)
idToURL, urlToID)
}
func validateSharePointBackupCreateFlags(sites, weburls, cats []string) error {
@ -202,44 +203,30 @@ func validateSharePointBackupCreateFlags(sites, weburls, cats []string) error {
// TODO: users might specify a data type, this only supports AllData().
func sharePointBackupCreateSelectors(
ctx context.Context,
urlToID map[string]string,
sites, weburls, cats []string,
gc *connector.GraphConnector,
) (*selectors.SharePointBackup, error) {
if len(sites) == 0 && len(weburls) == 0 {
return selectors.NewSharePointBackup(selectors.None()), nil
}
for _, site := range sites {
if site == utils.Wildcard {
return includeAllSitesWithCategories(cats), nil
}
if filters.PathContains(sites).Compare(utils.Wildcard) {
return includeAllSitesWithCategories(urlToID, cats), nil
}
for _, wURL := range weburls {
if wURL == utils.Wildcard {
return includeAllSitesWithCategories(cats), nil
}
if filters.PathContains(weburls).Compare(utils.Wildcard) {
return includeAllSitesWithCategories(urlToID, cats), nil
}
// TODO: log/print recoverable errors
errs := fault.New(false)
union, err := gc.UnionSiteIDsAndWebURLs(ctx, sites, weburls, errs)
if err != nil {
return nil, err
}
sel := selectors.NewSharePointBackup(union)
sel := selectors.NewSharePointBackup(append(slices.Clone(sites), weburls...))
return addCategories(sel, cats), nil
}
func includeAllSitesWithCategories(categories []string) *selectors.SharePointBackup {
sel := addCategories(
selectors.NewSharePointBackup(selectors.Any()),
func includeAllSitesWithCategories(urlToID map[string]string, categories []string) *selectors.SharePointBackup {
return addCategories(
selectors.NewSharePointBackup(maps.Values(urlToID)),
categories)
return sel
}
func addCategories(sel *selectors.SharePointBackup, cats []string) *selectors.SharePointBackup {

View File

@ -11,7 +11,6 @@ import (
"github.com/alcionai/corso/src/cli/utils"
"github.com/alcionai/corso/src/cli/utils/testdata"
"github.com/alcionai/corso/src/internal/connector"
"github.com/alcionai/corso/src/internal/tester"
"github.com/alcionai/corso/src/pkg/selectors"
)
@ -108,13 +107,17 @@ func (suite *SharePointSuite) TestValidateSharePointBackupCreateFlags() {
}
func (suite *SharePointSuite) TestSharePointBackupCreateSelectors() {
comboString := []string{"id_1", "id_2"}
gc := &connector.GraphConnector{
Sites: map[string]string{
"url_1": "id_1",
"url_2": "id_2",
},
}
const (
id1 = "id_1"
id2 = "id_2"
url1 = "url_1/foo"
url2 = "url_2/bar"
)
var (
bothIDs = []string{id1, id2}
urlToID = map[string]string{url1: id1, url2: id2}
)
table := []struct {
name string
@ -137,73 +140,72 @@ func (suite *SharePointSuite) TestSharePointBackupCreateSelectors() {
{
name: "site wildcard",
site: []string{utils.Wildcard},
expect: selectors.Any(),
expect: bothIDs,
expectScopesLen: 2,
},
{
name: "url wildcard",
weburl: []string{utils.Wildcard},
expect: selectors.Any(),
expect: bothIDs,
expectScopesLen: 2,
},
{
name: "sites",
site: []string{"id_1", "id_2"},
expect: []string{"id_1", "id_2"},
site: []string{id1, id2},
expect: []string{id1, id2},
expectScopesLen: 2,
},
{
name: "urls",
weburl: []string{"url_1", "url_2"},
expect: []string{"id_1", "id_2"},
weburl: []string{url1, url2},
expect: []string{url1, url2},
expectScopesLen: 2,
},
{
name: "mix sites and urls",
site: []string{"id_1"},
weburl: []string{"url_2"},
expect: []string{"id_1", "id_2"},
site: []string{id1},
weburl: []string{url2},
expect: []string{id1, url2},
expectScopesLen: 2,
},
{
name: "duplicate sites and urls",
site: []string{"id_1", "id_2"},
weburl: []string{"url_1", "url_2"},
expect: comboString,
site: []string{id1, id2},
weburl: []string{url1, url2},
expect: []string{id1, id2, url1, url2},
expectScopesLen: 2,
},
{
name: "unnecessary site wildcard",
site: []string{"id_1", utils.Wildcard},
weburl: []string{"url_1", "url_2"},
expect: selectors.Any(),
site: []string{id1, utils.Wildcard},
weburl: []string{url1, url2},
expect: bothIDs,
expectScopesLen: 2,
},
{
name: "unnecessary url wildcard",
site: comboString,
weburl: []string{"url_1", utils.Wildcard},
expect: selectors.Any(),
site: []string{id1},
weburl: []string{url1, utils.Wildcard},
expect: bothIDs,
expectScopesLen: 2,
},
{
name: "Pages",
site: comboString,
site: bothIDs,
data: []string{dataPages},
expect: comboString,
expect: bothIDs,
expectScopesLen: 1,
},
}
for _, test := range table {
suite.Run(test.name, func() {
t := suite.T()
ctx, flush := tester.NewContext()
defer flush()
sel, err := sharePointBackupCreateSelectors(ctx, test.site, test.weburl, test.data, gc)
require.NoError(t, err, clues.ToCore(err))
t := suite.T()
sel, err := sharePointBackupCreateSelectors(ctx, urlToID, test.site, test.weburl, test.data)
require.NoError(t, err, clues.ToCore(err))
assert.ElementsMatch(t, test.expect, sel.DiscreteResourceOwners())
})
}

View File

@ -43,3 +43,10 @@ func OrNow(t *time.Time) time.Time {
return *t
}
// To generates a pointer from any value. Primarily useful
// for generating pointers to strings and other primitives
// without needing to store a second variable.
func To[T any](t T) *T {
return &t
}

View File

@ -5,6 +5,7 @@ import (
"strings"
"github.com/alcionai/clues"
"golang.org/x/exp/maps"
"github.com/alcionai/corso/src/internal/common"
"github.com/alcionai/corso/src/internal/connector/discovery"
@ -47,7 +48,7 @@ func (gc *GraphConnector) ProduceBackupCollections(
diagnostics.Index("service", sels.Service.String()))
defer end()
err := verifyBackupInputs(sels, gc.GetSiteIDs())
err := verifyBackupInputs(sels, maps.Keys(gc.ResourceOwnerIDToName))
if err != nil {
return nil, nil, clues.Stack(err).WithClues(ctx)
}

View File

@ -0,0 +1,144 @@
package api
import (
"context"
"fmt"
"strings"
"github.com/alcionai/clues"
msgraphgocore "github.com/microsoftgraph/msgraph-sdk-go-core"
"github.com/microsoftgraph/msgraph-sdk-go/models"
"github.com/pkg/errors"
"github.com/alcionai/corso/src/internal/common/ptr"
"github.com/alcionai/corso/src/internal/connector/graph"
"github.com/alcionai/corso/src/pkg/fault"
)
// ---------------------------------------------------------------------------
// controller
// ---------------------------------------------------------------------------
func (c Client) Sites() Sites {
return Sites{c}
}
// Sites is an interface-compliant provider of the client.
type Sites struct {
Client
}
// ---------------------------------------------------------------------------
// methods
// ---------------------------------------------------------------------------
// GetAll retrieves all sites.
func (c Sites) GetAll(ctx context.Context, errs *fault.Bus) ([]models.Siteable, error) {
service, err := c.service()
if err != nil {
return nil, err
}
var resp models.SiteCollectionResponseable
resp, err = service.Client().Sites().Get(ctx, nil)
if err != nil {
return nil, graph.Wrap(ctx, err, "getting all sites")
}
iter, err := msgraphgocore.NewPageIterator(
resp,
service.Adapter(),
models.CreateSiteCollectionResponseFromDiscriminatorValue)
if err != nil {
return nil, graph.Wrap(ctx, err, "creating sites iterator")
}
var (
us = make([]models.Siteable, 0)
el = errs.Local()
)
iterator := func(item any) bool {
if el.Failure() != nil {
return false
}
s, err := validateSite(item)
if errors.Is(err, errKnownSkippableCase) {
// safe to no-op
return true
}
if err != nil {
el.AddRecoverable(graph.Wrap(ctx, err, "validating site"))
return true
}
us = append(us, s)
return true
}
if err := iter.Iterate(ctx, iterator); err != nil {
return nil, graph.Wrap(ctx, err, "iterating all sites")
}
return us, el.Failure()
}
func (c Sites) GetByID(ctx context.Context, id string) (models.Siteable, error) {
resp, err := c.stable.Client().SitesById(id).Get(ctx, nil)
if err != nil {
return nil, graph.Wrap(ctx, err, "getting site")
}
return resp, err
}
// ---------------------------------------------------------------------------
// helpers
// ---------------------------------------------------------------------------
var errKnownSkippableCase = clues.New("case is known and skippable")
const personalSitePath = "sharepoint.com/personal/"
// validateSite ensures the item is a Siteable, and contains the necessary
// identifiers that we handle with all users.
// returns the item as a Siteable model.
func validateSite(item any) (models.Siteable, error) {
m, ok := item.(models.Siteable)
if !ok {
return nil, clues.New(fmt.Sprintf("unexpected model: %T", item))
}
id, ok := ptr.ValOK(m.GetId())
if !ok || len(id) == 0 {
return nil, clues.New("missing ID")
}
url, ok := ptr.ValOK(m.GetWebUrl())
if !ok || len(url) == 0 {
return nil, clues.New("missing webURL").With("site_id", id) // TODO: pii
}
// personal (ie: oneDrive) sites have to be filtered out server-side.
if ok && strings.Contains(url, personalSitePath) {
return nil, clues.Stack(errKnownSkippableCase).
With("site_id", id, "site_url", url) // TODO: pii
}
if name, ok := ptr.ValOK(m.GetDisplayName()); !ok || len(name) == 0 {
// the built-in site at "https://{tenant-domain}/search" never has a name.
if strings.HasSuffix(url, "/search") {
return nil, clues.Stack(errKnownSkippableCase).
With("site_id", id, "site_url", url) // TODO: pii
}
return nil, clues.New("missing site display name").With("site_id", id)
}
return m, nil
}

View File

@ -0,0 +1,108 @@
package api
import (
"testing"
"github.com/alcionai/clues"
"github.com/microsoftgraph/msgraph-sdk-go/models"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/suite"
"github.com/alcionai/corso/src/internal/common/ptr"
"github.com/alcionai/corso/src/internal/tester"
)
type SitesUnitSuite struct {
tester.Suite
}
func TestSitesUnitSuite(t *testing.T) {
suite.Run(t, &SitesUnitSuite{Suite: tester.NewUnitSuite(t)})
}
func (suite *SitesUnitSuite) TestValidateSite() {
site := models.NewSite()
site.SetWebUrl(ptr.To("sharepoint.com/sites/foo"))
site.SetDisplayName(ptr.To("testsite"))
site.SetId(ptr.To("testID"))
tests := []struct {
name string
args interface{}
want models.Siteable
errCheck assert.ErrorAssertionFunc
errIsSkippable bool
}{
{
name: "Invalid type",
args: string("invalid type"),
errCheck: assert.Error,
},
{
name: "No ID",
args: models.NewSite(),
errCheck: assert.Error,
},
{
name: "No WebURL",
args: func() *models.Site {
s := models.NewSite()
s.SetId(ptr.To("id"))
return s
}(),
errCheck: assert.Error,
},
{
name: "No name",
args: func() *models.Site {
s := models.NewSite()
s.SetId(ptr.To("id"))
s.SetWebUrl(ptr.To("sharepoint.com/sites/foo"))
return s
}(),
errCheck: assert.Error,
},
{
name: "Search site",
args: func() *models.Site {
s := models.NewSite()
s.SetId(ptr.To("id"))
s.SetWebUrl(ptr.To("sharepoint.com/search"))
return s
}(),
errCheck: assert.Error,
errIsSkippable: true,
},
{
name: "Personal OneDrive",
args: func() *models.Site {
s := models.NewSite()
s.SetId(ptr.To("id"))
s.SetWebUrl(ptr.To("https://" + personalSitePath + "/someone's/onedrive"))
return s
}(),
errCheck: assert.Error,
errIsSkippable: true,
},
{
name: "Valid Site",
args: site,
want: site,
errCheck: assert.NoError,
},
}
for _, test := range tests {
suite.Run(test.name, func() {
t := suite.T()
got, err := validateSite(test.args)
test.errCheck(t, err, clues.ToCore(err))
if test.errIsSkippable {
assert.ErrorIs(t, err, errKnownSkippableCase)
}
assert.Equal(t, test.want, got)
})
}
}

View File

@ -29,6 +29,24 @@ type getWithInfoer interface {
getInfoer
}
// ---------------------------------------------------------------------------
// helpers
// ---------------------------------------------------------------------------
func apiClient(ctx context.Context, acct account.Account) (api.Client, error) {
m365, err := acct.M365Config()
if err != nil {
return api.Client{}, clues.Wrap(err, "retrieving m365 account configuration").WithClues(ctx)
}
client, err := api.NewClient(m365)
if err != nil {
return api.Client{}, clues.Wrap(err, "creating api client").WithClues(ctx)
}
return client, nil
}
// ---------------------------------------------------------------------------
// api
// ---------------------------------------------------------------------------
@ -39,19 +57,15 @@ func Users(
acct account.Account,
errs *fault.Bus,
) ([]models.Userable, error) {
m365, err := acct.M365Config()
client, err := apiClient(ctx, acct)
if err != nil {
return nil, clues.Wrap(err, "retrieving m365 account configuration").WithClues(ctx)
}
client, err := api.NewClient(m365)
if err != nil {
return nil, clues.Wrap(err, "creating api client").WithClues(ctx)
return nil, err
}
return client.Users().GetAll(ctx, errs)
}
// User fetches a single user's data.
func User(ctx context.Context, gwi getWithInfoer, userID string) (models.Userable, *api.UserInfo, error) {
u, err := gwi.GetByID(ctx, userID)
if err != nil {
@ -69,3 +83,17 @@ func User(ctx context.Context, gwi getWithInfoer, userID string) (models.Userabl
return u, ui, nil
}
// Sites fetches all sharepoint sites in the tenant
func Sites(
ctx context.Context,
acct account.Account,
errs *fault.Bus,
) ([]models.Siteable, error) {
client, err := apiClient(ctx, acct)
if err != nil {
return nil, err
}
return client.Sites().GetAll(ctx, errs)
}

View File

@ -31,10 +31,11 @@ func (suite *DiscoveryIntegrationSuite) TestUsers() {
ctx, flush := tester.NewContext()
defer flush()
t := suite.T()
acct := tester.NewM365Account(t)
errs := fault.New(true)
var (
t = suite.T()
acct = tester.NewM365Account(t)
errs = fault.New(true)
)
users, err := discovery.Users(ctx, acct, errs)
assert.NoError(t, err, clues.ToCore(err))
@ -42,8 +43,7 @@ func (suite *DiscoveryIntegrationSuite) TestUsers() {
ferrs := errs.Errors()
assert.Nil(t, ferrs.Failure)
assert.Empty(t, ferrs.Recovered)
assert.Less(t, 0, len(users))
assert.NotEmpty(t, users)
}
func (suite *DiscoveryIntegrationSuite) TestUsers_InvalidCredentials() {
@ -84,16 +84,85 @@ func (suite *DiscoveryIntegrationSuite) TestUsers_InvalidCredentials() {
for _, test := range table {
suite.Run(test.name, func() {
t := suite.T()
var (
t = suite.T()
a = test.acct(t)
errs = fault.New(true)
)
users, err := discovery.Users(ctx, a, errs)
assert.Empty(t, users, "returned some users")
assert.NotNil(t, err)
})
}
}
func (suite *DiscoveryIntegrationSuite) TestSites() {
ctx, flush := tester.NewContext()
defer flush()
var (
t = suite.T()
acct = tester.NewM365Account(t)
errs = fault.New(true)
)
sites, err := discovery.Sites(ctx, acct, errs)
assert.NoError(t, err, clues.ToCore(err))
ferrs := errs.Errors()
assert.Nil(t, ferrs.Failure)
assert.Empty(t, ferrs.Recovered)
assert.NotEmpty(t, sites)
}
func (suite *DiscoveryIntegrationSuite) TestSites_InvalidCredentials() {
ctx, flush := tester.NewContext()
defer flush()
table := []struct {
name string
acct func(t *testing.T) account.Account
}{
{
name: "Invalid Credentials",
acct: func(t *testing.T) account.Account {
a, err := account.NewAccount(
account.ProviderM365,
account.M365Config{
M365: credentials.M365{
AzureClientID: "Test",
AzureClientSecret: "without",
},
AzureTenantID: "data",
},
)
require.NoError(t, err, clues.ToCore(err))
return a
},
},
{
name: "Empty Credentials",
acct: func(t *testing.T) account.Account {
// intentionally swallowing the error here
a, _ := account.NewAccount(account.ProviderM365)
return a
},
},
}
for _, test := range table {
suite.Run(test.name, func() {
var (
t = suite.T()
a = test.acct(t)
errs = fault.New(true)
)
a := test.acct(t)
errs := fault.New(true)
users, err := discovery.Users(ctx, a, errs)
assert.Empty(t, users, "returned some users")
assert.NotNil(t, err)
// TODO(ashmrtn): Uncomment when fault package is used in discovery API.
// assert.NotNil(t, errs.Err())
})
}
}

View File

@ -4,29 +4,19 @@ package connector
import (
"context"
"fmt"
"net/http"
"runtime/trace"
"strings"
"sync"
"github.com/alcionai/clues"
"github.com/microsoft/kiota-abstractions-go/serialization"
msgraphgocore "github.com/microsoftgraph/msgraph-sdk-go-core"
"github.com/microsoftgraph/msgraph-sdk-go/models"
"github.com/pkg/errors"
"golang.org/x/exp/maps"
"github.com/alcionai/corso/src/internal/common/ptr"
"github.com/alcionai/corso/src/internal/connector/discovery/api"
"github.com/alcionai/corso/src/internal/connector/graph"
"github.com/alcionai/corso/src/internal/connector/sharepoint"
"github.com/alcionai/corso/src/internal/connector/support"
"github.com/alcionai/corso/src/internal/data"
"github.com/alcionai/corso/src/internal/diagnostics"
"github.com/alcionai/corso/src/pkg/account"
"github.com/alcionai/corso/src/pkg/fault"
"github.com/alcionai/corso/src/pkg/filters"
)
// ---------------------------------------------------------------------------
@ -44,9 +34,6 @@ type GraphConnector struct {
tenant string
credentials account.M365Config
// TODO: remove in favor of the maps below.
Sites map[string]string // webURL -> siteID and siteID -> webURL
// maps of resource owner ids to names, and names to ids.
// not guaranteed to be populated, only here as a post-population
// reference for processes that choose to populate the values.
@ -100,12 +87,6 @@ func NewGraphConnector(
return nil, clues.Wrap(err, "creating api client").WithClues(ctx)
}
if r == AllResources || r == Sites {
if err = gc.setTenantSites(ctx, errs); err != nil {
return nil, clues.Wrap(err, "retrieveing tenant site list")
}
}
return &gc, nil
}
@ -179,115 +160,6 @@ func (gc *GraphConnector) createService() (*graph.Service, error) {
return graph.NewService(adapter), nil
}
// setTenantSites queries the M365 to identify the sites in the
// workspace. The sites field is updated during this method
// iff the returned error is nil.
func (gc *GraphConnector) setTenantSites(ctx context.Context, errs *fault.Bus) error {
gc.Sites = map[string]string{}
ctx, end := diagnostics.Span(ctx, "gc:setTenantSites")
defer end()
sites, err := getResources(
ctx,
gc.Service,
gc.tenant,
sharepoint.GetAllSitesForTenant,
models.CreateSiteCollectionResponseFromDiscriminatorValue,
identifySite,
errs)
if err != nil {
return err
}
gc.Sites = sites
return nil
}
var errKnownSkippableCase = clues.New("case is known and skippable")
const personalSitePath = "sharepoint.com/personal/"
// Transforms an interface{} into a key,value pair representing
// siteName:siteID.
func identifySite(item any) (string, string, error) {
m, ok := item.(models.Siteable)
if !ok {
return "", "", clues.New("non-Siteable item").With("item_type", fmt.Sprintf("%T", item))
}
id := ptr.Val(m.GetId())
url, ok := ptr.ValOK(m.GetWebUrl())
if m.GetName() == nil {
// the built-in site at "https://{tenant-domain}/search" never has a name.
if ok && strings.HasSuffix(url, "/search") {
// TODO: pii siteID, on this and all following cases
return "", "", clues.Stack(errKnownSkippableCase).With("site_id", id)
}
return "", "", clues.New("site has no name").With("site_id", id)
}
// personal (ie: oneDrive) sites have to be filtered out server-side.
if ok && strings.Contains(url, personalSitePath) {
return "", "", clues.Stack(errKnownSkippableCase).With("site_id", id)
}
return url, id, nil
}
// GetSiteWebURLs returns the WebURLs of sharepoint sites within the tenant.
func (gc *GraphConnector) GetSiteWebURLs() []string {
return maps.Keys(gc.Sites)
}
// GetSiteIds returns the canonical site IDs in the tenant
func (gc *GraphConnector) GetSiteIDs() []string {
return maps.Values(gc.Sites)
}
// UnionSiteIDsAndWebURLs reduces the id and url slices into a single slice of site IDs.
// WebURLs will run as a path-suffix style matcher. Callers may provide partial urls, though
// each element in the url must fully match. Ex: the webURL value "foo" will match "www.ex.com/foo",
// but not match "www.ex.com/foobar".
// The returned IDs are reduced to a set of unique values.
func (gc *GraphConnector) UnionSiteIDsAndWebURLs(
ctx context.Context,
ids, urls []string,
errs *fault.Bus,
) ([]string, error) {
if len(gc.Sites) == 0 {
if err := gc.setTenantSites(ctx, errs); err != nil {
return nil, err
}
}
idm := map[string]struct{}{}
for _, id := range ids {
idm[id] = struct{}{}
}
match := filters.PathSuffix(urls)
for url, id := range gc.Sites {
if !match.Compare(url) {
continue
}
idm[id] = struct{}{}
}
idsl := make([]string, 0, len(idm))
for id := range idm {
idsl = append(idsl, id)
}
return idsl, nil
}
// AwaitStatus waits for all gc tasks to complete and then returns status
func (gc *GraphConnector) Wait() *data.CollectionStats {
defer func() {
@ -343,59 +215,3 @@ func (gc *GraphConnector) incrementAwaitingMessages() {
func (gc *GraphConnector) incrementMessagesBy(num int) {
gc.wg.Add(num)
}
// ---------------------------------------------------------------------------
// Helper Funcs
// ---------------------------------------------------------------------------
func getResources(
ctx context.Context,
gs graph.Servicer,
tenantID string,
query func(context.Context, graph.Servicer) (serialization.Parsable, error),
parser func(parseNode serialization.ParseNode) (serialization.Parsable, error),
identify func(any) (string, string, error),
errs *fault.Bus,
) (map[string]string, error) {
resources := map[string]string{}
response, err := query(ctx, gs)
if err != nil {
return nil, graph.Wrap(ctx, err, "retrieving tenant's resources")
}
iter, err := msgraphgocore.NewPageIterator(response, gs.Adapter(), parser)
if err != nil {
return nil, graph.Stack(ctx, err)
}
el := errs.Local()
callbackFunc := func(item any) bool {
if el.Failure() != nil {
return false
}
k, v, err := identify(item)
if err != nil {
if !errors.Is(err, errKnownSkippableCase) {
el.AddRecoverable(clues.Stack(err).
WithClues(ctx).
With("query_url", gs.Adapter().GetBaseUrl()))
}
return true
}
resources[k] = v
resources[v] = k
return true
}
if err := iter.Iterate(ctx, callbackFunc); err != nil {
return nil, graph.Stack(ctx, err)
}
return resources, el.Failure()
}

View File

@ -38,103 +38,6 @@ func TestGraphConnectorUnitSuite(t *testing.T) {
suite.Run(t, &GraphConnectorUnitSuite{Suite: tester.NewUnitSuite(t)})
}
func (suite *GraphConnectorUnitSuite) TestUnionSiteIDsAndWebURLs() {
const (
url1 = "www.foo.com/bar"
url2 = "www.fnords.com/smarf"
path1 = "bar"
path2 = "/smarf"
id1 = "site-id-1"
id2 = "site-id-2"
)
gc := &GraphConnector{
// must be populated, else the func will try to make a graph call
// to retrieve site data.
Sites: map[string]string{
url1: id1,
url2: id2,
},
}
table := []struct {
name string
ids []string
urls []string
expect []string
}{
{
name: "nil",
},
{
name: "empty",
ids: []string{},
urls: []string{},
expect: []string{},
},
{
name: "ids only",
ids: []string{id1, id2},
urls: []string{},
expect: []string{id1, id2},
},
{
name: "urls only",
ids: []string{},
urls: []string{url1, url2},
expect: []string{id1, id2},
},
{
name: "url suffix only",
ids: []string{},
urls: []string{path1, path2},
expect: []string{id1, id2},
},
{
name: "url and suffix overlap",
ids: []string{},
urls: []string{url1, url2, path1, path2},
expect: []string{id1, id2},
},
{
name: "ids and urls, no overlap",
ids: []string{id1},
urls: []string{url2},
expect: []string{id1, id2},
},
{
name: "ids and urls, overlap",
ids: []string{id1, id2},
urls: []string{url1, url2},
expect: []string{id1, id2},
},
{
name: "partial non-match on path",
ids: []string{},
urls: []string{path1[2:], path2[2:]},
expect: []string{},
},
{
name: "partial non-match on url",
ids: []string{},
urls: []string{url1[5:], url2[5:]},
expect: []string{},
},
}
for _, test := range table {
suite.Run(test.name, func() {
ctx, flush := tester.NewContext()
defer flush()
t := suite.T()
result, err := gc.UnionSiteIDsAndWebURLs(ctx, test.ids, test.urls, fault.New(true))
assert.NoError(t, err, clues.ToCore(err))
assert.ElementsMatch(t, test.expect, result)
})
}
}
func (suite *GraphConnectorUnitSuite) TestPopulateOwnerIDAndNamesFrom() {
const (
ownerID = "owner-id"
@ -306,35 +209,6 @@ func (suite *GraphConnectorIntegrationSuite) SetupSuite() {
tester.LogTimeOfTest(suite.T())
}
// TestSetTenantSites verifies GraphConnector's ability to query
// the sites associated with the credentials
func (suite *GraphConnectorIntegrationSuite) TestSetTenantSites() {
newConnector := GraphConnector{
tenant: "test_tenant",
Sites: make(map[string]string, 0),
credentials: suite.connector.credentials,
}
ctx, flush := tester.NewContext()
defer flush()
t := suite.T()
service, err := newConnector.createService()
require.NoError(t, err, clues.ToCore(err))
newConnector.Service = service
assert.Equal(t, 0, len(newConnector.Sites))
err = newConnector.setTenantSites(ctx, fault.New(true))
assert.NoError(t, err, clues.ToCore(err))
assert.Less(t, 0, len(newConnector.Sites))
for _, site := range newConnector.Sites {
assert.NotContains(t, "sharepoint.com/personal/", site)
}
}
func (suite *GraphConnectorIntegrationSuite) TestRestoreFailsBadService() {
ctx, flush := tester.NewContext()
defer flush()

View File

@ -6,9 +6,8 @@ import (
"github.com/alcionai/clues"
"github.com/microsoftgraph/msgraph-sdk-go/models"
"github.com/alcionai/corso/src/internal/connector"
"github.com/alcionai/corso/src/internal/common/ptr"
"github.com/alcionai/corso/src/internal/connector/discovery"
"github.com/alcionai/corso/src/internal/connector/graph"
"github.com/alcionai/corso/src/pkg/account"
"github.com/alcionai/corso/src/pkg/fault"
)
@ -55,6 +54,22 @@ func Users(ctx context.Context, acct account.Account, errs *fault.Bus) ([]*User,
return ret, nil
}
// parseUser extracts information from `models.Userable` we care about
func parseUser(item models.Userable) (*User, error) {
if item.GetUserPrincipalName() == nil {
return nil, clues.New("user missing principal name").
With("user_id", *item.GetId()) // TODO: pii
}
u := &User{PrincipalName: *item.GetUserPrincipalName(), ID: *item.GetId()}
if item.GetDisplayName() != nil {
u.Name = *item.GetDisplayName()
}
return u, nil
}
// UsersMap retrieves all users in the tenant, and returns two maps: one id-to-principalName,
// and one principalName-to-id.
func UsersMap(
@ -91,55 +106,56 @@ type Site struct {
// Sites returns a list of Sites in a specified M365 tenant
func Sites(ctx context.Context, acct account.Account, errs *fault.Bus) ([]*Site, error) {
gc, err := connector.NewGraphConnector(ctx, graph.HTTPClient(graph.NoTimeout()), acct, connector.Sites, errs)
sites, err := discovery.Sites(ctx, acct, errs)
if err != nil {
return nil, clues.Wrap(err, "initializing M365 graph connection")
}
// gc.Sites is a map with keys: SiteURL, values: ID
ret := make([]*Site, 0, len(gc.Sites))
for k, v := range gc.Sites {
ret = append(ret, &Site{
WebURL: k,
ID: v,
})
ret := make([]*Site, 0, len(sites))
for _, s := range sites {
ps, err := parseSite(s)
if err != nil {
return nil, clues.Wrap(err, "parsing siteable")
}
ret = append(ret, ps)
}
return ret, nil
}
// SiteURLs returns a list of SharePoint site WebURLs in the specified M365 tenant
func SiteURLs(ctx context.Context, acct account.Account, errs *fault.Bus) ([]string, error) {
gc, err := connector.NewGraphConnector(ctx, graph.HTTPClient(graph.NoTimeout()), acct, connector.Sites, errs)
// parseSite extracts the information from `models.Siteable` we care about
func parseSite(item models.Siteable) (*Site, error) {
s := &Site{
ID: ptr.Val(item.GetId()),
WebURL: ptr.Val(item.GetWebUrl()),
}
return s, nil
}
// SitesMap retrieves all sites in the tenant, and returns two maps: one id-to-webURL,
// and one webURL-to-id.
func SitesMap(
ctx context.Context,
acct account.Account,
errs *fault.Bus,
) (map[string]string, map[string]string, error) {
sites, err := Sites(ctx, acct, errs)
if err != nil {
return nil, clues.Wrap(err, "initializing M365 graph connection")
return nil, nil, err
}
return gc.GetSiteWebURLs(), nil
}
// SiteIDs returns a list of SharePoint sites IDs in the specified M365 tenant
func SiteIDs(ctx context.Context, acct account.Account, errs *fault.Bus) ([]string, error) {
gc, err := connector.NewGraphConnector(ctx, graph.HTTPClient(graph.NoTimeout()), acct, connector.Sites, errs)
if err != nil {
return nil, clues.Wrap(err, "initializing graph connection")
}
return gc.GetSiteIDs(), nil
}
// parseUser extracts information from `models.Userable` we care about
func parseUser(item models.Userable) (*User, error) {
if item.GetUserPrincipalName() == nil {
return nil, clues.New("user missing principal name").
With("user_id", *item.GetId()) // TODO: pii
}
u := &User{PrincipalName: *item.GetUserPrincipalName(), ID: *item.GetId()}
if item.GetDisplayName() != nil {
u.Name = *item.GetDisplayName()
}
return u, nil
var (
idToName = make(map[string]string, len(sites))
nameToID = make(map[string]string, len(sites))
)
for _, s := range sites {
idToName[s.ID] = s.WebURL
nameToID[s.WebURL] = s.ID
}
return idToName, nameToID, nil
}