move sharepoint onto map-based lookup

Refactors the sharepoint site lookup to use the
id-to-name maps.  This has a momentary regression
that will get solved in the next PR: we no longer
match on weburl suffixes, and instead require
a complete match.

Also, migrates the sharepoint lookup code out
of GC and into Discovery.
This commit is contained in:
ryanfkeepers 2023-03-27 15:41:05 -06:00
parent cdbf3910e8
commit 3531b3c84c
12 changed files with 487 additions and 435 deletions

View File

@ -167,7 +167,7 @@ func createExchangeCmd(cmd *cobra.Command, args []string) error {
idToPN, pnToID, err := m365.UsersMap(ctx, *acct, errs) idToPN, pnToID, err := m365.UsersMap(ctx, *acct, errs)
if err != nil { if err != nil {
return Only(ctx, clues.Wrap(err, "Failed to retrieve M365 user(s)")) return Only(ctx, clues.Wrap(err, "Failed to retrieve M365 users"))
} }
selectorSet := []selectors.Selector{} selectorSet := []selectors.Selector{}

View File

@ -7,18 +7,20 @@ import (
"github.com/pkg/errors" "github.com/pkg/errors"
"github.com/spf13/cobra" "github.com/spf13/cobra"
"github.com/spf13/pflag" "github.com/spf13/pflag"
"golang.org/x/exp/maps"
"golang.org/x/exp/slices"
"github.com/alcionai/corso/src/cli/options" "github.com/alcionai/corso/src/cli/options"
. "github.com/alcionai/corso/src/cli/print" . "github.com/alcionai/corso/src/cli/print"
"github.com/alcionai/corso/src/cli/utils" "github.com/alcionai/corso/src/cli/utils"
"github.com/alcionai/corso/src/internal/connector"
"github.com/alcionai/corso/src/internal/connector/graph"
"github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/data"
"github.com/alcionai/corso/src/pkg/backup/details" "github.com/alcionai/corso/src/pkg/backup/details"
"github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/fault"
"github.com/alcionai/corso/src/pkg/filters"
"github.com/alcionai/corso/src/pkg/path" "github.com/alcionai/corso/src/pkg/path"
"github.com/alcionai/corso/src/pkg/repository" "github.com/alcionai/corso/src/pkg/repository"
"github.com/alcionai/corso/src/pkg/selectors" "github.com/alcionai/corso/src/pkg/selectors"
"github.com/alcionai/corso/src/pkg/services/m365"
) )
// ------------------------------------------------------------------------------------------------ // ------------------------------------------------------------------------------------------------
@ -154,20 +156,19 @@ func createSharePointCmd(cmd *cobra.Command, args []string) error {
// TODO: log/print recoverable errors // TODO: log/print recoverable errors
errs := fault.New(false) errs := fault.New(false)
// TODO: discovery of sharepoint sites instead of early GC construction. idToURL, urlToID, err := m365.SitesMap(ctx, *acct, errs)
gc, err := connector.NewGraphConnector(ctx, graph.HTTPClient(graph.NoTimeout()), *acct, connector.Sites, errs)
if err != nil { if err != nil {
return Only(ctx, clues.Wrap(err, "Failed to connect to Microsoft APIs")) return Only(ctx, clues.Wrap(err, "Failed to retrieve M365 sites"))
} }
sel, err := sharePointBackupCreateSelectors(ctx, utils.SiteID, utils.WebURL, utils.CategoryData, gc) sel, err := sharePointBackupCreateSelectors(ctx, urlToID, utils.SiteID, utils.WebURL, utils.CategoryData)
if err != nil { if err != nil {
return Only(ctx, clues.Wrap(err, "Retrieving up sharepoint sites by ID and URL")) return Only(ctx, clues.Wrap(err, "Retrieving up sharepoint sites by ID and URL"))
} }
selectorSet := []selectors.Selector{} selectorSet := []selectors.Selector{}
for _, discSel := range sel.SplitByResourceOwner(gc.GetSiteIDs()) { for _, discSel := range sel.SplitByResourceOwner(maps.Keys(idToURL)) {
selectorSet = append(selectorSet, discSel.Selector) selectorSet = append(selectorSet, discSel.Selector)
} }
@ -176,7 +177,7 @@ func createSharePointCmd(cmd *cobra.Command, args []string) error {
r, r,
"SharePoint", "site", "SharePoint", "site",
selectorSet, selectorSet,
nil, nil) idToURL, urlToID)
} }
func validateSharePointBackupCreateFlags(sites, weburls, cats []string) error { func validateSharePointBackupCreateFlags(sites, weburls, cats []string) error {
@ -202,44 +203,30 @@ func validateSharePointBackupCreateFlags(sites, weburls, cats []string) error {
// TODO: users might specify a data type, this only supports AllData(). // TODO: users might specify a data type, this only supports AllData().
func sharePointBackupCreateSelectors( func sharePointBackupCreateSelectors(
ctx context.Context, ctx context.Context,
urlToID map[string]string,
sites, weburls, cats []string, sites, weburls, cats []string,
gc *connector.GraphConnector,
) (*selectors.SharePointBackup, error) { ) (*selectors.SharePointBackup, error) {
if len(sites) == 0 && len(weburls) == 0 { if len(sites) == 0 && len(weburls) == 0 {
return selectors.NewSharePointBackup(selectors.None()), nil return selectors.NewSharePointBackup(selectors.None()), nil
} }
for _, site := range sites { if filters.PathContains(sites).Compare(utils.Wildcard) {
if site == utils.Wildcard { return includeAllSitesWithCategories(urlToID, cats), nil
return includeAllSitesWithCategories(cats), nil
}
} }
for _, wURL := range weburls { if filters.PathContains(weburls).Compare(utils.Wildcard) {
if wURL == utils.Wildcard { return includeAllSitesWithCategories(urlToID, cats), nil
return includeAllSitesWithCategories(cats), nil
}
} }
// TODO: log/print recoverable errors sel := selectors.NewSharePointBackup(append(slices.Clone(sites), weburls...))
errs := fault.New(false)
union, err := gc.UnionSiteIDsAndWebURLs(ctx, sites, weburls, errs)
if err != nil {
return nil, err
}
sel := selectors.NewSharePointBackup(union)
return addCategories(sel, cats), nil return addCategories(sel, cats), nil
} }
func includeAllSitesWithCategories(categories []string) *selectors.SharePointBackup { func includeAllSitesWithCategories(urlToID map[string]string, categories []string) *selectors.SharePointBackup {
sel := addCategories( return addCategories(
selectors.NewSharePointBackup(selectors.Any()), selectors.NewSharePointBackup(maps.Values(urlToID)),
categories) categories)
return sel
} }
func addCategories(sel *selectors.SharePointBackup, cats []string) *selectors.SharePointBackup { func addCategories(sel *selectors.SharePointBackup, cats []string) *selectors.SharePointBackup {

View File

@ -11,7 +11,6 @@ import (
"github.com/alcionai/corso/src/cli/utils" "github.com/alcionai/corso/src/cli/utils"
"github.com/alcionai/corso/src/cli/utils/testdata" "github.com/alcionai/corso/src/cli/utils/testdata"
"github.com/alcionai/corso/src/internal/connector"
"github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/internal/tester"
"github.com/alcionai/corso/src/pkg/selectors" "github.com/alcionai/corso/src/pkg/selectors"
) )
@ -108,13 +107,17 @@ func (suite *SharePointSuite) TestValidateSharePointBackupCreateFlags() {
} }
func (suite *SharePointSuite) TestSharePointBackupCreateSelectors() { func (suite *SharePointSuite) TestSharePointBackupCreateSelectors() {
comboString := []string{"id_1", "id_2"} const (
gc := &connector.GraphConnector{ id1 = "id_1"
Sites: map[string]string{ id2 = "id_2"
"url_1": "id_1", url1 = "url_1/foo"
"url_2": "id_2", url2 = "url_2/bar"
}, )
}
var (
bothIDs = []string{id1, id2}
urlToID = map[string]string{url1: id1, url2: id2}
)
table := []struct { table := []struct {
name string name string
@ -137,73 +140,72 @@ func (suite *SharePointSuite) TestSharePointBackupCreateSelectors() {
{ {
name: "site wildcard", name: "site wildcard",
site: []string{utils.Wildcard}, site: []string{utils.Wildcard},
expect: selectors.Any(), expect: bothIDs,
expectScopesLen: 2, expectScopesLen: 2,
}, },
{ {
name: "url wildcard", name: "url wildcard",
weburl: []string{utils.Wildcard}, weburl: []string{utils.Wildcard},
expect: selectors.Any(), expect: bothIDs,
expectScopesLen: 2, expectScopesLen: 2,
}, },
{ {
name: "sites", name: "sites",
site: []string{"id_1", "id_2"}, site: []string{id1, id2},
expect: []string{"id_1", "id_2"}, expect: []string{id1, id2},
expectScopesLen: 2, expectScopesLen: 2,
}, },
{ {
name: "urls", name: "urls",
weburl: []string{"url_1", "url_2"}, weburl: []string{url1, url2},
expect: []string{"id_1", "id_2"}, expect: []string{url1, url2},
expectScopesLen: 2, expectScopesLen: 2,
}, },
{ {
name: "mix sites and urls", name: "mix sites and urls",
site: []string{"id_1"}, site: []string{id1},
weburl: []string{"url_2"}, weburl: []string{url2},
expect: []string{"id_1", "id_2"}, expect: []string{id1, url2},
expectScopesLen: 2, expectScopesLen: 2,
}, },
{ {
name: "duplicate sites and urls", name: "duplicate sites and urls",
site: []string{"id_1", "id_2"}, site: []string{id1, id2},
weburl: []string{"url_1", "url_2"}, weburl: []string{url1, url2},
expect: comboString, expect: []string{id1, id2, url1, url2},
expectScopesLen: 2, expectScopesLen: 2,
}, },
{ {
name: "unnecessary site wildcard", name: "unnecessary site wildcard",
site: []string{"id_1", utils.Wildcard}, site: []string{id1, utils.Wildcard},
weburl: []string{"url_1", "url_2"}, weburl: []string{url1, url2},
expect: selectors.Any(), expect: bothIDs,
expectScopesLen: 2, expectScopesLen: 2,
}, },
{ {
name: "unnecessary url wildcard", name: "unnecessary url wildcard",
site: comboString, site: []string{id1},
weburl: []string{"url_1", utils.Wildcard}, weburl: []string{url1, utils.Wildcard},
expect: selectors.Any(), expect: bothIDs,
expectScopesLen: 2, expectScopesLen: 2,
}, },
{ {
name: "Pages", name: "Pages",
site: comboString, site: bothIDs,
data: []string{dataPages}, data: []string{dataPages},
expect: comboString, expect: bothIDs,
expectScopesLen: 1, expectScopesLen: 1,
}, },
} }
for _, test := range table { for _, test := range table {
suite.Run(test.name, func() { suite.Run(test.name, func() {
t := suite.T()
ctx, flush := tester.NewContext() ctx, flush := tester.NewContext()
defer flush() defer flush()
sel, err := sharePointBackupCreateSelectors(ctx, test.site, test.weburl, test.data, gc) t := suite.T()
require.NoError(t, err, clues.ToCore(err))
sel, err := sharePointBackupCreateSelectors(ctx, urlToID, test.site, test.weburl, test.data)
require.NoError(t, err, clues.ToCore(err))
assert.ElementsMatch(t, test.expect, sel.DiscreteResourceOwners()) assert.ElementsMatch(t, test.expect, sel.DiscreteResourceOwners())
}) })
} }

View File

@ -43,3 +43,10 @@ func OrNow(t *time.Time) time.Time {
return *t return *t
} }
// To generates a pointer from any value. Primarily useful
// for generating pointers to strings and other primitives
// without needing to store a second variable.
func To[T any](t T) *T {
return &t
}

View File

@ -5,6 +5,7 @@ import (
"strings" "strings"
"github.com/alcionai/clues" "github.com/alcionai/clues"
"golang.org/x/exp/maps"
"github.com/alcionai/corso/src/internal/common" "github.com/alcionai/corso/src/internal/common"
"github.com/alcionai/corso/src/internal/connector/discovery" "github.com/alcionai/corso/src/internal/connector/discovery"
@ -47,7 +48,7 @@ func (gc *GraphConnector) ProduceBackupCollections(
diagnostics.Index("service", sels.Service.String())) diagnostics.Index("service", sels.Service.String()))
defer end() defer end()
err := verifyBackupInputs(sels, gc.GetSiteIDs()) err := verifyBackupInputs(sels, maps.Keys(gc.ResourceOwnerIDToName))
if err != nil { if err != nil {
return nil, nil, clues.Stack(err).WithClues(ctx) return nil, nil, clues.Stack(err).WithClues(ctx)
} }

View File

@ -0,0 +1,144 @@
package api
import (
"context"
"fmt"
"strings"
"github.com/alcionai/clues"
msgraphgocore "github.com/microsoftgraph/msgraph-sdk-go-core"
"github.com/microsoftgraph/msgraph-sdk-go/models"
"github.com/pkg/errors"
"github.com/alcionai/corso/src/internal/common/ptr"
"github.com/alcionai/corso/src/internal/connector/graph"
"github.com/alcionai/corso/src/pkg/fault"
)
// ---------------------------------------------------------------------------
// controller
// ---------------------------------------------------------------------------
func (c Client) Sites() Sites {
return Sites{c}
}
// Sites is an interface-compliant provider of the client.
type Sites struct {
Client
}
// ---------------------------------------------------------------------------
// methods
// ---------------------------------------------------------------------------
// GetAll retrieves all sites.
func (c Sites) GetAll(ctx context.Context, errs *fault.Bus) ([]models.Siteable, error) {
service, err := c.service()
if err != nil {
return nil, err
}
var resp models.SiteCollectionResponseable
resp, err = service.Client().Sites().Get(ctx, nil)
if err != nil {
return nil, graph.Wrap(ctx, err, "getting all sites")
}
iter, err := msgraphgocore.NewPageIterator(
resp,
service.Adapter(),
models.CreateSiteCollectionResponseFromDiscriminatorValue)
if err != nil {
return nil, graph.Wrap(ctx, err, "creating sites iterator")
}
var (
us = make([]models.Siteable, 0)
el = errs.Local()
)
iterator := func(item any) bool {
if el.Failure() != nil {
return false
}
s, err := validateSite(item)
if errors.Is(err, errKnownSkippableCase) {
// safe to no-op
return true
}
if err != nil {
el.AddRecoverable(graph.Wrap(ctx, err, "validating site"))
return true
}
us = append(us, s)
return true
}
if err := iter.Iterate(ctx, iterator); err != nil {
return nil, graph.Wrap(ctx, err, "iterating all sites")
}
return us, el.Failure()
}
func (c Sites) GetByID(ctx context.Context, id string) (models.Siteable, error) {
resp, err := c.stable.Client().SitesById(id).Get(ctx, nil)
if err != nil {
return nil, graph.Wrap(ctx, err, "getting site")
}
return resp, err
}
// ---------------------------------------------------------------------------
// helpers
// ---------------------------------------------------------------------------
var errKnownSkippableCase = clues.New("case is known and skippable")
const personalSitePath = "sharepoint.com/personal/"
// validateSite ensures the item is a Siteable, and contains the necessary
// identifiers that we handle with all users.
// returns the item as a Siteable model.
func validateSite(item any) (models.Siteable, error) {
m, ok := item.(models.Siteable)
if !ok {
return nil, clues.New(fmt.Sprintf("unexpected model: %T", item))
}
id, ok := ptr.ValOK(m.GetId())
if !ok || len(id) == 0 {
return nil, clues.New("missing ID")
}
url, ok := ptr.ValOK(m.GetWebUrl())
if !ok || len(url) == 0 {
return nil, clues.New("missing webURL").With("site_id", id) // TODO: pii
}
// personal (ie: oneDrive) sites have to be filtered out server-side.
if ok && strings.Contains(url, personalSitePath) {
return nil, clues.Stack(errKnownSkippableCase).
With("site_id", id, "site_url", url) // TODO: pii
}
if name, ok := ptr.ValOK(m.GetDisplayName()); !ok || len(name) == 0 {
// the built-in site at "https://{tenant-domain}/search" never has a name.
if strings.HasSuffix(url, "/search") {
return nil, clues.Stack(errKnownSkippableCase).
With("site_id", id, "site_url", url) // TODO: pii
}
return nil, clues.New("missing site display name").With("site_id", id)
}
return m, nil
}

View File

@ -0,0 +1,108 @@
package api
import (
"testing"
"github.com/alcionai/clues"
"github.com/microsoftgraph/msgraph-sdk-go/models"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/suite"
"github.com/alcionai/corso/src/internal/common/ptr"
"github.com/alcionai/corso/src/internal/tester"
)
type SitesUnitSuite struct {
tester.Suite
}
func TestSitesUnitSuite(t *testing.T) {
suite.Run(t, &SitesUnitSuite{Suite: tester.NewUnitSuite(t)})
}
func (suite *SitesUnitSuite) TestValidateSite() {
site := models.NewSite()
site.SetWebUrl(ptr.To("sharepoint.com/sites/foo"))
site.SetDisplayName(ptr.To("testsite"))
site.SetId(ptr.To("testID"))
tests := []struct {
name string
args interface{}
want models.Siteable
errCheck assert.ErrorAssertionFunc
errIsSkippable bool
}{
{
name: "Invalid type",
args: string("invalid type"),
errCheck: assert.Error,
},
{
name: "No ID",
args: models.NewSite(),
errCheck: assert.Error,
},
{
name: "No WebURL",
args: func() *models.Site {
s := models.NewSite()
s.SetId(ptr.To("id"))
return s
}(),
errCheck: assert.Error,
},
{
name: "No name",
args: func() *models.Site {
s := models.NewSite()
s.SetId(ptr.To("id"))
s.SetWebUrl(ptr.To("sharepoint.com/sites/foo"))
return s
}(),
errCheck: assert.Error,
},
{
name: "Search site",
args: func() *models.Site {
s := models.NewSite()
s.SetId(ptr.To("id"))
s.SetWebUrl(ptr.To("sharepoint.com/search"))
return s
}(),
errCheck: assert.Error,
errIsSkippable: true,
},
{
name: "Personal OneDrive",
args: func() *models.Site {
s := models.NewSite()
s.SetId(ptr.To("id"))
s.SetWebUrl(ptr.To("https://" + personalSitePath + "/someone's/onedrive"))
return s
}(),
errCheck: assert.Error,
errIsSkippable: true,
},
{
name: "Valid Site",
args: site,
want: site,
errCheck: assert.NoError,
},
}
for _, test := range tests {
suite.Run(test.name, func() {
t := suite.T()
got, err := validateSite(test.args)
test.errCheck(t, err, clues.ToCore(err))
if test.errIsSkippable {
assert.ErrorIs(t, err, errKnownSkippableCase)
}
assert.Equal(t, test.want, got)
})
}
}

View File

@ -29,6 +29,24 @@ type getWithInfoer interface {
getInfoer getInfoer
} }
// ---------------------------------------------------------------------------
// helpers
// ---------------------------------------------------------------------------
func apiClient(ctx context.Context, acct account.Account) (api.Client, error) {
m365, err := acct.M365Config()
if err != nil {
return api.Client{}, clues.Wrap(err, "retrieving m365 account configuration").WithClues(ctx)
}
client, err := api.NewClient(m365)
if err != nil {
return api.Client{}, clues.Wrap(err, "creating api client").WithClues(ctx)
}
return client, nil
}
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
// api // api
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
@ -39,19 +57,15 @@ func Users(
acct account.Account, acct account.Account,
errs *fault.Bus, errs *fault.Bus,
) ([]models.Userable, error) { ) ([]models.Userable, error) {
m365, err := acct.M365Config() client, err := apiClient(ctx, acct)
if err != nil { if err != nil {
return nil, clues.Wrap(err, "retrieving m365 account configuration").WithClues(ctx) return nil, err
}
client, err := api.NewClient(m365)
if err != nil {
return nil, clues.Wrap(err, "creating api client").WithClues(ctx)
} }
return client.Users().GetAll(ctx, errs) return client.Users().GetAll(ctx, errs)
} }
// User fetches a single user's data.
func User(ctx context.Context, gwi getWithInfoer, userID string) (models.Userable, *api.UserInfo, error) { func User(ctx context.Context, gwi getWithInfoer, userID string) (models.Userable, *api.UserInfo, error) {
u, err := gwi.GetByID(ctx, userID) u, err := gwi.GetByID(ctx, userID)
if err != nil { if err != nil {
@ -69,3 +83,17 @@ func User(ctx context.Context, gwi getWithInfoer, userID string) (models.Userabl
return u, ui, nil return u, ui, nil
} }
// Sites fetches all sharepoint sites in the tenant
func Sites(
ctx context.Context,
acct account.Account,
errs *fault.Bus,
) ([]models.Siteable, error) {
client, err := apiClient(ctx, acct)
if err != nil {
return nil, err
}
return client.Sites().GetAll(ctx, errs)
}

View File

@ -31,10 +31,11 @@ func (suite *DiscoveryIntegrationSuite) TestUsers() {
ctx, flush := tester.NewContext() ctx, flush := tester.NewContext()
defer flush() defer flush()
t := suite.T() var (
t = suite.T()
acct := tester.NewM365Account(t) acct = tester.NewM365Account(t)
errs := fault.New(true) errs = fault.New(true)
)
users, err := discovery.Users(ctx, acct, errs) users, err := discovery.Users(ctx, acct, errs)
assert.NoError(t, err, clues.ToCore(err)) assert.NoError(t, err, clues.ToCore(err))
@ -42,8 +43,7 @@ func (suite *DiscoveryIntegrationSuite) TestUsers() {
ferrs := errs.Errors() ferrs := errs.Errors()
assert.Nil(t, ferrs.Failure) assert.Nil(t, ferrs.Failure)
assert.Empty(t, ferrs.Recovered) assert.Empty(t, ferrs.Recovered)
assert.NotEmpty(t, users)
assert.Less(t, 0, len(users))
} }
func (suite *DiscoveryIntegrationSuite) TestUsers_InvalidCredentials() { func (suite *DiscoveryIntegrationSuite) TestUsers_InvalidCredentials() {
@ -84,16 +84,85 @@ func (suite *DiscoveryIntegrationSuite) TestUsers_InvalidCredentials() {
for _, test := range table { for _, test := range table {
suite.Run(test.name, func() { suite.Run(test.name, func() {
t := suite.T() var (
t = suite.T()
a = test.acct(t)
errs = fault.New(true)
)
users, err := discovery.Users(ctx, a, errs)
assert.Empty(t, users, "returned some users")
assert.NotNil(t, err)
})
}
}
func (suite *DiscoveryIntegrationSuite) TestSites() {
ctx, flush := tester.NewContext()
defer flush()
var (
t = suite.T()
acct = tester.NewM365Account(t)
errs = fault.New(true)
)
sites, err := discovery.Sites(ctx, acct, errs)
assert.NoError(t, err, clues.ToCore(err))
ferrs := errs.Errors()
assert.Nil(t, ferrs.Failure)
assert.Empty(t, ferrs.Recovered)
assert.NotEmpty(t, sites)
}
func (suite *DiscoveryIntegrationSuite) TestSites_InvalidCredentials() {
ctx, flush := tester.NewContext()
defer flush()
table := []struct {
name string
acct func(t *testing.T) account.Account
}{
{
name: "Invalid Credentials",
acct: func(t *testing.T) account.Account {
a, err := account.NewAccount(
account.ProviderM365,
account.M365Config{
M365: credentials.M365{
AzureClientID: "Test",
AzureClientSecret: "without",
},
AzureTenantID: "data",
},
)
require.NoError(t, err, clues.ToCore(err))
return a
},
},
{
name: "Empty Credentials",
acct: func(t *testing.T) account.Account {
// intentionally swallowing the error here
a, _ := account.NewAccount(account.ProviderM365)
return a
},
},
}
for _, test := range table {
suite.Run(test.name, func() {
var (
t = suite.T()
a = test.acct(t)
errs = fault.New(true)
)
a := test.acct(t)
errs := fault.New(true)
users, err := discovery.Users(ctx, a, errs) users, err := discovery.Users(ctx, a, errs)
assert.Empty(t, users, "returned some users") assert.Empty(t, users, "returned some users")
assert.NotNil(t, err) assert.NotNil(t, err)
// TODO(ashmrtn): Uncomment when fault package is used in discovery API.
// assert.NotNil(t, errs.Err())
}) })
} }
} }

View File

@ -4,29 +4,19 @@ package connector
import ( import (
"context" "context"
"fmt"
"net/http" "net/http"
"runtime/trace" "runtime/trace"
"strings"
"sync" "sync"
"github.com/alcionai/clues" "github.com/alcionai/clues"
"github.com/microsoft/kiota-abstractions-go/serialization"
msgraphgocore "github.com/microsoftgraph/msgraph-sdk-go-core"
"github.com/microsoftgraph/msgraph-sdk-go/models"
"github.com/pkg/errors" "github.com/pkg/errors"
"golang.org/x/exp/maps"
"github.com/alcionai/corso/src/internal/common/ptr"
"github.com/alcionai/corso/src/internal/connector/discovery/api" "github.com/alcionai/corso/src/internal/connector/discovery/api"
"github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/graph"
"github.com/alcionai/corso/src/internal/connector/sharepoint"
"github.com/alcionai/corso/src/internal/connector/support" "github.com/alcionai/corso/src/internal/connector/support"
"github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/data"
"github.com/alcionai/corso/src/internal/diagnostics"
"github.com/alcionai/corso/src/pkg/account" "github.com/alcionai/corso/src/pkg/account"
"github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/fault"
"github.com/alcionai/corso/src/pkg/filters"
) )
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
@ -44,9 +34,6 @@ type GraphConnector struct {
tenant string tenant string
credentials account.M365Config credentials account.M365Config
// TODO: remove in favor of the maps below.
Sites map[string]string // webURL -> siteID and siteID -> webURL
// maps of resource owner ids to names, and names to ids. // maps of resource owner ids to names, and names to ids.
// not guaranteed to be populated, only here as a post-population // not guaranteed to be populated, only here as a post-population
// reference for processes that choose to populate the values. // reference for processes that choose to populate the values.
@ -100,12 +87,6 @@ func NewGraphConnector(
return nil, clues.Wrap(err, "creating api client").WithClues(ctx) return nil, clues.Wrap(err, "creating api client").WithClues(ctx)
} }
if r == AllResources || r == Sites {
if err = gc.setTenantSites(ctx, errs); err != nil {
return nil, clues.Wrap(err, "retrieveing tenant site list")
}
}
return &gc, nil return &gc, nil
} }
@ -179,115 +160,6 @@ func (gc *GraphConnector) createService() (*graph.Service, error) {
return graph.NewService(adapter), nil return graph.NewService(adapter), nil
} }
// setTenantSites queries the M365 to identify the sites in the
// workspace. The sites field is updated during this method
// iff the returned error is nil.
func (gc *GraphConnector) setTenantSites(ctx context.Context, errs *fault.Bus) error {
gc.Sites = map[string]string{}
ctx, end := diagnostics.Span(ctx, "gc:setTenantSites")
defer end()
sites, err := getResources(
ctx,
gc.Service,
gc.tenant,
sharepoint.GetAllSitesForTenant,
models.CreateSiteCollectionResponseFromDiscriminatorValue,
identifySite,
errs)
if err != nil {
return err
}
gc.Sites = sites
return nil
}
var errKnownSkippableCase = clues.New("case is known and skippable")
const personalSitePath = "sharepoint.com/personal/"
// Transforms an interface{} into a key,value pair representing
// siteName:siteID.
func identifySite(item any) (string, string, error) {
m, ok := item.(models.Siteable)
if !ok {
return "", "", clues.New("non-Siteable item").With("item_type", fmt.Sprintf("%T", item))
}
id := ptr.Val(m.GetId())
url, ok := ptr.ValOK(m.GetWebUrl())
if m.GetName() == nil {
// the built-in site at "https://{tenant-domain}/search" never has a name.
if ok && strings.HasSuffix(url, "/search") {
// TODO: pii siteID, on this and all following cases
return "", "", clues.Stack(errKnownSkippableCase).With("site_id", id)
}
return "", "", clues.New("site has no name").With("site_id", id)
}
// personal (ie: oneDrive) sites have to be filtered out server-side.
if ok && strings.Contains(url, personalSitePath) {
return "", "", clues.Stack(errKnownSkippableCase).With("site_id", id)
}
return url, id, nil
}
// GetSiteWebURLs returns the WebURLs of sharepoint sites within the tenant.
func (gc *GraphConnector) GetSiteWebURLs() []string {
return maps.Keys(gc.Sites)
}
// GetSiteIds returns the canonical site IDs in the tenant
func (gc *GraphConnector) GetSiteIDs() []string {
return maps.Values(gc.Sites)
}
// UnionSiteIDsAndWebURLs reduces the id and url slices into a single slice of site IDs.
// WebURLs will run as a path-suffix style matcher. Callers may provide partial urls, though
// each element in the url must fully match. Ex: the webURL value "foo" will match "www.ex.com/foo",
// but not match "www.ex.com/foobar".
// The returned IDs are reduced to a set of unique values.
func (gc *GraphConnector) UnionSiteIDsAndWebURLs(
ctx context.Context,
ids, urls []string,
errs *fault.Bus,
) ([]string, error) {
if len(gc.Sites) == 0 {
if err := gc.setTenantSites(ctx, errs); err != nil {
return nil, err
}
}
idm := map[string]struct{}{}
for _, id := range ids {
idm[id] = struct{}{}
}
match := filters.PathSuffix(urls)
for url, id := range gc.Sites {
if !match.Compare(url) {
continue
}
idm[id] = struct{}{}
}
idsl := make([]string, 0, len(idm))
for id := range idm {
idsl = append(idsl, id)
}
return idsl, nil
}
// AwaitStatus waits for all gc tasks to complete and then returns status // AwaitStatus waits for all gc tasks to complete and then returns status
func (gc *GraphConnector) Wait() *data.CollectionStats { func (gc *GraphConnector) Wait() *data.CollectionStats {
defer func() { defer func() {
@ -343,59 +215,3 @@ func (gc *GraphConnector) incrementAwaitingMessages() {
func (gc *GraphConnector) incrementMessagesBy(num int) { func (gc *GraphConnector) incrementMessagesBy(num int) {
gc.wg.Add(num) gc.wg.Add(num)
} }
// ---------------------------------------------------------------------------
// Helper Funcs
// ---------------------------------------------------------------------------
func getResources(
ctx context.Context,
gs graph.Servicer,
tenantID string,
query func(context.Context, graph.Servicer) (serialization.Parsable, error),
parser func(parseNode serialization.ParseNode) (serialization.Parsable, error),
identify func(any) (string, string, error),
errs *fault.Bus,
) (map[string]string, error) {
resources := map[string]string{}
response, err := query(ctx, gs)
if err != nil {
return nil, graph.Wrap(ctx, err, "retrieving tenant's resources")
}
iter, err := msgraphgocore.NewPageIterator(response, gs.Adapter(), parser)
if err != nil {
return nil, graph.Stack(ctx, err)
}
el := errs.Local()
callbackFunc := func(item any) bool {
if el.Failure() != nil {
return false
}
k, v, err := identify(item)
if err != nil {
if !errors.Is(err, errKnownSkippableCase) {
el.AddRecoverable(clues.Stack(err).
WithClues(ctx).
With("query_url", gs.Adapter().GetBaseUrl()))
}
return true
}
resources[k] = v
resources[v] = k
return true
}
if err := iter.Iterate(ctx, callbackFunc); err != nil {
return nil, graph.Stack(ctx, err)
}
return resources, el.Failure()
}

View File

@ -38,103 +38,6 @@ func TestGraphConnectorUnitSuite(t *testing.T) {
suite.Run(t, &GraphConnectorUnitSuite{Suite: tester.NewUnitSuite(t)}) suite.Run(t, &GraphConnectorUnitSuite{Suite: tester.NewUnitSuite(t)})
} }
func (suite *GraphConnectorUnitSuite) TestUnionSiteIDsAndWebURLs() {
const (
url1 = "www.foo.com/bar"
url2 = "www.fnords.com/smarf"
path1 = "bar"
path2 = "/smarf"
id1 = "site-id-1"
id2 = "site-id-2"
)
gc := &GraphConnector{
// must be populated, else the func will try to make a graph call
// to retrieve site data.
Sites: map[string]string{
url1: id1,
url2: id2,
},
}
table := []struct {
name string
ids []string
urls []string
expect []string
}{
{
name: "nil",
},
{
name: "empty",
ids: []string{},
urls: []string{},
expect: []string{},
},
{
name: "ids only",
ids: []string{id1, id2},
urls: []string{},
expect: []string{id1, id2},
},
{
name: "urls only",
ids: []string{},
urls: []string{url1, url2},
expect: []string{id1, id2},
},
{
name: "url suffix only",
ids: []string{},
urls: []string{path1, path2},
expect: []string{id1, id2},
},
{
name: "url and suffix overlap",
ids: []string{},
urls: []string{url1, url2, path1, path2},
expect: []string{id1, id2},
},
{
name: "ids and urls, no overlap",
ids: []string{id1},
urls: []string{url2},
expect: []string{id1, id2},
},
{
name: "ids and urls, overlap",
ids: []string{id1, id2},
urls: []string{url1, url2},
expect: []string{id1, id2},
},
{
name: "partial non-match on path",
ids: []string{},
urls: []string{path1[2:], path2[2:]},
expect: []string{},
},
{
name: "partial non-match on url",
ids: []string{},
urls: []string{url1[5:], url2[5:]},
expect: []string{},
},
}
for _, test := range table {
suite.Run(test.name, func() {
ctx, flush := tester.NewContext()
defer flush()
t := suite.T()
result, err := gc.UnionSiteIDsAndWebURLs(ctx, test.ids, test.urls, fault.New(true))
assert.NoError(t, err, clues.ToCore(err))
assert.ElementsMatch(t, test.expect, result)
})
}
}
func (suite *GraphConnectorUnitSuite) TestPopulateOwnerIDAndNamesFrom() { func (suite *GraphConnectorUnitSuite) TestPopulateOwnerIDAndNamesFrom() {
const ( const (
ownerID = "owner-id" ownerID = "owner-id"
@ -306,35 +209,6 @@ func (suite *GraphConnectorIntegrationSuite) SetupSuite() {
tester.LogTimeOfTest(suite.T()) tester.LogTimeOfTest(suite.T())
} }
// TestSetTenantSites verifies GraphConnector's ability to query
// the sites associated with the credentials
func (suite *GraphConnectorIntegrationSuite) TestSetTenantSites() {
newConnector := GraphConnector{
tenant: "test_tenant",
Sites: make(map[string]string, 0),
credentials: suite.connector.credentials,
}
ctx, flush := tester.NewContext()
defer flush()
t := suite.T()
service, err := newConnector.createService()
require.NoError(t, err, clues.ToCore(err))
newConnector.Service = service
assert.Equal(t, 0, len(newConnector.Sites))
err = newConnector.setTenantSites(ctx, fault.New(true))
assert.NoError(t, err, clues.ToCore(err))
assert.Less(t, 0, len(newConnector.Sites))
for _, site := range newConnector.Sites {
assert.NotContains(t, "sharepoint.com/personal/", site)
}
}
func (suite *GraphConnectorIntegrationSuite) TestRestoreFailsBadService() { func (suite *GraphConnectorIntegrationSuite) TestRestoreFailsBadService() {
ctx, flush := tester.NewContext() ctx, flush := tester.NewContext()
defer flush() defer flush()

View File

@ -6,9 +6,8 @@ import (
"github.com/alcionai/clues" "github.com/alcionai/clues"
"github.com/microsoftgraph/msgraph-sdk-go/models" "github.com/microsoftgraph/msgraph-sdk-go/models"
"github.com/alcionai/corso/src/internal/connector" "github.com/alcionai/corso/src/internal/common/ptr"
"github.com/alcionai/corso/src/internal/connector/discovery" "github.com/alcionai/corso/src/internal/connector/discovery"
"github.com/alcionai/corso/src/internal/connector/graph"
"github.com/alcionai/corso/src/pkg/account" "github.com/alcionai/corso/src/pkg/account"
"github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/fault"
) )
@ -55,6 +54,22 @@ func Users(ctx context.Context, acct account.Account, errs *fault.Bus) ([]*User,
return ret, nil return ret, nil
} }
// parseUser extracts information from `models.Userable` we care about
func parseUser(item models.Userable) (*User, error) {
if item.GetUserPrincipalName() == nil {
return nil, clues.New("user missing principal name").
With("user_id", *item.GetId()) // TODO: pii
}
u := &User{PrincipalName: *item.GetUserPrincipalName(), ID: *item.GetId()}
if item.GetDisplayName() != nil {
u.Name = *item.GetDisplayName()
}
return u, nil
}
// UsersMap retrieves all users in the tenant, and returns two maps: one id-to-principalName, // UsersMap retrieves all users in the tenant, and returns two maps: one id-to-principalName,
// and one principalName-to-id. // and one principalName-to-id.
func UsersMap( func UsersMap(
@ -91,55 +106,56 @@ type Site struct {
// Sites returns a list of Sites in a specified M365 tenant // Sites returns a list of Sites in a specified M365 tenant
func Sites(ctx context.Context, acct account.Account, errs *fault.Bus) ([]*Site, error) { func Sites(ctx context.Context, acct account.Account, errs *fault.Bus) ([]*Site, error) {
gc, err := connector.NewGraphConnector(ctx, graph.HTTPClient(graph.NoTimeout()), acct, connector.Sites, errs) sites, err := discovery.Sites(ctx, acct, errs)
if err != nil { if err != nil {
return nil, clues.Wrap(err, "initializing M365 graph connection") return nil, clues.Wrap(err, "initializing M365 graph connection")
} }
// gc.Sites is a map with keys: SiteURL, values: ID ret := make([]*Site, 0, len(sites))
ret := make([]*Site, 0, len(gc.Sites))
for k, v := range gc.Sites { for _, s := range sites {
ret = append(ret, &Site{ ps, err := parseSite(s)
WebURL: k, if err != nil {
ID: v, return nil, clues.Wrap(err, "parsing siteable")
}) }
ret = append(ret, ps)
} }
return ret, nil return ret, nil
} }
// SiteURLs returns a list of SharePoint site WebURLs in the specified M365 tenant // parseSite extracts the information from `models.Siteable` we care about
func SiteURLs(ctx context.Context, acct account.Account, errs *fault.Bus) ([]string, error) { func parseSite(item models.Siteable) (*Site, error) {
gc, err := connector.NewGraphConnector(ctx, graph.HTTPClient(graph.NoTimeout()), acct, connector.Sites, errs) s := &Site{
ID: ptr.Val(item.GetId()),
WebURL: ptr.Val(item.GetWebUrl()),
}
return s, nil
}
// SitesMap retrieves all sites in the tenant, and returns two maps: one id-to-webURL,
// and one webURL-to-id.
func SitesMap(
ctx context.Context,
acct account.Account,
errs *fault.Bus,
) (map[string]string, map[string]string, error) {
sites, err := Sites(ctx, acct, errs)
if err != nil { if err != nil {
return nil, clues.Wrap(err, "initializing M365 graph connection") return nil, nil, err
} }
return gc.GetSiteWebURLs(), nil var (
} idToName = make(map[string]string, len(sites))
nameToID = make(map[string]string, len(sites))
// SiteIDs returns a list of SharePoint sites IDs in the specified M365 tenant )
func SiteIDs(ctx context.Context, acct account.Account, errs *fault.Bus) ([]string, error) {
gc, err := connector.NewGraphConnector(ctx, graph.HTTPClient(graph.NoTimeout()), acct, connector.Sites, errs) for _, s := range sites {
if err != nil { idToName[s.ID] = s.WebURL
return nil, clues.Wrap(err, "initializing graph connection") nameToID[s.WebURL] = s.ID
} }
return gc.GetSiteIDs(), nil return idToName, nameToID, nil
}
// parseUser extracts information from `models.Userable` we care about
func parseUser(item models.Userable) (*User, error) {
if item.GetUserPrincipalName() == nil {
return nil, clues.New("user missing principal name").
With("user_id", *item.GetId()) // TODO: pii
}
u := &User{PrincipalName: *item.GetUserPrincipalName(), ID: *item.GetId()}
if item.GetDisplayName() != nil {
u.Name = *item.GetDisplayName()
}
return u, nil
} }