From 3531b3c84ce7f151ff1a526130ea1ec8599ef342 Mon Sep 17 00:00:00 2001 From: ryanfkeepers Date: Mon, 27 Mar 2023 15:41:05 -0600 Subject: [PATCH] move sharepoint onto map-based lookup Refactors the sharepoint site lookup to use the id-to-name maps. This has a momentary regression that will get solved in the next PR: we no longer match on weburl suffixes, and instead require a complete match. Also, migrates the sharepoint lookup code out of GC and into Discovery. --- src/cli/backup/exchange.go | 2 +- src/cli/backup/sharepoint.go | 49 ++--- src/cli/backup/sharepoint_test.go | 66 ++++--- src/internal/common/ptr/pointer.go | 7 + src/internal/connector/data_collections.go | 3 +- src/internal/connector/discovery/api/sites.go | 144 ++++++++++++++ .../connector/discovery/api/sites_test.go | 108 ++++++++++ src/internal/connector/discovery/discovery.go | 42 +++- .../connector/discovery/discovery_test.go | 93 +++++++-- src/internal/connector/graph_connector.go | 184 ------------------ .../connector/graph_connector_test.go | 126 ------------ src/pkg/services/m365/m365.go | 98 ++++++---- 12 files changed, 487 insertions(+), 435 deletions(-) create mode 100644 src/internal/connector/discovery/api/sites.go create mode 100644 src/internal/connector/discovery/api/sites_test.go diff --git a/src/cli/backup/exchange.go b/src/cli/backup/exchange.go index aa1818be3..2345098e3 100644 --- a/src/cli/backup/exchange.go +++ b/src/cli/backup/exchange.go @@ -167,7 +167,7 @@ func createExchangeCmd(cmd *cobra.Command, args []string) error { idToPN, pnToID, err := m365.UsersMap(ctx, *acct, errs) if err != nil { - return Only(ctx, clues.Wrap(err, "Failed to retrieve M365 user(s)")) + return Only(ctx, clues.Wrap(err, "Failed to retrieve M365 users")) } selectorSet := []selectors.Selector{} diff --git a/src/cli/backup/sharepoint.go b/src/cli/backup/sharepoint.go index 77da2f37c..2293dee3b 100644 --- a/src/cli/backup/sharepoint.go +++ b/src/cli/backup/sharepoint.go @@ -7,18 +7,20 @@ import ( "github.com/pkg/errors" "github.com/spf13/cobra" "github.com/spf13/pflag" + "golang.org/x/exp/maps" + "golang.org/x/exp/slices" "github.com/alcionai/corso/src/cli/options" . "github.com/alcionai/corso/src/cli/print" "github.com/alcionai/corso/src/cli/utils" - "github.com/alcionai/corso/src/internal/connector" - "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/pkg/backup/details" "github.com/alcionai/corso/src/pkg/fault" + "github.com/alcionai/corso/src/pkg/filters" "github.com/alcionai/corso/src/pkg/path" "github.com/alcionai/corso/src/pkg/repository" "github.com/alcionai/corso/src/pkg/selectors" + "github.com/alcionai/corso/src/pkg/services/m365" ) // ------------------------------------------------------------------------------------------------ @@ -154,20 +156,19 @@ func createSharePointCmd(cmd *cobra.Command, args []string) error { // TODO: log/print recoverable errors errs := fault.New(false) - // TODO: discovery of sharepoint sites instead of early GC construction. - gc, err := connector.NewGraphConnector(ctx, graph.HTTPClient(graph.NoTimeout()), *acct, connector.Sites, errs) + idToURL, urlToID, err := m365.SitesMap(ctx, *acct, errs) if err != nil { - return Only(ctx, clues.Wrap(err, "Failed to connect to Microsoft APIs")) + return Only(ctx, clues.Wrap(err, "Failed to retrieve M365 sites")) } - sel, err := sharePointBackupCreateSelectors(ctx, utils.SiteID, utils.WebURL, utils.CategoryData, gc) + sel, err := sharePointBackupCreateSelectors(ctx, urlToID, utils.SiteID, utils.WebURL, utils.CategoryData) if err != nil { return Only(ctx, clues.Wrap(err, "Retrieving up sharepoint sites by ID and URL")) } selectorSet := []selectors.Selector{} - for _, discSel := range sel.SplitByResourceOwner(gc.GetSiteIDs()) { + for _, discSel := range sel.SplitByResourceOwner(maps.Keys(idToURL)) { selectorSet = append(selectorSet, discSel.Selector) } @@ -176,7 +177,7 @@ func createSharePointCmd(cmd *cobra.Command, args []string) error { r, "SharePoint", "site", selectorSet, - nil, nil) + idToURL, urlToID) } func validateSharePointBackupCreateFlags(sites, weburls, cats []string) error { @@ -202,44 +203,30 @@ func validateSharePointBackupCreateFlags(sites, weburls, cats []string) error { // TODO: users might specify a data type, this only supports AllData(). func sharePointBackupCreateSelectors( ctx context.Context, + urlToID map[string]string, sites, weburls, cats []string, - gc *connector.GraphConnector, ) (*selectors.SharePointBackup, error) { if len(sites) == 0 && len(weburls) == 0 { return selectors.NewSharePointBackup(selectors.None()), nil } - for _, site := range sites { - if site == utils.Wildcard { - return includeAllSitesWithCategories(cats), nil - } + if filters.PathContains(sites).Compare(utils.Wildcard) { + return includeAllSitesWithCategories(urlToID, cats), nil } - for _, wURL := range weburls { - if wURL == utils.Wildcard { - return includeAllSitesWithCategories(cats), nil - } + if filters.PathContains(weburls).Compare(utils.Wildcard) { + return includeAllSitesWithCategories(urlToID, cats), nil } - // TODO: log/print recoverable errors - errs := fault.New(false) - - union, err := gc.UnionSiteIDsAndWebURLs(ctx, sites, weburls, errs) - if err != nil { - return nil, err - } - - sel := selectors.NewSharePointBackup(union) + sel := selectors.NewSharePointBackup(append(slices.Clone(sites), weburls...)) return addCategories(sel, cats), nil } -func includeAllSitesWithCategories(categories []string) *selectors.SharePointBackup { - sel := addCategories( - selectors.NewSharePointBackup(selectors.Any()), +func includeAllSitesWithCategories(urlToID map[string]string, categories []string) *selectors.SharePointBackup { + return addCategories( + selectors.NewSharePointBackup(maps.Values(urlToID)), categories) - - return sel } func addCategories(sel *selectors.SharePointBackup, cats []string) *selectors.SharePointBackup { diff --git a/src/cli/backup/sharepoint_test.go b/src/cli/backup/sharepoint_test.go index 917977cd5..2f11571d9 100644 --- a/src/cli/backup/sharepoint_test.go +++ b/src/cli/backup/sharepoint_test.go @@ -11,7 +11,6 @@ import ( "github.com/alcionai/corso/src/cli/utils" "github.com/alcionai/corso/src/cli/utils/testdata" - "github.com/alcionai/corso/src/internal/connector" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/selectors" ) @@ -108,13 +107,17 @@ func (suite *SharePointSuite) TestValidateSharePointBackupCreateFlags() { } func (suite *SharePointSuite) TestSharePointBackupCreateSelectors() { - comboString := []string{"id_1", "id_2"} - gc := &connector.GraphConnector{ - Sites: map[string]string{ - "url_1": "id_1", - "url_2": "id_2", - }, - } + const ( + id1 = "id_1" + id2 = "id_2" + url1 = "url_1/foo" + url2 = "url_2/bar" + ) + + var ( + bothIDs = []string{id1, id2} + urlToID = map[string]string{url1: id1, url2: id2} + ) table := []struct { name string @@ -137,73 +140,72 @@ func (suite *SharePointSuite) TestSharePointBackupCreateSelectors() { { name: "site wildcard", site: []string{utils.Wildcard}, - expect: selectors.Any(), + expect: bothIDs, expectScopesLen: 2, }, { name: "url wildcard", weburl: []string{utils.Wildcard}, - expect: selectors.Any(), + expect: bothIDs, expectScopesLen: 2, }, { name: "sites", - site: []string{"id_1", "id_2"}, - expect: []string{"id_1", "id_2"}, + site: []string{id1, id2}, + expect: []string{id1, id2}, expectScopesLen: 2, }, { name: "urls", - weburl: []string{"url_1", "url_2"}, - expect: []string{"id_1", "id_2"}, + weburl: []string{url1, url2}, + expect: []string{url1, url2}, expectScopesLen: 2, }, { name: "mix sites and urls", - site: []string{"id_1"}, - weburl: []string{"url_2"}, - expect: []string{"id_1", "id_2"}, + site: []string{id1}, + weburl: []string{url2}, + expect: []string{id1, url2}, expectScopesLen: 2, }, { name: "duplicate sites and urls", - site: []string{"id_1", "id_2"}, - weburl: []string{"url_1", "url_2"}, - expect: comboString, + site: []string{id1, id2}, + weburl: []string{url1, url2}, + expect: []string{id1, id2, url1, url2}, expectScopesLen: 2, }, { name: "unnecessary site wildcard", - site: []string{"id_1", utils.Wildcard}, - weburl: []string{"url_1", "url_2"}, - expect: selectors.Any(), + site: []string{id1, utils.Wildcard}, + weburl: []string{url1, url2}, + expect: bothIDs, expectScopesLen: 2, }, { name: "unnecessary url wildcard", - site: comboString, - weburl: []string{"url_1", utils.Wildcard}, - expect: selectors.Any(), + site: []string{id1}, + weburl: []string{url1, utils.Wildcard}, + expect: bothIDs, expectScopesLen: 2, }, { name: "Pages", - site: comboString, + site: bothIDs, data: []string{dataPages}, - expect: comboString, + expect: bothIDs, expectScopesLen: 1, }, } for _, test := range table { suite.Run(test.name, func() { - t := suite.T() - ctx, flush := tester.NewContext() defer flush() - sel, err := sharePointBackupCreateSelectors(ctx, test.site, test.weburl, test.data, gc) - require.NoError(t, err, clues.ToCore(err)) + t := suite.T() + sel, err := sharePointBackupCreateSelectors(ctx, urlToID, test.site, test.weburl, test.data) + require.NoError(t, err, clues.ToCore(err)) assert.ElementsMatch(t, test.expect, sel.DiscreteResourceOwners()) }) } diff --git a/src/internal/common/ptr/pointer.go b/src/internal/common/ptr/pointer.go index a8f9a02b9..fa9d3f606 100644 --- a/src/internal/common/ptr/pointer.go +++ b/src/internal/common/ptr/pointer.go @@ -43,3 +43,10 @@ func OrNow(t *time.Time) time.Time { return *t } + +// To generates a pointer from any value. Primarily useful +// for generating pointers to strings and other primitives +// without needing to store a second variable. +func To[T any](t T) *T { + return &t +} diff --git a/src/internal/connector/data_collections.go b/src/internal/connector/data_collections.go index 570b8dd30..0eeb428ba 100644 --- a/src/internal/connector/data_collections.go +++ b/src/internal/connector/data_collections.go @@ -5,6 +5,7 @@ import ( "strings" "github.com/alcionai/clues" + "golang.org/x/exp/maps" "github.com/alcionai/corso/src/internal/common" "github.com/alcionai/corso/src/internal/connector/discovery" @@ -47,7 +48,7 @@ func (gc *GraphConnector) ProduceBackupCollections( diagnostics.Index("service", sels.Service.String())) defer end() - err := verifyBackupInputs(sels, gc.GetSiteIDs()) + err := verifyBackupInputs(sels, maps.Keys(gc.ResourceOwnerIDToName)) if err != nil { return nil, nil, clues.Stack(err).WithClues(ctx) } diff --git a/src/internal/connector/discovery/api/sites.go b/src/internal/connector/discovery/api/sites.go new file mode 100644 index 000000000..2365b41cb --- /dev/null +++ b/src/internal/connector/discovery/api/sites.go @@ -0,0 +1,144 @@ +package api + +import ( + "context" + "fmt" + "strings" + + "github.com/alcionai/clues" + msgraphgocore "github.com/microsoftgraph/msgraph-sdk-go-core" + "github.com/microsoftgraph/msgraph-sdk-go/models" + "github.com/pkg/errors" + + "github.com/alcionai/corso/src/internal/common/ptr" + "github.com/alcionai/corso/src/internal/connector/graph" + "github.com/alcionai/corso/src/pkg/fault" +) + +// --------------------------------------------------------------------------- +// controller +// --------------------------------------------------------------------------- + +func (c Client) Sites() Sites { + return Sites{c} +} + +// Sites is an interface-compliant provider of the client. +type Sites struct { + Client +} + +// --------------------------------------------------------------------------- +// methods +// --------------------------------------------------------------------------- + +// GetAll retrieves all sites. +func (c Sites) GetAll(ctx context.Context, errs *fault.Bus) ([]models.Siteable, error) { + service, err := c.service() + if err != nil { + return nil, err + } + + var resp models.SiteCollectionResponseable + + resp, err = service.Client().Sites().Get(ctx, nil) + + if err != nil { + return nil, graph.Wrap(ctx, err, "getting all sites") + } + + iter, err := msgraphgocore.NewPageIterator( + resp, + service.Adapter(), + models.CreateSiteCollectionResponseFromDiscriminatorValue) + if err != nil { + return nil, graph.Wrap(ctx, err, "creating sites iterator") + } + + var ( + us = make([]models.Siteable, 0) + el = errs.Local() + ) + + iterator := func(item any) bool { + if el.Failure() != nil { + return false + } + + s, err := validateSite(item) + if errors.Is(err, errKnownSkippableCase) { + // safe to no-op + return true + } + + if err != nil { + el.AddRecoverable(graph.Wrap(ctx, err, "validating site")) + return true + } + + us = append(us, s) + + return true + } + + if err := iter.Iterate(ctx, iterator); err != nil { + return nil, graph.Wrap(ctx, err, "iterating all sites") + } + + return us, el.Failure() +} + +func (c Sites) GetByID(ctx context.Context, id string) (models.Siteable, error) { + resp, err := c.stable.Client().SitesById(id).Get(ctx, nil) + if err != nil { + return nil, graph.Wrap(ctx, err, "getting site") + } + + return resp, err +} + +// --------------------------------------------------------------------------- +// helpers +// --------------------------------------------------------------------------- + +var errKnownSkippableCase = clues.New("case is known and skippable") + +const personalSitePath = "sharepoint.com/personal/" + +// validateSite ensures the item is a Siteable, and contains the necessary +// identifiers that we handle with all users. +// returns the item as a Siteable model. +func validateSite(item any) (models.Siteable, error) { + m, ok := item.(models.Siteable) + if !ok { + return nil, clues.New(fmt.Sprintf("unexpected model: %T", item)) + } + + id, ok := ptr.ValOK(m.GetId()) + if !ok || len(id) == 0 { + return nil, clues.New("missing ID") + } + + url, ok := ptr.ValOK(m.GetWebUrl()) + if !ok || len(url) == 0 { + return nil, clues.New("missing webURL").With("site_id", id) // TODO: pii + } + + // personal (ie: oneDrive) sites have to be filtered out server-side. + if ok && strings.Contains(url, personalSitePath) { + return nil, clues.Stack(errKnownSkippableCase). + With("site_id", id, "site_url", url) // TODO: pii + } + + if name, ok := ptr.ValOK(m.GetDisplayName()); !ok || len(name) == 0 { + // the built-in site at "https://{tenant-domain}/search" never has a name. + if strings.HasSuffix(url, "/search") { + return nil, clues.Stack(errKnownSkippableCase). + With("site_id", id, "site_url", url) // TODO: pii + } + + return nil, clues.New("missing site display name").With("site_id", id) + } + + return m, nil +} diff --git a/src/internal/connector/discovery/api/sites_test.go b/src/internal/connector/discovery/api/sites_test.go new file mode 100644 index 000000000..757b85827 --- /dev/null +++ b/src/internal/connector/discovery/api/sites_test.go @@ -0,0 +1,108 @@ +package api + +import ( + "testing" + + "github.com/alcionai/clues" + "github.com/microsoftgraph/msgraph-sdk-go/models" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/suite" + + "github.com/alcionai/corso/src/internal/common/ptr" + "github.com/alcionai/corso/src/internal/tester" +) + +type SitesUnitSuite struct { + tester.Suite +} + +func TestSitesUnitSuite(t *testing.T) { + suite.Run(t, &SitesUnitSuite{Suite: tester.NewUnitSuite(t)}) +} + +func (suite *SitesUnitSuite) TestValidateSite() { + site := models.NewSite() + site.SetWebUrl(ptr.To("sharepoint.com/sites/foo")) + site.SetDisplayName(ptr.To("testsite")) + site.SetId(ptr.To("testID")) + + tests := []struct { + name string + args interface{} + want models.Siteable + errCheck assert.ErrorAssertionFunc + errIsSkippable bool + }{ + { + name: "Invalid type", + args: string("invalid type"), + errCheck: assert.Error, + }, + { + name: "No ID", + args: models.NewSite(), + errCheck: assert.Error, + }, + { + name: "No WebURL", + args: func() *models.Site { + s := models.NewSite() + s.SetId(ptr.To("id")) + return s + }(), + errCheck: assert.Error, + }, + { + name: "No name", + args: func() *models.Site { + s := models.NewSite() + s.SetId(ptr.To("id")) + s.SetWebUrl(ptr.To("sharepoint.com/sites/foo")) + return s + }(), + errCheck: assert.Error, + }, + { + name: "Search site", + args: func() *models.Site { + s := models.NewSite() + s.SetId(ptr.To("id")) + s.SetWebUrl(ptr.To("sharepoint.com/search")) + return s + }(), + errCheck: assert.Error, + errIsSkippable: true, + }, + { + name: "Personal OneDrive", + args: func() *models.Site { + s := models.NewSite() + s.SetId(ptr.To("id")) + s.SetWebUrl(ptr.To("https://" + personalSitePath + "/someone's/onedrive")) + return s + }(), + errCheck: assert.Error, + errIsSkippable: true, + }, + { + name: "Valid Site", + args: site, + want: site, + errCheck: assert.NoError, + }, + } + for _, test := range tests { + suite.Run(test.name, func() { + t := suite.T() + + got, err := validateSite(test.args) + test.errCheck(t, err, clues.ToCore(err)) + + if test.errIsSkippable { + assert.ErrorIs(t, err, errKnownSkippableCase) + } + + assert.Equal(t, test.want, got) + }) + } +} diff --git a/src/internal/connector/discovery/discovery.go b/src/internal/connector/discovery/discovery.go index 0090c51ea..e955803c0 100644 --- a/src/internal/connector/discovery/discovery.go +++ b/src/internal/connector/discovery/discovery.go @@ -29,6 +29,24 @@ type getWithInfoer interface { getInfoer } +// --------------------------------------------------------------------------- +// helpers +// --------------------------------------------------------------------------- + +func apiClient(ctx context.Context, acct account.Account) (api.Client, error) { + m365, err := acct.M365Config() + if err != nil { + return api.Client{}, clues.Wrap(err, "retrieving m365 account configuration").WithClues(ctx) + } + + client, err := api.NewClient(m365) + if err != nil { + return api.Client{}, clues.Wrap(err, "creating api client").WithClues(ctx) + } + + return client, nil +} + // --------------------------------------------------------------------------- // api // --------------------------------------------------------------------------- @@ -39,19 +57,15 @@ func Users( acct account.Account, errs *fault.Bus, ) ([]models.Userable, error) { - m365, err := acct.M365Config() + client, err := apiClient(ctx, acct) if err != nil { - return nil, clues.Wrap(err, "retrieving m365 account configuration").WithClues(ctx) - } - - client, err := api.NewClient(m365) - if err != nil { - return nil, clues.Wrap(err, "creating api client").WithClues(ctx) + return nil, err } return client.Users().GetAll(ctx, errs) } +// User fetches a single user's data. func User(ctx context.Context, gwi getWithInfoer, userID string) (models.Userable, *api.UserInfo, error) { u, err := gwi.GetByID(ctx, userID) if err != nil { @@ -69,3 +83,17 @@ func User(ctx context.Context, gwi getWithInfoer, userID string) (models.Userabl return u, ui, nil } + +// Sites fetches all sharepoint sites in the tenant +func Sites( + ctx context.Context, + acct account.Account, + errs *fault.Bus, +) ([]models.Siteable, error) { + client, err := apiClient(ctx, acct) + if err != nil { + return nil, err + } + + return client.Sites().GetAll(ctx, errs) +} diff --git a/src/internal/connector/discovery/discovery_test.go b/src/internal/connector/discovery/discovery_test.go index c9a2c3f48..5c7039ff5 100644 --- a/src/internal/connector/discovery/discovery_test.go +++ b/src/internal/connector/discovery/discovery_test.go @@ -31,10 +31,11 @@ func (suite *DiscoveryIntegrationSuite) TestUsers() { ctx, flush := tester.NewContext() defer flush() - t := suite.T() - - acct := tester.NewM365Account(t) - errs := fault.New(true) + var ( + t = suite.T() + acct = tester.NewM365Account(t) + errs = fault.New(true) + ) users, err := discovery.Users(ctx, acct, errs) assert.NoError(t, err, clues.ToCore(err)) @@ -42,8 +43,7 @@ func (suite *DiscoveryIntegrationSuite) TestUsers() { ferrs := errs.Errors() assert.Nil(t, ferrs.Failure) assert.Empty(t, ferrs.Recovered) - - assert.Less(t, 0, len(users)) + assert.NotEmpty(t, users) } func (suite *DiscoveryIntegrationSuite) TestUsers_InvalidCredentials() { @@ -84,16 +84,85 @@ func (suite *DiscoveryIntegrationSuite) TestUsers_InvalidCredentials() { for _, test := range table { suite.Run(test.name, func() { - t := suite.T() + var ( + t = suite.T() + a = test.acct(t) + errs = fault.New(true) + ) + + users, err := discovery.Users(ctx, a, errs) + assert.Empty(t, users, "returned some users") + assert.NotNil(t, err) + }) + } +} + +func (suite *DiscoveryIntegrationSuite) TestSites() { + ctx, flush := tester.NewContext() + defer flush() + + var ( + t = suite.T() + acct = tester.NewM365Account(t) + errs = fault.New(true) + ) + + sites, err := discovery.Sites(ctx, acct, errs) + assert.NoError(t, err, clues.ToCore(err)) + + ferrs := errs.Errors() + assert.Nil(t, ferrs.Failure) + assert.Empty(t, ferrs.Recovered) + assert.NotEmpty(t, sites) +} + +func (suite *DiscoveryIntegrationSuite) TestSites_InvalidCredentials() { + ctx, flush := tester.NewContext() + defer flush() + + table := []struct { + name string + acct func(t *testing.T) account.Account + }{ + { + name: "Invalid Credentials", + acct: func(t *testing.T) account.Account { + a, err := account.NewAccount( + account.ProviderM365, + account.M365Config{ + M365: credentials.M365{ + AzureClientID: "Test", + AzureClientSecret: "without", + }, + AzureTenantID: "data", + }, + ) + require.NoError(t, err, clues.ToCore(err)) + + return a + }, + }, + { + name: "Empty Credentials", + acct: func(t *testing.T) account.Account { + // intentionally swallowing the error here + a, _ := account.NewAccount(account.ProviderM365) + return a + }, + }, + } + + for _, test := range table { + suite.Run(test.name, func() { + var ( + t = suite.T() + a = test.acct(t) + errs = fault.New(true) + ) - a := test.acct(t) - errs := fault.New(true) users, err := discovery.Users(ctx, a, errs) - assert.Empty(t, users, "returned some users") assert.NotNil(t, err) - // TODO(ashmrtn): Uncomment when fault package is used in discovery API. - // assert.NotNil(t, errs.Err()) }) } } diff --git a/src/internal/connector/graph_connector.go b/src/internal/connector/graph_connector.go index b9119c5a5..7eab83c6f 100644 --- a/src/internal/connector/graph_connector.go +++ b/src/internal/connector/graph_connector.go @@ -4,29 +4,19 @@ package connector import ( "context" - "fmt" "net/http" "runtime/trace" - "strings" "sync" "github.com/alcionai/clues" - "github.com/microsoft/kiota-abstractions-go/serialization" - msgraphgocore "github.com/microsoftgraph/msgraph-sdk-go-core" - "github.com/microsoftgraph/msgraph-sdk-go/models" "github.com/pkg/errors" - "golang.org/x/exp/maps" - "github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/internal/connector/discovery/api" "github.com/alcionai/corso/src/internal/connector/graph" - "github.com/alcionai/corso/src/internal/connector/sharepoint" "github.com/alcionai/corso/src/internal/connector/support" "github.com/alcionai/corso/src/internal/data" - "github.com/alcionai/corso/src/internal/diagnostics" "github.com/alcionai/corso/src/pkg/account" "github.com/alcionai/corso/src/pkg/fault" - "github.com/alcionai/corso/src/pkg/filters" ) // --------------------------------------------------------------------------- @@ -44,9 +34,6 @@ type GraphConnector struct { tenant string credentials account.M365Config - // TODO: remove in favor of the maps below. - Sites map[string]string // webURL -> siteID and siteID -> webURL - // maps of resource owner ids to names, and names to ids. // not guaranteed to be populated, only here as a post-population // reference for processes that choose to populate the values. @@ -100,12 +87,6 @@ func NewGraphConnector( return nil, clues.Wrap(err, "creating api client").WithClues(ctx) } - if r == AllResources || r == Sites { - if err = gc.setTenantSites(ctx, errs); err != nil { - return nil, clues.Wrap(err, "retrieveing tenant site list") - } - } - return &gc, nil } @@ -179,115 +160,6 @@ func (gc *GraphConnector) createService() (*graph.Service, error) { return graph.NewService(adapter), nil } -// setTenantSites queries the M365 to identify the sites in the -// workspace. The sites field is updated during this method -// iff the returned error is nil. -func (gc *GraphConnector) setTenantSites(ctx context.Context, errs *fault.Bus) error { - gc.Sites = map[string]string{} - - ctx, end := diagnostics.Span(ctx, "gc:setTenantSites") - defer end() - - sites, err := getResources( - ctx, - gc.Service, - gc.tenant, - sharepoint.GetAllSitesForTenant, - models.CreateSiteCollectionResponseFromDiscriminatorValue, - identifySite, - errs) - if err != nil { - return err - } - - gc.Sites = sites - - return nil -} - -var errKnownSkippableCase = clues.New("case is known and skippable") - -const personalSitePath = "sharepoint.com/personal/" - -// Transforms an interface{} into a key,value pair representing -// siteName:siteID. -func identifySite(item any) (string, string, error) { - m, ok := item.(models.Siteable) - if !ok { - return "", "", clues.New("non-Siteable item").With("item_type", fmt.Sprintf("%T", item)) - } - - id := ptr.Val(m.GetId()) - url, ok := ptr.ValOK(m.GetWebUrl()) - - if m.GetName() == nil { - // the built-in site at "https://{tenant-domain}/search" never has a name. - if ok && strings.HasSuffix(url, "/search") { - // TODO: pii siteID, on this and all following cases - return "", "", clues.Stack(errKnownSkippableCase).With("site_id", id) - } - - return "", "", clues.New("site has no name").With("site_id", id) - } - - // personal (ie: oneDrive) sites have to be filtered out server-side. - if ok && strings.Contains(url, personalSitePath) { - return "", "", clues.Stack(errKnownSkippableCase).With("site_id", id) - } - - return url, id, nil -} - -// GetSiteWebURLs returns the WebURLs of sharepoint sites within the tenant. -func (gc *GraphConnector) GetSiteWebURLs() []string { - return maps.Keys(gc.Sites) -} - -// GetSiteIds returns the canonical site IDs in the tenant -func (gc *GraphConnector) GetSiteIDs() []string { - return maps.Values(gc.Sites) -} - -// UnionSiteIDsAndWebURLs reduces the id and url slices into a single slice of site IDs. -// WebURLs will run as a path-suffix style matcher. Callers may provide partial urls, though -// each element in the url must fully match. Ex: the webURL value "foo" will match "www.ex.com/foo", -// but not match "www.ex.com/foobar". -// The returned IDs are reduced to a set of unique values. -func (gc *GraphConnector) UnionSiteIDsAndWebURLs( - ctx context.Context, - ids, urls []string, - errs *fault.Bus, -) ([]string, error) { - if len(gc.Sites) == 0 { - if err := gc.setTenantSites(ctx, errs); err != nil { - return nil, err - } - } - - idm := map[string]struct{}{} - - for _, id := range ids { - idm[id] = struct{}{} - } - - match := filters.PathSuffix(urls) - - for url, id := range gc.Sites { - if !match.Compare(url) { - continue - } - - idm[id] = struct{}{} - } - - idsl := make([]string, 0, len(idm)) - for id := range idm { - idsl = append(idsl, id) - } - - return idsl, nil -} - // AwaitStatus waits for all gc tasks to complete and then returns status func (gc *GraphConnector) Wait() *data.CollectionStats { defer func() { @@ -343,59 +215,3 @@ func (gc *GraphConnector) incrementAwaitingMessages() { func (gc *GraphConnector) incrementMessagesBy(num int) { gc.wg.Add(num) } - -// --------------------------------------------------------------------------- -// Helper Funcs -// --------------------------------------------------------------------------- - -func getResources( - ctx context.Context, - gs graph.Servicer, - tenantID string, - query func(context.Context, graph.Servicer) (serialization.Parsable, error), - parser func(parseNode serialization.ParseNode) (serialization.Parsable, error), - identify func(any) (string, string, error), - errs *fault.Bus, -) (map[string]string, error) { - resources := map[string]string{} - - response, err := query(ctx, gs) - if err != nil { - return nil, graph.Wrap(ctx, err, "retrieving tenant's resources") - } - - iter, err := msgraphgocore.NewPageIterator(response, gs.Adapter(), parser) - if err != nil { - return nil, graph.Stack(ctx, err) - } - - el := errs.Local() - - callbackFunc := func(item any) bool { - if el.Failure() != nil { - return false - } - - k, v, err := identify(item) - if err != nil { - if !errors.Is(err, errKnownSkippableCase) { - el.AddRecoverable(clues.Stack(err). - WithClues(ctx). - With("query_url", gs.Adapter().GetBaseUrl())) - } - - return true - } - - resources[k] = v - resources[v] = k - - return true - } - - if err := iter.Iterate(ctx, callbackFunc); err != nil { - return nil, graph.Stack(ctx, err) - } - - return resources, el.Failure() -} diff --git a/src/internal/connector/graph_connector_test.go b/src/internal/connector/graph_connector_test.go index 782c85243..1d9ef1098 100644 --- a/src/internal/connector/graph_connector_test.go +++ b/src/internal/connector/graph_connector_test.go @@ -38,103 +38,6 @@ func TestGraphConnectorUnitSuite(t *testing.T) { suite.Run(t, &GraphConnectorUnitSuite{Suite: tester.NewUnitSuite(t)}) } -func (suite *GraphConnectorUnitSuite) TestUnionSiteIDsAndWebURLs() { - const ( - url1 = "www.foo.com/bar" - url2 = "www.fnords.com/smarf" - path1 = "bar" - path2 = "/smarf" - id1 = "site-id-1" - id2 = "site-id-2" - ) - - gc := &GraphConnector{ - // must be populated, else the func will try to make a graph call - // to retrieve site data. - Sites: map[string]string{ - url1: id1, - url2: id2, - }, - } - - table := []struct { - name string - ids []string - urls []string - expect []string - }{ - { - name: "nil", - }, - { - name: "empty", - ids: []string{}, - urls: []string{}, - expect: []string{}, - }, - { - name: "ids only", - ids: []string{id1, id2}, - urls: []string{}, - expect: []string{id1, id2}, - }, - { - name: "urls only", - ids: []string{}, - urls: []string{url1, url2}, - expect: []string{id1, id2}, - }, - { - name: "url suffix only", - ids: []string{}, - urls: []string{path1, path2}, - expect: []string{id1, id2}, - }, - { - name: "url and suffix overlap", - ids: []string{}, - urls: []string{url1, url2, path1, path2}, - expect: []string{id1, id2}, - }, - { - name: "ids and urls, no overlap", - ids: []string{id1}, - urls: []string{url2}, - expect: []string{id1, id2}, - }, - { - name: "ids and urls, overlap", - ids: []string{id1, id2}, - urls: []string{url1, url2}, - expect: []string{id1, id2}, - }, - { - name: "partial non-match on path", - ids: []string{}, - urls: []string{path1[2:], path2[2:]}, - expect: []string{}, - }, - { - name: "partial non-match on url", - ids: []string{}, - urls: []string{url1[5:], url2[5:]}, - expect: []string{}, - }, - } - for _, test := range table { - suite.Run(test.name, func() { - ctx, flush := tester.NewContext() - defer flush() - - t := suite.T() - - result, err := gc.UnionSiteIDsAndWebURLs(ctx, test.ids, test.urls, fault.New(true)) - assert.NoError(t, err, clues.ToCore(err)) - assert.ElementsMatch(t, test.expect, result) - }) - } -} - func (suite *GraphConnectorUnitSuite) TestPopulateOwnerIDAndNamesFrom() { const ( ownerID = "owner-id" @@ -306,35 +209,6 @@ func (suite *GraphConnectorIntegrationSuite) SetupSuite() { tester.LogTimeOfTest(suite.T()) } -// TestSetTenantSites verifies GraphConnector's ability to query -// the sites associated with the credentials -func (suite *GraphConnectorIntegrationSuite) TestSetTenantSites() { - newConnector := GraphConnector{ - tenant: "test_tenant", - Sites: make(map[string]string, 0), - credentials: suite.connector.credentials, - } - - ctx, flush := tester.NewContext() - defer flush() - - t := suite.T() - - service, err := newConnector.createService() - require.NoError(t, err, clues.ToCore(err)) - - newConnector.Service = service - assert.Equal(t, 0, len(newConnector.Sites)) - - err = newConnector.setTenantSites(ctx, fault.New(true)) - assert.NoError(t, err, clues.ToCore(err)) - assert.Less(t, 0, len(newConnector.Sites)) - - for _, site := range newConnector.Sites { - assert.NotContains(t, "sharepoint.com/personal/", site) - } -} - func (suite *GraphConnectorIntegrationSuite) TestRestoreFailsBadService() { ctx, flush := tester.NewContext() defer flush() diff --git a/src/pkg/services/m365/m365.go b/src/pkg/services/m365/m365.go index c76639721..de8aa519d 100644 --- a/src/pkg/services/m365/m365.go +++ b/src/pkg/services/m365/m365.go @@ -6,9 +6,8 @@ import ( "github.com/alcionai/clues" "github.com/microsoftgraph/msgraph-sdk-go/models" - "github.com/alcionai/corso/src/internal/connector" + "github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/internal/connector/discovery" - "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/pkg/account" "github.com/alcionai/corso/src/pkg/fault" ) @@ -55,6 +54,22 @@ func Users(ctx context.Context, acct account.Account, errs *fault.Bus) ([]*User, return ret, nil } +// parseUser extracts information from `models.Userable` we care about +func parseUser(item models.Userable) (*User, error) { + if item.GetUserPrincipalName() == nil { + return nil, clues.New("user missing principal name"). + With("user_id", *item.GetId()) // TODO: pii + } + + u := &User{PrincipalName: *item.GetUserPrincipalName(), ID: *item.GetId()} + + if item.GetDisplayName() != nil { + u.Name = *item.GetDisplayName() + } + + return u, nil +} + // UsersMap retrieves all users in the tenant, and returns two maps: one id-to-principalName, // and one principalName-to-id. func UsersMap( @@ -91,55 +106,56 @@ type Site struct { // Sites returns a list of Sites in a specified M365 tenant func Sites(ctx context.Context, acct account.Account, errs *fault.Bus) ([]*Site, error) { - gc, err := connector.NewGraphConnector(ctx, graph.HTTPClient(graph.NoTimeout()), acct, connector.Sites, errs) + sites, err := discovery.Sites(ctx, acct, errs) if err != nil { return nil, clues.Wrap(err, "initializing M365 graph connection") } - // gc.Sites is a map with keys: SiteURL, values: ID - ret := make([]*Site, 0, len(gc.Sites)) - for k, v := range gc.Sites { - ret = append(ret, &Site{ - WebURL: k, - ID: v, - }) + ret := make([]*Site, 0, len(sites)) + + for _, s := range sites { + ps, err := parseSite(s) + if err != nil { + return nil, clues.Wrap(err, "parsing siteable") + } + + ret = append(ret, ps) } return ret, nil } -// SiteURLs returns a list of SharePoint site WebURLs in the specified M365 tenant -func SiteURLs(ctx context.Context, acct account.Account, errs *fault.Bus) ([]string, error) { - gc, err := connector.NewGraphConnector(ctx, graph.HTTPClient(graph.NoTimeout()), acct, connector.Sites, errs) +// parseSite extracts the information from `models.Siteable` we care about +func parseSite(item models.Siteable) (*Site, error) { + s := &Site{ + ID: ptr.Val(item.GetId()), + WebURL: ptr.Val(item.GetWebUrl()), + } + + return s, nil +} + +// SitesMap retrieves all sites in the tenant, and returns two maps: one id-to-webURL, +// and one webURL-to-id. +func SitesMap( + ctx context.Context, + acct account.Account, + errs *fault.Bus, +) (map[string]string, map[string]string, error) { + sites, err := Sites(ctx, acct, errs) if err != nil { - return nil, clues.Wrap(err, "initializing M365 graph connection") + return nil, nil, err } - return gc.GetSiteWebURLs(), nil -} - -// SiteIDs returns a list of SharePoint sites IDs in the specified M365 tenant -func SiteIDs(ctx context.Context, acct account.Account, errs *fault.Bus) ([]string, error) { - gc, err := connector.NewGraphConnector(ctx, graph.HTTPClient(graph.NoTimeout()), acct, connector.Sites, errs) - if err != nil { - return nil, clues.Wrap(err, "initializing graph connection") - } - - return gc.GetSiteIDs(), nil -} - -// parseUser extracts information from `models.Userable` we care about -func parseUser(item models.Userable) (*User, error) { - if item.GetUserPrincipalName() == nil { - return nil, clues.New("user missing principal name"). - With("user_id", *item.GetId()) // TODO: pii - } - - u := &User{PrincipalName: *item.GetUserPrincipalName(), ID: *item.GetId()} - - if item.GetDisplayName() != nil { - u.Name = *item.GetDisplayName() - } - - return u, nil + var ( + idToName = make(map[string]string, len(sites)) + nameToID = make(map[string]string, len(sites)) + ) + + for _, s := range sites { + idToName[s.ID] = s.WebURL + nameToID[s.WebURL] = s.ID + } + + return idToName, nameToID, nil }