move sharepoint onto map-based lookup (#2958)

Refactors the sharepoint site lookup to use the
id-to-name maps.  This has a momentary regression
that will get solved in the next PR: we no longer
match on weburl suffixes, and instead require
a complete match.

Also, migrates the sharepoint lookup code out
of GC and into Discovery.

---

#### Does this PR need a docs update or release note?

- [x]  No

#### Type of change

- [x] 🌻 Feature

#### Issue(s)

* #2825
* #1995
* #1547

#### Test Plan

- [x]  Unit test
- [x] 💚 E2E
This commit is contained in:
Keepers 2023-04-05 13:38:22 -06:00 committed by GitHub
parent 86c72089a1
commit 0d521bb5a7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 588 additions and 498 deletions

View File

@ -166,7 +166,7 @@ func createExchangeCmd(cmd *cobra.Command, args []string) error {
ins, err := m365.UsersMap(ctx, *acct, errs)
if err != nil {
return Only(ctx, clues.Wrap(err, "Failed to retrieve M365 user(s)"))
return Only(ctx, clues.Wrap(err, "Failed to retrieve M365 users"))
}
selectorSet := []selectors.Selector{}

View File

@ -7,18 +7,20 @@ import (
"github.com/pkg/errors"
"github.com/spf13/cobra"
"github.com/spf13/pflag"
"golang.org/x/exp/slices"
"github.com/alcionai/corso/src/cli/options"
. "github.com/alcionai/corso/src/cli/print"
"github.com/alcionai/corso/src/cli/utils"
"github.com/alcionai/corso/src/internal/connector"
"github.com/alcionai/corso/src/internal/connector/graph"
"github.com/alcionai/corso/src/internal/common"
"github.com/alcionai/corso/src/internal/data"
"github.com/alcionai/corso/src/pkg/backup/details"
"github.com/alcionai/corso/src/pkg/fault"
"github.com/alcionai/corso/src/pkg/filters"
"github.com/alcionai/corso/src/pkg/path"
"github.com/alcionai/corso/src/pkg/repository"
"github.com/alcionai/corso/src/pkg/selectors"
"github.com/alcionai/corso/src/pkg/services/m365"
)
// ------------------------------------------------------------------------------------------------
@ -154,20 +156,19 @@ func createSharePointCmd(cmd *cobra.Command, args []string) error {
// TODO: log/print recoverable errors
errs := fault.New(false)
// TODO: discovery of sharepoint sites instead of early GC construction.
gc, err := connector.NewGraphConnector(ctx, graph.HTTPClient(graph.NoTimeout()), *acct, connector.Sites, errs)
ins, err := m365.SitesMap(ctx, *acct, errs)
if err != nil {
return Only(ctx, clues.Wrap(err, "Failed to connect to Microsoft APIs"))
return Only(ctx, clues.Wrap(err, "Failed to retrieve M365 sites"))
}
sel, err := sharePointBackupCreateSelectors(ctx, utils.SiteIDFV, utils.WebURLFV, utils.CategoryDataFV, gc)
sel, err := sharePointBackupCreateSelectors(ctx, ins, utils.SiteIDFV, utils.WebURLFV, utils.CategoryDataFV)
if err != nil {
return Only(ctx, clues.Wrap(err, "Retrieving up sharepoint sites by ID and URL"))
}
selectorSet := []selectors.Selector{}
for _, discSel := range sel.SplitByResourceOwner(gc.GetSiteIDs()) {
for _, discSel := range sel.SplitByResourceOwner(ins.IDs()) {
selectorSet = append(selectorSet, discSel.Selector)
}
@ -176,7 +177,7 @@ func createSharePointCmd(cmd *cobra.Command, args []string) error {
r,
"SharePoint", "site",
selectorSet,
nil) // TODO: prepopulate ids,names
ins)
}
func validateSharePointBackupCreateFlags(sites, weburls, cats []string) error {
@ -202,44 +203,28 @@ func validateSharePointBackupCreateFlags(sites, weburls, cats []string) error {
// TODO: users might specify a data type, this only supports AllData().
func sharePointBackupCreateSelectors(
ctx context.Context,
ins common.IDNameSwapper,
sites, weburls, cats []string,
gc *connector.GraphConnector,
) (*selectors.SharePointBackup, error) {
if len(sites) == 0 && len(weburls) == 0 {
return selectors.NewSharePointBackup(selectors.None()), nil
}
for _, site := range sites {
if site == utils.Wildcard {
return includeAllSitesWithCategories(cats), nil
}
if filters.PathContains(sites).Compare(utils.Wildcard) {
return includeAllSitesWithCategories(ins, cats), nil
}
for _, wURL := range weburls {
if wURL == utils.Wildcard {
return includeAllSitesWithCategories(cats), nil
}
if filters.PathContains(weburls).Compare(utils.Wildcard) {
return includeAllSitesWithCategories(ins, cats), nil
}
// TODO: log/print recoverable errors
errs := fault.New(false)
union, err := gc.UnionSiteIDsAndWebURLs(ctx, sites, weburls, errs)
if err != nil {
return nil, err
}
sel := selectors.NewSharePointBackup(union)
sel := selectors.NewSharePointBackup(append(slices.Clone(sites), weburls...))
return addCategories(sel, cats), nil
}
func includeAllSitesWithCategories(categories []string) *selectors.SharePointBackup {
sel := addCategories(
selectors.NewSharePointBackup(selectors.Any()),
categories)
return sel
func includeAllSitesWithCategories(ins common.IDNameSwapper, categories []string) *selectors.SharePointBackup {
return addCategories(selectors.NewSharePointBackup(ins.IDs()), categories)
}
func addCategories(sel *selectors.SharePointBackup, cats []string) *selectors.SharePointBackup {

View File

@ -11,7 +11,7 @@ import (
"github.com/alcionai/corso/src/cli/utils"
"github.com/alcionai/corso/src/cli/utils/testdata"
"github.com/alcionai/corso/src/internal/connector"
"github.com/alcionai/corso/src/internal/common"
"github.com/alcionai/corso/src/internal/tester"
"github.com/alcionai/corso/src/pkg/selectors"
)
@ -108,13 +108,20 @@ func (suite *SharePointSuite) TestValidateSharePointBackupCreateFlags() {
}
func (suite *SharePointSuite) TestSharePointBackupCreateSelectors() {
comboString := []string{"id_1", "id_2"}
gc := &connector.GraphConnector{
Sites: map[string]string{
"url_1": "id_1",
"url_2": "id_2",
},
}
const (
id1 = "id_1"
id2 = "id_2"
url1 = "url_1/foo"
url2 = "url_2/bar"
)
var (
ins = common.IDsNames{
IDToName: map[string]string{id1: url1, id2: url2},
NameToID: map[string]string{url1: id1, url2: id2},
}
bothIDs = []string{id1, id2}
)
table := []struct {
name string
@ -137,73 +144,72 @@ func (suite *SharePointSuite) TestSharePointBackupCreateSelectors() {
{
name: "site wildcard",
site: []string{utils.Wildcard},
expect: selectors.Any(),
expect: bothIDs,
expectScopesLen: 2,
},
{
name: "url wildcard",
weburl: []string{utils.Wildcard},
expect: selectors.Any(),
expect: bothIDs,
expectScopesLen: 2,
},
{
name: "sites",
site: []string{"id_1", "id_2"},
expect: []string{"id_1", "id_2"},
site: []string{id1, id2},
expect: []string{id1, id2},
expectScopesLen: 2,
},
{
name: "urls",
weburl: []string{"url_1", "url_2"},
expect: []string{"id_1", "id_2"},
weburl: []string{url1, url2},
expect: []string{url1, url2},
expectScopesLen: 2,
},
{
name: "mix sites and urls",
site: []string{"id_1"},
weburl: []string{"url_2"},
expect: []string{"id_1", "id_2"},
site: []string{id1},
weburl: []string{url2},
expect: []string{id1, url2},
expectScopesLen: 2,
},
{
name: "duplicate sites and urls",
site: []string{"id_1", "id_2"},
weburl: []string{"url_1", "url_2"},
expect: comboString,
site: []string{id1, id2},
weburl: []string{url1, url2},
expect: []string{id1, id2, url1, url2},
expectScopesLen: 2,
},
{
name: "unnecessary site wildcard",
site: []string{"id_1", utils.Wildcard},
weburl: []string{"url_1", "url_2"},
expect: selectors.Any(),
site: []string{id1, utils.Wildcard},
weburl: []string{url1, url2},
expect: bothIDs,
expectScopesLen: 2,
},
{
name: "unnecessary url wildcard",
site: comboString,
weburl: []string{"url_1", utils.Wildcard},
expect: selectors.Any(),
site: []string{id1},
weburl: []string{url1, utils.Wildcard},
expect: bothIDs,
expectScopesLen: 2,
},
{
name: "Pages",
site: comboString,
site: bothIDs,
data: []string{dataPages},
expect: comboString,
expect: bothIDs,
expectScopesLen: 1,
},
}
for _, test := range table {
suite.Run(test.name, func() {
t := suite.T()
ctx, flush := tester.NewContext()
defer flush()
sel, err := sharePointBackupCreateSelectors(ctx, test.site, test.weburl, test.data, gc)
require.NoError(t, err, clues.ToCore(err))
t := suite.T()
sel, err := sharePointBackupCreateSelectors(ctx, ins, test.site, test.weburl, test.data)
require.NoError(t, err, clues.ToCore(err))
assert.ElementsMatch(t, test.expect, sel.DiscreteResourceOwners())
})
}

View File

@ -76,7 +76,7 @@ func Only(ctx context.Context, e error) error {
// if s is nil, prints nothing.
// Prepends the message with "Error: "
func Err(ctx context.Context, s ...any) {
out(getRootCmd(ctx).ErrOrStderr())
out(getRootCmd(ctx).ErrOrStderr(), s...)
}
// Errf prints the params to cobra's error writer (stdErr by default)

View File

@ -43,3 +43,10 @@ func OrNow(t *time.Time) time.Time {
return *t
}
// To generates a pointer from any value. Primarily useful
// for generating pointers to strings and other primitives
// without needing to store a second variable.
func To[T any](t T) *T {
return &t
}

View File

@ -47,7 +47,7 @@ func (gc *GraphConnector) ProduceBackupCollections(
diagnostics.Index("service", sels.Service.String()))
defer end()
err := verifyBackupInputs(sels, gc.GetSiteIDs())
err := verifyBackupInputs(sels, gc.IDNameLookup.IDs())
if err != nil {
return nil, nil, clues.Stack(err).WithClues(ctx)
}

View File

@ -335,9 +335,14 @@ func (suite *ConnectorCreateSharePointCollectionIntegrationSuite) TestCreateShar
siteIDs = []string{siteID}
)
id, name, err := gc.PopulateOwnerIDAndNamesFrom(siteID, nil)
require.NoError(t, err, clues.ToCore(err))
sel := selectors.NewSharePointBackup(siteIDs)
sel.Include(sel.LibraryFolders([]string{"foo"}, selectors.PrefixMatch()))
sel.SetDiscreteOwnerIDName(id, name)
cols, excludes, err := gc.ProduceBackupCollections(
ctx,
sel.Selector,
@ -374,9 +379,14 @@ func (suite *ConnectorCreateSharePointCollectionIntegrationSuite) TestCreateShar
siteIDs = []string{siteID}
)
id, name, err := gc.PopulateOwnerIDAndNamesFrom(siteID, nil)
require.NoError(t, err, clues.ToCore(err))
sel := selectors.NewSharePointBackup(siteIDs)
sel.Include(sel.Lists(selectors.Any()))
sel.SetDiscreteOwnerIDName(id, name)
cols, excludes, err := gc.ProduceBackupCollections(
ctx,
sel.Selector,

View File

@ -0,0 +1,142 @@
package api
import (
"context"
"fmt"
"strings"
"github.com/alcionai/clues"
msgraphgocore "github.com/microsoftgraph/msgraph-sdk-go-core"
"github.com/microsoftgraph/msgraph-sdk-go/models"
"github.com/pkg/errors"
"github.com/alcionai/corso/src/internal/common/ptr"
"github.com/alcionai/corso/src/internal/connector/graph"
"github.com/alcionai/corso/src/pkg/fault"
)
// ---------------------------------------------------------------------------
// controller
// ---------------------------------------------------------------------------
func (c Client) Sites() Sites {
return Sites{c}
}
// Sites is an interface-compliant provider of the client.
type Sites struct {
Client
}
// ---------------------------------------------------------------------------
// methods
// ---------------------------------------------------------------------------
// GetAll retrieves all sites.
func (c Sites) GetAll(ctx context.Context, errs *fault.Bus) ([]models.Siteable, error) {
service, err := c.service()
if err != nil {
return nil, err
}
resp, err := service.Client().Sites().Get(ctx, nil)
if err != nil {
return nil, graph.Wrap(ctx, err, "getting all sites")
}
iter, err := msgraphgocore.NewPageIterator(
resp,
service.Adapter(),
models.CreateSiteCollectionResponseFromDiscriminatorValue)
if err != nil {
return nil, graph.Wrap(ctx, err, "creating sites iterator")
}
var (
us = make([]models.Siteable, 0)
el = errs.Local()
)
iterator := func(item any) bool {
if el.Failure() != nil {
return false
}
s, err := validateSite(item)
if errors.Is(err, errKnownSkippableCase) {
// safe to no-op
return true
}
if err != nil {
el.AddRecoverable(graph.Wrap(ctx, err, "validating site"))
return true
}
us = append(us, s)
return true
}
if err := iter.Iterate(ctx, iterator); err != nil {
return nil, graph.Wrap(ctx, err, "enumerating sites")
}
return us, el.Failure()
}
func (c Sites) GetByID(ctx context.Context, id string) (models.Siteable, error) {
resp, err := c.stable.Client().SitesById(id).Get(ctx, nil)
if err != nil {
return nil, graph.Wrap(ctx, err, "getting site")
}
return resp, err
}
// ---------------------------------------------------------------------------
// helpers
// ---------------------------------------------------------------------------
var errKnownSkippableCase = clues.New("case is known and skippable")
const personalSitePath = "sharepoint.com/personal/"
// validateSite ensures the item is a Siteable, and contains the necessary
// identifiers that we handle with all users.
// returns the item as a Siteable model.
func validateSite(item any) (models.Siteable, error) {
m, ok := item.(models.Siteable)
if !ok {
return nil, clues.New(fmt.Sprintf("unexpected model: %T", item))
}
id := ptr.Val(m.GetId())
if len(id) == 0 {
return nil, clues.New("missing ID")
}
url := ptr.Val(m.GetWebUrl())
if len(url) == 0 {
return nil, clues.New("missing webURL").With("site_id", id) // TODO: pii
}
// personal (ie: oneDrive) sites have to be filtered out server-side.
if strings.Contains(url, personalSitePath) {
return nil, clues.Stack(errKnownSkippableCase).
With("site_id", id, "site_url", url) // TODO: pii
}
name := ptr.Val(m.GetDisplayName())
if len(name) == 0 {
// the built-in site at "https://{tenant-domain}/search" never has a name.
if strings.HasSuffix(url, "/search") {
return nil, clues.Stack(errKnownSkippableCase).
With("site_id", id, "site_url", url) // TODO: pii
}
return nil, clues.New("missing site display name").With("site_id", id)
}
return m, nil
}

View File

@ -0,0 +1,155 @@
package api
import (
"testing"
"github.com/alcionai/clues"
"github.com/microsoftgraph/msgraph-sdk-go/models"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/stretchr/testify/suite"
"github.com/alcionai/corso/src/internal/common/ptr"
"github.com/alcionai/corso/src/internal/tester"
"github.com/alcionai/corso/src/pkg/account"
"github.com/alcionai/corso/src/pkg/fault"
)
type SitesUnitSuite struct {
tester.Suite
}
func TestSitesUnitSuite(t *testing.T) {
suite.Run(t, &SitesUnitSuite{Suite: tester.NewUnitSuite(t)})
}
func (suite *SitesUnitSuite) TestValidateSite() {
site := models.NewSite()
site.SetWebUrl(ptr.To("sharepoint.com/sites/foo"))
site.SetDisplayName(ptr.To("testsite"))
site.SetId(ptr.To("testID"))
tests := []struct {
name string
args any
want models.Siteable
errCheck assert.ErrorAssertionFunc
errIsSkippable bool
}{
{
name: "Invalid type",
args: string("invalid type"),
errCheck: assert.Error,
},
{
name: "No ID",
args: models.NewSite(),
errCheck: assert.Error,
},
{
name: "No WebURL",
args: func() *models.Site {
s := models.NewSite()
s.SetId(ptr.To("id"))
return s
}(),
errCheck: assert.Error,
},
{
name: "No name",
args: func() *models.Site {
s := models.NewSite()
s.SetId(ptr.To("id"))
s.SetWebUrl(ptr.To("sharepoint.com/sites/foo"))
return s
}(),
errCheck: assert.Error,
},
{
name: "Search site",
args: func() *models.Site {
s := models.NewSite()
s.SetId(ptr.To("id"))
s.SetWebUrl(ptr.To("sharepoint.com/search"))
return s
}(),
errCheck: assert.Error,
errIsSkippable: true,
},
{
name: "Personal OneDrive",
args: func() *models.Site {
s := models.NewSite()
s.SetId(ptr.To("id"))
s.SetWebUrl(ptr.To("https://" + personalSitePath + "/someone's/onedrive"))
return s
}(),
errCheck: assert.Error,
errIsSkippable: true,
},
{
name: "Valid Site",
args: site,
want: site,
errCheck: assert.NoError,
},
}
for _, test := range tests {
suite.Run(test.name, func() {
t := suite.T()
got, err := validateSite(test.args)
test.errCheck(t, err, clues.ToCore(err))
if test.errIsSkippable {
assert.ErrorIs(t, err, errKnownSkippableCase)
}
assert.Equal(t, test.want, got)
})
}
}
type SitesIntgSuite struct {
tester.Suite
creds account.M365Config
}
func TestSitesIntgSuite(t *testing.T) {
suite.Run(t, &SitesIntgSuite{
Suite: tester.NewIntegrationSuite(
t,
[][]string{tester.M365AcctCredEnvs, tester.AWSStorageCredEnvs}),
})
}
func (suite *SitesIntgSuite) SetupSuite() {
var (
t = suite.T()
acct = tester.NewM365Account(t)
)
m365, err := acct.M365Config()
require.NoError(t, err, clues.ToCore(err))
suite.creds = m365
}
func (suite *SitesIntgSuite) TestGetAll() {
ctx, flush := tester.NewContext()
defer flush()
t := suite.T()
cli, err := NewClient(suite.creds)
require.NoError(t, err, clues.ToCore(err))
sites, err := cli.Sites().GetAll(ctx, fault.New(true))
require.NoError(t, err)
require.NotZero(t, len(sites), "must have at least one site")
for _, site := range sites {
assert.NotContains(t, ptr.Val(site.GetWebUrl()), personalSitePath, "must not return onedrive sites")
}
}

View File

@ -29,6 +29,24 @@ type getWithInfoer interface {
getInfoer
}
// ---------------------------------------------------------------------------
// helpers
// ---------------------------------------------------------------------------
func apiClient(ctx context.Context, acct account.Account) (api.Client, error) {
m365, err := acct.M365Config()
if err != nil {
return api.Client{}, clues.Wrap(err, "retrieving m365 account configuration").WithClues(ctx)
}
client, err := api.NewClient(m365)
if err != nil {
return api.Client{}, clues.Wrap(err, "creating api client").WithClues(ctx)
}
return client, nil
}
// ---------------------------------------------------------------------------
// api
// ---------------------------------------------------------------------------
@ -39,19 +57,15 @@ func Users(
acct account.Account,
errs *fault.Bus,
) ([]models.Userable, error) {
m365, err := acct.M365Config()
client, err := apiClient(ctx, acct)
if err != nil {
return nil, clues.Wrap(err, "retrieving m365 account configuration").WithClues(ctx)
}
client, err := api.NewClient(m365)
if err != nil {
return nil, clues.Wrap(err, "creating api client").WithClues(ctx)
return nil, err
}
return client.Users().GetAll(ctx, errs)
}
// User fetches a single user's data.
func User(ctx context.Context, gwi getWithInfoer, userID string) (models.Userable, *api.UserInfo, error) {
u, err := gwi.GetByID(ctx, userID)
if err != nil {
@ -69,3 +83,17 @@ func User(ctx context.Context, gwi getWithInfoer, userID string) (models.Userabl
return u, ui, nil
}
// Sites fetches all sharepoint sites in the tenant
func Sites(
ctx context.Context,
acct account.Account,
errs *fault.Bus,
) ([]models.Siteable, error) {
client, err := apiClient(ctx, acct)
if err != nil {
return nil, err
}
return client.Sites().GetAll(ctx, errs)
}

View File

@ -31,10 +31,11 @@ func (suite *DiscoveryIntegrationSuite) TestUsers() {
ctx, flush := tester.NewContext()
defer flush()
t := suite.T()
acct := tester.NewM365Account(t)
errs := fault.New(true)
var (
t = suite.T()
acct = tester.NewM365Account(t)
errs = fault.New(true)
)
users, err := discovery.Users(ctx, acct, errs)
assert.NoError(t, err, clues.ToCore(err))
@ -42,8 +43,7 @@ func (suite *DiscoveryIntegrationSuite) TestUsers() {
ferrs := errs.Errors()
assert.Nil(t, ferrs.Failure)
assert.Empty(t, ferrs.Recovered)
assert.Less(t, 0, len(users))
assert.NotEmpty(t, users)
}
func (suite *DiscoveryIntegrationSuite) TestUsers_InvalidCredentials() {
@ -84,16 +84,85 @@ func (suite *DiscoveryIntegrationSuite) TestUsers_InvalidCredentials() {
for _, test := range table {
suite.Run(test.name, func() {
t := suite.T()
var (
t = suite.T()
a = test.acct(t)
errs = fault.New(true)
)
a := test.acct(t)
errs := fault.New(true)
users, err := discovery.Users(ctx, a, errs)
assert.Empty(t, users, "returned some users")
assert.NotNil(t, err)
// TODO(ashmrtn): Uncomment when fault package is used in discovery API.
// assert.NotNil(t, errs.Err())
})
}
}
func (suite *DiscoveryIntegrationSuite) TestSites() {
ctx, flush := tester.NewContext()
defer flush()
var (
t = suite.T()
acct = tester.NewM365Account(t)
errs = fault.New(true)
)
sites, err := discovery.Sites(ctx, acct, errs)
assert.NoError(t, err, clues.ToCore(err))
ferrs := errs.Errors()
assert.Nil(t, ferrs.Failure)
assert.Empty(t, ferrs.Recovered)
assert.NotEmpty(t, sites)
}
func (suite *DiscoveryIntegrationSuite) TestSites_InvalidCredentials() {
ctx, flush := tester.NewContext()
defer flush()
table := []struct {
name string
acct func(t *testing.T) account.Account
}{
{
name: "Invalid Credentials",
acct: func(t *testing.T) account.Account {
a, err := account.NewAccount(
account.ProviderM365,
account.M365Config{
M365: credentials.M365{
AzureClientID: "Test",
AzureClientSecret: "without",
},
AzureTenantID: "data",
},
)
require.NoError(t, err, clues.ToCore(err))
return a
},
},
{
name: "Empty Credentials",
acct: func(t *testing.T) account.Account {
// intentionally swallowing the error here
a, _ := account.NewAccount(account.ProviderM365)
return a
},
},
}
for _, test := range table {
suite.Run(test.name, func() {
var (
t = suite.T()
a = test.acct(t)
errs = fault.New(true)
)
sites, err := discovery.Sites(ctx, a, errs)
assert.Empty(t, sites, "returned some sites")
assert.NotNil(t, err)
})
}
}

View File

@ -4,31 +4,21 @@ package connector
import (
"context"
"fmt"
"net/http"
"runtime/trace"
"strings"
"sync"
"github.com/alcionai/clues"
"github.com/microsoft/kiota-abstractions-go/serialization"
msgraphgocore "github.com/microsoftgraph/msgraph-sdk-go-core"
"github.com/microsoftgraph/msgraph-sdk-go/models"
"github.com/pkg/errors"
"golang.org/x/exp/maps"
"github.com/alcionai/corso/src/internal/common"
"github.com/alcionai/corso/src/internal/common/ptr"
"github.com/alcionai/corso/src/internal/connector/discovery/api"
"github.com/alcionai/corso/src/internal/connector/graph"
"github.com/alcionai/corso/src/internal/connector/sharepoint"
"github.com/alcionai/corso/src/internal/connector/support"
"github.com/alcionai/corso/src/internal/data"
"github.com/alcionai/corso/src/internal/diagnostics"
"github.com/alcionai/corso/src/internal/operations/inject"
"github.com/alcionai/corso/src/pkg/account"
"github.com/alcionai/corso/src/pkg/fault"
"github.com/alcionai/corso/src/pkg/filters"
)
// ---------------------------------------------------------------------------
@ -52,10 +42,7 @@ type GraphConnector struct {
tenant string
credentials account.M365Config
// TODO: remove in favor of the maps below.
Sites map[string]string // webURL -> siteID and siteID -> webURL
// lookup for resource owner ids to names, and names to ids.
// maps of resource owner ids to names, and names to ids.
// not guaranteed to be populated, only here as a post-population
// reference for processes that choose to populate the values.
IDNameLookup common.IDNameSwapper
@ -91,10 +78,11 @@ func NewGraphConnector(
}
gc := GraphConnector{
itemClient: itemClient,
tenant: m365.AzureTenantID,
wg: &sync.WaitGroup{},
credentials: m365,
itemClient: itemClient,
tenant: m365.AzureTenantID,
wg: &sync.WaitGroup{},
credentials: m365,
IDNameLookup: common.IDsNames{},
}
gc.Service, err = gc.createService()
@ -107,12 +95,6 @@ func NewGraphConnector(
return nil, clues.Wrap(err, "creating api client").WithClues(ctx)
}
if r == AllResources || r == Sites {
if err = gc.setTenantSites(ctx, errs); err != nil {
return nil, clues.Wrap(err, "retrieveing tenant site list")
}
}
return &gc, nil
}
@ -186,114 +168,6 @@ func (gc *GraphConnector) createService() (*graph.Service, error) {
return graph.NewService(adapter), nil
}
// setTenantSites queries the M365 to identify the sites in the
// workspace. The sites field is updated during this method
// iff the returned error is nil.
func (gc *GraphConnector) setTenantSites(ctx context.Context, errs *fault.Bus) error {
gc.Sites = map[string]string{}
ctx, end := diagnostics.Span(ctx, "gc:setTenantSites")
defer end()
sites, err := getResources(
ctx,
gc.Service,
gc.tenant,
sharepoint.GetAllSitesForTenant,
models.CreateSiteCollectionResponseFromDiscriminatorValue,
identifySite,
errs)
if err != nil {
return err
}
gc.Sites = sites
return nil
}
var errKnownSkippableCase = clues.New("case is known and skippable")
const personalSitePath = "sharepoint.com/personal/"
// Transforms an interface{} into a key,value pair representing
// siteName:siteID.
func identifySite(item any) (string, string, error) {
m, ok := item.(models.Siteable)
if !ok {
return "", "", clues.New("non-Siteable item").With("item_type", fmt.Sprintf("%T", item))
}
id := ptr.Val(m.GetId())
url, ok := ptr.ValOK(m.GetWebUrl())
if m.GetName() == nil {
// the built-in site at "https://{tenant-domain}/search" never has a name.
if ok && strings.HasSuffix(url, "/search") {
return "", "", clues.Stack(errKnownSkippableCase).With("site_id", clues.Hide(id))
}
return "", "", clues.New("site has no name").With("site_id", clues.Hide(id))
}
// personal (ie: oneDrive) sites have to be filtered out server-side.
if ok && strings.Contains(url, personalSitePath) {
return "", "", clues.Stack(errKnownSkippableCase).With("site_id", clues.Hide(id))
}
return url, id, nil
}
// GetSiteWebURLs returns the WebURLs of sharepoint sites within the tenant.
func (gc *GraphConnector) GetSiteWebURLs() []string {
return maps.Keys(gc.Sites)
}
// GetSiteIds returns the canonical site IDs in the tenant
func (gc *GraphConnector) GetSiteIDs() []string {
return maps.Values(gc.Sites)
}
// UnionSiteIDsAndWebURLs reduces the id and url slices into a single slice of site IDs.
// WebURLs will run as a path-suffix style matcher. Callers may provide partial urls, though
// each element in the url must fully match. Ex: the webURL value "foo" will match "www.ex.com/foo",
// but not match "www.ex.com/foobar".
// The returned IDs are reduced to a set of unique values.
func (gc *GraphConnector) UnionSiteIDsAndWebURLs(
ctx context.Context,
ids, urls []string,
errs *fault.Bus,
) ([]string, error) {
if len(gc.Sites) == 0 {
if err := gc.setTenantSites(ctx, errs); err != nil {
return nil, err
}
}
idm := map[string]struct{}{}
for _, id := range ids {
idm[id] = struct{}{}
}
match := filters.PathSuffix(urls)
for url, id := range gc.Sites {
if !match.Compare(url) {
continue
}
idm[id] = struct{}{}
}
idsl := make([]string, 0, len(idm))
for id := range idm {
idsl = append(idsl, id)
}
return idsl, nil
}
// AwaitStatus waits for all gc tasks to complete and then returns status
func (gc *GraphConnector) Wait() *data.CollectionStats {
defer func() {
@ -349,59 +223,3 @@ func (gc *GraphConnector) incrementAwaitingMessages() {
func (gc *GraphConnector) incrementMessagesBy(num int) {
gc.wg.Add(num)
}
// ---------------------------------------------------------------------------
// Helper Funcs
// ---------------------------------------------------------------------------
func getResources(
ctx context.Context,
gs graph.Servicer,
tenantID string,
query func(context.Context, graph.Servicer) (serialization.Parsable, error),
parser func(parseNode serialization.ParseNode) (serialization.Parsable, error),
identify func(any) (string, string, error),
errs *fault.Bus,
) (map[string]string, error) {
resources := map[string]string{}
response, err := query(ctx, gs)
if err != nil {
return nil, graph.Wrap(ctx, err, "retrieving tenant's resources")
}
iter, err := msgraphgocore.NewPageIterator(response, gs.Adapter(), parser)
if err != nil {
return nil, graph.Stack(ctx, err)
}
el := errs.Local()
callbackFunc := func(item any) bool {
if el.Failure() != nil {
return false
}
k, v, err := identify(item)
if err != nil {
if !errors.Is(err, errKnownSkippableCase) {
el.AddRecoverable(clues.Stack(err).
WithClues(ctx).
With("query_url", gs.Adapter().GetBaseUrl()))
}
return true
}
resources[k] = v
resources[v] = k
return true
}
if err := iter.Iterate(ctx, callbackFunc); err != nil {
return nil, graph.Stack(ctx, err)
}
return resources, el.Failure()
}

View File

@ -6,15 +6,10 @@ import (
"github.com/alcionai/clues"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/stretchr/testify/suite"
"github.com/alcionai/corso/src/internal/connector/graph"
"github.com/alcionai/corso/src/internal/connector/support"
"github.com/alcionai/corso/src/internal/tester"
"github.com/alcionai/corso/src/pkg/account"
"github.com/alcionai/corso/src/pkg/credentials"
"github.com/alcionai/corso/src/pkg/fault"
"github.com/alcionai/corso/src/pkg/selectors"
)
@ -33,57 +28,6 @@ func TestDisconnectedGraphSuite(t *testing.T) {
suite.Run(t, s)
}
func (suite *DisconnectedGraphConnectorSuite) TestBadConnection() {
ctx, flush := tester.NewContext()
defer flush()
table := []struct {
name string
acct func(t *testing.T) account.Account
}{
{
name: "Invalid Credentials",
acct: func(t *testing.T) account.Account {
a, err := account.NewAccount(
account.ProviderM365,
account.M365Config{
M365: credentials.M365{
AzureClientID: "Test",
AzureClientSecret: "without",
},
AzureTenantID: "data",
},
)
require.NoError(t, err, clues.ToCore(err))
return a
},
},
{
name: "Empty Credentials",
acct: func(t *testing.T) account.Account {
// intentionally swallowing the error here
a, _ := account.NewAccount(account.ProviderM365)
return a
},
},
}
for _, test := range table {
suite.Run(test.name, func() {
t := suite.T()
gc, err := NewGraphConnector(
ctx,
graph.HTTPClient(graph.NoTimeout()),
test.acct(t),
Sites,
fault.New(true))
assert.Nil(t, gc, test.name+" failed")
assert.NotNil(t, err, test.name+" failed")
})
}
}
func statusTestTask(gc *GraphConnector, objects, success, folder int) {
ctx, flush := tester.NewContext()
defer flush()

View File

@ -39,103 +39,6 @@ func TestGraphConnectorUnitSuite(t *testing.T) {
suite.Run(t, &GraphConnectorUnitSuite{Suite: tester.NewUnitSuite(t)})
}
func (suite *GraphConnectorUnitSuite) TestUnionSiteIDsAndWebURLs() {
const (
url1 = "www.foo.com/bar"
url2 = "www.fnords.com/smarf"
path1 = "bar"
path2 = "/smarf"
id1 = "site-id-1"
id2 = "site-id-2"
)
gc := &GraphConnector{
// must be populated, else the func will try to make a graph call
// to retrieve site data.
Sites: map[string]string{
url1: id1,
url2: id2,
},
}
table := []struct {
name string
ids []string
urls []string
expect []string
}{
{
name: "nil",
},
{
name: "empty",
ids: []string{},
urls: []string{},
expect: []string{},
},
{
name: "ids only",
ids: []string{id1, id2},
urls: []string{},
expect: []string{id1, id2},
},
{
name: "urls only",
ids: []string{},
urls: []string{url1, url2},
expect: []string{id1, id2},
},
{
name: "url suffix only",
ids: []string{},
urls: []string{path1, path2},
expect: []string{id1, id2},
},
{
name: "url and suffix overlap",
ids: []string{},
urls: []string{url1, url2, path1, path2},
expect: []string{id1, id2},
},
{
name: "ids and urls, no overlap",
ids: []string{id1},
urls: []string{url2},
expect: []string{id1, id2},
},
{
name: "ids and urls, overlap",
ids: []string{id1, id2},
urls: []string{url1, url2},
expect: []string{id1, id2},
},
{
name: "partial non-match on path",
ids: []string{},
urls: []string{path1[2:], path2[2:]},
expect: []string{},
},
{
name: "partial non-match on url",
ids: []string{},
urls: []string{url1[5:], url2[5:]},
expect: []string{},
},
}
for _, test := range table {
suite.Run(test.name, func() {
ctx, flush := tester.NewContext()
defer flush()
t := suite.T()
result, err := gc.UnionSiteIDsAndWebURLs(ctx, test.ids, test.urls, fault.New(true))
assert.NoError(t, err, clues.ToCore(err))
assert.ElementsMatch(t, test.expect, result)
})
}
}
func (suite *GraphConnectorUnitSuite) TestPopulateOwnerIDAndNamesFrom() {
const (
ownerID = "owner-id"
@ -320,35 +223,6 @@ func (suite *GraphConnectorIntegrationSuite) SetupSuite() {
tester.LogTimeOfTest(suite.T())
}
// TestSetTenantSites verifies GraphConnector's ability to query
// the sites associated with the credentials
func (suite *GraphConnectorIntegrationSuite) TestSetTenantSites() {
newConnector := GraphConnector{
tenant: "test_tenant",
Sites: make(map[string]string, 0),
credentials: suite.connector.credentials,
}
ctx, flush := tester.NewContext()
defer flush()
t := suite.T()
service, err := newConnector.createService()
require.NoError(t, err, clues.ToCore(err))
newConnector.Service = service
assert.Equal(t, 0, len(newConnector.Sites))
err = newConnector.setTenantSites(ctx, fault.New(true))
assert.NoError(t, err, clues.ToCore(err))
assert.Less(t, 0, len(newConnector.Sites))
for _, site := range newConnector.Sites {
assert.NotContains(t, "sharepoint.com/personal/", site)
}
}
func (suite *GraphConnectorIntegrationSuite) TestRestoreFailsBadService() {
ctx, flush := tester.NewContext()
defer flush()
@ -564,16 +438,26 @@ func runBackupAndCompare(
cats[c.category] = struct{}{}
}
expectedDests := make([]destAndCats, 0, len(config.resourceOwners))
var (
expectedDests = make([]destAndCats, 0, len(config.resourceOwners))
idToName = map[string]string{}
nameToID = map[string]string{}
)
for _, ro := range config.resourceOwners {
expectedDests = append(expectedDests, destAndCats{
resourceOwner: ro,
dest: config.dest.ContainerName,
cats: cats,
})
idToName[ro] = ro
nameToID[ro] = ro
}
backupGC := loadConnector(ctx, t, graph.HTTPClient(graph.NoTimeout()), config.resource)
backupGC.IDNameLookup = common.IDsNames{IDToName: idToName, NameToID: nameToID}
backupSel := backupSelectorForExpected(t, config.service, expectedDests)
t.Logf("Selective backup of %s\n", backupSel)
@ -1271,6 +1155,11 @@ func (suite *GraphConnectorIntegrationSuite) TestBackup_CreatesPrefixCollections
start = time.Now()
)
id, name, err := backupGC.PopulateOwnerIDAndNamesFrom(backupSel.DiscreteOwner, nil)
require.NoError(t, err, clues.ToCore(err))
backupSel.SetDiscreteOwnerIDName(id, name)
dcs, excludes, err := backupGC.ProduceBackupCollections(
ctx,
backupSel,

View File

@ -123,6 +123,11 @@ func prepNewTestBackupOp(
t.FailNow()
}
id, name, err := gc.PopulateOwnerIDAndNamesFrom(sel.DiscreteOwner, nil)
require.NoError(t, err, clues.ToCore(err))
sel.SetDiscreteOwnerIDName(id, name)
bo := newTestBackupOp(t, ctx, kw, ms, gc, acct, sel, bus, featureToggles, closer)
return bo, acct, kw, ms, gc, closer

View File

@ -278,6 +278,9 @@ func setupExchangeBackup(
fault.New(true))
require.NoError(t, err, clues.ToCore(err))
id, name, err := gc.PopulateOwnerIDAndNamesFrom(owner, nil)
require.NoError(t, err, clues.ToCore(err))
bsel.DiscreteOwner = owner
bsel.Include(
bsel.MailFolders([]string{exchange.DefaultMailFolder}, selectors.PrefixMatch()),
@ -285,6 +288,8 @@ func setupExchangeBackup(
bsel.EventCalendars([]string{exchange.DefaultCalendar}, selectors.PrefixMatch()),
)
bsel.SetDiscreteOwnerIDName(id, name)
bo, err := NewBackupOperation(
ctx,
control.Options{},
@ -335,6 +340,9 @@ func setupSharePointBackup(
fault.New(true))
require.NoError(t, err, clues.ToCore(err))
id, name, err := gc.PopulateOwnerIDAndNamesFrom(owner, nil)
require.NoError(t, err, clues.ToCore(err))
spsel.DiscreteOwner = owner
// assume a folder name "test" exists in the drive.
// this is brittle, and requires us to backfill anytime
@ -342,6 +350,8 @@ func setupSharePointBackup(
// growth from re-backup/restore of restored files.
spsel.Include(spsel.LibraryFolders([]string{"test"}, selectors.PrefixMatch()))
spsel.SetDiscreteOwnerIDName(id, name)
bo, err := NewBackupOperation(
ctx,
control.Options{},

View File

@ -9,9 +9,7 @@ import (
"github.com/alcionai/corso/src/internal/common"
"github.com/alcionai/corso/src/internal/common/ptr"
"github.com/alcionai/corso/src/internal/connector"
"github.com/alcionai/corso/src/internal/connector/discovery"
"github.com/alcionai/corso/src/internal/connector/graph"
"github.com/alcionai/corso/src/pkg/account"
"github.com/alcionai/corso/src/pkg/fault"
)
@ -58,6 +56,22 @@ func Users(ctx context.Context, acct account.Account, errs *fault.Bus) ([]*User,
return ret, nil
}
// parseUser extracts information from `models.Userable` we care about
func parseUser(item models.Userable) (*User, error) {
if item.GetUserPrincipalName() == nil {
return nil, clues.New("user missing principal name").
With("user_id", *item.GetId()) // TODO: pii
}
u := &User{
PrincipalName: ptr.Val(item.GetUserPrincipalName()),
ID: ptr.Val(item.GetId()),
Name: ptr.Val(item.GetDisplayName()),
}
return u, nil
}
// UsersMap retrieves all users in the tenant, and returns two maps: one id-to-principalName,
// and one principalName-to-id.
func UsersMap(
@ -96,59 +110,66 @@ type Site struct {
// ID is of the format: <site collection hostname>.<site collection unique id>.<site unique id>
// for example: contoso.sharepoint.com,abcdeab3-0ccc-4ce1-80ae-b32912c9468d,xyzud296-9f7c-44e1-af81-3c06d0d43007
ID string
// DisplayName is the human-readable name of the site. Normally the plaintext name that the
// user provided when they created the site, though it can be changed across time.
// Ex: webUrl: https://host.com/sites/TestingSite, displayName: "Testing Site"
DisplayName string
}
// Sites returns a list of Sites in a specified M365 tenant
func Sites(ctx context.Context, acct account.Account, errs *fault.Bus) ([]*Site, error) {
gc, err := connector.NewGraphConnector(ctx, graph.HTTPClient(graph.NoTimeout()), acct, connector.Sites, errs)
sites, err := discovery.Sites(ctx, acct, errs)
if err != nil {
return nil, clues.Wrap(err, "initializing M365 graph connection")
}
// gc.Sites is a map with keys: SiteURL, values: ID
ret := make([]*Site, 0, len(gc.Sites))
for k, v := range gc.Sites {
ret = append(ret, &Site{
WebURL: k,
ID: v,
})
ret := make([]*Site, 0, len(sites))
for _, s := range sites {
ps, err := parseSite(s)
if err != nil {
return nil, clues.Wrap(err, "parsing siteable")
}
ret = append(ret, ps)
}
return ret, nil
}
// SiteURLs returns a list of SharePoint site WebURLs in the specified M365 tenant
func SiteURLs(ctx context.Context, acct account.Account, errs *fault.Bus) ([]string, error) {
gc, err := connector.NewGraphConnector(ctx, graph.HTTPClient(graph.NoTimeout()), acct, connector.Sites, errs)
// parseSite extracts the information from `models.Siteable` we care about
func parseSite(item models.Siteable) (*Site, error) {
s := &Site{
ID: ptr.Val(item.GetId()),
WebURL: ptr.Val(item.GetWebUrl()),
DisplayName: ptr.Val(item.GetDisplayName()),
}
return s, nil
}
// SitesMap retrieves all sites in the tenant, and returns two maps: one id-to-webURL,
// and one webURL-to-id.
func SitesMap(
ctx context.Context,
acct account.Account,
errs *fault.Bus,
) (common.IDsNames, error) {
sites, err := Sites(ctx, acct, errs)
if err != nil {
return nil, clues.Wrap(err, "initializing M365 graph connection")
return common.IDsNames{}, err
}
return gc.GetSiteWebURLs(), nil
}
// SiteIDs returns a list of SharePoint sites IDs in the specified M365 tenant
func SiteIDs(ctx context.Context, acct account.Account, errs *fault.Bus) ([]string, error) {
gc, err := connector.NewGraphConnector(ctx, graph.HTTPClient(graph.NoTimeout()), acct, connector.Sites, errs)
if err != nil {
return nil, clues.Wrap(err, "initializing graph connection")
}
return gc.GetSiteIDs(), nil
}
// parseUser extracts information from `models.Userable` we care about
func parseUser(item models.Userable) (*User, error) {
if item.GetUserPrincipalName() == nil {
return nil, clues.New("user missing principal name").
With("user_id", clues.Hide(ptr.Val(item.GetId())))
}
u := &User{PrincipalName: *item.GetUserPrincipalName(), ID: *item.GetId()}
if item.GetDisplayName() != nil {
u.Name = *item.GetDisplayName()
}
return u, nil
ins := common.IDsNames{
IDToName: make(map[string]string, len(sites)),
NameToID: make(map[string]string, len(sites)),
}
for _, s := range sites {
ins.IDToName[s.ID] = s.WebURL
ins.NameToID[s.WebURL] = s.ID
}
return ins, nil
}

View File

@ -66,6 +66,7 @@ func (suite *M365IntegrationSuite) TestSites() {
t := suite.T()
assert.NotEmpty(t, s.WebURL)
assert.NotEmpty(t, s.ID)
assert.NotEmpty(t, s.DisplayName)
})
}
}