diff --git a/CHANGELOG.md b/CHANGELOG.md index 0e8c12dfd..5e4fca312 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Reduce backup runtime for OneDrive and SharePoint incremental backups that have no file changes. - Increase Exchange backup performance by lazily fetching data only for items whose content changed. - Added `--backups` flag to delete multiple backups in `corso backup delete` command. +- Backup now includes all sites that belongs to a team, not just the root site. ## Fixed - Teams Channels that cannot support delta tokens (those without messages) fall back to non-delta enumeration and no longer fail a backup. diff --git a/src/cli/flags/testdata/backup_list.go b/src/cli/flags/testdata/backup_list.go index 911a6b450..82b08646f 100644 --- a/src/cli/flags/testdata/backup_list.go +++ b/src/cli/flags/testdata/backup_list.go @@ -3,9 +3,10 @@ package testdata import ( "testing" - "github.com/alcionai/corso/src/cli/flags" "github.com/spf13/cobra" "gotest.tools/v3/assert" + + "github.com/alcionai/corso/src/cli/flags" ) func PreparedBackupListFlags() []string { diff --git a/src/internal/m365/graph/errors.go b/src/internal/m365/graph/errors.go index f5c7824ab..6a758977e 100644 --- a/src/internal/m365/graph/errors.go +++ b/src/internal/m365/graph/errors.go @@ -70,6 +70,7 @@ const ( NoSPLicense errorMessage = "Tenant does not have a SPO license" parameterDeltaTokenNotSupported errorMessage = "Parameter 'DeltaToken' not supported for this request" usersCannotBeResolved errorMessage = "One or more users could not be resolved" + requestedSiteCouldNotBeFound errorMessage = "Requested site could not be found" ) const ( @@ -259,6 +260,10 @@ func IsErrUsersCannotBeResolved(err error) bool { return hasErrorCode(err, noResolvedUsers) || hasErrorMessage(err, usersCannotBeResolved) } +func IsErrSiteNotFound(err error) bool { + return hasErrorMessage(err, requestedSiteCouldNotBeFound) +} + // --------------------------------------------------------------------------- // error parsers // --------------------------------------------------------------------------- diff --git a/src/internal/m365/graph/errors_test.go b/src/internal/m365/graph/errors_test.go index cf9f2f99d..cd0057fda 100644 --- a/src/internal/m365/graph/errors_test.go +++ b/src/internal/m365/graph/errors_test.go @@ -628,6 +628,51 @@ func (suite *GraphErrorsUnitSuite) TestIsErrUsersCannotBeResolved() { } } +func (suite *GraphErrorsUnitSuite) TestIsErrSiteCouldNotBeFound() { + table := []struct { + name string + err error + expect assert.BoolAssertionFunc + }{ + { + name: "nil", + err: nil, + expect: assert.False, + }, + { + name: "non-matching", + err: assert.AnError, + expect: assert.False, + }, + { + name: "non-matching oDataErr", + err: odErrMsg("InvalidRequest", "cant resolve sites"), + expect: assert.False, + }, + { + name: "matching oDataErr msg", + err: odErrMsg("InvalidRequest", string(requestedSiteCouldNotBeFound)), + expect: assert.True, + }, + // next two tests are to make sure the checks are case insensitive + { + name: "oDataErr uppercase", + err: odErrMsg("InvalidRequest", strings.ToUpper(string(requestedSiteCouldNotBeFound))), + expect: assert.True, + }, + { + name: "oDataErr lowercase", + err: odErrMsg("InvalidRequest", strings.ToLower(string(requestedSiteCouldNotBeFound))), + expect: assert.True, + }, + } + for _, test := range table { + suite.Run(test.name, func() { + test.expect(suite.T(), IsErrSiteNotFound(test.err)) + }) + } +} + func (suite *GraphErrorsUnitSuite) TestIsErrCannotOpenFileAttachment() { table := []struct { name string diff --git a/src/internal/m365/restore.go b/src/internal/m365/restore.go index 616fd6f2b..d6237c072 100644 --- a/src/internal/m365/restore.go +++ b/src/internal/m365/restore.go @@ -84,6 +84,7 @@ func (ctrl *Controller) ConsumeRestoreCollections( rcc, ctrl.AC, ctrl.backupDriveIDNames, + ctrl.backupSiteIDWebURL, dcs, deets, errs, diff --git a/src/internal/m365/service/groups/backup.go b/src/internal/m365/service/groups/backup.go index 7dbbf8e13..1943b8fb4 100644 --- a/src/internal/m365/service/groups/backup.go +++ b/src/internal/m365/service/groups/backup.go @@ -79,10 +79,7 @@ func ProduceBackupCollections( switch scope.Category().PathType() { case path.LibrariesCategory: - // TODO(meain): Private channels get a separate SharePoint - // site. We should also back those up and not just the - // default one. - resp, err := ac.Groups().GetRootSite(ctx, bpc.ProtectedResource.ID()) + sites, err := ac.Groups().GetAllSites(ctx, bpc.ProtectedResource.ID(), errs) if err != nil { return nil, nil, false, err } @@ -95,39 +92,47 @@ func ProduceBackupCollections( siteMetadataCollection[siteID] = append(siteMetadataCollection[siteID], c) } - pr := idname.NewProvider(ptr.Val(resp.GetId()), ptr.Val(resp.GetName())) - sbpc := inject.BackupProducerConfig{ - LastBackupVersion: bpc.LastBackupVersion, - Options: bpc.Options, - ProtectedResource: pr, - Selector: bpc.Selector, - MetadataCollections: siteMetadataCollection[ptr.Val(resp.GetId())], - } + for _, s := range sites { + pr := idname.NewProvider(ptr.Val(s.GetId()), ptr.Val(s.GetName())) + sbpc := inject.BackupProducerConfig{ + LastBackupVersion: bpc.LastBackupVersion, + Options: bpc.Options, + ProtectedResource: pr, + Selector: bpc.Selector, + MetadataCollections: siteMetadataCollection[ptr.Val(s.GetId())], + } - bh := drive.NewGroupBackupHandler( - bpc.ProtectedResource.ID(), - ptr.Val(resp.GetId()), - ac.Drives(), - scope) + bh := drive.NewGroupBackupHandler( + bpc.ProtectedResource.ID(), + ptr.Val(s.GetId()), + ac.Drives(), + scope) - cp, err := bh.SitePathPrefix(creds.AzureTenantID) - if err != nil { - return nil, nil, false, clues.Wrap(err, "getting canonical path") - } + cp, err := bh.SitePathPrefix(creds.AzureTenantID) + if err != nil { + return nil, nil, false, clues.Wrap(err, "getting canonical path") + } - sitesPreviousPaths[ptr.Val(resp.GetId())] = cp.String() + sitesPreviousPaths[ptr.Val(s.GetId())] = cp.String() - dbcs, canUsePreviousBackup, err = site.CollectLibraries( - ctx, - sbpc, - bh, - creds.AzureTenantID, - ssmb, - su, - errs) - if err != nil { - el.AddRecoverable(ctx, err) - continue + cs, cupb, err := site.CollectLibraries( + ctx, + sbpc, + bh, + creds.AzureTenantID, + ssmb, + su, + errs) + if err != nil { + el.AddRecoverable(ctx, err) + continue + } + + dbcs = append(dbcs, cs...) + + // FIXME(meain): This can cause incorrect backup + // https://github.com/alcionai/corso/issues/4371 + canUsePreviousBackup = canUsePreviousBackup || cupb } case path.ChannelMessagesCategory: diff --git a/src/internal/m365/service/groups/restore.go b/src/internal/m365/service/groups/restore.go index 9a94a921b..fc09088e4 100644 --- a/src/internal/m365/service/groups/restore.go +++ b/src/internal/m365/service/groups/restore.go @@ -12,6 +12,7 @@ import ( "github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/m365/collection/drive" + "github.com/alcionai/corso/src/internal/m365/graph" "github.com/alcionai/corso/src/internal/m365/support" "github.com/alcionai/corso/src/internal/operations/inject" "github.com/alcionai/corso/src/pkg/backup/details" @@ -29,24 +30,20 @@ func ConsumeRestoreCollections( rcc inject.RestoreConsumerConfig, ac api.Client, backupDriveIDNames idname.Cacher, + backupSiteIDWebURL idname.Cacher, dcs []data.RestoreCollection, deets *details.Builder, errs *fault.Bus, ctr *count.Bus, ) (*support.ControllerOperationStatus, error) { var ( - restoreMetrics support.CollectionMetrics - caches = drive.NewRestoreCaches(backupDriveIDNames) - lrh = drive.NewLibraryRestoreHandler(ac, rcc.Selector.PathService()) - el = errs.Local() + restoreMetrics support.CollectionMetrics + caches = drive.NewRestoreCaches(backupDriveIDNames) + lrh = drive.NewLibraryRestoreHandler(ac, rcc.Selector.PathService()) + el = errs.Local() + webURLToSiteNames = map[string]string{} ) - // TODO: uncomment when a handler is available - // err := caches.Populate(ctx, lrh, rcc.ProtectedResource.ID()) - // if err != nil { - // return nil, clues.Wrap(err, "initializing restore caches") - // } - // Reorder collections so that the parents directories are created // before the child directories; a requirement for permissions. data.SortRestoreCollections(dcs) @@ -59,7 +56,7 @@ func ConsumeRestoreCollections( var ( err error - resp models.Siteable + siteName string category = dc.FullPath().Category() metrics support.CollectionMetrics ictx = clues.Add(ctx, @@ -71,16 +68,25 @@ func ConsumeRestoreCollections( switch dc.FullPath().Category() { case path.LibrariesCategory: - // TODO(meain): As of now we only restore the root site - // and that too to whatever is currently the root site of the - // group and not the original one. Not sure if the - // original can be changed. - resp, err = ac.Groups().GetRootSite(ctx, rcc.ProtectedResource.ID()) - if err != nil { - return nil, err + siteID := dc.FullPath().Folders()[1] + + webURL, ok := backupSiteIDWebURL.NameOf(siteID) + if !ok { + // This should not happen, but just in case + logger.Ctx(ctx).With("site_id", siteID).Info("site weburl not found, using site id") } - pr := idname.NewProvider(ptr.Val(resp.GetId()), ptr.Val(resp.GetName())) + siteName, err = getSiteName(ctx, siteID, webURL, ac.Sites(), webURLToSiteNames) + if err != nil { + el.AddRecoverable(ctx, clues.Wrap(err, "getting site"). + With("web_url", webURL, "site_id", siteID)) + } else if len(siteName) == 0 { + // Site was deleted in between and restore and is not + // available anymore. + continue + } + + pr := idname.NewProvider(siteID, siteName) srcc := inject.RestoreConsumerConfig{ BackupVersion: rcc.BackupVersion, Options: rcc.Options, @@ -133,3 +139,38 @@ func ConsumeRestoreCollections( return status, el.Failure() } + +func getSiteName( + ctx context.Context, + siteID string, + webURL string, + ac api.GetByIDer[models.Siteable], + webURLToSiteNames map[string]string, +) (string, error) { + siteName, ok := webURLToSiteNames[webURL] + if ok { + return siteName, nil + } + + site, err := ac.GetByID(ctx, siteID, api.CallConfig{}) + if err != nil { + webURLToSiteNames[webURL] = "" + + if graph.IsErrSiteNotFound(err) { + // TODO(meain): Should we surface this to the user somehow? + // In case a site that we had previously backed up was + // deleted, skip that site with a warning. + logger.Ctx(ctx).With("web_url", webURL, "site_id", siteID). + Info("Site does not exist, skipping restore.") + + return "", nil + } + + return "", err + } + + siteName = ptr.Val(site.GetDisplayName()) + webURLToSiteNames[webURL] = siteName + + return siteName, nil +} diff --git a/src/internal/m365/service/groups/restore_test.go b/src/internal/m365/service/groups/restore_test.go index d87000fc5..262bc3159 100644 --- a/src/internal/m365/service/groups/restore_test.go +++ b/src/internal/m365/service/groups/restore_test.go @@ -7,12 +7,17 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" + "golang.org/x/exp/slices" "github.com/alcionai/corso/src/internal/common/idname" + "github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/data/mock" + "github.com/alcionai/corso/src/internal/m365/graph" "github.com/alcionai/corso/src/internal/operations/inject" "github.com/alcionai/corso/src/internal/tester" + "github.com/alcionai/corso/src/internal/tester/tconfig" + "github.com/alcionai/corso/src/pkg/control" "github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/path" "github.com/alcionai/corso/src/pkg/services/m365/api" @@ -52,9 +57,118 @@ func (suite *GroupsUnitSuite) TestConsumeRestoreCollections_noErrorOnGroups() { rcc, api.Client{}, idname.NewCache(map[string]string{}), + idname.NewCache(map[string]string{}), dcs, nil, fault.New(false), nil) assert.NoError(t, err, "Groups Channels restore") } + +type groupsIntegrationSuite struct { + tester.Suite + resource string + tenantID string + ac api.Client +} + +func TestGroupsIntegrationSuite(t *testing.T) { + suite.Run(t, &groupsIntegrationSuite{ + Suite: tester.NewIntegrationSuite( + t, + [][]string{tconfig.M365AcctCredEnvs}), + }) +} + +func (suite *groupsIntegrationSuite) SetupSuite() { + t := suite.T() + + ctx, flush := tester.NewContext(t) + defer flush() + + graph.InitializeConcurrencyLimiter(ctx, true, 4) + + suite.resource = tconfig.M365TeamID(t) + + acct := tconfig.NewM365Account(t) + creds, err := acct.M365Config() + require.NoError(t, err, clues.ToCore(err)) + + suite.ac, err = api.NewClient(creds, control.DefaultOptions()) + require.NoError(t, err, clues.ToCore(err)) + + suite.tenantID = creds.AzureTenantID +} + +// test for getSiteName +func (suite *groupsIntegrationSuite) TestGetSiteName() { + t := suite.T() + + ctx, flush := tester.NewContext(t) + defer flush() + + rootSite, err := suite.ac.Groups().GetRootSite(ctx, suite.resource) + require.NoError(t, err, clues.ToCore(err)) + + // Generate a fake site ID that appears valid to graph API but doesn't actually exist. + // This "could" be flaky, but highly unlikely + unavailableSiteID := []rune(ptr.Val(rootSite.GetId())) + firstIDChar := slices.Index(unavailableSiteID, ',') + 1 + + if unavailableSiteID[firstIDChar] != '2' { + unavailableSiteID[firstIDChar] = '2' + } else { + unavailableSiteID[firstIDChar] = '1' + } + + tests := []struct { + name string + siteID string + webURL string + siteName string + webURLToSiteNames map[string]string + expectErr assert.ErrorAssertionFunc + }{ + { + name: "valid", + siteID: ptr.Val(rootSite.GetId()), + webURL: ptr.Val(rootSite.GetWebUrl()), + siteName: *rootSite.GetDisplayName(), + webURLToSiteNames: map[string]string{}, + expectErr: assert.NoError, + }, + { + name: "unavailable", + siteID: string(unavailableSiteID), + webURL: "https://does-not-matter", + siteName: "", + webURLToSiteNames: map[string]string{}, + expectErr: assert.NoError, + }, + { + name: "previously found", + siteID: "random-id", + webURL: "https://random-url", + siteName: "random-name", + webURLToSiteNames: map[string]string{"https://random-url": "random-name"}, + expectErr: assert.NoError, + }, + } + + for _, test := range tests { + suite.Run(test.name, func() { + t := suite.T() + + siteName, err := getSiteName( + ctx, + test.siteID, + test.webURL, + suite.ac.Sites(), + test.webURLToSiteNames) + require.NoError(t, err, clues.ToCore(err)) + + test.expectErr(t, err) + assert.Equal(t, test.siteName, siteName) + }) + } +} diff --git a/src/internal/operations/test/onedrive_test.go b/src/internal/operations/test/onedrive_test.go index 8b4ac9b81..6e53566c9 100644 --- a/src/internal/operations/test/onedrive_test.go +++ b/src/internal/operations/test/onedrive_test.go @@ -762,11 +762,10 @@ func runDriveIncrementalTest( true) // do some additional checks to ensure the incremental dealt with fewer items. - // +2 on read/writes to account for metadata: 1 delta and 1 path. var ( - expectWrites = test.itemsWritten + 2 + expectWrites = test.itemsWritten expectNonMetaWrites = test.nonMetaItemsWritten - expectReads = test.itemsRead + 2 + expectReads = test.itemsRead assertReadWrite = assert.Equal ) @@ -775,6 +774,17 @@ func runDriveIncrementalTest( // /libraries/sites/previouspath expectWrites++ expectReads++ + + // +2 on read/writes to account for metadata: 1 delta and 1 path (for each site) + sites, err := ac.Groups().GetAllSites(ctx, owner, fault.New(true)) + require.NoError(t, err, clues.ToCore(err)) + + expectWrites += len(sites) * 2 + expectReads += len(sites) * 2 + } else { + // +2 on read/writes to account for metadata: 1 delta and 1 path. + expectWrites += 2 + expectReads += 2 } // Sharepoint can produce a superset of permissions by nature of diff --git a/src/pkg/services/m365/api/groups.go b/src/pkg/services/m365/api/groups.go index 2aacdedf0..b6223dbdd 100644 --- a/src/pkg/services/m365/api/groups.go +++ b/src/pkg/services/m365/api/groups.go @@ -3,6 +3,8 @@ package api import ( "context" "fmt" + "net/url" + "strings" "github.com/alcionai/clues" msgraphgocore "github.com/microsoftgraph/msgraph-sdk-go-core" @@ -154,6 +156,88 @@ func (c Groups) GetByID( return group, nil } +// GetAllSites gets all the sites that belong to a group. This is +// necessary as private and shared channels gets their on individual +// sites. All the other channels make use of the root site. +func (c Groups) GetAllSites( + ctx context.Context, + identifier string, + errs *fault.Bus, +) ([]models.Siteable, error) { + el := errs.Local() + + root, err := c.GetRootSite(ctx, identifier) + if err != nil { + return nil, clues.Wrap(err, "getting root site"). + With("group_id", identifier) + } + + sites := []models.Siteable{root} + + channels, err := Channels(c).GetChannels(ctx, identifier) + if err != nil { + return nil, clues.Wrap(err, "getting channels") + } + + service, err := c.Service() + if err != nil { + return nil, graph.Stack(ctx, err) + } + + for _, ch := range channels { + if ptr.Val(ch.GetMembershipType()) == models.STANDARD_CHANNELMEMBERSHIPTYPE { + // Standard channels use root site + continue + } + + ictx := clues.Add( + ctx, + "channel_id", + ptr.Val(ch.GetId()), + "channel_name", + clues.Hide(ptr.Val(ch.GetDisplayName()))) + + resp, err := service. + Client(). + Teams(). + ByTeamId(identifier). + Channels(). + ByChannelId(ptr.Val(ch.GetId())). + FilesFolder(). + Get(ictx, nil) + if err != nil { + return nil, clues.Wrap(err, "getting files folder for channel"). + WithClues(ictx) + } + + // WebURL returned here is the url to the documents folder, we + // have to trim that out to get the actual site's webURL + // https://example.sharepoint.com/sites//Shared%20Documents/ + documentWebURL := ptr.Val(resp.GetWebUrl()) + + u, err := url.Parse(documentWebURL) + if err != nil { + return nil, clues.Wrap(err, "parsing document web url"). + WithClues(ictx) + } + + pathSegments := strings.Split(u.Path, "/") // pathSegments[0] == "" + siteWebURL := fmt.Sprintf("%s://%s/%s/%s", u.Scheme, u.Host, pathSegments[1], pathSegments[2]) + + ictx = clues.Add(ictx, "document_web_url", documentWebURL, "site_web_url", siteWebURL) + + site, err := Sites(c).GetByID(ictx, siteWebURL, CallConfig{}) + if err != nil { + el.AddRecoverable(ctx, clues.Wrap(err, "getting site")) + continue + } + + sites = append(sites, site) + } + + return sites, el.Failure() +} + func (c Groups) GetRootSite( ctx context.Context, identifier string, @@ -171,7 +255,7 @@ func (c Groups) GetRootSite( BySiteId("root"). Get(ctx, nil) if err != nil { - return nil, clues.Wrap(err, "getting root site for group") + return nil, graph.Stack(ctx, err) } return resp, graph.Stack(ctx, err).OrNil() diff --git a/src/pkg/services/m365/api/groups_test.go b/src/pkg/services/m365/api/groups_test.go index b60240cff..213bb5d81 100644 --- a/src/pkg/services/m365/api/groups_test.go +++ b/src/pkg/services/m365/api/groups_test.go @@ -110,6 +110,33 @@ func (suite *GroupsIntgSuite) TestGetAll() { require.NotZero(t, len(groups), "must have at least one group") } +func (suite *GroupsIntgSuite) TestGetAllSites() { + t := suite.T() + + ctx, flush := tester.NewContext(t) + defer flush() + + channels, err := suite.its.ac. + Channels().GetChannels(ctx, suite.its.group.id) + require.NoError(t, err, "getting channels") + require.NotZero(t, len(channels), "must have at least one channel") + + siteCount := 1 + + for _, c := range channels { + if ptr.Val(c.GetMembershipType()) != models.STANDARD_CHANNELMEMBERSHIPTYPE { + siteCount++ + } + } + + sites, err := suite.its.ac. + Groups(). + GetAllSites(ctx, suite.its.group.id, fault.New(true)) + require.NoError(t, err) + require.NotZero(t, len(sites), "must have at least one site") + require.Equal(t, siteCount, len(sites), "incorrect number of sites") +} + func (suite *GroupsIntgSuite) TestGroups_GetByID() { t := suite.T() diff --git a/src/pkg/services/m365/api/sites.go b/src/pkg/services/m365/api/sites.go index 0865a4f47..813f1c1fa 100644 --- a/src/pkg/services/m365/api/sites.go +++ b/src/pkg/services/m365/api/sites.go @@ -142,6 +142,8 @@ func (c Sites) GetByID( options.QueryParameters.Expand = cc.Expand } + // NOTE: `/sites` sends `displayName` as name, but + // `/sites/` send base of `webURL` as name resp, err = c.Stable. Client(). Sites().