Merge branch 'main' into beta-library-switch

This commit is contained in:
Danny 2023-01-26 17:17:00 -05:00 committed by GitHub
commit 57ce0be82a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
43 changed files with 1092 additions and 283 deletions

View File

@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Handle case where user's drive has not been initialized
- Inline attachments (e.g. copy/paste ) are discovered and backed up correctly ([#2163](https://github.com/alcionai/corso/issues/2163))
- Guest and External users (for cloud accounts) and non-on-premise users (for systems that use on-prem AD syncs) are now excluded from backup and restore operations.
- Remove the M365 license guid check in OneDrive backup which wasn't reliable.
## [v0.1.0] (alpha) - 2023-01-13

View File

@ -13,6 +13,7 @@ import (
. "github.com/alcionai/corso/src/cli/print"
"github.com/alcionai/corso/src/cli/utils"
"github.com/alcionai/corso/src/internal/connector"
"github.com/alcionai/corso/src/internal/connector/graph"
"github.com/alcionai/corso/src/internal/kopia"
"github.com/alcionai/corso/src/internal/model"
"github.com/alcionai/corso/src/pkg/backup"
@ -209,7 +210,7 @@ func createSharePointCmd(cmd *cobra.Command, args []string) error {
defer utils.CloseRepo(ctx, r)
gc, err := connector.NewGraphConnector(ctx, acct, connector.Sites)
gc, err := connector.NewGraphConnector(ctx, graph.LargeItemClient(), acct, connector.Sites)
if err != nil {
return Only(ctx, errors.Wrap(err, "Failed to connect to Microsoft APIs"))
}

View File

@ -39,7 +39,7 @@ var corsoCmd = &cobra.Command{
func handleCorsoCmd(cmd *cobra.Command, args []string) error {
v, _ := cmd.Flags().GetBool("version")
if v {
print.Outf(cmd.Context(), "Corso\nversion: "+version.Version)
print.Outf(cmd.Context(), "Corso version: "+version.Version)
return nil
}

View File

@ -12,6 +12,7 @@ import (
. "github.com/alcionai/corso/src/cli/print"
"github.com/alcionai/corso/src/internal/common"
"github.com/alcionai/corso/src/internal/connector"
"github.com/alcionai/corso/src/internal/connector/graph"
"github.com/alcionai/corso/src/internal/connector/mockconnector"
"github.com/alcionai/corso/src/internal/data"
"github.com/alcionai/corso/src/pkg/account"
@ -111,7 +112,7 @@ func getGCAndVerifyUser(ctx context.Context, userID string) (*connector.GraphCon
}
// build a graph connector
gc, err := connector.NewGraphConnector(ctx, acct, connector.Users)
gc, err := connector.NewGraphConnector(ctx, graph.LargeItemClient(), acct, connector.Users)
if err != nil {
return nil, account.Account{}, errors.Wrap(err, "connecting to graph api")
}

View File

@ -96,7 +96,7 @@ func runDisplayM365JSON(
var (
bs []byte
err error
cat = graph.StringToPathCategory(category)
cat = path.ToCategoryType(category)
sw = kw.NewJsonSerializationWriter()
)
@ -178,7 +178,7 @@ func getGC(ctx context.Context) (*connector.GraphConnector, account.M365Config,
return nil, m365Cfg, Only(ctx, errors.Wrap(err, "finding m365 account details"))
}
gc, err := connector.NewGraphConnector(ctx, acct, connector.Users)
gc, err := connector.NewGraphConnector(ctx, graph.LargeItemClient(), acct, connector.Users)
if err != nil {
return nil, m365Cfg, Only(ctx, errors.Wrap(err, "connecting to graph API"))
}

View File

@ -255,7 +255,7 @@ func getGC(ctx context.Context) (*connector.GraphConnector, error) {
}
// build a graph connector
gc, err := connector.NewGraphConnector(ctx, acct, connector.Users)
gc, err := connector.NewGraphConnector(ctx, graph.LargeItemClient(), acct, connector.Users)
if err != nil {
return nil, Only(ctx, errors.Wrap(err, "connecting to graph api"))
}

View File

@ -4,7 +4,8 @@ go 1.19
require (
github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.2.0
github.com/aws/aws-sdk-go v1.44.184
github.com/alcionai/clues v0.0.0-20230120231953-1cf61dbafc40
github.com/aws/aws-sdk-go v1.44.187
github.com/aws/aws-xray-sdk-go v1.8.0
github.com/google/uuid v1.3.0
github.com/hashicorp/go-multierror v1.1.1

View File

@ -52,6 +52,8 @@ github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1o
github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4=
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d h1:licZJFw2RwpHMqeKTCYkitsPqHNxTmd4SNR5r94FGM8=
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d/go.mod h1:asat636LX7Bqt5lYEZ27JNDcqxfjdBQuJ/MM4CN/Lzo=
github.com/alcionai/clues v0.0.0-20230120231953-1cf61dbafc40 h1:bvAwz0dcJeIyRjudVyzmmawOvc4SqlSerKd0B4dh0yw=
github.com/alcionai/clues v0.0.0-20230120231953-1cf61dbafc40/go.mod h1:UlAs8jkWIpsOMakiC8NxPgQQVQRdvyf1hYMszlYYLb4=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
@ -60,8 +62,8 @@ github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk5
github.com/alessio/shellescape v1.4.1 h1:V7yhSDDn8LP4lc4jS8pFkt0zCnzVJlG5JXy9BVKJUX0=
github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY=
github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
github.com/aws/aws-sdk-go v1.44.184 h1:/MggyE66rOImXJKl1HqhLQITvWvqIV7w1Q4MaG6FHUo=
github.com/aws/aws-sdk-go v1.44.184/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI=
github.com/aws/aws-sdk-go v1.44.187 h1:D5CsRomPnlwDHJCanL2mtaLIcbhjiWxNh5j8zvaWdJA=
github.com/aws/aws-sdk-go v1.44.187/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI=
github.com/aws/aws-xray-sdk-go v1.8.0 h1:0xncHZ588wB/geLjbM/esoW3FOEThWy2TJyb4VXfLFY=
github.com/aws/aws-xray-sdk-go v1.8.0/go.mod h1:7LKe47H+j3evfvS1+q0wzpoaGXGrF3mUsfM+thqVO+A=
github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8=

View File

@ -87,6 +87,7 @@ func (gc *GraphConnector) DataCollections(
case selectors.ServiceSharePoint:
colls, err := sharepoint.DataCollections(
ctx,
gc.itemClient,
sels,
gc.credentials.AzureTenantID,
gc.Service,
@ -198,6 +199,7 @@ func (gc *GraphConnector) OneDriveDataCollections(
logger.Ctx(ctx).With("user", user).Debug("Creating OneDrive collections")
odcs, err := onedrive.NewCollections(
gc.itemClient,
gc.credentials.AzureTenantID,
user,
onedrive.OneDriveSource,

View File

@ -10,6 +10,7 @@ import (
"github.com/stretchr/testify/suite"
"github.com/alcionai/corso/src/internal/connector/exchange"
"github.com/alcionai/corso/src/internal/connector/graph"
"github.com/alcionai/corso/src/internal/connector/sharepoint"
"github.com/alcionai/corso/src/internal/tester"
"github.com/alcionai/corso/src/pkg/control"
@ -43,7 +44,7 @@ func (suite *ConnectorDataCollectionIntegrationSuite) SetupSuite() {
tester.MustGetEnvVars(suite.T(), tester.M365AcctCredEnvs...)
suite.connector = loadConnector(ctx, suite.T(), AllResources)
suite.connector = loadConnector(ctx, suite.T(), graph.LargeItemClient(), AllResources)
suite.user = tester.M365UserID(suite.T())
suite.site = tester.M365SiteID(suite.T())
@ -62,7 +63,7 @@ func (suite *ConnectorDataCollectionIntegrationSuite) TestExchangeDataCollection
selUsers := []string{suite.user}
connector := loadConnector(ctx, suite.T(), Users)
connector := loadConnector(ctx, suite.T(), graph.LargeItemClient(), Users)
tests := []struct {
name string
getSelector func(t *testing.T) selectors.Selector
@ -138,7 +139,7 @@ func (suite *ConnectorDataCollectionIntegrationSuite) TestDataCollections_invali
owners := []string{"snuffleupagus"}
connector := loadConnector(ctx, suite.T(), Users)
connector := loadConnector(ctx, suite.T(), graph.LargeItemClient(), Users)
tests := []struct {
name string
getSelector func(t *testing.T) selectors.Selector
@ -214,7 +215,7 @@ func (suite *ConnectorDataCollectionIntegrationSuite) TestSharePointDataCollecti
selSites := []string{suite.site}
connector := loadConnector(ctx, suite.T(), Sites)
connector := loadConnector(ctx, suite.T(), graph.LargeItemClient(), Sites)
tests := []struct {
name string
expected int
@ -243,6 +244,7 @@ func (suite *ConnectorDataCollectionIntegrationSuite) TestSharePointDataCollecti
suite.T().Run(test.name, func(t *testing.T) {
collections, err := sharepoint.DataCollections(
ctx,
graph.LargeItemClient(),
test.getSelector(),
connector.credentials.AzureTenantID,
connector.Service,
@ -298,7 +300,7 @@ func (suite *ConnectorCreateSharePointCollectionIntegrationSuite) SetupSuite() {
tester.MustGetEnvSets(suite.T(), tester.M365AcctCredEnvs)
suite.connector = loadConnector(ctx, suite.T(), Sites)
suite.connector = loadConnector(ctx, suite.T(), graph.LargeItemClient(), Sites)
suite.user = tester.M365UserID(suite.T())
tester.LogTimeOfTest(suite.T())
@ -311,7 +313,7 @@ func (suite *ConnectorCreateSharePointCollectionIntegrationSuite) TestCreateShar
var (
t = suite.T()
siteID = tester.M365SiteID(t)
gc = loadConnector(ctx, t, Sites)
gc = loadConnector(ctx, t, graph.LargeItemClient(), Sites)
siteIDs = []string{siteID}
)
@ -335,7 +337,7 @@ func (suite *ConnectorCreateSharePointCollectionIntegrationSuite) TestCreateShar
var (
t = suite.T()
siteID = tester.M365SiteID(t)
gc = loadConnector(ctx, t, Sites)
gc = loadConnector(ctx, t, graph.LargeItemClient(), Sites)
siteIDs = []string{siteID}
)

View File

@ -4,7 +4,6 @@ import (
"net/http"
"net/http/httputil"
"os"
"strings"
"time"
az "github.com/Azure/azure-sdk-for-go/sdk/azidentity"
@ -15,7 +14,6 @@ import (
"github.com/pkg/errors"
"github.com/alcionai/corso/src/pkg/logger"
"github.com/alcionai/corso/src/pkg/path"
)
const (
@ -57,6 +55,23 @@ func CreateHTTPClient() *http.Client {
return httpClient
}
// LargeItemClient generates a client that's configured to handle
// large file downloads. This client isn't suitable for other queries
// due to loose restrictions on timeouts and such.
//
// Re-use of http clients is critical, or else we leak os resources
// and consume relatively unbound socket connections. It is important
// to centralize this client to be passed downstream where api calls
// can utilize it on a per-download basis.
//
// TODO: this should get owned by an API client layer, not the GC itself.
func LargeItemClient() *http.Client {
httpClient := CreateHTTPClient()
httpClient.Timeout = 0 // infinite timeout for pulling large files
return httpClient
}
// ---------------------------------------------------------------------------
// Logging Middleware
// ---------------------------------------------------------------------------
@ -87,6 +102,10 @@ func (handler *LoggingMiddleware) Intercept(
logger.Ctx(ctx).Infow("graph api throttling", "method", req.Method, "url", req.URL)
}
if resp.StatusCode != http.StatusTooManyRequests && (resp.StatusCode/100) != 2 {
logger.Ctx(ctx).Infow("graph api error", "method", req.Method, "url", req.URL)
}
if logger.DebugAPI || os.Getenv(logGraphRequestsEnvKey) != "" {
respDump, _ := httputil.DumpResponse(resp, true)
@ -104,26 +123,3 @@ func (handler *LoggingMiddleware) Intercept(
return resp, err
}
// ---------------------------------------------------------------------------
// Other Helpers
// ---------------------------------------------------------------------------
func StringToPathCategory(input string) path.CategoryType {
param := strings.ToLower(input)
switch param {
case "email":
return path.EmailCategory
case "contacts":
return path.ContactsCategory
case "events":
return path.EventsCategory
case "files":
return path.FilesCategory
case "libraries":
return path.LibrariesCategory
default:
return path.UnknownCategory
}
}

View File

@ -4,6 +4,7 @@ package connector
import (
"context"
"net/http"
"runtime/trace"
"strings"
"sync"
@ -38,8 +39,9 @@ import (
// GraphRequestAdapter from the msgraph-sdk-go. Additional fields are for
// bookkeeping and interfacing with other component.
type GraphConnector struct {
Service graph.Servicer
Owners api.Client
Service graph.Servicer
Owners api.Client
itemClient *http.Client // configured to handle large item downloads
tenant string
Users map[string]string // key<email> value<id>
@ -64,13 +66,19 @@ const (
Sites
)
func NewGraphConnector(ctx context.Context, acct account.Account, r resource) (*GraphConnector, error) {
func NewGraphConnector(
ctx context.Context,
itemClient *http.Client,
acct account.Account,
r resource,
) (*GraphConnector, error) {
m365, err := acct.M365Config()
if err != nil {
return nil, errors.Wrap(err, "retrieving m365 account configuration")
}
gc := GraphConnector{
itemClient: itemClient,
tenant: m365.AzureTenantID,
Users: make(map[string]string, 0),
wg: &sync.WaitGroup{},

View File

@ -9,6 +9,7 @@ import (
"github.com/stretchr/testify/require"
"github.com/stretchr/testify/suite"
"github.com/alcionai/corso/src/internal/connector/graph"
"github.com/alcionai/corso/src/internal/connector/support"
"github.com/alcionai/corso/src/internal/tester"
"github.com/alcionai/corso/src/pkg/account"
@ -65,7 +66,7 @@ func (suite *DisconnectedGraphConnectorSuite) TestBadConnection() {
for _, test := range table {
suite.T().Run(test.name, func(t *testing.T) {
gc, err := NewGraphConnector(ctx, test.acct(t), Users)
gc, err := NewGraphConnector(ctx, graph.LargeItemClient(), test.acct(t), Users)
assert.Nil(t, gc, test.name+" failed")
assert.NotNil(t, err, test.name+"failed")
})

View File

@ -3,6 +3,7 @@ package connector
import (
"context"
"io"
"net/http"
"reflect"
"testing"
@ -978,9 +979,9 @@ func getSelectorWith(
}
}
func loadConnector(ctx context.Context, t *testing.T, r resource) *GraphConnector {
func loadConnector(ctx context.Context, t *testing.T, itemClient *http.Client, r resource) *GraphConnector {
a := tester.NewM365Account(t)
connector, err := NewGraphConnector(ctx, a, r)
connector, err := NewGraphConnector(ctx, itemClient, a, r)
require.NoError(t, err)
return connector

View File

@ -156,7 +156,7 @@ func (suite *GraphConnectorIntegrationSuite) SetupSuite() {
tester.MustGetEnvSets(suite.T(), tester.M365AcctCredEnvs)
suite.connector = loadConnector(ctx, suite.T(), Users)
suite.connector = loadConnector(ctx, suite.T(), graph.LargeItemClient(), Users)
suite.user = tester.M365UserID(suite.T())
suite.acct = tester.NewM365Account(suite.T())
@ -380,7 +380,7 @@ func runRestoreBackupTest(
start := time.Now()
restoreGC := loadConnector(ctx, t, test.resource)
restoreGC := loadConnector(ctx, t, graph.LargeItemClient(), test.resource)
restoreSel := getSelectorWith(t, test.service, resourceOwners, true)
deets, err := restoreGC.RestoreDataCollections(
ctx,
@ -419,7 +419,7 @@ func runRestoreBackupTest(
})
}
backupGC := loadConnector(ctx, t, test.resource)
backupGC := loadConnector(ctx, t, graph.LargeItemClient(), test.resource)
backupSel := backupSelectorForExpected(t, test.service, expectedDests)
t.Logf("Selective backup of %s\n", backupSel)
@ -870,7 +870,7 @@ func (suite *GraphConnectorIntegrationSuite) TestMultiFolderBackupDifferentNames
dest.ContainerName,
)
restoreGC := loadConnector(ctx, t, test.resource)
restoreGC := loadConnector(ctx, t, graph.LargeItemClient(), test.resource)
deets, err := restoreGC.RestoreDataCollections(ctx, suite.acct, restoreSel, dest, collections)
require.NoError(t, err)
require.NotNil(t, deets)
@ -888,7 +888,7 @@ func (suite *GraphConnectorIntegrationSuite) TestMultiFolderBackupDifferentNames
// Run a backup and compare its output with what we put in.
backupGC := loadConnector(ctx, t, test.resource)
backupGC := loadConnector(ctx, t, graph.LargeItemClient(), test.resource)
backupSel := backupSelectorForExpected(t, test.service, expectedDests)
t.Log("Selective backup of", backupSel)

View File

@ -4,11 +4,13 @@ package onedrive
import (
"context"
"io"
"net/http"
"sync"
"sync/atomic"
"time"
"github.com/microsoftgraph/msgraph-sdk-go/models"
"github.com/pkg/errors"
"github.com/spatialcurrent/go-lazy/pkg/lazy"
"github.com/alcionai/corso/src/internal/connector/graph"
@ -43,6 +45,9 @@ var (
// Collection represents a set of OneDrive objects retrieved from M365
type Collection struct {
// configured to handle large item downloads
itemClient *http.Client
// data is used to share data streams with the collection consumer
data chan data.Stream
// folderPath indicates what level in the hierarchy this collection
@ -64,12 +69,13 @@ type Collection struct {
// itemReadFunc returns a reader for the specified item
type itemReaderFunc func(
ctx context.Context,
hc *http.Client,
item models.DriveItemable,
) (itemInfo details.ItemInfo, itemData io.ReadCloser, err error)
// NewCollection creates a Collection
func NewCollection(
itemClient *http.Client,
folderPath path.Path,
driveID string,
service graph.Servicer,
@ -78,6 +84,7 @@ func NewCollection(
ctrlOpts control.Options,
) *Collection {
c := &Collection{
itemClient: itemClient,
folderPath: folderPath,
driveItems: map[string]models.DriveItemable{},
driveID: driveID,
@ -198,11 +205,16 @@ func (oc *Collection) populateItems(ctx context.Context) {
m.Unlock()
}
for _, item := range oc.driveItems {
for id, item := range oc.driveItems {
if oc.ctrl.FailFast && errs != nil {
break
}
if item == nil {
errUpdater(id, errors.New("nil item"))
continue
}
semaphoreCh <- struct{}{}
wg.Add(1)
@ -219,10 +231,9 @@ func (oc *Collection) populateItems(ctx context.Context) {
)
for i := 1; i <= maxRetries; i++ {
itemInfo, itemData, err = oc.itemReader(ctx, item)
// retry on Timeout type errors, break otherwise.
itemInfo, itemData, err = oc.itemReader(oc.itemClient, item)
if err == nil || graph.IsErrTimeout(err) == nil {
// retry on Timeout type errors, break otherwise.
break
}

View File

@ -2,9 +2,8 @@ package onedrive
import (
"bytes"
"context"
"errors"
"io"
"net/http"
"sync"
"testing"
"time"
@ -15,6 +14,7 @@ import (
"github.com/stretchr/testify/require"
"github.com/stretchr/testify/suite"
"github.com/alcionai/corso/src/internal/connector/graph"
"github.com/alcionai/corso/src/internal/connector/support"
"github.com/alcionai/corso/src/internal/data"
"github.com/alcionai/corso/src/pkg/backup/details"
@ -73,7 +73,7 @@ func (suite *CollectionUnitTestSuite) TestCollection() {
name: "oneDrive, no duplicates",
numInstances: 1,
source: OneDriveSource,
itemReader: func(context.Context, models.DriveItemable) (details.ItemInfo, io.ReadCloser, error) {
itemReader: func(*http.Client, models.DriveItemable) (details.ItemInfo, io.ReadCloser, error) {
return details.ItemInfo{OneDrive: &details.OneDriveInfo{ItemName: testItemName, Modified: now}},
io.NopCloser(bytes.NewReader(testItemData)),
nil
@ -87,7 +87,7 @@ func (suite *CollectionUnitTestSuite) TestCollection() {
name: "oneDrive, duplicates",
numInstances: 3,
source: OneDriveSource,
itemReader: func(context.Context, models.DriveItemable) (details.ItemInfo, io.ReadCloser, error) {
itemReader: func(*http.Client, models.DriveItemable) (details.ItemInfo, io.ReadCloser, error) {
return details.ItemInfo{OneDrive: &details.OneDriveInfo{ItemName: testItemName, Modified: now}},
io.NopCloser(bytes.NewReader(testItemData)),
nil
@ -101,7 +101,7 @@ func (suite *CollectionUnitTestSuite) TestCollection() {
name: "sharePoint, no duplicates",
numInstances: 1,
source: SharePointSource,
itemReader: func(context.Context, models.DriveItemable) (details.ItemInfo, io.ReadCloser, error) {
itemReader: func(*http.Client, models.DriveItemable) (details.ItemInfo, io.ReadCloser, error) {
return details.ItemInfo{SharePoint: &details.SharePointInfo{ItemName: testItemName, Modified: now}},
io.NopCloser(bytes.NewReader(testItemData)),
nil
@ -115,7 +115,7 @@ func (suite *CollectionUnitTestSuite) TestCollection() {
name: "sharePoint, duplicates",
numInstances: 3,
source: SharePointSource,
itemReader: func(context.Context, models.DriveItemable) (details.ItemInfo, io.ReadCloser, error) {
itemReader: func(*http.Client, models.DriveItemable) (details.ItemInfo, io.ReadCloser, error) {
return details.ItemInfo{SharePoint: &details.SharePointInfo{ItemName: testItemName, Modified: now}},
io.NopCloser(bytes.NewReader(testItemData)),
nil
@ -140,6 +140,7 @@ func (suite *CollectionUnitTestSuite) TestCollection() {
require.NoError(t, err)
coll := NewCollection(
graph.LargeItemClient(),
folderPath,
"drive-id",
suite,
@ -224,6 +225,7 @@ func (suite *CollectionUnitTestSuite) TestCollectionReadError() {
require.NoError(t, err)
coll := NewCollection(
graph.LargeItemClient(),
folderPath,
"fakeDriveID",
suite,
@ -235,10 +237,8 @@ func (suite *CollectionUnitTestSuite) TestCollectionReadError() {
mockItem.SetId(&testItemID)
coll.Add(mockItem)
readError := errors.New("Test error")
coll.itemReader = func(context.Context, models.DriveItemable) (details.ItemInfo, io.ReadCloser, error) {
return details.ItemInfo{}, nil, readError
coll.itemReader = func(*http.Client, models.DriveItemable) (details.ItemInfo, io.ReadCloser, error) {
return details.ItemInfo{}, nil, assert.AnError
}
coll.Items()

View File

@ -3,10 +3,12 @@ package onedrive
import (
"context"
"fmt"
"net/http"
"strings"
"github.com/microsoftgraph/msgraph-sdk-go/models"
"github.com/pkg/errors"
"golang.org/x/exp/maps"
"github.com/alcionai/corso/src/internal/connector/graph"
"github.com/alcionai/corso/src/internal/connector/support"
@ -45,6 +47,9 @@ type folderMatcher interface {
// Collections is used to retrieve drive data for a
// resource owner, which can be either a user or a sharepoint site.
type Collections struct {
// configured to handle large item downloads
itemClient *http.Client
tenant string
resourceOwner string
source driveSource
@ -65,6 +70,7 @@ type Collections struct {
}
func NewCollections(
itemClient *http.Client,
tenant string,
resourceOwner string,
source driveSource,
@ -74,6 +80,7 @@ func NewCollections(
ctrlOpts control.Options,
) *Collections {
return &Collections{
itemClient: itemClient,
tenant: tenant,
resourceOwner: resourceOwner,
source: source,
@ -98,6 +105,12 @@ func (c *Collections) Get(ctx context.Context) ([]data.Collection, error) {
deltaURLs = map[string]string{}
// Drive ID -> folder ID -> folder path
folderPaths = map[string]map[string]string{}
// Items that should be excluded when sourcing data from the base backup.
// TODO(ashmrtn): This list contains the M365 IDs of deleted items so while
// it's technically safe to pass all the way through to kopia (files are
// unlikely to be named their M365 ID) we should wait to do that until we've
// switched to using those IDs for file names in kopia.
excludedItems = map[string]struct{}{}
)
// Update the collection map with items from each drive
@ -105,7 +118,13 @@ func (c *Collections) Get(ctx context.Context) ([]data.Collection, error) {
driveID := *d.GetId()
driveName := *d.GetName()
delta, paths, err := collectItems(ctx, c.service, driveID, driveName, c.UpdateCollections)
delta, paths, excluded, err := collectItems(
ctx,
c.service,
driveID,
driveName,
c.UpdateCollections,
)
if err != nil {
return nil, err
}
@ -121,6 +140,8 @@ func (c *Collections) Get(ctx context.Context) ([]data.Collection, error) {
folderPaths[driveID][id] = p
}
}
maps.Copy(excludedItems, excluded)
}
observe.Message(ctx, fmt.Sprintf("Discovered %d items to backup", c.NumItems))
@ -171,6 +192,7 @@ func (c *Collections) UpdateCollections(
items []models.DriveItemable,
oldPaths map[string]string,
newPaths map[string]string,
excluded map[string]struct{},
) error {
for _, item := range items {
if item.GetRoot() != nil {
@ -231,18 +253,31 @@ func (c *Collections) UpdateCollections(
updatePath(newPaths, *item.GetId(), folderPath.String())
case item.GetFile() != nil:
if item.GetDeleted() != nil {
excluded[*item.GetId()] = struct{}{}
// Exchange counts items streamed through it which includes deletions so
// add that here too.
c.NumFiles++
c.NumItems++
continue
}
// TODO(ashmrtn): Figure what when an item was moved (maybe) and add it to
// the exclude list.
col, found := c.CollectionMap[collectionPath.String()]
if !found {
// TODO(ashmrtn): Compare old and new path and set collection state
// accordingly.
col = NewCollection(
c.itemClient,
collectionPath,
driveID,
c.service,
c.statusUpdater,
c.source,
c.ctrl,
)
c.ctrl)
c.CollectionMap[collectionPath.String()] = col
c.NumContainers++

View File

@ -10,6 +10,7 @@ import (
"github.com/stretchr/testify/suite"
"golang.org/x/exp/maps"
"github.com/alcionai/corso/src/internal/connector/graph"
"github.com/alcionai/corso/src/internal/tester"
"github.com/alcionai/corso/src/pkg/control"
"github.com/alcionai/corso/src/pkg/selectors"
@ -105,6 +106,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
expectedContainerCount int
expectedFileCount int
expectedMetadataPaths map[string]string
expectedExcludes map[string]struct{}
}{
{
testCase: "Invalid item",
@ -115,6 +117,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
scope: anyFolder,
expect: assert.Error,
expectedMetadataPaths: map[string]string{},
expectedExcludes: map[string]struct{}{},
},
{
testCase: "Single File",
@ -135,6 +138,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
expectedContainerCount: 1,
// Root folder is skipped since it's always present.
expectedMetadataPaths: map[string]string{},
expectedExcludes: map[string]struct{}{},
},
{
testCase: "Single Folder",
@ -153,6 +157,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
testBaseDrivePath+"/folder",
)[0],
},
expectedExcludes: map[string]struct{}{},
},
{
testCase: "Single Package",
@ -171,6 +176,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
testBaseDrivePath+"/package",
)[0],
},
expectedExcludes: map[string]struct{}{},
},
{
testCase: "1 root file, 1 folder, 1 package, 2 files, 3 collections",
@ -209,6 +215,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
testBaseDrivePath+"/package",
)[0],
},
expectedExcludes: map[string]struct{}{},
},
{
testCase: "contains folder selector",
@ -258,6 +265,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
testBaseDrivePath+"/folder/subfolder/folder",
)[0],
},
expectedExcludes: map[string]struct{}{},
},
{
testCase: "prefix subfolder selector",
@ -292,6 +300,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
testBaseDrivePath+"/folder/subfolder/folder",
)[0],
},
expectedExcludes: map[string]struct{}{},
},
{
testCase: "match subfolder selector",
@ -318,6 +327,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
expectedContainerCount: 1,
// No child folders for subfolder so nothing here.
expectedMetadataPaths: map[string]string{},
expectedExcludes: map[string]struct{}{},
},
{
testCase: "not moved folder tree",
@ -358,6 +368,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
testBaseDrivePath+"/folder/subfolder",
)[0],
},
expectedExcludes: map[string]struct{}{},
},
{
testCase: "moved folder tree",
@ -398,6 +409,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
testBaseDrivePath+"/folder/subfolder",
)[0],
},
expectedExcludes: map[string]struct{}{},
},
{
testCase: "moved folder tree and subfolder 1",
@ -439,6 +451,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
testBaseDrivePath+"/subfolder",
)[0],
},
expectedExcludes: map[string]struct{}{},
},
{
testCase: "moved folder tree and subfolder 2",
@ -480,6 +493,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
testBaseDrivePath+"/subfolder",
)[0],
},
expectedExcludes: map[string]struct{}{},
},
{
testCase: "deleted folder and package",
@ -508,6 +522,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
expectedFileCount: 0,
expectedContainerCount: 0,
expectedMetadataPaths: map[string]string{},
expectedExcludes: map[string]struct{}{},
},
{
testCase: "delete folder tree move subfolder",
@ -543,6 +558,24 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
testBaseDrivePath+"/subfolder",
)[0],
},
expectedExcludes: map[string]struct{}{},
},
{
testCase: "delete file",
items: []models.DriveItemable{
delItem("item", testBaseDrivePath, true, false, false),
},
inputFolderMap: map[string]string{},
scope: anyFolder,
expect: assert.NoError,
expectedCollectionPaths: []string{},
expectedItemCount: 1,
expectedFileCount: 1,
expectedContainerCount: 0,
expectedMetadataPaths: map[string]string{},
expectedExcludes: map[string]struct{}{
"item": {},
},
},
}
@ -551,9 +584,11 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
ctx, flush := tester.NewContext()
defer flush()
excludes := map[string]struct{}{}
outputFolderMap := map[string]string{}
maps.Copy(outputFolderMap, tt.inputFolderMap)
c := NewCollections(
graph.LargeItemClient(),
tenant,
user,
OneDriveSource,
@ -562,7 +597,15 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
nil,
control.Options{})
err := c.UpdateCollections(ctx, "driveID", "General", tt.items, tt.inputFolderMap, outputFolderMap)
err := c.UpdateCollections(
ctx,
"driveID",
"General",
tt.items,
tt.inputFolderMap,
outputFolderMap,
excludes,
)
tt.expect(t, err)
assert.Equal(t, len(tt.expectedCollectionPaths), len(c.CollectionMap), "collection paths")
assert.Equal(t, tt.expectedItemCount, c.NumItems, "item count")
@ -573,6 +616,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() {
}
assert.Equal(t, tt.expectedMetadataPaths, outputFolderMap)
assert.Equal(t, tt.expectedExcludes, excludes)
})
}
}

View File

@ -6,7 +6,6 @@ import (
"strings"
"time"
msgraphgocore "github.com/microsoftgraph/msgraph-sdk-go-core"
msdrive "github.com/microsoftgraph/msgraph-sdk-go/drive"
msdrives "github.com/microsoftgraph/msgraph-sdk-go/drives"
"github.com/microsoftgraph/msgraph-sdk-go/models"
@ -20,55 +19,7 @@ import (
"github.com/alcionai/corso/src/pkg/logger"
)
var (
errFolderNotFound = errors.New("folder not found")
// nolint:lll
// OneDrive associated SKUs located at:
// https://learn.microsoft.com/en-us/azure/active-directory/enterprise-users/licensing-service-plan-reference
skuIDs = []string{
// Microsoft 365 Apps for Business 0365
"cdd28e44-67e3-425e-be4c-737fab2899d3",
// Microsoft 365 Apps for Business SMB_Business
"b214fe43-f5a3-4703-beeb-fa97188220fc",
// Microsoft 365 Apps for enterprise
"c2273bd0-dff7-4215-9ef5-2c7bcfb06425",
// Microsoft 365 Apps for Faculty
"12b8c807-2e20-48fc-b453-542b6ee9d171",
// Microsoft 365 Apps for Students
"c32f9321-a627-406d-a114-1f9c81aaafac",
// OneDrive for Business (Plan 1)
"e6778190-713e-4e4f-9119-8b8238de25df",
// OneDrive for Business (Plan 2)
"ed01faf2-1d88-4947-ae91-45ca18703a96",
// Visio Plan 1
"ca7f3140-d88c-455b-9a1c-7f0679e31a76",
// Visio Plan 2
"38b434d2-a15e-4cde-9a98-e737c75623e1",
// Visio Online Plan 1
"4b244418-9658-4451-a2b8-b5e2b364e9bd",
// Visio Online Plan 2
"c5928f49-12ba-48f7-ada3-0d743a3601d5",
// Visio Plan 2 for GCC
"4ae99959-6b0f-43b0-b1ce-68146001bdba",
// ONEDRIVEENTERPRISE
"afcafa6a-d966-4462-918c-ec0b4e0fe642",
// Microsoft 365 E5 Developer
"c42b9cae-ea4f-4ab7-9717-81576235ccac",
// Microsoft 365 E5
"06ebc4ee-1bb5-47dd-8120-11324bc54e06",
// Office 365 E4
"1392051d-0cb9-4b7a-88d5-621fee5e8711",
// Microsoft 365 E3
"05e9a617-0261-4cee-bb44-138d3ef5d965",
// Microsoft 365 Business Premium
"cbdc14ab-d96c-4c30-b9f4-6ada7cdc1d46",
// Microsoft 365 Business Standard
"f245ecc8-75af-4f8e-b61f-27d8114de5f3",
// Microsoft 365 Business Basic
"3b555118-da6a-4418-894f-7df1e2096870",
}
)
var errFolderNotFound = errors.New("folder not found")
const (
// nextLinkKey is used to find the next link in a paged
@ -116,21 +67,11 @@ func siteDrives(ctx context.Context, service graph.Servicer, site string) ([]mod
func userDrives(ctx context.Context, service graph.Servicer, user string) ([]models.Driveable, error) {
var (
hasDrive bool
numberOfRetries = 3
r models.DriveCollectionResponseable
err error
)
hasDrive, err := hasDriveLicense(ctx, service, user)
if err != nil {
return nil, errors.Wrap(err, user)
}
if !hasDrive {
logger.Ctx(ctx).Debugf("User %s does not have a license for OneDrive", user)
return make([]models.Driveable, 0), nil // no license
}
// Retry Loop for Drive retrieval. Request can timeout
for i := 0; i <= numberOfRetries; i++ {
r, err = service.Client().UsersById(user).Drives().Get(ctx, nil)
@ -138,7 +79,7 @@ func userDrives(ctx context.Context, service graph.Servicer, user string) ([]mod
detailedError := support.ConnectorStackErrorTrace(err)
if strings.Contains(detailedError, userMysiteURLNotFound) ||
strings.Contains(detailedError, userMysiteNotFound) {
logger.Ctx(ctx).Debugf("User %s does not have a drive", user)
logger.Ctx(ctx).Infof("User %s does not have a drive", user)
return make([]models.Driveable, 0), nil // no license
}
@ -170,6 +111,7 @@ type itemCollector func(
driveItems []models.DriveItemable,
oldPaths map[string]string,
newPaths map[string]string,
excluded map[string]struct{},
) error
// collectItems will enumerate all items in the specified drive and hand them to the
@ -179,13 +121,14 @@ func collectItems(
service graph.Servicer,
driveID, driveName string,
collector itemCollector,
) (string, map[string]string, error) {
) (string, map[string]string, map[string]struct{}, error) {
var (
newDeltaURL = ""
// TODO(ashmrtn): Eventually this should probably be a parameter so we can
// take in previous paths.
oldPaths = map[string]string{}
newPaths = map[string]string{}
excluded = map[string]struct{}{}
)
maps.Copy(newPaths, oldPaths)
@ -219,16 +162,16 @@ func collectItems(
for {
r, err := builder.Get(ctx, requestConfig)
if err != nil {
return "", nil, errors.Wrapf(
return "", nil, nil, errors.Wrapf(
err,
"failed to query drive items. details: %s",
support.ConnectorStackErrorTrace(err),
)
}
err = collector(ctx, driveID, driveName, r.GetValue(), oldPaths, newPaths)
err = collector(ctx, driveID, driveName, r.GetValue(), oldPaths, newPaths, excluded)
if err != nil {
return "", nil, err
return "", nil, nil, err
}
if r.GetOdataDeltaLink() != nil && len(*r.GetOdataDeltaLink()) > 0 {
@ -245,7 +188,7 @@ func collectItems(
builder = msdrives.NewItemRootDeltaRequestBuilder(*nextLink, service.Adapter())
}
return newDeltaURL, newPaths, nil
return newDeltaURL, newPaths, excluded, nil
}
// getFolder will lookup the specified folder name under `parentFolderID`
@ -352,7 +295,7 @@ func GetAllFolders(
folders := map[string]*Displayable{}
for _, d := range drives {
_, _, err = collectItems(
_, _, _, err = collectItems(
ctx,
gs,
*d.GetId(),
@ -363,6 +306,7 @@ func GetAllFolders(
items []models.DriveItemable,
oldPaths map[string]string,
newPaths map[string]string,
excluded map[string]struct{},
) error {
for _, item := range items {
// Skip the root item.
@ -419,56 +363,3 @@ func DeleteItem(
return nil
}
// hasDriveLicense utility function that queries M365 server
// to investigate the user's includes access to OneDrive.
func hasDriveLicense(
ctx context.Context,
service graph.Servicer,
user string,
) (bool, error) {
var hasDrive bool
resp, err := service.Client().UsersById(user).LicenseDetails().Get(ctx, nil)
if err != nil {
return false,
errors.Wrap(err, "failure obtaining license details for user")
}
iter, err := msgraphgocore.NewPageIterator(
resp, service.Adapter(),
models.CreateLicenseDetailsCollectionResponseFromDiscriminatorValue,
)
if err != nil {
return false, err
}
cb := func(pageItem any) bool {
entry, ok := pageItem.(models.LicenseDetailsable)
if !ok {
err = errors.New("casting item to models.LicenseDetailsable")
return false
}
sku := entry.GetSkuId()
if sku == nil {
return true
}
for _, license := range skuIDs {
if sku.String() == license {
hasDrive = true
return false
}
}
return true
}
if err := iter.Iterate(ctx, cb); err != nil {
return false,
errors.Wrap(err, support.ConnectorStackErrorTrace(err))
}
return hasDrive, nil
}

View File

@ -9,6 +9,7 @@ import (
"github.com/stretchr/testify/suite"
"github.com/alcionai/corso/src/internal/common"
"github.com/alcionai/corso/src/internal/connector/graph"
"github.com/alcionai/corso/src/internal/tester"
"github.com/alcionai/corso/src/pkg/control"
"github.com/alcionai/corso/src/pkg/logger"
@ -146,6 +147,7 @@ func (suite *OneDriveSuite) TestOneDriveNewCollections() {
NewOneDriveBackup([]string{test.user}).
AllData()[0]
odcs, err := NewCollections(
graph.LargeItemClient(),
creds.AzureTenantID,
test.user,
OneDriveSource,

View File

@ -4,6 +4,7 @@ import (
"context"
"fmt"
"io"
"net/http"
"strings"
msdrives "github.com/microsoftgraph/msgraph-sdk-go/drives"
@ -13,6 +14,7 @@ import (
"github.com/alcionai/corso/src/internal/connector/graph"
"github.com/alcionai/corso/src/internal/connector/support"
"github.com/alcionai/corso/src/internal/connector/uploadsession"
"github.com/alcionai/corso/src/internal/version"
"github.com/alcionai/corso/src/pkg/backup/details"
"github.com/alcionai/corso/src/pkg/logger"
)
@ -27,7 +29,7 @@ const (
// It crafts this by querying M365 for a download URL for the item
// and using a http client to initialize a reader
func sharePointItemReader(
ctx context.Context,
hc *http.Client,
item models.DriveItemable,
) (details.ItemInfo, io.ReadCloser, error) {
url, ok := item.GetAdditionalData()[downloadURLKey].(*string)
@ -35,7 +37,7 @@ func sharePointItemReader(
return details.ItemInfo{}, nil, fmt.Errorf("failed to get url for %s", *item.GetName())
}
rc, err := driveItemReader(ctx, *url)
resp, err := hc.Get(*url)
if err != nil {
return details.ItemInfo{}, nil, err
}
@ -44,14 +46,14 @@ func sharePointItemReader(
SharePoint: sharePointItemInfo(item, *item.GetSize()),
}
return dii, rc, nil
return dii, resp.Body, nil
}
// oneDriveItemReader will return a io.ReadCloser for the specified item
// It crafts this by querying M365 for a download URL for the item
// and using a http client to initialize a reader
func oneDriveItemReader(
ctx context.Context,
hc *http.Client,
item models.DriveItemable,
) (details.ItemInfo, io.ReadCloser, error) {
url, ok := item.GetAdditionalData()[downloadURLKey].(*string)
@ -59,7 +61,17 @@ func oneDriveItemReader(
return details.ItemInfo{}, nil, fmt.Errorf("failed to get url for %s", *item.GetName())
}
rc, err := driveItemReader(ctx, *url)
req, err := http.NewRequest(http.MethodGet, *url, nil)
if err != nil {
return details.ItemInfo{}, nil, err
}
// Decorate the traffic
//nolint:lll
// See https://learn.microsoft.com/en-us/sharepoint/dev/general-development/how-to-avoid-getting-throttled-or-blocked-in-sharepoint-online#how-to-decorate-your-http-traffic
req.Header.Set("User-Agent", "ISV|Alcion|Corso/"+version.Version)
resp, err := hc.Do(req)
if err != nil {
return details.ItemInfo{}, nil, err
}
@ -68,25 +80,7 @@ func oneDriveItemReader(
OneDrive: oneDriveItemInfo(item, *item.GetSize()),
}
return dii, rc, nil
}
// driveItemReader will return a io.ReadCloser for the specified item
// It crafts this by querying M365 for a download URL for the item
// and using a http client to initialize a reader
func driveItemReader(
ctx context.Context,
url string,
) (io.ReadCloser, error) {
httpClient := graph.CreateHTTPClient()
httpClient.Timeout = 0 // infinite timeout for pulling large files
resp, err := httpClient.Get(url)
if err != nil {
return nil, errors.Wrapf(err, "failed to download file from %s", url)
}
return resp.Body, nil
return dii, resp.Body, nil
}
// oneDriveItemInfo will populate a details.OneDriveInfo struct
@ -97,7 +91,7 @@ func driveItemReader(
func oneDriveItemInfo(di models.DriveItemable, itemSize int64) *details.OneDriveInfo {
var email, parent string
if di.GetCreatedBy().GetUser() != nil {
if di.GetCreatedBy() != nil && di.GetCreatedBy().GetUser() != nil {
// User is sometimes not available when created via some
// external applications (like backup/restore solutions)
ed, ok := di.GetCreatedBy().GetUser().GetAdditionalData()["email"]
@ -106,11 +100,9 @@ func oneDriveItemInfo(di models.DriveItemable, itemSize int64) *details.OneDrive
}
}
if di.GetParentReference() != nil {
if di.GetParentReference().GetName() != nil {
// EndPoint is not always populated from external apps
parent = *di.GetParentReference().GetName()
}
if di.GetParentReference() != nil && di.GetParentReference().GetName() != nil {
// EndPoint is not always populated from external apps
parent = *di.GetParentReference().GetName()
}
return &details.OneDriveInfo{

View File

@ -101,6 +101,7 @@ func (suite *ItemIntegrationSuite) TestItemReader_oneDrive() {
items []models.DriveItemable,
oldPaths map[string]string,
newPaths map[string]string,
excluded map[string]struct{},
) error {
for _, item := range items {
if item.GetFile() != nil {
@ -111,7 +112,7 @@ func (suite *ItemIntegrationSuite) TestItemReader_oneDrive() {
return nil
}
_, _, err := collectItems(ctx, suite, suite.userDriveID, "General", itemCollector)
_, _, _, err := collectItems(ctx, suite, suite.userDriveID, "General", itemCollector)
require.NoError(suite.T(), err)
// Test Requirement 2: Need a file
@ -125,7 +126,7 @@ func (suite *ItemIntegrationSuite) TestItemReader_oneDrive() {
// Read data for the file
itemInfo, itemData, err := oneDriveItemReader(ctx, driveItem)
itemInfo, itemData, err := oneDriveItemReader(graph.LargeItemClient(), driveItem)
require.NoError(suite.T(), err)
require.NotNil(suite.T(), itemInfo.OneDrive)
require.NotEmpty(suite.T(), itemInfo.OneDrive.ItemName)

View File

@ -2,6 +2,7 @@ package sharepoint
import (
"context"
"net/http"
"github.com/pkg/errors"
@ -24,6 +25,7 @@ type statusUpdater interface {
// for the specified user
func DataCollections(
ctx context.Context,
itemClient *http.Client,
selector selectors.Selector,
tenantID string,
serv graph.Servicer,
@ -66,6 +68,7 @@ func DataCollections(
case path.LibrariesCategory:
spcs, err = collectLibraries(
ctx,
itemClient,
serv,
tenantID,
site,
@ -124,6 +127,7 @@ func collectLists(
// all the drives associated with the site.
func collectLibraries(
ctx context.Context,
itemClient *http.Client,
serv graph.Servicer,
tenantID, siteID string,
scope selectors.SharePointScope,
@ -138,6 +142,7 @@ func collectLibraries(
logger.Ctx(ctx).With("site", siteID).Debug("Creating SharePoint Library collections")
colls := onedrive.NewCollections(
itemClient,
tenantID,
siteID,
onedrive.SharePointSource,

View File

@ -7,6 +7,7 @@ import (
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/suite"
"github.com/alcionai/corso/src/internal/connector/graph"
"github.com/alcionai/corso/src/internal/connector/onedrive"
"github.com/alcionai/corso/src/internal/tester"
"github.com/alcionai/corso/src/pkg/control"
@ -89,7 +90,9 @@ func (suite *SharePointLibrariesSuite) TestUpdateCollections() {
paths := map[string]string{}
newPaths := map[string]string{}
excluded := map[string]struct{}{}
c := onedrive.NewCollections(
graph.LargeItemClient(),
tenant,
site,
onedrive.SharePointSource,
@ -97,7 +100,7 @@ func (suite *SharePointLibrariesSuite) TestUpdateCollections() {
&MockGraphService{},
nil,
control.Options{})
err := c.UpdateCollections(ctx, "driveID", "General", test.items, paths, newPaths)
err := c.UpdateCollections(ctx, "driveID", "General", test.items, paths, newPaths, excluded)
test.expect(t, err)
assert.Equal(t, len(test.expectedCollectionPaths), len(c.CollectionMap), "collection paths")
assert.Equal(t, test.expectedItemCount, c.NumItems, "item count")

View File

@ -4,6 +4,7 @@ import (
"context"
"fmt"
multierror "github.com/hashicorp/go-multierror"
bytesize "github.com/inhies/go-bytesize"
"github.com/alcionai/corso/src/pkg/logger"
@ -21,6 +22,7 @@ type ConnectorOperationStatus struct {
FolderCount int
Successful int
ErrorCount int
Err error
incomplete bool
incompleteReason string
additionalDetails string
@ -70,6 +72,7 @@ func CreateStatus(
FolderCount: folders,
Successful: cm.Successes,
ErrorCount: numErr,
Err: err,
incomplete: hasErrors,
incompleteReason: reason,
bytes: cm.TotalBytes,
@ -115,6 +118,7 @@ func MergeStatus(one, two ConnectorOperationStatus) ConnectorOperationStatus {
FolderCount: one.FolderCount + two.FolderCount,
Successful: one.Successful + two.Successful,
ErrorCount: one.ErrorCount + two.ErrorCount,
Err: multierror.Append(one.Err, two.Err).ErrorOrNil(),
bytes: one.bytes + two.bytes,
incomplete: hasErrors,
incompleteReason: one.incompleteReason + ", " + two.incompleteReason,

View File

@ -345,6 +345,7 @@ func streamBaseEntries(
prevPath path.Path,
dir fs.Directory,
encodedSeen map[string]struct{},
globalExcludeSet map[string]struct{},
progress *corsoProgress,
) error {
if dir == nil {
@ -373,6 +374,12 @@ func streamBaseEntries(
return errors.Wrapf(err, "unable to decode entry name %s", entry.Name())
}
// This entry was marked as deleted by a service that can't tell us the
// previous path of deleted items, only the item ID.
if _, ok := globalExcludeSet[entName]; ok {
return nil
}
// For now assuming that item IDs don't need escaping.
itemPath, err := curPath.Append(entName, true)
if err != nil {
@ -421,6 +428,7 @@ func getStreamItemFunc(
staticEnts []fs.Entry,
streamedEnts data.Collection,
baseDir fs.Directory,
globalExcludeSet map[string]struct{},
progress *corsoProgress,
) func(context.Context, func(context.Context, fs.Entry) error) error {
return func(ctx context.Context, cb func(context.Context, fs.Entry) error) error {
@ -443,6 +451,7 @@ func getStreamItemFunc(
prevPath,
baseDir,
seen,
globalExcludeSet,
progress,
); err != nil {
errs = multierror.Append(
@ -457,21 +466,22 @@ func getStreamItemFunc(
// buildKopiaDirs recursively builds a directory hierarchy from the roots up.
// Returned directories are virtualfs.StreamingDirectory.
func buildKopiaDirs(dirName string, dir *treeMap, progress *corsoProgress) (fs.Directory, error) {
func buildKopiaDirs(
dirName string,
dir *treeMap,
globalExcludeSet map[string]struct{},
progress *corsoProgress,
) (fs.Directory, error) {
// Reuse kopia directories directly if the subtree rooted at them is
// unchanged.
//
// TODO(ashmrtn): This will need updated when we have OneDrive backups where
// items have been deleted because we can't determine which directory used to
// have the item.
//
// TODO(ashmrtn): We could possibly also use this optimization if we know that
// the collection has no items in it. In that case though, we may need to take
// extra care to ensure the name of the directory is properly represented. For
// example, a directory that has been renamed but with no additional items may
// not be able to directly use kopia's version of the directory due to the
// rename.
if dir.collection == nil && len(dir.childDirs) == 0 && dir.baseDir != nil {
if dir.collection == nil && len(dir.childDirs) == 0 && dir.baseDir != nil && len(globalExcludeSet) == 0 {
return dir.baseDir, nil
}
@ -480,7 +490,7 @@ func buildKopiaDirs(dirName string, dir *treeMap, progress *corsoProgress) (fs.D
var childDirs []fs.Entry
for childName, childDir := range dir.childDirs {
child, err := buildKopiaDirs(childName, childDir, progress)
child, err := buildKopiaDirs(childName, childDir, globalExcludeSet, progress)
if err != nil {
return nil, err
}
@ -496,6 +506,7 @@ func buildKopiaDirs(dirName string, dir *treeMap, progress *corsoProgress) (fs.D
childDirs,
dir.collection,
dir.baseDir,
globalExcludeSet,
progress,
),
), nil
@ -879,11 +890,19 @@ func inflateBaseTree(
// virtualfs.StreamingDirectory with the given DataCollections if there is one
// for that node. Tags can be used in future backups to fetch old snapshots for
// caching reasons.
//
// globalExcludeSet represents a set of items, represented with file names, to
// exclude from base directories when uploading the snapshot. As items in *all*
// base directories will be checked for in every base directory, this assumes
// that items in the bases are unique. Deletions of directories or subtrees
// should be represented as changes in the status of a Collection, not an entry
// in the globalExcludeSet.
func inflateDirTree(
ctx context.Context,
loader snapshotLoader,
baseSnaps []IncrementalBase,
collections []data.Collection,
globalExcludeSet map[string]struct{},
progress *corsoProgress,
) (fs.Directory, error) {
roots, updatedPaths, err := inflateCollectionTree(ctx, collections)
@ -915,7 +934,7 @@ func inflateDirTree(
var res fs.Directory
for dirName, dir := range roots {
tmp, err := buildKopiaDirs(dirName, dir, progress)
tmp, err := buildKopiaDirs(dirName, dir, globalExcludeSet, progress)
if err != nil {
return nil, err
}

View File

@ -705,7 +705,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTree() {
// - emails
// - Inbox
// - 42 separate files
dirTree, err := inflateDirTree(ctx, nil, nil, collections, progress)
dirTree, err := inflateDirTree(ctx, nil, nil, collections, nil, progress)
require.NoError(t, err)
assert.Equal(t, encodeAsPath(testTenant), dirTree.Name())
@ -793,7 +793,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTree_MixedDirectory()
suite.T().Run(test.name, func(t *testing.T) {
progress := &corsoProgress{pending: map[string]*itemDetails{}}
dirTree, err := inflateDirTree(ctx, nil, nil, test.layout, progress)
dirTree, err := inflateDirTree(ctx, nil, nil, test.layout, nil, progress)
require.NoError(t, err)
assert.Equal(t, encodeAsPath(testTenant), dirTree.Name())
@ -889,7 +889,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTree_Fails() {
defer flush()
suite.T().Run(test.name, func(t *testing.T) {
_, err := inflateDirTree(ctx, nil, nil, test.layout, nil)
_, err := inflateDirTree(ctx, nil, nil, test.layout, nil, nil)
assert.Error(t, err)
})
}
@ -992,7 +992,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeErrors() {
cols = append(cols, mc)
}
_, err := inflateDirTree(ctx, nil, nil, cols, progress)
_, err := inflateDirTree(ctx, nil, nil, cols, nil, progress)
require.Error(t, err)
})
}
@ -1261,6 +1261,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeSingleSubtree() {
mockIncrementalBase("", testTenant, testUser, path.ExchangeService, path.EmailCategory),
},
test.inputCollections(),
nil,
progress,
)
require.NoError(t, err)
@ -1281,7 +1282,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeMultipleSubdirecto
[]string{testTenant, service, testUser, category, testInboxDir},
false,
)
inboxFileName1 := testFileName4
inboxFileName1 := testFileName
inboxFileData1 := testFileData4
inboxFileName2 := testFileName5
inboxFileData2 := testFileData5
@ -1291,7 +1292,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeMultipleSubdirecto
append(inboxPath.Elements(), personalDir),
false,
)
personalFileName1 := testFileName
personalFileName1 := inboxFileName1
personalFileName2 := testFileName2
workPath := makePath(
@ -1312,7 +1313,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeMultipleSubdirecto
// - user1
// - email
// - Inbox
// - file4
// - file1
// - personal
// - file1
// - file2
@ -1369,8 +1370,51 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeMultipleSubdirecto
table := []struct {
name string
inputCollections func(t *testing.T) []data.Collection
inputExcludes map[string]struct{}
expected *expectedNode
}{
{
name: "GlobalExcludeSet",
inputCollections: func(t *testing.T) []data.Collection {
return nil
},
inputExcludes: map[string]struct{}{
inboxFileName1: {},
},
expected: expectedTreeWithChildren(
[]string{
testTenant,
service,
testUser,
category,
},
[]*expectedNode{
{
name: testInboxDir,
children: []*expectedNode{
{
name: personalDir,
children: []*expectedNode{
{
name: personalFileName2,
children: []*expectedNode{},
},
},
},
{
name: workDir,
children: []*expectedNode{
{
name: workFileName1,
children: []*expectedNode{},
},
},
},
},
},
},
),
},
{
name: "MovesSubtree",
inputCollections: func(t *testing.T) []data.Collection {
@ -1919,6 +1963,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeMultipleSubdirecto
mockIncrementalBase("", testTenant, testUser, path.ExchangeService, path.EmailCategory),
},
test.inputCollections(t),
test.inputExcludes,
progress,
)
require.NoError(t, err)
@ -2079,6 +2124,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeSkipsDeletedSubtre
mockIncrementalBase("", testTenant, testUser, path.ExchangeService, path.EmailCategory),
},
collections,
nil,
progress,
)
require.NoError(t, err)
@ -2325,6 +2371,7 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeSelectsCorrectSubt
mockIncrementalBase("id2", testTenant, testUser, path.ExchangeService, path.EmailCategory),
},
collections,
nil,
progress,
)
require.NoError(t, err)

View File

@ -129,7 +129,11 @@ func (w Wrapper) BackupCollections(
ctx, end := D.Span(ctx, "kopia:backupCollections")
defer end()
if len(collections) == 0 {
// TODO(ashmrtn): Make this a parameter when actually enabling the global
// exclude set.
var globalExcludeSet map[string]struct{}
if len(collections) == 0 && len(globalExcludeSet) == 0 {
return &BackupStats{}, &details.Builder{}, nil, nil
}
@ -147,7 +151,14 @@ func (w Wrapper) BackupCollections(
base = previousSnapshots
}
dirTree, err := inflateDirTree(ctx, w.c, base, collections, progress)
dirTree, err := inflateDirTree(
ctx,
w.c,
base,
collections,
globalExcludeSet,
progress,
)
if err != nil {
return nil, nil, nil, errors.Wrap(err, "building kopia directories")
}

View File

@ -4,6 +4,7 @@ import (
"context"
"time"
"github.com/alcionai/clues"
"github.com/google/uuid"
multierror "github.com/hashicorp/go-multierror"
"github.com/pkg/errors"
@ -119,6 +120,14 @@ func (op *BackupOperation) Run(ctx context.Context) (err error) {
op.Results.BackupID = model.StableID(uuid.NewString())
ctx = clues.AddAll(
ctx,
"tenant_id", tenantID, // TODO: pii
"resource_owner", op.ResourceOwner, // TODO: pii
"backup_id", op.Results.BackupID,
"service", op.Selectors.Service,
"incremental", uib)
op.bus.Event(
ctx,
events.BackupStart,
@ -174,6 +183,8 @@ func (op *BackupOperation) Run(ctx context.Context) (err error) {
return opStats.readErr
}
ctx = clues.Add(ctx, "collections", len(cs))
opStats.k, backupDetails, toMerge, err = consumeBackupDataCollections(
ctx,
op.kopia,
@ -208,13 +219,11 @@ func (op *BackupOperation) Run(ctx context.Context) (err error) {
opStats.gc = gc.AwaitStatus()
if opStats.gc.ErrorCount > 0 {
opStats.writeErr = multierror.Append(nil, opStats.writeErr, errors.Errorf(
"%v errors reported while fetching item data",
opStats.gc.ErrorCount,
)).ErrorOrNil()
merr := multierror.Append(opStats.readErr, errors.Wrap(opStats.gc.Err, "retrieving data"))
opStats.readErr = merr.ErrorOrNil()
// Need to exit before we set started to true else we'll report no errors.
return opStats.writeErr
return opStats.readErr
}
// should always be 1, since backups are 1:1 with resourceOwners.

View File

@ -655,7 +655,7 @@ func (suite *BackupOpIntegrationSuite) TestBackup_Run_exchangeIncrementals() {
m365, err := acct.M365Config()
require.NoError(t, err)
gc, err := connector.NewGraphConnector(ctx, acct, connector.Users)
gc, err := connector.NewGraphConnector(ctx, graph.LargeItemClient(), acct, connector.Users)
require.NoError(t, err)
ac, err := api.NewClient(m365)

View File

@ -7,6 +7,7 @@ import (
"github.com/pkg/errors"
"github.com/alcionai/corso/src/internal/connector"
"github.com/alcionai/corso/src/internal/connector/graph"
"github.com/alcionai/corso/src/internal/events"
"github.com/alcionai/corso/src/internal/kopia"
"github.com/alcionai/corso/src/internal/observe"
@ -107,7 +108,7 @@ func connectToM365(
resource = connector.Sites
}
gc, err := connector.NewGraphConnector(ctx, acct, resource)
gc, err := connector.NewGraphConnector(ctx, graph.LargeItemClient(), acct, resource)
if err != nil {
return nil, err
}

View File

@ -5,6 +5,7 @@ import (
"fmt"
"time"
"github.com/alcionai/clues"
"github.com/google/uuid"
multierror "github.com/hashicorp/go-multierror"
"github.com/pkg/errors"
@ -129,6 +130,12 @@ func (op *RestoreOperation) Run(ctx context.Context) (restoreDetails *details.De
detailsStore := streamstore.New(op.kopia, op.account.ID(), op.Selectors.PathService())
ctx = clues.AddAll(
ctx,
"tenant_id", op.account.ID(), // TODO: pii
"backup_id", op.BackupID,
"service", op.Selectors.Service)
bup, deets, err := getBackupAndDetailsFromID(
ctx,
op.BackupID,
@ -142,6 +149,8 @@ func (op *RestoreOperation) Run(ctx context.Context) (restoreDetails *details.De
return nil, err
}
ctx = clues.Add(ctx, "resource_owner", bup.Selector.DiscreteOwner)
op.bus.Event(
ctx,
events.RestoreStart,
@ -159,6 +168,8 @@ func (op *RestoreOperation) Run(ctx context.Context) (restoreDetails *details.De
return nil, err
}
ctx = clues.Add(ctx, "details_paths", len(paths))
observe.Message(ctx, fmt.Sprintf("Discovered %d items in backup %s to restore", len(paths), op.BackupID))
kopiaComplete, closer := observe.MessageWithCompletion(ctx, "Enumerating items in repository")
@ -174,6 +185,8 @@ func (op *RestoreOperation) Run(ctx context.Context) (restoreDetails *details.De
}
kopiaComplete <- struct{}{}
ctx = clues.Add(ctx, "collections", len(dcs))
opStats.cs = dcs
opStats.resourceCount = len(data.ResourceOwnerSet(dcs))

View File

@ -0,0 +1,291 @@
package fault_test
import (
"fmt"
"github.com/pkg/errors"
"github.com/alcionai/corso/src/pkg/fault"
)
// ---------------------------------------------------------------------------
// mock helpers
// ---------------------------------------------------------------------------
var (
ctrl any
items = []string{}
)
type mockController struct {
errors any
}
func connectClient() error { return nil }
func dependencyCall() error { return nil }
func getIthItem(i string) error { return nil }
func getData() ([]string, error) { return nil, nil }
func storeData([]string, *fault.Errors) {}
type mockOper struct {
Errors *fault.Errors
}
func newOperation() mockOper { return mockOper{fault.New(true)} }
func (m mockOper) Run() *fault.Errors { return m.Errors }
// ---------------------------------------------------------------------------
// examples
// ---------------------------------------------------------------------------
// ExampleNewErrors highlights assumptions and best practices
// for generating Errors structs.
func Example_new() {
// Errors should only be generated during the construction of
// another controller, such as a new Backup or Restore Operations.
// Configurations like failFast are set during construction.
//
// Generating new fault.Errors structs outside of an operation
// controller is a smell, and should be avoided. If you need
// to aggregate errors, you should accept an interface and pass
// an Errors instance into it.
ctrl = mockController{
errors: fault.New(false),
}
}
// ExampleErrorsFail describes the assumptions and best practices
// for setting the Failure error.
func Example_errors_Fail() {
errs := fault.New(false)
// Fail() should be used to record any error that highlights a
// non-recoverable failure in a process.
//
// Fail() should only get called in the last step before returning
// a fault.Errors from a controller. In all other cases, you
// should simply return an error and expect the upstream controller
// to call Fail() for you.
if err := connectClient(); err != nil {
// normally, you'd want to
// return errs.Fail(err)
errs.Fail(err)
}
// Only the topmost handler of the error should set the Fail() err.
// This will normally be the operation controller itself.
// IE: Fail() is not Wrap(). In lower levels, errors should get
// wrapped and returned like normal, and only handled by errors
// at the end.
lowLevelCall := func() error {
if err := dependencyCall(); err != nil {
// wrap here, deeper into the stack
return errors.Wrap(err, "dependency")
}
return nil
}
if err := lowLevelCall(); err != nil {
// fail here, at the top of the stack
errs.Fail(err)
}
}
// ExampleErrorsAdd describes the assumptions and best practices
// for aggregating iterable or recoverable errors.
func Example_errors_Add() {
errs := fault.New(false)
// Add() should be used to record any error in a recoverable
// part of processing.
//
// Add() should only get called in the last step in handling an
// error within a loop or stream that does not otherwise return
// an error. In all other cases, you should simply return an error
// and expect the upstream point of iteration to call Add() for you.
for _, i := range items {
if err := getIthItem(i); err != nil {
errs.Add(err)
}
}
// In case of failFast behavior, iteration should exit as soon
// as an error occurs. Errors does not expose the failFast flag
// directly. Instead, iterators should check the value of Err().
// If it is non-nil, then the loop shold break.
for _, i := range items {
if errs.Err() != nil {
break
}
errs.Add(getIthItem(i))
}
// Only the topmost handler of the error should Add() the err.
// This will normally be the iteration loop itself.
// IE: Add() is not Wrap(). In lower levels, errors should get
// wrapped and returned like normally, and only added to the
// errors at the end.
clientBasedGetter := func(s string) error {
if err := dependencyCall(); err != nil {
// wrap here, deeper into the stack
return errors.Wrap(err, "dependency")
}
return nil
}
for _, i := range items {
if err := clientBasedGetter(i); err != nil {
// add here, within the iteraton loop
errs.Add(err)
}
}
}
// ExampleErrorsErr describes retrieving the non-recoverable error.
func Example_errors_Err() {
errs := fault.New(false)
errs.Fail(errors.New("catastrophe"))
// Err() gets the primary failure error.
err := errs.Err()
fmt.Println(err)
// if multiple Failures occur, each one after the first gets
// added to the Errs slice.
errs.Fail(errors.New("another catastrophe"))
errSl := errs.Errs()
for _, e := range errSl {
fmt.Println(e)
}
// If Err() is nil, then you can assume the operation completed.
// A complete operation is not necessarily an error-free operation.
//
// Even if Err() is nil, Errs() can be non-empty.
// Make sure you check both.
errs = fault.New(true)
// If failFast is set to true, then the first error Add()ed gets
// promoted to the Err() position.
errs.Add(errors.New("not catastrophic, but still becomes the Err()"))
err = errs.Err()
fmt.Println(err)
// Output: catastrophe
// another catastrophe
// not catastrophic, but still becomes the Err()
}
// ExampleErrorsErrs describes retrieving individual errors.
func Example_errors_Errs() {
errs := fault.New(false)
errs.Add(errors.New("not catastrophic"))
errs.Add(errors.New("something unwanted"))
// Errs() gets the slice errors that were recorded, but were
// considered recoverable.
errSl := errs.Errs()
for _, err := range errSl {
fmt.Println(err)
}
// Errs() only needs to be investigated by the end user at the
// conclusion of an operation. Checking Errs() within lower-
// layer code is a smell. Funcs should return an error if they
// need upstream handlers to recognize failure states.
//
// If Errs() is nil, then you can assume that no recoverable or
// iteration-based errors occurred. But that does not necessarily
// mean the operation was able to complete.
//
// Even if Errs() contains zero items, Err() can be non-nil.
// Make sure you check both.
// Output: not catastrophic
// something unwanted
}
// ExampleErrorsE2e showcases a more complex integration.
func Example_errors_e2e() {
oper := newOperation()
// imagine that we're a user, calling into corso SDK.
// (fake funcs used here to minimize example bloat)
//
// The operation is our controller, we expect it to
// generate a new fault.Errors when constructed, and
// to return that struct when we call Run()
errs := oper.Run()
// Let's investigate what went on inside. Since we're at
// the top of our controller, and returning a fault.Errors,
// all the error handlers set the Fail() case.
/* Run() */
func() *fault.Errors {
if err := connectClient(); err != nil {
// Fail() here; we're top level in the controller
// and this is a non-recoverable issue
return oper.Errors.Fail(err)
}
data, err := getData()
if err != nil {
return oper.Errors.Fail(err)
}
// storeData will aggregate iterated errors into
// oper.Errors.
storeData(data, oper.Errors)
// return oper.Errors here, in part to ensure it's
// non-nil, and because we don't know if we've
// aggregated any iterated errors yet.
return oper.Errors
}()
// What about the lower level handling? storeData didn't
// return an error, so what's happening there?
/* storeData */
func(data []any, errs *fault.Errors) {
// this is downstream in our code somewhere
storer := func(a any) error {
if err := dependencyCall(); err != nil {
// we're not passing in or calling fault.Errors here,
// because this isn't the iteration handler, it's just
// a regular error.
return errors.Wrap(err, "dependency")
}
return nil
}
for _, d := range data {
if errs.Err() != nil {
break
}
if err := storer(d); err != nil {
// Since we're at the top of the iteration, we need
// to add each error to the fault.Errors struct.
errs.Add(err)
}
}
}(nil, nil)
// then at the end of the oper.Run, we investigate the results.
if errs.Err() != nil {
// handle the primary error
fmt.Println("err occurred", errs.Err())
}
for _, err := range errs.Errs() {
// handle each recoverable error
fmt.Println("recoverable err occurred", err)
}
}

127
src/pkg/fault/fault.go Normal file
View File

@ -0,0 +1,127 @@
package fault
import (
"sync"
"golang.org/x/exp/slices"
)
type Errors struct {
mu *sync.Mutex
// err identifies non-recoverable errors. This includes
// non-start cases (ex: cannot connect to client), hard-
// stop issues (ex: credentials expired) or conscious exit
// cases (ex: iteration error + failFast config).
err error
// errs is the accumulation of recoverable or iterated
// errors. Eg: if a process is retrieving N items, and
// 1 of the items fails to be retrieved, but the rest of
// them succeed, we'd expect to see 1 error added to this
// slice.
errs []error
// if failFast is true, the first errs addition will
// get promoted to the err value. This signifies a
// non-recoverable processing state, causing any running
// processes to exit.
failFast bool
}
// ErrorsData provides the errors data alone, without sync
// controls, allowing the data to be persisted.
type ErrorsData struct {
Err error `json:"err"`
Errs []error `json:"errs"`
FailFast bool `json:"failFast"`
}
// New constructs a new error with default values in place.
func New(failFast bool) *Errors {
return &Errors{
mu: &sync.Mutex{},
errs: []error{},
failFast: failFast,
}
}
// Err returns the primary error. If not nil, this
// indicates the operation exited prior to completion.
func (e *Errors) Err() error {
return e.err
}
// Errs returns the slice of recoverable and
// iterated errors.
func (e *Errors) Errs() []error {
return e.errs
}
// Data returns the plain set of error data
// without any sync properties.
func (e *Errors) Data() ErrorsData {
return ErrorsData{
Err: e.err,
Errs: slices.Clone(e.errs),
FailFast: e.failFast,
}
}
// TODO: introduce Failer interface
// Fail sets the non-recoverable error (ie: errors.err)
// in the errors struct. If a non-recoverable error is
// already present, the error gets added to the errs slice.
func (e *Errors) Fail(err error) *Errors {
if err == nil {
return e
}
e.mu.Lock()
defer e.mu.Unlock()
return e.setErr(err)
}
// setErr handles setting errors.err. Sync locking gets
// handled upstream of this call.
func (e *Errors) setErr(err error) *Errors {
if e.err != nil {
return e.addErr(err)
}
e.err = err
return e
}
// TODO: introduce Adder interface
// Add appends the error to the slice of recoverable and
// iterated errors (ie: errors.errs). If failFast is true,
// the first Added error will get copied to errors.err,
// causing the errors struct to identify as non-recoverably
// failed.
func (e *Errors) Add(err error) *Errors {
if err == nil {
return e
}
e.mu.Lock()
defer e.mu.Unlock()
return e.addErr(err)
}
// addErr handles adding errors to errors.errs. Sync locking
// gets handled upstream of this call.
func (e *Errors) addErr(err error) *Errors {
if e.err == nil && e.failFast {
e.setErr(err)
}
e.errs = append(e.errs, err)
return e
}

202
src/pkg/fault/fault_test.go Normal file
View File

@ -0,0 +1,202 @@
package fault_test
import (
"errors"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/stretchr/testify/suite"
"github.com/alcionai/corso/src/pkg/fault"
)
type FaultErrorsUnitSuite struct {
suite.Suite
}
func TestFaultErrorsUnitSuite(t *testing.T) {
suite.Run(t, new(FaultErrorsUnitSuite))
}
func (suite *FaultErrorsUnitSuite) TestNew() {
t := suite.T()
n := fault.New(false)
assert.NotNil(t, n)
n = fault.New(true)
assert.NotNil(t, n)
}
func (suite *FaultErrorsUnitSuite) TestErr() {
table := []struct {
name string
failFast bool
fail error
add error
expect assert.ErrorAssertionFunc
}{
{
name: "nil",
expect: assert.NoError,
},
{
name: "nil, failFast",
failFast: true,
expect: assert.NoError,
},
{
name: "failed",
fail: assert.AnError,
expect: assert.Error,
},
{
name: "failed, failFast",
fail: assert.AnError,
failFast: true,
expect: assert.Error,
},
{
name: "added",
add: assert.AnError,
expect: assert.NoError,
},
{
name: "added, failFast",
add: assert.AnError,
failFast: true,
expect: assert.Error,
},
}
for _, test := range table {
suite.T().Run(test.name, func(t *testing.T) {
n := fault.New(test.failFast)
require.NotNil(t, n)
e := n.Fail(test.fail)
require.NotNil(t, e)
e = n.Add(test.add)
require.NotNil(t, e)
test.expect(t, n.Err())
})
}
}
func (suite *FaultErrorsUnitSuite) TestFail() {
t := suite.T()
n := fault.New(false)
require.NotNil(t, n)
n.Fail(assert.AnError)
assert.Error(t, n.Err())
assert.Empty(t, n.Errs())
n.Fail(assert.AnError)
assert.Error(t, n.Err())
assert.NotEmpty(t, n.Errs())
}
func (suite *FaultErrorsUnitSuite) TestErrs() {
table := []struct {
name string
failFast bool
fail error
add error
expect assert.ValueAssertionFunc
}{
{
name: "nil",
expect: assert.Empty,
},
{
name: "nil, failFast",
failFast: true,
expect: assert.Empty,
},
{
name: "failed",
fail: assert.AnError,
expect: assert.Empty,
},
{
name: "failed, failFast",
fail: assert.AnError,
failFast: true,
expect: assert.Empty,
},
{
name: "added",
add: assert.AnError,
expect: assert.NotEmpty,
},
{
name: "added, failFast",
add: assert.AnError,
failFast: true,
expect: assert.NotEmpty,
},
}
for _, test := range table {
suite.T().Run(test.name, func(t *testing.T) {
n := fault.New(test.failFast)
require.NotNil(t, n)
e := n.Fail(test.fail)
require.NotNil(t, e)
e = n.Add(test.add)
require.NotNil(t, e)
test.expect(t, n.Errs())
})
}
}
func (suite *FaultErrorsUnitSuite) TestAdd() {
t := suite.T()
n := fault.New(true)
require.NotNil(t, n)
n.Add(assert.AnError)
assert.Error(t, n.Err())
assert.Len(t, n.Errs(), 1)
n.Add(assert.AnError)
assert.Error(t, n.Err())
assert.Len(t, n.Errs(), 2)
}
func (suite *FaultErrorsUnitSuite) TestData() {
t := suite.T()
// not fail-fast
n := fault.New(false)
require.NotNil(t, n)
n.Fail(errors.New("fail"))
n.Add(errors.New("1"))
n.Add(errors.New("2"))
d := n.Data()
assert.Equal(t, n.Err(), d.Err)
assert.ElementsMatch(t, n.Errs(), d.Errs)
assert.False(t, d.FailFast)
// fail-fast
n = fault.New(true)
require.NotNil(t, n)
n.Fail(errors.New("fail"))
n.Add(errors.New("1"))
n.Add(errors.New("2"))
d = n.Data()
assert.Equal(t, n.Err(), d.Err)
assert.ElementsMatch(t, n.Errs(), d.Errs)
assert.True(t, d.FailFast)
}

View File

@ -1,6 +1,8 @@
package path
import (
"strings"
"github.com/pkg/errors"
)
@ -30,18 +32,20 @@ const (
)
func toServiceType(service string) ServiceType {
switch service {
case ExchangeService.String():
s := strings.ToLower(service)
switch s {
case strings.ToLower(ExchangeService.String()):
return ExchangeService
case OneDriveService.String():
case strings.ToLower(OneDriveService.String()):
return OneDriveService
case SharePointService.String():
case strings.ToLower(SharePointService.String()):
return SharePointService
case ExchangeMetadataService.String():
case strings.ToLower(ExchangeMetadataService.String()):
return ExchangeMetadataService
case OneDriveMetadataService.String():
case strings.ToLower(OneDriveMetadataService.String()):
return OneDriveMetadataService
case SharePointMetadataService.String():
case strings.ToLower(SharePointMetadataService.String()):
return SharePointMetadataService
default:
return UnknownService
@ -70,22 +74,24 @@ const (
)
func ToCategoryType(category string) CategoryType {
switch category {
case EmailCategory.String():
cat := strings.ToLower(category)
switch cat {
case strings.ToLower(EmailCategory.String()):
return EmailCategory
case ContactsCategory.String():
case strings.ToLower(ContactsCategory.String()):
return ContactsCategory
case EventsCategory.String():
case strings.ToLower(EventsCategory.String()):
return EventsCategory
case FilesCategory.String():
case strings.ToLower(FilesCategory.String()):
return FilesCategory
case LibrariesCategory.String():
case strings.ToLower(LibrariesCategory.String()):
return LibrariesCategory
case ListsCategory.String():
case strings.ToLower(ListsCategory.String()):
return ListsCategory
case PagesCategory.String():
case strings.ToLower(PagesCategory.String()):
return PagesCategory
case DetailsCategory.String():
case strings.ToLower(DetailsCategory.String()):
return DetailsCategory
default:
return UnknownCategory

View File

@ -1,6 +1,7 @@
package path
import (
"strings"
"testing"
"github.com/stretchr/testify/assert"
@ -128,3 +129,51 @@ func (suite *ServiceCategoryUnitSuite) TestValidateServiceAndCategory() {
})
}
}
func (suite *ServiceCategoryUnitSuite) TestToServiceType() {
table := []struct {
name string
service string
expected ServiceType
}{
{
name: "SameCase",
service: ExchangeMetadataService.String(),
expected: ExchangeMetadataService,
},
{
name: "DifferentCase",
service: strings.ToUpper(ExchangeMetadataService.String()),
expected: ExchangeMetadataService,
},
}
for _, test := range table {
suite.T().Run(test.name, func(t *testing.T) {
assert.Equal(t, test.expected, toServiceType(test.service))
})
}
}
func (suite *ServiceCategoryUnitSuite) TestToCategoryType() {
table := []struct {
name string
category string
expected CategoryType
}{
{
name: "SameCase",
category: EmailCategory.String(),
expected: EmailCategory,
},
{
name: "DifferentCase",
category: strings.ToUpper(EmailCategory.String()),
expected: EmailCategory,
},
}
for _, test := range table {
suite.T().Run(test.name, func(t *testing.T) {
assert.Equal(t, test.expected, ToCategoryType(test.category))
})
}
}

View File

@ -224,7 +224,7 @@ func (s *sharePoint) AllData() []SharePointScope {
scopes,
makeScope[SharePointScope](SharePointLibrary, Any()),
makeScope[SharePointScope](SharePointList, Any()),
makeScope[SharePointScope](SharePointPage, Any()),
makeScope[SharePointScope](SharePointPageFolder, Any()),
)
return scopes
@ -323,7 +323,7 @@ func (s *sharePoint) PageItems(pages, items []string, opts ...option) []SharePoi
scopes = append(
scopes,
makeScope[SharePointScope](SharePointPage, items).
set(SharePointPage, pages, opts...),
set(SharePointPageFolder, pages, opts...),
)
return scopes

View File

@ -193,9 +193,13 @@ func (suite *SharePointSelectorSuite) TestToSharePointRestore() {
func (suite *SharePointSelectorSuite) TestSharePointRestore_Reduce() {
var (
item = stubRepoRef(path.SharePointService, path.LibrariesCategory, "sid", "folderA/folderB", "item")
item2 = stubRepoRef(path.SharePointService, path.LibrariesCategory, "sid", "folderA/folderC", "item2")
item3 = stubRepoRef(path.SharePointService, path.LibrariesCategory, "sid", "folderD/folderE", "item3")
pairAC = "folderA/folderC"
pairGH = "folderG/folderH"
item = stubRepoRef(path.SharePointService, path.LibrariesCategory, "sid", "folderA/folderB", "item")
item2 = stubRepoRef(path.SharePointService, path.LibrariesCategory, "sid", pairAC, "item2")
item3 = stubRepoRef(path.SharePointService, path.LibrariesCategory, "sid", "folderD/folderE", "item3")
item4 = stubRepoRef(path.SharePointService, path.PagesCategory, "sid", pairGH, "item4")
item5 = stubRepoRef(path.SharePointService, path.PagesCategory, "sid", pairGH, "item5")
)
deets := &details.Details{
@ -225,6 +229,22 @@ func (suite *SharePointSelectorSuite) TestSharePointRestore_Reduce() {
},
},
},
{
RepoRef: item4,
ItemInfo: details.ItemInfo{
SharePoint: &details.SharePointInfo{
ItemType: details.SharePointItem,
},
},
},
{
RepoRef: item5,
ItemInfo: details.ItemInfo{
SharePoint: &details.SharePointInfo{
ItemType: details.SharePointItem,
},
},
},
},
},
}
@ -247,7 +267,7 @@ func (suite *SharePointSelectorSuite) TestSharePointRestore_Reduce() {
odr.Include(odr.AllData())
return odr
},
expect: arr(item, item2, item3),
expect: arr(item, item2, item3, item4, item5),
},
{
name: "only match item",
@ -264,11 +284,21 @@ func (suite *SharePointSelectorSuite) TestSharePointRestore_Reduce() {
deets: deets,
makeSelector: func() *SharePointRestore {
odr := NewSharePointRestore([]string{"sid"})
odr.Include(odr.Libraries([]string{"folderA/folderB", "folderA/folderC"}))
odr.Include(odr.Libraries([]string{"folderA/folderB", pairAC}))
return odr
},
expect: arr(item, item2),
},
{
name: "pages match folder",
deets: deets,
makeSelector: func() *SharePointRestore {
odr := NewSharePointRestore([]string{"sid"})
odr.Include(odr.Pages([]string{pairGH, pairAC}))
return odr
},
expect: arr(item4, item5),
},
}
for _, test := range table {
suite.T().Run(test.name, func(t *testing.T) {

View File

@ -8,6 +8,7 @@ import (
"github.com/alcionai/corso/src/internal/connector"
"github.com/alcionai/corso/src/internal/connector/discovery"
"github.com/alcionai/corso/src/internal/connector/graph"
"github.com/alcionai/corso/src/pkg/account"
)
@ -20,7 +21,7 @@ type User struct {
// Users returns a list of users in the specified M365 tenant
// TODO: Implement paging support
func Users(ctx context.Context, m365Account account.Account) ([]*User, error) {
gc, err := connector.NewGraphConnector(ctx, m365Account, connector.Users)
gc, err := connector.NewGraphConnector(ctx, graph.LargeItemClient(), m365Account, connector.Users)
if err != nil {
return nil, errors.Wrap(err, "could not initialize M365 graph connection")
}
@ -76,7 +77,7 @@ func UserPNs(ctx context.Context, m365Account account.Account) ([]string, error)
// SiteURLs returns a list of SharePoint site WebURLs in the specified M365 tenant
func SiteURLs(ctx context.Context, m365Account account.Account) ([]string, error) {
gc, err := connector.NewGraphConnector(ctx, m365Account, connector.Sites)
gc, err := connector.NewGraphConnector(ctx, graph.LargeItemClient(), m365Account, connector.Sites)
if err != nil {
return nil, errors.Wrap(err, "could not initialize M365 graph connection")
}
@ -86,7 +87,7 @@ func SiteURLs(ctx context.Context, m365Account account.Account) ([]string, error
// SiteURLs returns a list of SharePoint sites IDs in the specified M365 tenant
func SiteIDs(ctx context.Context, m365Account account.Account) ([]string, error) {
gc, err := connector.NewGraphConnector(ctx, m365Account, connector.Sites)
gc, err := connector.NewGraphConnector(ctx, graph.LargeItemClient(), m365Account, connector.Sites)
if err != nil {
return nil, errors.Wrap(err, "could not initialize M365 graph connection")
}

View File

@ -47,7 +47,7 @@ const config = {
sidebarPath: require.resolve('./sidebars.js'),
remarkPlugins: [require('mdx-mermaid')],
editUrl:
'https://github.com/alcionai/corso/tree/main/docs',
'https://github.com/alcionai/corso/tree/main/website',
},
blog: {
showReadingTime: true,

View File

@ -13220,9 +13220,9 @@
}
},
"node_modules/ua-parser-js": {
"version": "0.7.32",
"resolved": "https://registry.npmjs.org/ua-parser-js/-/ua-parser-js-0.7.32.tgz",
"integrity": "sha512-f9BESNVhzlhEFf2CHMSj40NWOjYPl1YKYbrvIr/hFTDEmLq7SRbWvm7FcdcpCYT95zrOhC7gZSxjdnnTpBcwVw==",
"version": "0.7.33",
"resolved": "https://registry.npmjs.org/ua-parser-js/-/ua-parser-js-0.7.33.tgz",
"integrity": "sha512-s8ax/CeZdK9R/56Sui0WM6y9OFREJarMRHqLB2EwkovemBxNQ+Bqu8GAsUnVcXKgphb++ghr/B2BZx4mahujPw==",
"funding": [
{
"type": "opencollective",
@ -13233,7 +13233,6 @@
"url": "https://paypal.me/faisalman"
}
],
"license": "MIT",
"engines": {
"node": "*"
}
@ -23418,9 +23417,9 @@
"peer": true
},
"ua-parser-js": {
"version": "0.7.32",
"resolved": "https://registry.npmjs.org/ua-parser-js/-/ua-parser-js-0.7.32.tgz",
"integrity": "sha512-f9BESNVhzlhEFf2CHMSj40NWOjYPl1YKYbrvIr/hFTDEmLq7SRbWvm7FcdcpCYT95zrOhC7gZSxjdnnTpBcwVw=="
"version": "0.7.33",
"resolved": "https://registry.npmjs.org/ua-parser-js/-/ua-parser-js-0.7.33.tgz",
"integrity": "sha512-s8ax/CeZdK9R/56Sui0WM6y9OFREJarMRHqLB2EwkovemBxNQ+Bqu8GAsUnVcXKgphb++ghr/B2BZx4mahujPw=="
},
"unherit": {
"version": "1.1.3",