From b8bc85deba3b0c9e386b6e98545b087809a04025 Mon Sep 17 00:00:00 2001 From: Abin Simon Date: Fri, 3 Feb 2023 08:55:51 +0530 Subject: [PATCH] Metadata backup for OneDrive (#2148) ## Description This PR adds option to backup and restore additional metadata(currently permissions) for OneDrive. **This PR also adds the ability to restore empty folders for OneDrive along with their permissions.** Breaking change: Any old backups will not work as we expect both `.data` and `.meta`/`.dirmeta` files to be available for OneDrive backups. ## Does this PR need a docs update or release note? *Added changelog, docs pending.* - [x] :white_check_mark: Yes, it's included - [x] :clock1: Yes, but in a later PR - [ ] :no_entry: No ## Type of change - [x] :sunflower: Feature - [ ] :bug: Bugfix - [ ] :world_map: Documentation - [ ] :robot: Test - [ ] :computer: CI/Deployment - [ ] :broom: Tech Debt/Cleanup ## Issue(s) * https://github.com/alcionai/corso/issues/1774 ## Test Plan - [ ] :muscle: Manual - [x] :zap: Unit test - [x] :green_heart: E2E --- CHANGELOG.md | 8 + src/cli/backup/onedrive.go | 1 + src/cli/options/options.go | 45 +- src/cli/restore/onedrive.go | 6 + src/cmd/factory/impl/common.go | 5 +- src/cmd/factory/impl/exchange.go | 4 + src/internal/connector/graph_connector.go | 3 +- .../connector/graph_connector_helper_test.go | 62 ++- .../connector/graph_connector_test.go | 510 +++++++++++++++++- src/internal/connector/onedrive/collection.go | 275 +++++++--- .../connector/onedrive/collection_test.go | 163 +++++- .../connector/onedrive/collections.go | 15 +- .../connector/onedrive/collections_test.go | 177 +++--- src/internal/connector/onedrive/item.go | 80 ++- src/internal/connector/onedrive/item_test.go | 2 +- src/internal/connector/onedrive/restore.go | 415 ++++++++++++-- .../sharepoint/data_collections_test.go | 2 +- src/internal/connector/sharepoint/restore.go | 8 +- src/internal/connector/support/status.go | 1 + .../operations/backup_integration_test.go | 2 +- src/internal/operations/restore.go | 16 +- src/pkg/control/options.go | 14 +- 22 files changed, 1540 insertions(+), 274 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 50a5f3270..33485d926 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Document Corso's fault-tolerance and restartability features - Add retries on timeouts and status code 500 for Exchange - Increase page size preference for delta requests for Exchange to reduce number of roundtrips +- OneDrive file/folder permissions can now be backed up and restored +- Add `--restore-permissions` flag to toggle restoration of OneDrive permissions + +### Known Issues + +- When the same user has permissions to a file and the containing + folder, we only restore folder level permissions for the user and no + separate file only permission is restored. ## [v0.2.0] (alpha) - 2023-1-29 diff --git a/src/cli/backup/onedrive.go b/src/cli/backup/onedrive.go index 60a055dce..f74f98916 100644 --- a/src/cli/backup/onedrive.go +++ b/src/cli/backup/onedrive.go @@ -79,6 +79,7 @@ func addOneDriveCommands(cmd *cobra.Command) *cobra.Command { switch cmd.Use { case createCommand: c, fs = utils.AddCommand(cmd, oneDriveCreateCmd()) + options.AddFeatureToggle(cmd, options.DisablePermissionsBackup()) c.Use = c.Use + " " + oneDriveServiceCommandCreateUseSuffix c.Example = oneDriveServiceCommandCreateExamples diff --git a/src/cli/options/options.go b/src/cli/options/options.go index 4988c29ca..32defc5bb 100644 --- a/src/cli/options/options.go +++ b/src/cli/options/options.go @@ -11,17 +11,11 @@ import ( func Control() control.Options { opt := control.Defaults() - if fastFail { - opt.FailFast = true - } - - if noStats { - opt.DisableMetrics = true - } - - if disableIncrementals { - opt.ToggleFeatures.DisableIncrementals = true - } + opt.FailFast = fastFail + opt.DisableMetrics = noStats + opt.RestorePermissions = restorePermissions + opt.ToggleFeatures.DisableIncrementals = disableIncrementals + opt.ToggleFeatures.DisablePermissionsBackup = disablePermissionsBackup return opt } @@ -31,8 +25,9 @@ func Control() control.Options { // --------------------------------------------------------------------------- var ( - fastFail bool - noStats bool + fastFail bool + noStats bool + restorePermissions bool ) // AddOperationFlags adds command-local operation flags @@ -49,11 +44,20 @@ func AddGlobalOperationFlags(cmd *cobra.Command) { fs.BoolVar(&noStats, "no-stats", false, "disable anonymous usage statistics gathering") } +// AddRestorePermissionsFlag adds OneDrive flag for restoring permissions +func AddRestorePermissionsFlag(cmd *cobra.Command) { + fs := cmd.Flags() + fs.BoolVar(&restorePermissions, "restore-permissions", false, "Restore permissions for files and folders") +} + // --------------------------------------------------------------------------- // Feature Flags // --------------------------------------------------------------------------- -var disableIncrementals bool +var ( + disableIncrementals bool + disablePermissionsBackup bool +) type exposeFeatureFlag func(*pflag.FlagSet) @@ -78,3 +82,16 @@ func DisableIncrementals() func(*pflag.FlagSet) { cobra.CheckErr(fs.MarkHidden("disable-incrementals")) } } + +// Adds the hidden '--disable-permissions-backup' cli flag which, when +// set, disables backing up permissions. +func DisablePermissionsBackup() func(*pflag.FlagSet) { + return func(fs *pflag.FlagSet) { + fs.BoolVar( + &disablePermissionsBackup, + "disable-permissions-backup", + false, + "Disable backing up item permissions for OneDrive") + cobra.CheckErr(fs.MarkHidden("disable-permissions-backup")) + } +} diff --git a/src/cli/restore/onedrive.go b/src/cli/restore/onedrive.go index 526db414b..bd8dc7816 100644 --- a/src/cli/restore/onedrive.go +++ b/src/cli/restore/onedrive.go @@ -63,6 +63,9 @@ func addOneDriveCommands(cmd *cobra.Command) *cobra.Command { utils.FileFN, nil, "Restore items by file name or ID") + // permissions restore flag + options.AddRestorePermissionsFlag(c) + // onedrive info flags fs.StringVar( @@ -97,6 +100,9 @@ const ( oneDriveServiceCommandRestoreExamples = `# Restore file with ID 98765abcdef corso restore onedrive --backup 1234abcd-12ab-cd34-56de-1234abcd --file 98765abcdef +# Restore file with ID 98765abcdef along with its associated permissions +corso restore onedrive --backup 1234abcd-12ab-cd34-56de-1234abcd --file 98765abcdef --restore-permissions + # Restore Alice's file named "FY2021 Planning.xlsx in "Documents/Finance Reports" from a specific backup corso restore onedrive --backup 1234abcd-12ab-cd34-56de-1234abcd \ --user alice@example.com --file "FY2021 Planning.xlsx" --folder "Documents/Finance Reports" diff --git a/src/cmd/factory/impl/common.go b/src/cmd/factory/impl/common.go index 0ea6835dd..3ed3831fc 100644 --- a/src/cmd/factory/impl/common.go +++ b/src/cmd/factory/impl/common.go @@ -50,6 +50,7 @@ func generateAndRestoreItems( tenantID, userID, destFldr string, howMany int, dbf dataBuilderFunc, + opts control.Options, ) (*details.Details, error) { items := make([]item, 0, howMany) @@ -74,7 +75,7 @@ func generateAndRestoreItems( items: items, }} - // TODO: fit the desination to the containers + // TODO: fit the destination to the containers dest := control.DefaultRestoreDestination(common.SimpleTimeTesting) dest.ContainerName = destFldr @@ -90,7 +91,7 @@ func generateAndRestoreItems( Infof(ctx, "Generating %d %s items in %s\n", howMany, cat, Destination) - return gc.RestoreDataCollections(ctx, acct, sel, dest, dataColls) + return gc.RestoreDataCollections(ctx, acct, sel, dest, opts, dataColls) } // ------------------------------------------------------------------------------------------ diff --git a/src/cmd/factory/impl/exchange.go b/src/cmd/factory/impl/exchange.go index 26f7eef09..39e3c13a1 100644 --- a/src/cmd/factory/impl/exchange.go +++ b/src/cmd/factory/impl/exchange.go @@ -6,6 +6,7 @@ import ( . "github.com/alcionai/corso/src/cli/print" "github.com/alcionai/corso/src/cli/utils" "github.com/alcionai/corso/src/internal/connector/mockconnector" + "github.com/alcionai/corso/src/pkg/control" "github.com/alcionai/corso/src/pkg/path" "github.com/alcionai/corso/src/pkg/selectors" ) @@ -67,6 +68,7 @@ func handleExchangeEmailFactory(cmd *cobra.Command, args []string) error { subject, body, body, now, now, now, now) }, + control.Options{}, ) if err != nil { return Only(ctx, err) @@ -107,6 +109,7 @@ func handleExchangeCalendarEventFactory(cmd *cobra.Command, args []string) error User, subject, body, body, now, now, false) }, + control.Options{}, ) if err != nil { return Only(ctx, err) @@ -152,6 +155,7 @@ func handleExchangeContactFactory(cmd *cobra.Command, args []string) error { "123-456-7890", ) }, + control.Options{}, ) if err != nil { return Only(ctx, err) diff --git a/src/internal/connector/graph_connector.go b/src/internal/connector/graph_connector.go index 3dbc0e60c..370948639 100644 --- a/src/internal/connector/graph_connector.go +++ b/src/internal/connector/graph_connector.go @@ -269,6 +269,7 @@ func (gc *GraphConnector) RestoreDataCollections( acct account.Account, selector selectors.Selector, dest control.RestoreDestination, + opts control.Options, dcs []data.Collection, ) (*details.Details, error) { ctx, end := D.Span(ctx, "connector:restore") @@ -289,7 +290,7 @@ func (gc *GraphConnector) RestoreDataCollections( case selectors.ServiceExchange: status, err = exchange.RestoreExchangeDataCollections(ctx, creds, gc.Service, dest, dcs, deets) case selectors.ServiceOneDrive: - status, err = onedrive.RestoreCollections(ctx, gc.Service, dest, dcs, deets) + status, err = onedrive.RestoreCollections(ctx, gc.Service, dest, opts, dcs, deets) case selectors.ServiceSharePoint: status, err = sharepoint.RestoreCollections(ctx, gc.Service, dest, dcs, deets) default: diff --git a/src/internal/connector/graph_connector_helper_test.go b/src/internal/connector/graph_connector_helper_test.go index 698ee8527..8a0e22a26 100644 --- a/src/internal/connector/graph_connector_helper_test.go +++ b/src/internal/connector/graph_connector_helper_test.go @@ -2,9 +2,11 @@ package connector import ( "context" + "encoding/json" "io" "net/http" "reflect" + "strings" "testing" "github.com/microsoftgraph/msgraph-sdk-go/models" @@ -14,6 +16,7 @@ import ( "golang.org/x/exp/maps" "github.com/alcionai/corso/src/internal/connector/mockconnector" + "github.com/alcionai/corso/src/internal/connector/onedrive" "github.com/alcionai/corso/src/internal/connector/support" "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/tester" @@ -645,21 +648,52 @@ func compareOneDriveItem( t *testing.T, expected map[string][]byte, item data.Stream, + restorePermissions bool, ) { + name := item.UUID() + expectedData := expected[item.UUID()] if !assert.NotNil(t, expectedData, "unexpected file with name %s", item.UUID) { return } - // OneDrive items are just byte buffers of the data. Nothing special to - // interpret. May need to do chunked comparisons in the future if we test - // large item equality. buf, err := io.ReadAll(item.ToReader()) if !assert.NoError(t, err) { return } - assert.Equal(t, expectedData, buf) + if !strings.HasSuffix(name, onedrive.MetaFileSuffix) && !strings.HasSuffix(name, onedrive.DirMetaFileSuffix) { + // OneDrive data items are just byte buffers of the data. Nothing special to + // interpret. May need to do chunked comparisons in the future if we test + // large item equality. + assert.Equal(t, expectedData, buf) + return + } + + var ( + itemMeta onedrive.Metadata + expectedMeta onedrive.Metadata + ) + + err = json.Unmarshal(buf, &itemMeta) + assert.Nil(t, err) + + err = json.Unmarshal(expectedData, &expectedMeta) + assert.Nil(t, err) + + if !restorePermissions { + assert.Equal(t, 0, len(itemMeta.Permissions)) + return + } + + assert.Equal(t, len(expectedMeta.Permissions), len(itemMeta.Permissions), "number of permissions after restore") + + // FIXME(meain): The permissions before and after might not be in the same order. + for i, p := range expectedMeta.Permissions { + assert.Equal(t, p.Email, itemMeta.Permissions[i].Email) + assert.Equal(t, p.Roles, itemMeta.Permissions[i].Roles) + assert.Equal(t, p.Expiration, itemMeta.Permissions[i].Expiration) + } } func compareItem( @@ -668,6 +702,7 @@ func compareItem( service path.ServiceType, category path.CategoryType, item data.Stream, + restorePermissions bool, ) { if mt, ok := item.(data.StreamModTime); ok { assert.NotZero(t, mt.ModTime()) @@ -687,7 +722,7 @@ func compareItem( } case path.OneDriveService: - compareOneDriveItem(t, expected, item) + compareOneDriveItem(t, expected, item, restorePermissions) default: assert.FailNowf(t, "unexpected service: %s", service.String()) @@ -720,6 +755,7 @@ func checkCollections( expectedItems int, expected map[string]map[string][]byte, got []data.Collection, + restorePermissions bool, ) int { collectionsWithItems := []data.Collection{} @@ -754,7 +790,7 @@ func checkCollections( continue } - compareItem(t, expectedColData, service, category, item) + compareItem(t, expectedColData, service, category, item, restorePermissions) } if gotItems != startingItems { @@ -906,10 +942,11 @@ func collectionsForInfo( tenant, user string, dest control.RestoreDestination, allInfo []colInfo, -) (int, []data.Collection, map[string]map[string][]byte) { +) (int, int, []data.Collection, map[string]map[string][]byte) { collections := make([]data.Collection, 0, len(allInfo)) expectedData := make(map[string]map[string][]byte, len(allInfo)) totalItems := 0 + kopiaEntries := 0 for _, info := range allInfo { pth := mustToDataLayerPath( @@ -935,13 +972,20 @@ func collectionsForInfo( c.Data[i] = info.items[i].data baseExpected[info.items[i].lookupKey] = info.items[i].data + + // We do not count metadata files against item count + if service != path.OneDriveService || + (service == path.OneDriveService && + strings.HasSuffix(info.items[i].name, onedrive.DataFileSuffix)) { + totalItems++ + } } collections = append(collections, c) - totalItems += len(info.items) + kopiaEntries += len(info.items) } - return totalItems, collections, expectedData + return totalItems, kopiaEntries, collections, expectedData } //nolint:deadcode diff --git a/src/internal/connector/graph_connector_test.go b/src/internal/connector/graph_connector_test.go index be1439c35..2c2280e37 100644 --- a/src/internal/connector/graph_connector_test.go +++ b/src/internal/connector/graph_connector_test.go @@ -2,6 +2,8 @@ package connector import ( "context" + "encoding/base64" + "encoding/json" "strings" "testing" "time" @@ -15,6 +17,7 @@ import ( "github.com/alcionai/corso/src/internal/connector/discovery/api" "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/mockconnector" + "github.com/alcionai/corso/src/internal/connector/onedrive" "github.com/alcionai/corso/src/internal/connector/support" "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/tester" @@ -135,9 +138,10 @@ func (suite *GraphConnectorUnitSuite) TestUnionSiteIDsAndWebURLs() { type GraphConnectorIntegrationSuite struct { suite.Suite - connector *GraphConnector - user string - acct account.Account + connector *GraphConnector + user string + secondaryUser string + acct account.Account } func TestGraphConnectorIntegrationSuite(t *testing.T) { @@ -158,6 +162,7 @@ func (suite *GraphConnectorIntegrationSuite) SetupSuite() { suite.connector = loadConnector(ctx, suite.T(), graph.HTTPClient(graph.NoTimeout()), Users) suite.user = tester.M365UserID(suite.T()) + suite.secondaryUser = tester.SecondaryM365UserID(suite.T()) suite.acct = tester.NewM365Account(suite.T()) tester.LogTimeOfTest(suite.T()) @@ -226,7 +231,7 @@ func (suite *GraphConnectorIntegrationSuite) TestRestoreFailsBadService() { } ) - deets, err := suite.connector.RestoreDataCollections(ctx, acct, sel, dest, nil) + deets, err := suite.connector.RestoreDataCollections(ctx, acct, sel, dest, control.Options{}, nil) assert.Error(t, err) assert.NotNil(t, deets) @@ -297,7 +302,9 @@ func (suite *GraphConnectorIntegrationSuite) TestEmptyCollections() { suite.acct, test.sel, dest, - test.col) + control.Options{RestorePermissions: true}, + test.col, + ) require.NoError(t, err) assert.NotNil(t, deets) @@ -344,11 +351,13 @@ func runRestoreBackupTest( test restoreBackupInfo, tenant string, resourceOwners []string, + opts control.Options, ) { var ( - collections []data.Collection - expectedData = map[string]map[string][]byte{} - totalItems = 0 + collections []data.Collection + expectedData = map[string]map[string][]byte{} + totalItems = 0 + totalKopiaItems = 0 // Get a dest per test so they're independent. dest = tester.DefaultTestRestoreDestination() ) @@ -357,7 +366,7 @@ func runRestoreBackupTest( defer flush() for _, owner := range resourceOwners { - numItems, ownerCollections, userExpectedData := collectionsForInfo( + numItems, kopiaItems, ownerCollections, userExpectedData := collectionsForInfo( t, test.service, tenant, @@ -368,6 +377,7 @@ func runRestoreBackupTest( collections = append(collections, ownerCollections...) totalItems += numItems + totalKopiaItems += kopiaItems maps.Copy(expectedData, userExpectedData) } @@ -386,7 +396,9 @@ func runRestoreBackupTest( acct, restoreSel, dest, - collections) + opts, + collections, + ) require.NoError(t, err) assert.NotNil(t, deets) @@ -425,7 +437,7 @@ func runRestoreBackupTest( t.Logf("Selective backup of %s\n", backupSel) start = time.Now() - dcs, excludes, err := backupGC.DataCollections(ctx, backupSel, nil, control.Options{}) + dcs, excludes, err := backupGC.DataCollections(ctx, backupSel, nil, control.Options{RestorePermissions: true}) require.NoError(t, err) // No excludes yet because this isn't an incremental backup. assert.Empty(t, excludes) @@ -434,7 +446,7 @@ func runRestoreBackupTest( // Pull the data prior to waiting for the status as otherwise it will // deadlock. - skipped := checkCollections(t, totalItems, expectedData, dcs) + skipped := checkCollections(t, totalKopiaItems, expectedData, dcs, opts.RestorePermissions) status = backupGC.AwaitStatus() @@ -446,6 +458,20 @@ func runRestoreBackupTest( "backup status.Successful; wanted %d items + %d skipped", totalItems, skipped) } +func getTestMetaJSON(t *testing.T, user string, roles []string) []byte { + id := base64.StdEncoding.EncodeToString([]byte(user + strings.Join(roles, "+"))) + testMeta := onedrive.Metadata{Permissions: []onedrive.UserPermission{ + {ID: id, Roles: roles, Email: user}, + }} + + testMetaJSON, err := json.Marshal(testMeta) + if err != nil { + t.Fatal("unable to marshall test permissions", err) + } + + return testMetaJSON +} + func (suite *GraphConnectorIntegrationSuite) TestRestoreAndBackup() { bodyText := "This email has some text. However, all the text is on the same line." subjectText := "Test message for restore" @@ -564,7 +590,7 @@ func (suite *GraphConnectorIntegrationSuite) TestRestoreAndBackup() { }, }, { - name: "MultipleContactsMutlipleFolders", + name: "MultipleContactsMultipleFolders", service: path.ExchangeService, resource: Users, collections: []colInfo{ @@ -691,9 +717,24 @@ func (suite *GraphConnectorIntegrationSuite) TestRestoreAndBackup() { category: path.FilesCategory, items: []itemInfo{ { - name: "test-file.txt", + name: "test-file.txt" + onedrive.DataFileSuffix, data: []byte(strings.Repeat("a", 33)), - lookupKey: "test-file.txt", + lookupKey: "test-file.txt" + onedrive.DataFileSuffix, + }, + { + name: "test-file.txt" + onedrive.MetaFileSuffix, + data: []byte("{}"), + lookupKey: "test-file.txt" + onedrive.MetaFileSuffix, + }, + { + name: "folder-a" + onedrive.DirMetaFileSuffix, + data: []byte("{}"), + lookupKey: "folder-a" + onedrive.DirMetaFileSuffix, + }, + { + name: "b" + onedrive.DirMetaFileSuffix, + data: []byte("{}"), + lookupKey: "b" + onedrive.DirMetaFileSuffix, }, }, }, @@ -707,9 +748,19 @@ func (suite *GraphConnectorIntegrationSuite) TestRestoreAndBackup() { category: path.FilesCategory, items: []itemInfo{ { - name: "test-file.txt", + name: "test-file.txt" + onedrive.DataFileSuffix, data: []byte(strings.Repeat("b", 65)), - lookupKey: "test-file.txt", + lookupKey: "test-file.txt" + onedrive.DataFileSuffix, + }, + { + name: "test-file.txt" + onedrive.MetaFileSuffix, + data: []byte("{}"), + lookupKey: "test-file.txt" + onedrive.MetaFileSuffix, + }, + { + name: "b" + onedrive.DirMetaFileSuffix, + data: []byte("{}"), + lookupKey: "b" + onedrive.DirMetaFileSuffix, }, }, }, @@ -724,9 +775,19 @@ func (suite *GraphConnectorIntegrationSuite) TestRestoreAndBackup() { category: path.FilesCategory, items: []itemInfo{ { - name: "test-file.txt", + name: "test-file.txt" + onedrive.DataFileSuffix, data: []byte(strings.Repeat("c", 129)), - lookupKey: "test-file.txt", + lookupKey: "test-file.txt" + onedrive.DataFileSuffix, + }, + { + name: "test-file.txt" + onedrive.MetaFileSuffix, + data: []byte("{}"), + lookupKey: "test-file.txt" + onedrive.MetaFileSuffix, + }, + { + name: "folder-a" + onedrive.DirMetaFileSuffix, + data: []byte("{}"), + lookupKey: "folder-a" + onedrive.DirMetaFileSuffix, }, }, }, @@ -742,9 +803,14 @@ func (suite *GraphConnectorIntegrationSuite) TestRestoreAndBackup() { category: path.FilesCategory, items: []itemInfo{ { - name: "test-file.txt", + name: "test-file.txt" + onedrive.DataFileSuffix, data: []byte(strings.Repeat("d", 257)), - lookupKey: "test-file.txt", + lookupKey: "test-file.txt" + onedrive.DataFileSuffix, + }, + { + name: "test-file.txt" + onedrive.MetaFileSuffix, + data: []byte("{}"), + lookupKey: "test-file.txt" + onedrive.MetaFileSuffix, }, }, }, @@ -758,9 +824,67 @@ func (suite *GraphConnectorIntegrationSuite) TestRestoreAndBackup() { category: path.FilesCategory, items: []itemInfo{ { - name: "test-file.txt", + name: "test-file.txt" + onedrive.DataFileSuffix, data: []byte(strings.Repeat("e", 257)), - lookupKey: "test-file.txt", + lookupKey: "test-file.txt" + onedrive.DataFileSuffix, + }, + { + name: "test-file.txt" + onedrive.MetaFileSuffix, + data: []byte("{}"), + lookupKey: "test-file.txt" + onedrive.MetaFileSuffix, + }, + }, + }, + }, + }, + { + name: "OneDriveFoldersAndFilesWithMetadata", + service: path.OneDriveService, + resource: Users, + collections: []colInfo{ + { + pathElements: []string{ + "drives", + driveID, + "root:", + }, + category: path.FilesCategory, + items: []itemInfo{ + { + name: "test-file.txt" + onedrive.DataFileSuffix, + data: []byte(strings.Repeat("a", 33)), + lookupKey: "test-file.txt" + onedrive.DataFileSuffix, + }, + { + name: "test-file.txt" + onedrive.MetaFileSuffix, + data: getTestMetaJSON(suite.T(), suite.secondaryUser, []string{"write"}), + lookupKey: "test-file.txt" + onedrive.MetaFileSuffix, + }, + { + name: "b" + onedrive.DirMetaFileSuffix, + data: getTestMetaJSON(suite.T(), suite.secondaryUser, []string{"read"}), + lookupKey: "b" + onedrive.DirMetaFileSuffix, + }, + }, + }, + { + pathElements: []string{ + "drives", + driveID, + "root:", + "b", + }, + category: path.FilesCategory, + items: []itemInfo{ + { + name: "test-file.txt" + onedrive.DataFileSuffix, + data: []byte(strings.Repeat("e", 66)), + lookupKey: "test-file.txt" + onedrive.DataFileSuffix, + }, + { + name: "test-file.txt" + onedrive.MetaFileSuffix, + data: getTestMetaJSON(suite.T(), suite.secondaryUser, []string{"read"}), + lookupKey: "test-file.txt" + onedrive.MetaFileSuffix, }, }, }, @@ -770,7 +894,14 @@ func (suite *GraphConnectorIntegrationSuite) TestRestoreAndBackup() { for _, test := range table { suite.T().Run(test.name, func(t *testing.T) { - runRestoreBackupTest(t, suite.acct, test, suite.connector.tenant, []string{suite.user}) + runRestoreBackupTest( + t, + suite.acct, + test, + suite.connector.tenant, + []string{suite.user}, + control.Options{RestorePermissions: true}, + ) }) } } @@ -857,7 +988,7 @@ func (suite *GraphConnectorIntegrationSuite) TestMultiFolderBackupDifferentNames }, }) - totalItems, collections, expectedData := collectionsForInfo( + totalItems, _, collections, expectedData := collectionsForInfo( t, test.service, suite.connector.tenant, @@ -879,7 +1010,14 @@ func (suite *GraphConnectorIntegrationSuite) TestMultiFolderBackupDifferentNames ) restoreGC := loadConnector(ctx, t, graph.HTTPClient(graph.NoTimeout()), test.resource) - deets, err := restoreGC.RestoreDataCollections(ctx, suite.acct, restoreSel, dest, collections) + deets, err := restoreGC.RestoreDataCollections( + ctx, + suite.acct, + restoreSel, + dest, + control.Options{RestorePermissions: true}, + collections, + ) require.NoError(t, err) require.NotNil(t, deets) @@ -900,7 +1038,7 @@ func (suite *GraphConnectorIntegrationSuite) TestMultiFolderBackupDifferentNames backupSel := backupSelectorForExpected(t, test.service, expectedDests) t.Log("Selective backup of", backupSel) - dcs, excludes, err := backupGC.DataCollections(ctx, backupSel, nil, control.Options{}) + dcs, excludes, err := backupGC.DataCollections(ctx, backupSel, nil, control.Options{RestorePermissions: true}) require.NoError(t, err) // No excludes yet because this isn't an incremental backup. assert.Empty(t, excludes) @@ -909,7 +1047,7 @@ func (suite *GraphConnectorIntegrationSuite) TestMultiFolderBackupDifferentNames // Pull the data prior to waiting for the status as otherwise it will // deadlock. - skipped := checkCollections(t, allItems, allExpectedData, dcs) + skipped := checkCollections(t, allItems, allExpectedData, dcs, true) status := backupGC.AwaitStatus() assert.Equal(t, allItems+skipped, status.ObjectCount, "status.ObjectCount") @@ -918,6 +1056,313 @@ func (suite *GraphConnectorIntegrationSuite) TestMultiFolderBackupDifferentNames } } +func (suite *GraphConnectorIntegrationSuite) TestPermissionsRestoreAndBackup() { + ctx, flush := tester.NewContext() + defer flush() + + // Get the default drive ID for the test user. + driveID := mustGetDefaultDriveID( + suite.T(), + ctx, + suite.connector.Service, + suite.user, + ) + + table := []restoreBackupInfo{ + { + name: "FilePermissionsResote", + service: path.OneDriveService, + resource: Users, + collections: []colInfo{ + { + pathElements: []string{ + "drives", + driveID, + "root:", + }, + category: path.FilesCategory, + items: []itemInfo{ + { + name: "test-file.txt" + onedrive.DataFileSuffix, + data: []byte(strings.Repeat("a", 33)), + lookupKey: "test-file.txt" + onedrive.DataFileSuffix, + }, + { + name: "test-file.txt" + onedrive.MetaFileSuffix, + data: getTestMetaJSON(suite.T(), suite.secondaryUser, []string{"write"}), + lookupKey: "test-file.txt" + onedrive.MetaFileSuffix, + }, + }, + }, + }, + }, + + { + name: "FileInsideFolderPermissionsRestore", + service: path.OneDriveService, + resource: Users, + collections: []colInfo{ + { + pathElements: []string{ + "drives", + driveID, + "root:", + }, + category: path.FilesCategory, + items: []itemInfo{ + { + name: "test-file.txt" + onedrive.DataFileSuffix, + data: []byte(strings.Repeat("a", 33)), + lookupKey: "test-file.txt" + onedrive.DataFileSuffix, + }, + { + name: "test-file.txt" + onedrive.MetaFileSuffix, + data: []byte("{}"), + lookupKey: "test-file.txt" + onedrive.MetaFileSuffix, + }, + { + name: "b" + onedrive.DirMetaFileSuffix, + data: []byte("{}"), + lookupKey: "b" + onedrive.DirMetaFileSuffix, + }, + }, + }, + { + pathElements: []string{ + "drives", + driveID, + "root:", + "b", + }, + category: path.FilesCategory, + items: []itemInfo{ + { + name: "test-file.txt" + onedrive.DataFileSuffix, + data: []byte(strings.Repeat("e", 66)), + lookupKey: "test-file.txt" + onedrive.DataFileSuffix, + }, + { + name: "test-file.txt" + onedrive.MetaFileSuffix, + data: getTestMetaJSON(suite.T(), suite.secondaryUser, []string{"read"}), + lookupKey: "test-file.txt" + onedrive.MetaFileSuffix, + }, + }, + }, + }, + }, + + { + name: "FileAndFolderPermissionsResote", + service: path.OneDriveService, + resource: Users, + collections: []colInfo{ + { + pathElements: []string{ + "drives", + driveID, + "root:", + }, + category: path.FilesCategory, + items: []itemInfo{ + { + name: "test-file.txt" + onedrive.DataFileSuffix, + data: []byte(strings.Repeat("a", 33)), + lookupKey: "test-file.txt" + onedrive.DataFileSuffix, + }, + { + name: "test-file.txt" + onedrive.MetaFileSuffix, + data: getTestMetaJSON(suite.T(), suite.secondaryUser, []string{"write"}), + lookupKey: "test-file.txt" + onedrive.MetaFileSuffix, + }, + { + name: "b" + onedrive.DirMetaFileSuffix, + data: getTestMetaJSON(suite.T(), suite.secondaryUser, []string{"read"}), + lookupKey: "b" + onedrive.DirMetaFileSuffix, + }, + }, + }, + { + pathElements: []string{ + "drives", + driveID, + "root:", + "b", + }, + category: path.FilesCategory, + items: []itemInfo{ + { + name: "test-file.txt" + onedrive.DataFileSuffix, + data: []byte(strings.Repeat("e", 66)), + lookupKey: "test-file.txt" + onedrive.DataFileSuffix, + }, + { + name: "test-file.txt" + onedrive.MetaFileSuffix, + data: getTestMetaJSON(suite.T(), suite.secondaryUser, []string{"read"}), + lookupKey: "test-file.txt" + onedrive.MetaFileSuffix, + }, + }, + }, + }, + }, + + { + name: "FileAndFolderSeparatePermissionsResote", + service: path.OneDriveService, + resource: Users, + collections: []colInfo{ + { + pathElements: []string{ + "drives", + driveID, + "root:", + }, + category: path.FilesCategory, + items: []itemInfo{ + { + name: "b" + onedrive.DirMetaFileSuffix, + data: getTestMetaJSON(suite.T(), suite.secondaryUser, []string{"read"}), + lookupKey: "b" + onedrive.DirMetaFileSuffix, + }, + }, + }, + { + pathElements: []string{ + "drives", + driveID, + "root:", + "b", + }, + category: path.FilesCategory, + items: []itemInfo{ + { + name: "test-file.txt" + onedrive.DataFileSuffix, + data: []byte(strings.Repeat("e", 66)), + lookupKey: "test-file.txt" + onedrive.DataFileSuffix, + }, + { + name: "test-file.txt" + onedrive.MetaFileSuffix, + data: getTestMetaJSON(suite.T(), suite.secondaryUser, []string{"write"}), + lookupKey: "test-file.txt" + onedrive.MetaFileSuffix, + }, + }, + }, + }, + }, + + { + name: "FolderAndNoChildPermissionsResote", + service: path.OneDriveService, + resource: Users, + collections: []colInfo{ + { + pathElements: []string{ + "drives", + driveID, + "root:", + }, + category: path.FilesCategory, + items: []itemInfo{ + { + name: "b" + onedrive.DirMetaFileSuffix, + data: getTestMetaJSON(suite.T(), suite.secondaryUser, []string{"read"}), + lookupKey: "b" + onedrive.DirMetaFileSuffix, + }, + }, + }, + { + pathElements: []string{ + "drives", + driveID, + "root:", + "b", + }, + category: path.FilesCategory, + items: []itemInfo{ + { + name: "test-file.txt" + onedrive.DataFileSuffix, + data: []byte(strings.Repeat("e", 66)), + lookupKey: "test-file.txt" + onedrive.DataFileSuffix, + }, + { + name: "test-file.txt" + onedrive.MetaFileSuffix, + data: []byte("{}"), + lookupKey: "test-file.txt" + onedrive.MetaFileSuffix, + }, + }, + }, + }, + }, + } + + for _, test := range table { + suite.T().Run(test.name, func(t *testing.T) { + runRestoreBackupTest(t, + suite.acct, + test, + suite.connector.tenant, + []string{suite.user}, + control.Options{RestorePermissions: true}, + ) + }) + } +} + +func (suite *GraphConnectorIntegrationSuite) TestPermissionsBackupAndNoRestore() { + ctx, flush := tester.NewContext() + defer flush() + + // Get the default drive ID for the test user. + driveID := mustGetDefaultDriveID( + suite.T(), + ctx, + suite.connector.Service, + suite.user, + ) + + table := []restoreBackupInfo{ + { + name: "FilePermissionsResote", + service: path.OneDriveService, + resource: Users, + collections: []colInfo{ + { + pathElements: []string{ + "drives", + driveID, + "root:", + }, + category: path.FilesCategory, + items: []itemInfo{ + { + name: "test-file.txt" + onedrive.DataFileSuffix, + data: []byte(strings.Repeat("a", 33)), + lookupKey: "test-file.txt" + onedrive.DataFileSuffix, + }, + { + name: "test-file.txt" + onedrive.MetaFileSuffix, + data: getTestMetaJSON(suite.T(), suite.secondaryUser, []string{"write"}), + lookupKey: "test-file.txt" + onedrive.MetaFileSuffix, + }, + }, + }, + }, + }, + } + + for _, test := range table { + suite.T().Run(test.name, func(t *testing.T) { + runRestoreBackupTest( + t, + suite.acct, + test, + suite.connector.tenant, + []string{suite.user}, + control.Options{RestorePermissions: false}, + ) + }) + } +} + // TODO: this should only be run during smoke tests, not part of the standard CI. // That's why it's set aside instead of being included in the other test set. func (suite *GraphConnectorIntegrationSuite) TestRestoreAndBackup_largeMailAttachment() { @@ -942,5 +1387,12 @@ func (suite *GraphConnectorIntegrationSuite) TestRestoreAndBackup_largeMailAttac }, } - runRestoreBackupTest(suite.T(), suite.acct, test, suite.connector.tenant, []string{suite.user}) + runRestoreBackupTest( + suite.T(), + suite.acct, + test, + suite.connector.tenant, + []string{suite.user}, + control.Options{RestorePermissions: true}, + ) } diff --git a/src/internal/connector/onedrive/collection.go b/src/internal/connector/onedrive/collection.go index c4e1825bd..3b8ff5cbb 100644 --- a/src/internal/connector/onedrive/collection.go +++ b/src/internal/connector/onedrive/collection.go @@ -5,6 +5,7 @@ import ( "context" "io" "net/http" + "strings" "sync" "sync/atomic" "time" @@ -34,6 +35,10 @@ const ( // Max number of retries to get doc from M365 // Seems to timeout at times because of multiple requests maxRetries = 4 // 1 + 3 retries + + MetaFileSuffix = ".meta" + DirMetaFileSuffix = ".dirmeta" + DataFileSuffix = ".data" ) var ( @@ -56,12 +61,13 @@ type Collection struct { // M365 IDs of file items within this collection driveItems map[string]models.DriveItemable // M365 ID of the drive this collection was created from - driveID string - source driveSource - service graph.Servicer - statusUpdater support.StatusUpdater - itemReader itemReaderFunc - ctrl control.Options + driveID string + source driveSource + service graph.Servicer + statusUpdater support.StatusUpdater + itemReader itemReaderFunc + itemMetaReader itemMetaReaderFunc + ctrl control.Options // should only be true if the old delta token expired doNotMergeItems bool @@ -73,6 +79,15 @@ type itemReaderFunc func( item models.DriveItemable, ) (itemInfo details.ItemInfo, itemData io.ReadCloser, err error) +// itemMetaReaderFunc returns a reader for the metadata of the +// specified item +type itemMetaReaderFunc func( + ctx context.Context, + service graph.Servicer, + driveID string, + item models.DriveItemable, +) (io.ReadCloser, int, error) + // NewCollection creates a Collection func NewCollection( itemClient *http.Client, @@ -101,6 +116,7 @@ func NewCollection( c.itemReader = sharePointItemReader default: c.itemReader = oneDriveItemReader + c.itemMetaReader = oneDriveItemMetaReader } return c @@ -138,6 +154,21 @@ func (oc Collection) DoNotMergeItems() bool { return oc.doNotMergeItems } +// FilePermission is used to store permissions of a specific user to a +// OneDrive item. +type UserPermission struct { + ID string `json:"id,omitempty"` + Roles []string `json:"role,omitempty"` + Email string `json:"email,omitempty"` + Expiration *time.Time `json:"expiration,omitempty"` +} + +// ItemMeta contains metadata about the Item. It gets stored in a +// separate file in kopia +type Metadata struct { + Permissions []UserPermission `json:"permissions,omitempty"` +} + // Item represents a single item retrieved from OneDrive type Item struct { id string @@ -173,18 +204,21 @@ func (od *Item) ModTime() time.Time { // and uses the collection `itemReader` to read the item func (oc *Collection) populateItems(ctx context.Context) { var ( - errs error - byteCount int64 - itemsRead int64 - wg sync.WaitGroup - m sync.Mutex + errs error + byteCount int64 + itemsRead int64 + dirsRead int64 + itemsFound int64 + dirsFound int64 + wg sync.WaitGroup + m sync.Mutex ) // Retrieve the OneDrive folder path to set later in // `details.OneDriveInfo` parentPathString, err := path.GetDriveFolderPath(oc.folderPath) if err != nil { - oc.reportAsCompleted(ctx, 0, 0, err) + oc.reportAsCompleted(ctx, 0, 0, 0, err) return } @@ -205,16 +239,11 @@ func (oc *Collection) populateItems(ctx context.Context) { m.Unlock() } - for id, item := range oc.driveItems { + for _, item := range oc.driveItems { if oc.ctrl.FailFast && errs != nil { break } - if item == nil { - errUpdater(id, errors.New("nil item")) - continue - } - semaphoreCh <- struct{}{} wg.Add(1) @@ -223,13 +252,61 @@ func (oc *Collection) populateItems(ctx context.Context) { defer wg.Done() defer func() { <-semaphoreCh }() + // Read the item var ( - itemID = *item.GetId() - itemName = *item.GetName() - itemSize = *item.GetSize() - itemInfo details.ItemInfo + itemID = *item.GetId() + itemName = *item.GetName() + itemSize = *item.GetSize() + itemInfo details.ItemInfo + itemMeta io.ReadCloser + itemMetaSize int + metaSuffix string + err error ) + isFile := item.GetFile() != nil + + if isFile { + atomic.AddInt64(&itemsFound, 1) + + metaSuffix = MetaFileSuffix + } else { + atomic.AddInt64(&dirsFound, 1) + + metaSuffix = DirMetaFileSuffix + } + + if oc.source == OneDriveSource { + // Fetch metadata for the file + for i := 1; i <= maxRetries; i++ { + if oc.ctrl.ToggleFeatures.DisablePermissionsBackup { + // We are still writing the metadata file but with + // empty permissions as we are not sure how the + // restore will be called. + itemMeta = io.NopCloser(strings.NewReader("{}")) + itemMetaSize = 2 + + break + } + + itemMeta, itemMetaSize, err = oc.itemMetaReader(ctx, oc.service, oc.driveID, item) + + // retry on Timeout type errors, break otherwise. + if err == nil || !graph.IsErrTimeout(err) { + break + } + + if i < maxRetries { + time.Sleep(1 * time.Second) + } + } + + if err != nil { + errUpdater(*item.GetId(), err) + return + } + } + switch oc.source { case SharePointSource: itemInfo.SharePoint = sharePointItemInfo(item, itemSize) @@ -239,101 +316,127 @@ func (oc *Collection) populateItems(ctx context.Context) { itemInfo.OneDrive.ParentPath = parentPathString } - // Construct a new lazy readCloser to feed to the collection consumer. - // This ensures that downloads won't be attempted unless that consumer - // attempts to read bytes. Assumption is that kopia will check things - // like file modtimes before attempting to read. - itemReader := lazy.NewLazyReadCloser(func() (io.ReadCloser, error) { - // Read the item - var ( - itemData io.ReadCloser - err error - ) + if isFile { + dataSuffix := "" + if oc.source == OneDriveSource { + dataSuffix = DataFileSuffix + } - for i := 1; i <= maxRetries; i++ { - _, itemData, err = oc.itemReader(oc.itemClient, item) - if err == nil { - break - } + // Construct a new lazy readCloser to feed to the collection consumer. + // This ensures that downloads won't be attempted unless that consumer + // attempts to read bytes. Assumption is that kopia will check things + // like file modtimes before attempting to read. + itemReader := lazy.NewLazyReadCloser(func() (io.ReadCloser, error) { + // Read the item + var ( + itemData io.ReadCloser + err error + ) - if graph.IsErrUnauthorized(err) { - // assume unauthorized requests are a sign of an expired - // jwt token, and that we've overrun the available window - // to download the actual file. Re-downloading the item - // will refresh that download url. - di, diErr := getDriveItem(ctx, oc.service, oc.driveID, itemID) - if diErr != nil { - err = errors.Wrap(diErr, "retrieving expired item") + for i := 1; i <= maxRetries; i++ { + _, itemData, err = oc.itemReader(oc.itemClient, item) + if err == nil { break } - item = di + if graph.IsErrUnauthorized(err) { + // assume unauthorized requests are a sign of an expired + // jwt token, and that we've overrun the available window + // to download the actual file. Re-downloading the item + // will refresh that download url. + di, diErr := getDriveItem(ctx, oc.service, oc.driveID, itemID) + if diErr != nil { + err = errors.Wrap(diErr, "retrieving expired item") + break + } - continue + item = di - } else if !graph.IsErrTimeout(err) && - !graph.IsInternalServerError(err) { - // Don't retry for non-timeout, on-unauth, as - // we are already retrying it in the default - // retry middleware - break + continue + + } else if !graph.IsErrTimeout(err) && + !graph.IsInternalServerError(err) { + // Don't retry for non-timeout, on-unauth, as + // we are already retrying it in the default + // retry middleware + break + } + + if i < maxRetries { + time.Sleep(1 * time.Second) + } } - if i < maxRetries { - time.Sleep(1 * time.Second) + // check for errors following retries + if err != nil { + errUpdater(itemID, err) + return nil, err } + + // display/log the item download + progReader, closer := observe.ItemProgress( + ctx, + itemData, + observe.ItemBackupMsg, + observe.PII(itemName+dataSuffix), + itemSize, + ) + go closer() + + return progReader, nil + }) + + oc.data <- &Item{ + id: itemName + dataSuffix, + data: itemReader, + info: itemInfo, } + } - // check for errors following retries - if err != nil { - errUpdater(itemID, err) - return nil, err + if oc.source == OneDriveSource { + metaReader := lazy.NewLazyReadCloser(func() (io.ReadCloser, error) { + progReader, closer := observe.ItemProgress( + ctx, itemMeta, observe.ItemBackupMsg, + observe.PII(itemName+metaSuffix), int64(itemMetaSize)) + go closer() + return progReader, nil + }) + + oc.data <- &Item{ + id: itemName + metaSuffix, + data: metaReader, + info: itemInfo, } + } - // display/log the item download - progReader, closer := observe.ItemProgress(ctx, itemData, observe.ItemBackupMsg, observe.PII(itemName), itemSize) - go closer() - - return progReader, nil - }) - - // This can cause inaccurate counts. Right now it counts all the items - // we intend to read. Errors within the lazy readCloser will create a - // conflict: an item is both successful and erroneous. But the async - // control to fix that is more error-prone than helpful. - // - // TODO: transform this into a stats bus so that async control of stats - // aggregation is handled at the backup level, not at the item iteration - // level. - // // Item read successfully, add to collection - atomic.AddInt64(&itemsRead, 1) + if isFile { + atomic.AddInt64(&itemsRead, 1) + } else { + atomic.AddInt64(&dirsRead, 1) + } + // byteCount iteration atomic.AddInt64(&byteCount, itemSize) - oc.data <- &Item{ - id: itemName, - data: itemReader, - info: itemInfo, - } folderProgress <- struct{}{} }(item) } wg.Wait() - oc.reportAsCompleted(ctx, int(itemsRead), byteCount, errs) + oc.reportAsCompleted(ctx, int(itemsFound), int(itemsRead), byteCount, errs) } -func (oc *Collection) reportAsCompleted(ctx context.Context, itemsRead int, byteCount int64, errs error) { +func (oc *Collection) reportAsCompleted(ctx context.Context, itemsFound, itemsRead int, byteCount int64, errs error) { close(oc.data) status := support.CreateStatus(ctx, support.Backup, 1, // num folders (always 1) support.CollectionMetrics{ - Objects: len(oc.driveItems), // items to read, - Successes: itemsRead, // items read successfully, - TotalBytes: byteCount, // Number of bytes read in the operation, + Objects: itemsFound, // items to read, + Successes: itemsRead, // items read successfully, + TotalBytes: byteCount, // Number of bytes read in the operation, }, errs, oc.folderPath.Folder(), // Additional details diff --git a/src/internal/connector/onedrive/collection_test.go b/src/internal/connector/onedrive/collection_test.go index b608e9068..b8e5fe446 100644 --- a/src/internal/connector/onedrive/collection_test.go +++ b/src/internal/connector/onedrive/collection_test.go @@ -2,8 +2,11 @@ package onedrive import ( "bytes" + "context" + "encoding/json" "io" "net/http" + "strings" "sync" "testing" "time" @@ -60,6 +63,14 @@ func (suite *CollectionUnitTestSuite) TestCollection() { testItemName = "itemName" testItemData = []byte("testdata") now = time.Now() + testItemMeta = Metadata{Permissions: []UserPermission{ + { + ID: "testMetaID", + Roles: []string{"read", "write"}, + Email: "email@provider.com", + Expiration: &now, + }, + }} ) type nst struct { @@ -164,6 +175,7 @@ func (suite *CollectionUnitTestSuite) TestCollection() { // Set a item reader, add an item and validate we get the item back mockItem := models.NewDriveItem() mockItem.SetId(&testItemID) + mockItem.SetFile(models.NewFile()) mockItem.SetName(&test.itemDeets.name) mockItem.SetSize(&test.itemDeets.size) mockItem.SetCreatedDateTime(&test.itemDeets.time) @@ -174,6 +186,18 @@ func (suite *CollectionUnitTestSuite) TestCollection() { } coll.itemReader = test.itemReader + coll.itemMetaReader = func(_ context.Context, + _ graph.Servicer, + _ string, + _ models.DriveItemable, + ) (io.ReadCloser, int, error) { + metaJSON, err := json.Marshal(testItemMeta) + if err != nil { + return nil, 0, err + } + + return io.NopCloser(bytes.NewReader(metaJSON)), len(metaJSON), nil + } // Read items from the collection wg.Add(1) @@ -184,28 +208,54 @@ func (suite *CollectionUnitTestSuite) TestCollection() { wg.Wait() + if test.source == OneDriveSource { + require.Len(t, readItems, 2) // .data and .meta + } else { + require.Len(t, readItems, 1) + } + + // Expect only 1 item + require.Equal(t, 1, collStatus.ObjectCount) + require.Equal(t, 1, collStatus.Successful) + // Validate item info and data readItem := readItems[0] readItemInfo := readItem.(data.StreamInfo) - readData, err := io.ReadAll(readItem.ToReader()) - require.NoError(t, err) - assert.Equal(t, testItemData, readData) - - // Expect only 1 item - require.Len(t, readItems, 1) - require.Equal(t, 1, collStatus.ObjectCount, "items iterated") - require.Equal(t, 1, collStatus.Successful, "items successful") - - assert.Equal(t, testItemName, readItem.UUID()) + if test.source == OneDriveSource { + assert.Equal(t, testItemName+DataFileSuffix, readItem.UUID()) + } else { + assert.Equal(t, testItemName, readItem.UUID()) + } require.Implements(t, (*data.StreamModTime)(nil), readItem) mt := readItem.(data.StreamModTime) assert.Equal(t, now, mt.ModTime()) + readData, err := io.ReadAll(readItem.ToReader()) + require.NoError(t, err) + name, parentPath := test.infoFrom(t, readItemInfo.Info()) + + assert.Equal(t, testItemData, readData) assert.Equal(t, testItemName, name) assert.Equal(t, driveFolderPath, parentPath) + + if test.source == OneDriveSource { + readItemMeta := readItems[1] + + assert.Equal(t, testItemName+MetaFileSuffix, readItemMeta.UUID()) + + readMetaData, err := io.ReadAll(readItemMeta.ToReader()) + require.NoError(t, err) + + tm, err := json.Marshal(testItemMeta) + if err != nil { + t.Fatal("unable to marshall test permissions", err) + } + + assert.Equal(t, tm, readMetaData) + } }) } } @@ -255,6 +305,7 @@ func (suite *CollectionUnitTestSuite) TestCollectionReadError() { mockItem := models.NewDriveItem() mockItem.SetId(&testItemID) + mockItem.SetFile(models.NewFile()) mockItem.SetName(&name) mockItem.SetSize(&size) mockItem.SetCreatedDateTime(&now) @@ -265,6 +316,14 @@ func (suite *CollectionUnitTestSuite) TestCollectionReadError() { return details.ItemInfo{}, nil, assert.AnError } + coll.itemMetaReader = func(_ context.Context, + _ graph.Servicer, + _ string, + _ models.DriveItemable, + ) (io.ReadCloser, int, error) { + return io.NopCloser(strings.NewReader(`{}`)), 2, nil + } + collItem, ok := <-coll.Items() assert.True(t, ok) @@ -279,3 +338,87 @@ func (suite *CollectionUnitTestSuite) TestCollectionReadError() { }) } } + +func (suite *CollectionUnitTestSuite) TestCollectionDisablePermissionsBackup() { + table := []struct { + name string + source driveSource + }{ + { + name: "oneDrive", + source: OneDriveSource, + }, + } + for _, test := range table { + suite.T().Run(test.name, func(t *testing.T) { + var ( + testItemID = "fakeItemID" + testItemName = "Fake Item" + testItemSize = int64(10) + + collStatus = support.ConnectorOperationStatus{} + wg = sync.WaitGroup{} + ) + + wg.Add(1) + + folderPath, err := GetCanonicalPath("drive/driveID1/root:/folderPath", "a-tenant", "a-user", test.source) + require.NoError(t, err) + + coll := NewCollection( + graph.HTTPClient(graph.NoTimeout()), + folderPath, + "fakeDriveID", + suite, + suite.testStatusUpdater(&wg, &collStatus), + test.source, + control.Options{ToggleFeatures: control.Toggles{DisablePermissionsBackup: true}}) + + now := time.Now() + mockItem := models.NewDriveItem() + mockItem.SetFile(models.NewFile()) + mockItem.SetId(&testItemID) + mockItem.SetName(&testItemName) + mockItem.SetSize(&testItemSize) + mockItem.SetCreatedDateTime(&now) + mockItem.SetLastModifiedDateTime(&now) + coll.Add(mockItem) + + coll.itemReader = func( + *http.Client, + models.DriveItemable, + ) (details.ItemInfo, io.ReadCloser, error) { + return details.ItemInfo{OneDrive: &details.OneDriveInfo{ItemName: "fakeName", Modified: time.Now()}}, + io.NopCloser(strings.NewReader("Fake Data!")), + nil + } + + coll.itemMetaReader = func(_ context.Context, + _ graph.Servicer, + _ string, + _ models.DriveItemable, + ) (io.ReadCloser, int, error) { + return io.NopCloser(strings.NewReader(`{"key": "value"}`)), 16, nil + } + + readItems := []data.Stream{} + for item := range coll.Items() { + readItems = append(readItems, item) + } + + wg.Wait() + + // Expect no items + require.Equal(t, 1, collStatus.ObjectCount) + require.Equal(t, 1, collStatus.Successful) + + for _, i := range readItems { + if strings.HasSuffix(i.UUID(), MetaFileSuffix) { + content, err := io.ReadAll(i.ToReader()) + require.NoError(t, err) + require.Equal(t, content, []byte("{}")) + } + } + }) + } +} diff --git a/src/internal/connector/onedrive/collections.go b/src/internal/connector/onedrive/collections.go index 200e51e23..50c5323d9 100644 --- a/src/internal/connector/onedrive/collections.go +++ b/src/internal/connector/onedrive/collections.go @@ -430,6 +430,12 @@ func (c *Collections) UpdateCollections( // already created and partially populated. updatePath(newPaths, *item.GetId(), folderPath.String()) + if c.source != OneDriveSource { + continue + } + + fallthrough + case item.GetFile() != nil: if item.GetDeleted() != nil { excluded[*item.GetId()] = struct{}{} @@ -445,6 +451,7 @@ func (c *Collections) UpdateCollections( // the exclude list. col, found := c.CollectionMap[collectionPath.String()] + if !found { // TODO(ashmrtn): Compare old and new path and set collection state // accordingly. @@ -459,13 +466,17 @@ func (c *Collections) UpdateCollections( c.CollectionMap[collectionPath.String()] = col c.NumContainers++ - c.NumItems++ } collection := col.(*Collection) collection.Add(item) - c.NumFiles++ + c.NumItems++ + if item.GetFile() != nil { + // This is necessary as we have a fallthrough for + // folders and packages + c.NumFiles++ + } default: return errors.Errorf("item type not supported. item name : %s", *item.GetName()) diff --git a/src/internal/connector/onedrive/collections_test.go b/src/internal/connector/onedrive/collections_test.go index 21dae9549..3316a10c5 100644 --- a/src/internal/connector/onedrive/collections_test.go +++ b/src/internal/connector/onedrive/collections_test.go @@ -139,7 +139,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { user, testBaseDrivePath, ), - expectedItemCount: 2, + expectedItemCount: 1, expectedFileCount: 1, expectedContainerCount: 1, // Root folder is skipped since it's always present. @@ -151,10 +151,15 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { items: []models.DriveItemable{ driveItem("folder", "folder", testBaseDrivePath, false, true, false), }, - inputFolderMap: map[string]string{}, - scope: anyFolder, - expect: assert.NoError, - expectedCollectionPaths: []string{}, + inputFolderMap: map[string]string{}, + scope: anyFolder, + expect: assert.NoError, + expectedCollectionPaths: expectedPathAsSlice( + suite.T(), + tenant, + user, + testBaseDrivePath, + ), expectedMetadataPaths: map[string]string{ "folder": expectedPathAsSlice( suite.T(), @@ -163,17 +168,24 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { testBaseDrivePath+"/folder", )[0], }, - expectedExcludes: map[string]struct{}{}, + expectedItemCount: 1, + expectedContainerCount: 1, + expectedExcludes: map[string]struct{}{}, }, { testCase: "Single Package", items: []models.DriveItemable{ driveItem("package", "package", testBaseDrivePath, false, false, true), }, - inputFolderMap: map[string]string{}, - scope: anyFolder, - expect: assert.NoError, - expectedCollectionPaths: []string{}, + inputFolderMap: map[string]string{}, + scope: anyFolder, + expect: assert.NoError, + expectedCollectionPaths: expectedPathAsSlice( + suite.T(), + tenant, + user, + testBaseDrivePath, + ), expectedMetadataPaths: map[string]string{ "package": expectedPathAsSlice( suite.T(), @@ -182,7 +194,9 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { testBaseDrivePath+"/package", )[0], }, - expectedExcludes: map[string]struct{}{}, + expectedItemCount: 1, + expectedContainerCount: 1, + expectedExcludes: map[string]struct{}{}, }, { testCase: "1 root file, 1 folder, 1 package, 2 files, 3 collections", @@ -204,7 +218,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { testBaseDrivePath+folder, testBaseDrivePath+pkg, ), - expectedItemCount: 6, + expectedItemCount: 5, expectedFileCount: 3, expectedContainerCount: 3, expectedMetadataPaths: map[string]string{ @@ -238,23 +252,17 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { inputFolderMap: map[string]string{}, scope: (&selectors.OneDriveBackup{}).Folders([]string{"folder"})[0], expect: assert.NoError, - expectedCollectionPaths: append( - expectedPathAsSlice( - suite.T(), - tenant, - user, - testBaseDrivePath+"/folder", - ), - expectedPathAsSlice( - suite.T(), - tenant, - user, - testBaseDrivePath+folderSub+folder, - )..., + expectedCollectionPaths: expectedPathAsSlice( + suite.T(), + tenant, + user, + testBaseDrivePath+"/folder", + testBaseDrivePath+folderSub, + testBaseDrivePath+folderSub+folder, ), expectedItemCount: 4, expectedFileCount: 2, - expectedContainerCount: 2, + expectedContainerCount: 3, // just "folder" isn't added here because the include check is done on the // parent path since we only check later if something is a folder or not. expectedMetadataPaths: map[string]string{ @@ -293,11 +301,12 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { suite.T(), tenant, user, + testBaseDrivePath+folderSub, testBaseDrivePath+folderSub+folder, ), expectedItemCount: 2, expectedFileCount: 1, - expectedContainerCount: 1, + expectedContainerCount: 2, expectedMetadataPaths: map[string]string{ "folder2": expectedPathAsSlice( suite.T(), @@ -328,7 +337,7 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { user, testBaseDrivePath+folderSub, ), - expectedItemCount: 2, + expectedItemCount: 1, expectedFileCount: 1, expectedContainerCount: 1, // No child folders for subfolder so nothing here. @@ -354,12 +363,17 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { testBaseDrivePath+"/folder/subfolder", )[0], }, - scope: anyFolder, - expect: assert.NoError, - expectedCollectionPaths: []string{}, - expectedItemCount: 0, - expectedFileCount: 0, - expectedContainerCount: 0, + scope: anyFolder, + expect: assert.NoError, + expectedCollectionPaths: expectedPathAsSlice( + suite.T(), + tenant, + user, + testBaseDrivePath, + ), + expectedItemCount: 1, + expectedFileCount: 0, + expectedContainerCount: 1, expectedMetadataPaths: map[string]string{ "folder": expectedPathAsSlice( suite.T(), @@ -395,12 +409,17 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { testBaseDrivePath+"/a-folder/subfolder", )[0], }, - scope: anyFolder, - expect: assert.NoError, - expectedCollectionPaths: []string{}, - expectedItemCount: 0, - expectedFileCount: 0, - expectedContainerCount: 0, + scope: anyFolder, + expect: assert.NoError, + expectedCollectionPaths: expectedPathAsSlice( + suite.T(), + tenant, + user, + testBaseDrivePath, + ), + expectedItemCount: 1, + expectedFileCount: 0, + expectedContainerCount: 1, expectedMetadataPaths: map[string]string{ "folder": expectedPathAsSlice( suite.T(), @@ -437,12 +456,17 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { testBaseDrivePath+"/a-folder/subfolder", )[0], }, - scope: anyFolder, - expect: assert.NoError, - expectedCollectionPaths: []string{}, - expectedItemCount: 0, - expectedFileCount: 0, - expectedContainerCount: 0, + scope: anyFolder, + expect: assert.NoError, + expectedCollectionPaths: expectedPathAsSlice( + suite.T(), + tenant, + user, + testBaseDrivePath, + ), + expectedItemCount: 2, + expectedFileCount: 0, + expectedContainerCount: 1, expectedMetadataPaths: map[string]string{ "folder": expectedPathAsSlice( suite.T(), @@ -479,12 +503,17 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { testBaseDrivePath+"/a-folder/subfolder", )[0], }, - scope: anyFolder, - expect: assert.NoError, - expectedCollectionPaths: []string{}, - expectedItemCount: 0, - expectedFileCount: 0, - expectedContainerCount: 0, + scope: anyFolder, + expect: assert.NoError, + expectedCollectionPaths: expectedPathAsSlice( + suite.T(), + tenant, + user, + testBaseDrivePath, + ), + expectedItemCount: 2, + expectedFileCount: 0, + expectedContainerCount: 1, expectedMetadataPaths: map[string]string{ "folder": expectedPathAsSlice( suite.T(), @@ -550,12 +579,17 @@ func (suite *OneDriveCollectionsSuite) TestUpdateCollections() { testBaseDrivePath+"/folder/subfolder", )[0], }, - scope: anyFolder, - expect: assert.NoError, - expectedCollectionPaths: []string{}, - expectedItemCount: 0, - expectedFileCount: 0, - expectedContainerCount: 0, + scope: anyFolder, + expect: assert.NoError, + expectedCollectionPaths: expectedPathAsSlice( + suite.T(), + tenant, + user, + testBaseDrivePath, + ), + expectedItemCount: 1, + expectedFileCount: 0, + expectedContainerCount: 1, expectedMetadataPaths: map[string]string{ "subfolder": expectedPathAsSlice( suite.T(), @@ -1043,6 +1077,12 @@ func (suite *OneDriveCollectionsSuite) TestGet() { ) require.NoError(suite.T(), err, "making metadata path") + rootFolderPath := expectedPathAsSlice( + suite.T(), + tenant, + user, + testBaseDrivePath, + )[0] folderPath := expectedPathAsSlice( suite.T(), tenant, @@ -1067,6 +1107,12 @@ func (suite *OneDriveCollectionsSuite) TestGet() { driveBasePath2 := "drive/driveID2/root:" + rootFolderPath2 := expectedPathAsSlice( + suite.T(), + tenant, + user, + driveBasePath2, + )[0] folderPath2 := expectedPathAsSlice( suite.T(), tenant, @@ -1161,7 +1207,8 @@ func (suite *OneDriveCollectionsSuite) TestGet() { }, errCheck: assert.NoError, expectedCollections: map[string][]string{ - folderPath: {"file"}, + folderPath: {"file"}, + rootFolderPath: {"folder"}, }, expectedDeltaURLs: map[string]string{ driveID1: delta, @@ -1189,7 +1236,8 @@ func (suite *OneDriveCollectionsSuite) TestGet() { }, errCheck: assert.NoError, expectedCollections: map[string][]string{ - folderPath: {"file"}, + folderPath: {"file"}, + rootFolderPath: {"folder"}, }, expectedDeltaURLs: map[string]string{}, expectedFolderPaths: map[string]map[string]string{}, @@ -1218,7 +1266,8 @@ func (suite *OneDriveCollectionsSuite) TestGet() { }, errCheck: assert.NoError, expectedCollections: map[string][]string{ - folderPath: {"file", "file2"}, + folderPath: {"file", "file2"}, + rootFolderPath: {"folder"}, }, expectedDeltaURLs: map[string]string{ driveID1: delta, @@ -1258,8 +1307,10 @@ func (suite *OneDriveCollectionsSuite) TestGet() { }, errCheck: assert.NoError, expectedCollections: map[string][]string{ - folderPath: {"file"}, - folderPath2: {"file"}, + folderPath: {"file"}, + folderPath2: {"file"}, + rootFolderPath: {"folder"}, + rootFolderPath2: {"folder"}, }, expectedDeltaURLs: map[string]string{ driveID1: delta, diff --git a/src/internal/connector/onedrive/item.go b/src/internal/connector/onedrive/item.go index b1027de9d..1526f1401 100644 --- a/src/internal/connector/onedrive/item.go +++ b/src/internal/connector/onedrive/item.go @@ -1,7 +1,9 @@ package onedrive import ( + "bytes" "context" + "encoding/json" "fmt" "io" "net/http" @@ -37,6 +39,7 @@ func getDriveItem( // sharePointItemReader will return a io.ReadCloser for the specified item // It crafts this by querying M365 for a download URL for the item // and using a http client to initialize a reader +// TODO: Add metadata fetching to SharePoint func sharePointItemReader( hc *http.Client, item models.DriveItemable, @@ -53,6 +56,25 @@ func sharePointItemReader( return dii, resp.Body, nil } +func oneDriveItemMetaReader( + ctx context.Context, + service graph.Servicer, + driveID string, + item models.DriveItemable, +) (io.ReadCloser, int, error) { + meta, err := oneDriveItemMetaInfo(ctx, service, driveID, item) + if err != nil { + return nil, 0, err + } + + metaJSON, err := json.Marshal(meta) + if err != nil { + return nil, 0, err + } + + return io.NopCloser(bytes.NewReader(metaJSON)), len(metaJSON), nil +} + // oneDriveItemReader will return a io.ReadCloser for the specified item // It crafts this by querying M365 for a download URL for the item // and using a http client to initialize a reader @@ -60,16 +82,25 @@ func oneDriveItemReader( hc *http.Client, item models.DriveItemable, ) (details.ItemInfo, io.ReadCloser, error) { - resp, err := downloadItem(hc, item) - if err != nil { - return details.ItemInfo{}, nil, errors.Wrap(err, "downloading item") + var ( + rc io.ReadCloser + isFile = item.GetFile() != nil + ) + + if isFile { + resp, err := downloadItem(hc, item) + if err != nil { + return details.ItemInfo{}, nil, errors.Wrap(err, "downloading item") + } + + rc = resp.Body } dii := details.ItemInfo{ OneDrive: oneDriveItemInfo(item, *item.GetSize()), } - return dii, resp.Body, nil + return dii, rc, nil } func downloadItem(hc *http.Client, item models.DriveItemable) (*http.Response, error) { @@ -149,6 +180,47 @@ func oneDriveItemInfo(di models.DriveItemable, itemSize int64) *details.OneDrive } } +// oneDriveItemMetaInfo will fetch the meta information for a drive +// item. As of now, it only adds the permissions applicable for a +// onedrive item. +func oneDriveItemMetaInfo( + ctx context.Context, service graph.Servicer, + driveID string, di models.DriveItemable, +) (Metadata, error) { + itemID := di.GetId() + + perm, err := service.Client().DrivesById(driveID).ItemsById(*itemID).Permissions().Get(ctx, nil) + if err != nil { + return Metadata{}, errors.Wrapf(err, "failed to get item permissions %s", *itemID) + } + + up := []UserPermission{} + + for _, p := range perm.GetValue() { + roles := []string{} + + for _, r := range p.GetRoles() { + // Skip if the only role available in owner + if r != "owner" { + roles = append(roles, r) + } + } + + if len(roles) == 0 { + continue + } + + up = append(up, UserPermission{ + ID: *p.GetId(), + Roles: roles, + Email: *p.GetGrantedToV2().GetUser().GetAdditionalData()["email"].(*string), + Expiration: p.GetExpirationDateTime(), + }) + } + + return Metadata{Permissions: up}, nil +} + // sharePointItemInfo will populate a details.SharePointInfo struct // with properties from the drive item. ItemSize is specified // separately for restore processes because the local itemable diff --git a/src/internal/connector/onedrive/item_test.go b/src/internal/connector/onedrive/item_test.go index 6a8894ebf..b0e42943a 100644 --- a/src/internal/connector/onedrive/item_test.go +++ b/src/internal/connector/onedrive/item_test.go @@ -138,8 +138,8 @@ func (suite *ItemIntegrationSuite) TestItemReader_oneDrive() { ) // Read data for the file - itemInfo, itemData, err := oneDriveItemReader(graph.HTTPClient(graph.NoTimeout()), driveItem) + require.NoError(suite.T(), err) require.NotNil(suite.T(), itemInfo.OneDrive) require.NotEmpty(suite.T(), itemInfo.OneDrive.ItemName) diff --git a/src/internal/connector/onedrive/restore.go b/src/internal/connector/onedrive/restore.go index 00ed855b7..af591cd86 100644 --- a/src/internal/connector/onedrive/restore.go +++ b/src/internal/connector/onedrive/restore.go @@ -2,9 +2,15 @@ package onedrive import ( "context" + "encoding/json" + "fmt" "io" "runtime/trace" + "sort" + "strings" + msdrive "github.com/microsoftgraph/msgraph-sdk-go/drive" + "github.com/microsoftgraph/msgraph-sdk-go/models" "github.com/pkg/errors" "github.com/alcionai/corso/src/internal/connector/graph" @@ -25,28 +31,92 @@ const ( copyBufferSize = 5 * 1024 * 1024 ) +func getParentPermissions( + parentPath path.Path, + parentPermissions map[string][]UserPermission, +) ([]UserPermission, error) { + parentPerms, ok := parentPermissions[parentPath.String()] + if !ok { + onedrivePath, err := path.ToOneDrivePath(parentPath) + if err != nil { + return nil, errors.Wrap(err, "invalid restore path") + } + + if len(onedrivePath.Folders) != 0 { + return nil, errors.Wrap(err, "unable to compute item permissions") + } + + parentPerms = []UserPermission{} + } + + return parentPerms, nil +} + // RestoreCollections will restore the specified data collections into OneDrive func RestoreCollections( ctx context.Context, service graph.Servicer, dest control.RestoreDestination, + opts control.Options, dcs []data.Collection, deets *details.Builder, ) (*support.ConnectorOperationStatus, error) { var ( restoreMetrics support.CollectionMetrics restoreErrors error + metrics support.CollectionMetrics + folderPerms map[string][]UserPermission + canceled bool + + // permissionIDMappings is used to map between old and new id + // of permissions as we restore them + permissionIDMappings = map[string]string{} ) errUpdater := func(id string, err error) { restoreErrors = support.WrapAndAppend(id, err, restoreErrors) } + // Reorder collections so that the parents directories are created + // before the child directories + sort.Slice(dcs, func(i, j int) bool { + return dcs[i].FullPath().String() < dcs[j].FullPath().String() + }) + + parentPermissions := map[string][]UserPermission{} + // Iterate through the data collections and restore the contents of each for _, dc := range dcs { - temp, canceled := RestoreCollection(ctx, service, dc, OneDriveSource, dest.ContainerName, deets, errUpdater) + var ( + parentPerms []UserPermission + err error + ) - restoreMetrics.Combine(temp) + if opts.RestorePermissions { + parentPerms, err = getParentPermissions(dc.FullPath(), parentPermissions) + if err != nil { + errUpdater(dc.FullPath().String(), err) + } + } + + metrics, folderPerms, permissionIDMappings, canceled = RestoreCollection( + ctx, + service, + dc, + parentPerms, + OneDriveSource, + dest.ContainerName, + deets, + errUpdater, + permissionIDMappings, + opts.RestorePermissions, + ) + + for k, v := range folderPerms { + parentPermissions[k] = v + } + + restoreMetrics.Combine(metrics) if canceled { break @@ -66,29 +136,36 @@ func RestoreCollections( // RestoreCollection handles restoration of an individual collection. // returns: // - the collection's item and byte count metrics -// - the context cancellation state (true if the context is cancelled) +// - the context cancellation state (true if the context is canceled) func RestoreCollection( ctx context.Context, service graph.Servicer, dc data.Collection, + parentPerms []UserPermission, source driveSource, restoreContainerName string, deets *details.Builder, errUpdater func(string, error), -) (support.CollectionMetrics, bool) { + permissionIDMappings map[string]string, + restorePerms bool, +) (support.CollectionMetrics, map[string][]UserPermission, map[string]string, bool) { ctx, end := D.Span(ctx, "gc:oneDrive:restoreCollection", D.Label("path", dc.FullPath())) defer end() var ( - metrics = support.CollectionMetrics{} - copyBuffer = make([]byte, copyBufferSize) - directory = dc.FullPath() + metrics = support.CollectionMetrics{} + copyBuffer = make([]byte, copyBufferSize) + directory = dc.FullPath() + restoredIDs = map[string]string{} + itemInfo details.ItemInfo + itemID string + folderPerms = map[string][]UserPermission{} ) drivePath, err := path.ToOneDrivePath(directory) if err != nil { errUpdater(directory.String(), err) - return metrics, false + return metrics, folderPerms, permissionIDMappings, false } // Assemble folder hierarchy we're going to restore into (we recreate the folder hierarchy @@ -108,7 +185,7 @@ func RestoreCollection( restoreFolderID, err := CreateRestoreFolders(ctx, service, drivePath.DriveID, restoreFolderElements) if err != nil { errUpdater(directory.String(), errors.Wrapf(err, "failed to create folders %v", restoreFolderElements)) - return metrics, false + return metrics, folderPerms, permissionIDMappings, false } // Restore items from the collection @@ -118,50 +195,175 @@ func RestoreCollection( select { case <-ctx.Done(): errUpdater("context canceled", ctx.Err()) - return metrics, true + return metrics, folderPerms, permissionIDMappings, true case itemData, ok := <-items: if !ok { - return metrics, false - } - metrics.Objects++ - - metrics.TotalBytes += int64(len(copyBuffer)) - - itemInfo, err := restoreItem(ctx, - service, - itemData, - drivePath.DriveID, - restoreFolderID, - copyBuffer, - source) - if err != nil { - errUpdater(itemData.UUID(), err) - continue + return metrics, folderPerms, permissionIDMappings, false } itemPath, err := dc.FullPath().Append(itemData.UUID(), true) if err != nil { logger.Ctx(ctx).DPanicw("transforming item to full path", "error", err) + errUpdater(itemData.UUID(), err) continue } - deets.Add( - itemPath.String(), - itemPath.ShortRef(), - "", - true, - itemInfo) + if source == OneDriveSource { + name := itemData.UUID() + if strings.HasSuffix(name, DataFileSuffix) { + metrics.Objects++ + metrics.TotalBytes += int64(len(copyBuffer)) + trimmedName := strings.TrimSuffix(name, DataFileSuffix) - metrics.Successes++ + itemID, itemInfo, err = restoreData(ctx, service, trimmedName, itemData, + drivePath.DriveID, restoreFolderID, copyBuffer, source) + if err != nil { + errUpdater(itemData.UUID(), err) + continue + } + + restoredIDs[trimmedName] = itemID + + deets.Add(itemPath.String(), itemPath.ShortRef(), "", true, itemInfo) + + // Mark it as success without processing .meta + // file if we are not restoring permissions + if !restorePerms { + metrics.Successes++ + } + } else if strings.HasSuffix(name, MetaFileSuffix) { + if !restorePerms { + continue + } + + meta, err := getMetadata(itemData.ToReader()) + if err != nil { + errUpdater(itemData.UUID(), err) + continue + } + + trimmedName := strings.TrimSuffix(name, MetaFileSuffix) + restoreID, ok := restoredIDs[trimmedName] + if !ok { + errUpdater(itemData.UUID(), fmt.Errorf("item not available to restore permissions")) + continue + } + + permissionIDMappings, err = restorePermissions( + ctx, + service, + drivePath.DriveID, + restoreID, + parentPerms, + meta.Permissions, + permissionIDMappings, + ) + if err != nil { + errUpdater(itemData.UUID(), err) + continue + } + + // Objects count is incremented when we restore a + // data file and success count is incremented when + // we restore a meta file as every data file + // should have an associated meta file + metrics.Successes++ + } else if strings.HasSuffix(name, DirMetaFileSuffix) { + trimmedName := strings.TrimSuffix(name, DirMetaFileSuffix) + folderID, err := createRestoreFolder( + ctx, + service, + drivePath.DriveID, + trimmedName, + restoreFolderID, + ) + if err != nil { + errUpdater(itemData.UUID(), err) + continue + } + + if !restorePerms { + continue + } + + meta, err := getMetadata(itemData.ToReader()) + if err != nil { + errUpdater(itemData.UUID(), err) + continue + } + + permissionIDMappings, err = restorePermissions( + ctx, + service, + drivePath.DriveID, + folderID, + parentPerms, + meta.Permissions, + permissionIDMappings, + ) + if err != nil { + errUpdater(itemData.UUID(), err) + continue + } + + trimmedPath := strings.TrimSuffix(itemPath.String(), DirMetaFileSuffix) + folderPerms[trimmedPath] = meta.Permissions + } else { + if !ok { + errUpdater(itemData.UUID(), fmt.Errorf("invalid backup format, you might be using an old backup")) + continue + } + } + } else { + metrics.Objects++ + metrics.TotalBytes += int64(len(copyBuffer)) + + // No permissions stored at the moment for SharePoint + _, itemInfo, err = restoreData(ctx, + service, + itemData.UUID(), + itemData, + drivePath.DriveID, + restoreFolderID, + copyBuffer, + source) + if err != nil { + errUpdater(itemData.UUID(), err) + continue + } + + deets.Add(itemPath.String(), itemPath.ShortRef(), "", true, itemInfo) + metrics.Successes++ + } } } } -// createRestoreFolders creates the restore folder hieararchy in the specified drive and returns the folder ID -// of the last folder entry in the hiearchy +// Creates a folder with its permissions +func createRestoreFolder( + ctx context.Context, + service graph.Servicer, + driveID, folder, parentFolderID string, +) (string, error) { + folderItem, err := createItem(ctx, service, driveID, parentFolderID, newItem(folder, true)) + if err != nil { + return "", errors.Wrapf( + err, + "failed to create folder %s/%s. details: %s", parentFolderID, folder, + support.ConnectorStackErrorTrace(err), + ) + } + + logger.Ctx(ctx).Debugf("Resolved %s in %s to %s", folder, parentFolderID, *folderItem.GetId()) + + return *folderItem.GetId(), nil +} + +// createRestoreFolders creates the restore folder hierarchy in the specified drive and returns the folder ID +// of the last folder entry in the hierarchy func CreateRestoreFolders(ctx context.Context, service graph.Servicer, driveID string, restoreFolders []string, ) (string, error) { driveRoot, err := service.Client().DrivesById(driveID).Root().Get(ctx, nil) @@ -209,15 +411,16 @@ func CreateRestoreFolders(ctx context.Context, service graph.Servicer, driveID s return parentFolderID, nil } -// restoreItem will create a new item in the specified `parentFolderID` and upload the data.Stream -func restoreItem( +// restoreData will create a new item in the specified `parentFolderID` and upload the data.Stream +func restoreData( ctx context.Context, service graph.Servicer, + name string, itemData data.Stream, driveID, parentFolderID string, copyBuffer []byte, source driveSource, -) (details.ItemInfo, error) { +) (string, details.ItemInfo, error) { ctx, end := D.Span(ctx, "gc:oneDrive:restoreItem", D.Label("item_uuid", itemData.UUID())) defer end() @@ -227,19 +430,19 @@ func restoreItem( // Get the stream size (needed to create the upload session) ss, ok := itemData.(data.StreamSize) if !ok { - return details.ItemInfo{}, errors.Errorf("item %q does not implement DataStreamInfo", itemName) + return "", details.ItemInfo{}, errors.Errorf("item %q does not implement DataStreamInfo", itemName) } // Create Item - newItem, err := createItem(ctx, service, driveID, parentFolderID, newItem(itemData.UUID(), false)) + newItem, err := createItem(ctx, service, driveID, parentFolderID, newItem(name, false)) if err != nil { - return details.ItemInfo{}, errors.Wrapf(err, "failed to create item %s", itemName) + return "", details.ItemInfo{}, errors.Wrapf(err, "failed to create item %s", itemName) } // Get a drive item writer w, err := driveItemWriter(ctx, service, driveID, *newItem.GetId(), ss.Size()) if err != nil { - return details.ItemInfo{}, errors.Wrapf(err, "failed to create item upload session %s", itemName) + return "", details.ItemInfo{}, errors.Wrapf(err, "failed to create item upload session %s", itemName) } iReader := itemData.ToReader() @@ -250,7 +453,7 @@ func restoreItem( // Upload the stream data written, err := io.CopyBuffer(w, progReader, copyBuffer) if err != nil { - return details.ItemInfo{}, errors.Wrapf(err, "failed to upload data: item %s", itemName) + return "", details.ItemInfo{}, errors.Wrapf(err, "failed to upload data: item %s", itemName) } dii := details.ItemInfo{} @@ -262,5 +465,129 @@ func restoreItem( dii.OneDrive = oneDriveItemInfo(newItem, written) } - return dii, nil + return *newItem.GetId(), dii, nil +} + +// getMetadata read and parses the metadata info for an item +func getMetadata(metar io.ReadCloser) (Metadata, error) { + var meta Metadata + // `metar` will be nil for the top level container folder + if metar != nil { + metaraw, err := io.ReadAll(metar) + if err != nil { + return Metadata{}, err + } + + err = json.Unmarshal(metaraw, &meta) + if err != nil { + return Metadata{}, err + } + } + + return meta, nil +} + +// getChildPermissions is to filter out permissions present in the +// parent from the ones that are available for child. This is +// necessary as we store the nested permissions in the child. We +// cannot avoid storing the nested permissions as it is possible that +// a file in a folder can remove the nested permission that is present +// on itself. +func getChildPermissions(childPermissions, parentPermissions []UserPermission) ([]UserPermission, []UserPermission) { + addedPermissions := []UserPermission{} + removedPermissions := []UserPermission{} + + for _, cp := range childPermissions { + found := false + + for _, pp := range parentPermissions { + if cp.ID == pp.ID { + found = true + break + } + } + + if !found { + addedPermissions = append(addedPermissions, cp) + } + } + + for _, pp := range parentPermissions { + found := false + + for _, cp := range childPermissions { + if pp.ID == cp.ID { + found = true + break + } + } + + if !found { + removedPermissions = append(removedPermissions, pp) + } + } + + return addedPermissions, removedPermissions +} + +// restorePermissions takes in the permissions that were added and the +// removed(ones present in parent but not in child) and adds/removes +// the necessary permissions on onedrive objects. +func restorePermissions( + ctx context.Context, + service graph.Servicer, + driveID string, + itemID string, + parentPerms []UserPermission, + childPerms []UserPermission, + permissionIDMappings map[string]string, +) (map[string]string, error) { + permAdded, permRemoved := getChildPermissions(childPerms, parentPerms) + + for _, p := range permRemoved { + err := service.Client().DrivesById(driveID).ItemsById(itemID). + PermissionsById(permissionIDMappings[p.ID]).Delete(ctx, nil) + if err != nil { + return permissionIDMappings, errors.Wrapf( + err, + "failed to remove permission for item %s. details: %s", + itemID, + support.ConnectorStackErrorTrace(err), + ) + } + } + + for _, p := range permAdded { + pbody := msdrive.NewItemsItemInvitePostRequestBody() + pbody.SetRoles(p.Roles) + + if p.Expiration != nil { + expiry := p.Expiration.String() + pbody.SetExpirationDateTime(&expiry) + } + + si := false + pbody.SetSendInvitation(&si) + + rs := true + pbody.SetRequireSignIn(&rs) + + rec := models.NewDriveRecipient() + rec.SetEmail(&p.Email) + pbody.SetRecipients([]models.DriveRecipientable{rec}) + + np, err := service.Client().DrivesById(driveID).ItemsById(itemID).Invite().Post(ctx, pbody, nil) + if err != nil { + return permissionIDMappings, errors.Wrapf( + err, + "failed to set permission for item %s. details: %s", + itemID, + support.ConnectorStackErrorTrace(err), + ) + } + + permissionIDMappings[p.ID] = *np.GetValue()[0].GetId() + } + + return permissionIDMappings, nil } diff --git a/src/internal/connector/sharepoint/data_collections_test.go b/src/internal/connector/sharepoint/data_collections_test.go index 87aaa5c84..11d05156c 100644 --- a/src/internal/connector/sharepoint/data_collections_test.go +++ b/src/internal/connector/sharepoint/data_collections_test.go @@ -77,7 +77,7 @@ func (suite *SharePointLibrariesSuite) TestUpdateCollections() { site, testBaseDrivePath, ), - expectedItemCount: 2, + expectedItemCount: 1, expectedFileCount: 1, expectedContainerCount: 1, }, diff --git a/src/internal/connector/sharepoint/restore.go b/src/internal/connector/sharepoint/restore.go index ef2b940bb..4784ed209 100644 --- a/src/internal/connector/sharepoint/restore.go +++ b/src/internal/connector/sharepoint/restore.go @@ -59,14 +59,18 @@ func RestoreCollections( switch dc.FullPath().Category() { case path.LibrariesCategory: - metrics, canceled = onedrive.RestoreCollection( + metrics, _, _, canceled = onedrive.RestoreCollection( ctx, service, dc, + []onedrive.UserPermission{}, // Currently permission data is not stored for sharepoint onedrive.OneDriveSource, dest.ContainerName, deets, - errUpdater) + errUpdater, + map[string]string{}, + false, + ) case path.ListsCategory: metrics, canceled = RestoreCollection( ctx, diff --git a/src/internal/connector/support/status.go b/src/internal/connector/support/status.go index 7e38758d3..dcf5f32c5 100644 --- a/src/internal/connector/support/status.go +++ b/src/internal/connector/support/status.go @@ -66,6 +66,7 @@ func CreateStatus( hasErrors := err != nil numErr := GetNumberOfErrors(err) + status := ConnectorOperationStatus{ lastOperation: op, ObjectCount: cm.Objects, diff --git a/src/internal/operations/backup_integration_test.go b/src/internal/operations/backup_integration_test.go index 21d4009e0..a57a9d2be 100644 --- a/src/internal/operations/backup_integration_test.go +++ b/src/internal/operations/backup_integration_test.go @@ -339,7 +339,7 @@ func generateContainerOfItems( dest, collections) - deets, err := gc.RestoreDataCollections(ctx, acct, sel, dest, dataColls) + deets, err := gc.RestoreDataCollections(ctx, acct, sel, dest, control.Options{RestorePermissions: true}, dataColls) require.NoError(t, err) return deets diff --git a/src/internal/operations/restore.go b/src/internal/operations/restore.go index aa9229336..206eb8026 100644 --- a/src/internal/operations/restore.go +++ b/src/internal/operations/restore.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "runtime/debug" + "sort" "time" "github.com/alcionai/clues" @@ -221,7 +222,9 @@ func (op *RestoreOperation) do(ctx context.Context) (restoreDetails *details.Det op.account, op.Selectors, op.Destination, - dcs) + op.Options, + dcs, + ) if err != nil { opStats.writeErr = errors.Wrap(err, "restoring service data") return nil, opStats.writeErr @@ -327,6 +330,17 @@ func formatDetailsForRestoration( paths[i] = p } + // TODO(meain): Move this to onedrive specific component, but as + // of now the paths can technically be from multiple services + + // This sort is done primarily to order `.meta` files after `.data` + // files. This is only a necessity for OneDrive as we are storing + // metadata for files/folders in separate meta files and we the + // data to be restored before we can restore the metadata. + sort.Slice(paths, func(i, j int) bool { + return paths[i].String() < paths[j].String() + }) + if errs != nil { return nil, errs } diff --git a/src/pkg/control/options.go b/src/pkg/control/options.go index 9cc5a334a..6f53839ca 100644 --- a/src/pkg/control/options.go +++ b/src/pkg/control/options.go @@ -6,10 +6,11 @@ import ( // Options holds the optional configurations for a process type Options struct { - Collision CollisionPolicy `json:"-"` - DisableMetrics bool `json:"disableMetrics"` - FailFast bool `json:"failFast"` - ToggleFeatures Toggles `json:"ToggleFeatures"` + Collision CollisionPolicy `json:"-"` + DisableMetrics bool `json:"disableMetrics"` + FailFast bool `json:"failFast"` + RestorePermissions bool `json:"restorePermissions"` + ToggleFeatures Toggles `json:"ToggleFeatures"` } // Defaults provides an Options with the default values set. @@ -74,4 +75,9 @@ type Toggles struct { // DisableIncrementals prevents backups from using incremental lookups, // forcing a new, complete backup of all data regardless of prior state. DisableIncrementals bool `json:"exchangeIncrementals,omitempty"` + + // DisablePermissionsBackup is used to disable backups of item + // permissions. Permission metadata increases graph api call count, + // so disabling their retrieval when not needed is advised. + DisablePermissionsBackup bool `json:"disablePermissionsBackup,omitempty"` }