diff --git a/.github/workflows/sanity-test.yaml b/.github/workflows/sanity-test.yaml index 2bce1ca57..3ff954b81 100644 --- a/.github/workflows/sanity-test.yaml +++ b/.github/workflows/sanity-test.yaml @@ -97,15 +97,32 @@ jobs: exit 1 fi + # generate new entries to roll into the next load test + # only runs if the test was successful + - name: New Data Creation + working-directory: ./src/cmd/factory + env: + AZURE_CLIENT_ID: ${{ secrets.CLIENT_ID }} + AZURE_CLIENT_SECRET: ${{ secrets.CLIENT_SECRET }} + AZURE_TENANT_ID: ${{ secrets.TENANT_ID }} + CORSO_M365_LOAD_TEST_USER_ID: ${{ secrets.CORSO_M365_LOAD_TEST_USER_ID }} + run: | + go run . exchange emails \ + --user ${{ env.CORSO_M365_TEST_USER_ID }} \ + --tenant ${{ env.AZURE_TENANT_ID }} \ + --destination Corso_Restore_st_${{ steps.repo-init.outputs.result }} \ + --count 4 + # run the tests - name: Backup exchange test id: exchange-test run: | ./corso backup create exchange \ --user "${CORSO_M365_TEST_USER_ID}" \ - --hide-progress \ - --json \ - 2>&1 | tee $TEST_RESULT/backup_exchange.txt + --hide-progress \ + --data 'email' \ + --json \ + 2>&1 | tee $TEST_RESULT/backup_exchange.txt resultjson=$(sed -e '1,/Completed Backups/d' $TEST_RESULT/backup_exchange.txt ) @@ -152,6 +169,7 @@ jobs: run: | set -euo pipefail ./corso restore exchange \ + --email-folder Corso_Restore_st_${{ steps.repo-init.outputs.result }} \ --hide-progress \ --backup "${{ steps.exchange-test.outputs.result }}" \ 2>&1 | tee $TEST_RESULT/exchange-restore-test.txt @@ -161,6 +179,7 @@ jobs: env: SANITY_RESTORE_FOLDER: ${{ steps.exchange-restore-test.outputs.result }} SANITY_RESTORE_SERVICE: "exchange" + TEST_DATA: Corso_Restore_st_${{ steps.repo-init.outputs.result }} run: | set -euo pipefail ./sanityCheck @@ -193,6 +212,7 @@ jobs: ./corso restore exchange \ --hide-progress \ --backup "${{ steps.exchange-incremental-test.outputs.result }}" \ + --email-folder Corso_Restore_st_${{ steps.repo-init.outputs.result }} \ 2>&1 | tee $TEST_RESULT/exchange-incremantal-restore-test.txt echo result=$(grep -i -e 'Restoring to folder ' $TEST_RESULT/exchange-incremantal-restore-test.txt | sed "s/Restoring to folder//" ) >> $GITHUB_OUTPUT @@ -200,6 +220,8 @@ jobs: env: SANITY_RESTORE_FOLDER: ${{ steps.exchange-incremantal-restore-test.outputs.result }} SANITY_RESTORE_SERVICE: "exchange" + TEST_DATA: Corso_Restore_st_${{ steps.repo-init.outputs.result }} + BASE_BACKUP: ${{ steps.exchange-restore-test.outputs.result }} run: | set -euo pipefail ./sanityCheck @@ -263,6 +285,7 @@ jobs: run: | set -euo pipefail ./corso restore onedrive \ + --restore-permissions \ --hide-progress \ --backup "${{ steps.onedrive-test.outputs.result }}" \ 2>&1 | tee $TEST_RESULT/onedrive-restore-test.txt @@ -283,7 +306,7 @@ jobs: set -euo pipefail ./corso backup create onedrive \ --hide-progress \ - --user "${CORSO_M365_TEST_USER_ID}"\ + --user "${CORSO_M365_TEST_USER_ID}" \ --json \ 2>&1 | tee $TEST_RESULT/backup_onedrive_incremental.txt @@ -303,6 +326,7 @@ jobs: run: | set -euo pipefail ./corso restore onedrive \ + --restore-permissions \ --hide-progress \ --backup "${{ steps.onedrive-incremental-test.outputs.result }}" \ 2>&1 | tee $TEST_RESULT/onedrive-incremental-restore-test.txt diff --git a/CHANGELOG.md b/CHANGELOG.md index b65e0ccbf..8608857df 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] (beta) +### Added +- Permissions backup for OneDrive is now out of experimental (By default, only newly backed up items will have their permissions backed up. You will have to run a full backup to ensure all items have their permissions backed up.) + ### Fixed - Fixed permissions restore in latest backup version. - Incremental OneDrive backups could panic if the delta token expired and a folder was seen and deleted in the course of item enumeration for the backup. @@ -16,6 +19,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Enable compression for all data uploaded by kopia. - SharePoint --folder selectors correctly return items. - Fix Exchange cli args for filtering items +- Skip OneNote items bigger than 2GB (Graph API prevents us from downloading them) +- ParentPath of json output for Exchange calendar now shows names instead of IDs. ## [v0.6.1] (beta) - 2023-03-21 diff --git a/src/.golangci.yml b/src/.golangci.yml index 15f93b7b4..f0ce30498 100644 --- a/src/.golangci.yml +++ b/src/.golangci.yml @@ -29,7 +29,7 @@ linters-settings: forbid: # Don't allow creating contexts without logging in tests. Use an ignore # lower down to ensure usages of this outside of tests aren't reported. - - 'context\.(Background|TODO)(# tests should use tester\.NewContext )?' + - 'context\.(Background|TODO)(# tests should use tester\.NewContext)?' # Don't allow use of path as it hardcodes separator to `/`. # Use filepath instead. - '\bpath\.(Ext|Base|Dir|Join)' @@ -38,10 +38,12 @@ linters-settings: # Don't allow use of testify suite directly. Use one of the wrappers from # tester/suite.go instead. Use an ignore lower down to exclude packages # that result in import cycles if they try to use the wrapper. - - 'suite\.Suite(# tests should use one of the Suite wrappers in tester package )?' + - 'suite\.Suite(# tests should use one of the Suite wrappers in tester package)?' # All errors should be constructed and wrapped with the clues package. # String formatting should be avoided in favor of structured errors (ie: err.With(k, v)). - '(errors|fmt)\.(New|Stack|Wrap|Error)f?\((# error handling should use clues pkg)?' + # Avoid Warn-level logging in favor of Info or Error. + - 'Warn[wf]?\((# logging should use Info or Error)?' lll: line-length: 120 revive: diff --git a/src/Makefile b/src/Makefile index 6f75be16b..fff36d78c 100644 --- a/src/Makefile +++ b/src/Makefile @@ -79,4 +79,7 @@ load-test: -mutexprofile=mutex.prof \ -trace=trace.out \ -outputdir=test_results \ - ./pkg/repository/loadtest/repository_load_test.go \ No newline at end of file + ./pkg/repository/loadtest/repository_load_test.go + +getM365: + go build -o getM365 cmd/getM365/main.go \ No newline at end of file diff --git a/src/cli/backup/backup.go b/src/cli/backup/backup.go index f407f084f..c55e49792 100644 --- a/src/cli/backup/backup.go +++ b/src/cli/backup/backup.go @@ -13,8 +13,8 @@ import ( "github.com/alcionai/corso/src/cli/options" . "github.com/alcionai/corso/src/cli/print" "github.com/alcionai/corso/src/cli/utils" + "github.com/alcionai/corso/src/internal/common" "github.com/alcionai/corso/src/internal/data" - "github.com/alcionai/corso/src/internal/model" "github.com/alcionai/corso/src/pkg/account" "github.com/alcionai/corso/src/pkg/backup" "github.com/alcionai/corso/src/pkg/logger" @@ -195,35 +195,36 @@ func runBackups( r repository.Repository, serviceName, resourceOwnerType string, selectorSet []selectors.Selector, + ins common.IDNameSwapper, ) error { var ( - bIDs []model.StableID + bIDs []string errs = []error{} ) for _, discSel := range selectorSet { var ( owner = discSel.DiscreteOwner - bctx = clues.Add(ctx, "resource_owner", owner) + ictx = clues.Add(ctx, "resource_owner", owner) ) - bo, err := r.NewBackup(bctx, discSel) + bo, err := r.NewBackupWithLookup(ictx, discSel, ins) if err != nil { - errs = append(errs, clues.Wrap(err, owner).WithClues(bctx)) - Errf(bctx, "%v\n", err) + errs = append(errs, clues.Wrap(err, owner).WithClues(ictx)) + Errf(ictx, "%v\n", err) continue } - err = bo.Run(bctx) + err = bo.Run(ictx) if err != nil { - errs = append(errs, clues.Wrap(err, owner).WithClues(bctx)) - Errf(bctx, "%v\n", err) + errs = append(errs, clues.Wrap(err, owner).WithClues(ictx)) + Errf(ictx, "%v\n", err) continue } - bIDs = append(bIDs, bo.Results.BackupID) + bIDs = append(bIDs, string(bo.Results.BackupID)) Infof(ctx, "Done - ID: %v\n", bo.Results.BackupID) } @@ -265,7 +266,7 @@ func genericDeleteCommand(cmd *cobra.Command, bID, designation string, args []st defer utils.CloseRepo(ctx, r) - if err := r.DeleteBackup(ctx, model.StableID(bID)); err != nil { + if err := r.DeleteBackup(ctx, bID); err != nil { return Only(ctx, clues.Wrap(err, "Deleting backup "+bID)) } diff --git a/src/cli/backup/exchange.go b/src/cli/backup/exchange.go index 07acce3cd..680d6ed83 100644 --- a/src/cli/backup/exchange.go +++ b/src/cli/backup/exchange.go @@ -164,14 +164,14 @@ func createExchangeCmd(cmd *cobra.Command, args []string) error { // TODO: log/print recoverable errors errs := fault.New(false) - users, err := m365.UserPNs(ctx, *acct, errs) + ins, err := m365.UsersMap(ctx, *acct, errs) if err != nil { - return Only(ctx, clues.Wrap(err, "Failed to retrieve M365 user(s)")) + return Only(ctx, clues.Wrap(err, "Failed to retrieve M365 users")) } selectorSet := []selectors.Selector{} - for _, discSel := range sel.SplitByResourceOwner(users) { + for _, discSel := range sel.SplitByResourceOwner(ins.IDs()) { selectorSet = append(selectorSet, discSel.Selector) } @@ -180,7 +180,7 @@ func createExchangeCmd(cmd *cobra.Command, args []string) error { r, "Exchange", "user", selectorSet, - ) + ins) } func exchangeBackupCreateSelectors(userIDs, cats []string) *selectors.ExchangeBackup { diff --git a/src/cli/backup/exchange_e2e_test.go b/src/cli/backup/exchange_e2e_test.go index f25e0ecfd..96fa0ce35 100644 --- a/src/cli/backup/exchange_e2e_test.go +++ b/src/cli/backup/exchange_e2e_test.go @@ -16,6 +16,7 @@ import ( "github.com/alcionai/corso/src/cli/config" "github.com/alcionai/corso/src/cli/print" "github.com/alcionai/corso/src/cli/utils" + "github.com/alcionai/corso/src/internal/common" "github.com/alcionai/corso/src/internal/connector/exchange" "github.com/alcionai/corso/src/internal/operations" "github.com/alcionai/corso/src/internal/tester" @@ -300,7 +301,15 @@ func (suite *PreparedBackupExchangeE2ESuite) SetupSuite() { suite.backupOps = make(map[path.CategoryType]string) - users := []string{suite.m365UserID} + var ( + users = []string{suite.m365UserID} + idToName = map[string]string{suite.m365UserID: "todo-name-" + suite.m365UserID} + nameToID = map[string]string{"todo-name-" + suite.m365UserID: suite.m365UserID} + ins = common.IDsNames{ + IDToName: idToName, + NameToID: nameToID, + } + ) for _, set := range backupDataSets { var ( @@ -321,7 +330,7 @@ func (suite *PreparedBackupExchangeE2ESuite) SetupSuite() { sel.Include(scopes) - bop, err := suite.repo.NewBackup(ctx, sel.Selector) + bop, err := suite.repo.NewBackupWithLookup(ctx, sel.Selector, ins) require.NoError(t, err, clues.ToCore(err)) err = bop.Run(ctx) @@ -330,7 +339,7 @@ func (suite *PreparedBackupExchangeE2ESuite) SetupSuite() { bIDs := string(bop.Results.BackupID) // sanity check, ensure we can find the backup and its details immediately - b, err := suite.repo.Backup(ctx, bop.Results.BackupID) + b, err := suite.repo.Backup(ctx, string(bop.Results.BackupID)) require.NoError(t, err, "retrieving recent backup by ID") require.Equal(t, bIDs, string(b.ID), "repo backup matches results id") _, b, errs := suite.repo.GetBackupDetails(ctx, bIDs) diff --git a/src/cli/backup/onedrive.go b/src/cli/backup/onedrive.go index 9429e4ea5..31cffae7e 100644 --- a/src/cli/backup/onedrive.go +++ b/src/cli/backup/onedrive.go @@ -68,7 +68,7 @@ func addOneDriveCommands(cmd *cobra.Command) *cobra.Command { c, fs = utils.AddCommand(cmd, oneDriveCreateCmd()) fs.SortFlags = false - options.AddFeatureToggle(cmd, options.EnablePermissionsBackup()) + options.AddFeatureToggle(cmd) c.Use = c.Use + " " + oneDriveServiceCommandCreateUseSuffix c.Example = oneDriveServiceCommandCreateExamples @@ -148,14 +148,14 @@ func createOneDriveCmd(cmd *cobra.Command, args []string) error { // TODO: log/print recoverable errors errs := fault.New(false) - users, err := m365.UserPNs(ctx, *acct, errs) + ins, err := m365.UsersMap(ctx, *acct, errs) if err != nil { return Only(ctx, clues.Wrap(err, "Failed to retrieve M365 users")) } selectorSet := []selectors.Selector{} - for _, discSel := range sel.SplitByResourceOwner(users) { + for _, discSel := range sel.SplitByResourceOwner(ins.IDs()) { selectorSet = append(selectorSet, discSel.Selector) } @@ -164,7 +164,7 @@ func createOneDriveCmd(cmd *cobra.Command, args []string) error { r, "OneDrive", "user", selectorSet, - ) + ins) } func validateOneDriveBackupCreateFlags(users []string) error { diff --git a/src/cli/backup/onedrive_e2e_test.go b/src/cli/backup/onedrive_e2e_test.go index 515002f31..19555e91a 100644 --- a/src/cli/backup/onedrive_e2e_test.go +++ b/src/cli/backup/onedrive_e2e_test.go @@ -16,6 +16,7 @@ import ( "github.com/alcionai/corso/src/cli/config" "github.com/alcionai/corso/src/cli/print" "github.com/alcionai/corso/src/cli/utils" + "github.com/alcionai/corso/src/internal/common" "github.com/alcionai/corso/src/internal/operations" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/account" @@ -80,7 +81,7 @@ func (suite *NoBackupOneDriveE2ESuite) SetupSuite() { suite.acct, suite.st, control.Options{ - ToggleFeatures: control.Toggles{EnablePermissionsBackup: true}, + ToggleFeatures: control.Toggles{}, }) require.NoError(t, err, clues.ToCore(err)) } @@ -201,18 +202,26 @@ func (suite *BackupDeleteOneDriveE2ESuite) SetupSuite() { suite.acct, suite.st, control.Options{ - ToggleFeatures: control.Toggles{EnablePermissionsBackup: true}, + ToggleFeatures: control.Toggles{}, }) require.NoError(t, err, clues.ToCore(err)) - m365UserID := tester.M365UserID(t) - users := []string{m365UserID} + var ( + m365UserID = tester.M365UserID(t) + users = []string{m365UserID} + idToName = map[string]string{m365UserID: "todo-name-" + m365UserID} + nameToID = map[string]string{"todo-name-" + m365UserID: m365UserID} + ins = common.IDsNames{ + IDToName: idToName, + NameToID: nameToID, + } + ) // some tests require an existing backup sel := selectors.NewOneDriveBackup(users) sel.Include(sel.Folders(selectors.Any())) - suite.backupOp, err = suite.repo.NewBackup(ctx, sel.Selector) + suite.backupOp, err = suite.repo.NewBackupWithLookup(ctx, sel.Selector, ins) require.NoError(t, err, clues.ToCore(err)) err = suite.backupOp.Run(ctx) diff --git a/src/cli/backup/sharepoint.go b/src/cli/backup/sharepoint.go index 76fd80f66..93c2c6be2 100644 --- a/src/cli/backup/sharepoint.go +++ b/src/cli/backup/sharepoint.go @@ -7,18 +7,20 @@ import ( "github.com/pkg/errors" "github.com/spf13/cobra" "github.com/spf13/pflag" + "golang.org/x/exp/slices" "github.com/alcionai/corso/src/cli/options" . "github.com/alcionai/corso/src/cli/print" "github.com/alcionai/corso/src/cli/utils" - "github.com/alcionai/corso/src/internal/connector" - "github.com/alcionai/corso/src/internal/connector/graph" + "github.com/alcionai/corso/src/internal/common" "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/pkg/backup/details" "github.com/alcionai/corso/src/pkg/fault" + "github.com/alcionai/corso/src/pkg/filters" "github.com/alcionai/corso/src/pkg/path" "github.com/alcionai/corso/src/pkg/repository" "github.com/alcionai/corso/src/pkg/selectors" + "github.com/alcionai/corso/src/pkg/services/m365" ) // ------------------------------------------------------------------------------------------------ @@ -154,19 +156,19 @@ func createSharePointCmd(cmd *cobra.Command, args []string) error { // TODO: log/print recoverable errors errs := fault.New(false) - gc, err := connector.NewGraphConnector(ctx, graph.HTTPClient(graph.NoTimeout()), *acct, connector.Sites, errs) + ins, err := m365.SitesMap(ctx, *acct, errs) if err != nil { - return Only(ctx, clues.Wrap(err, "Failed to connect to Microsoft APIs")) + return Only(ctx, clues.Wrap(err, "Failed to retrieve M365 sites")) } - sel, err := sharePointBackupCreateSelectors(ctx, utils.SiteIDFV, utils.WebURLFV, utils.CategoryDataFV, gc) + sel, err := sharePointBackupCreateSelectors(ctx, ins, utils.SiteIDFV, utils.WebURLFV, utils.CategoryDataFV) if err != nil { return Only(ctx, clues.Wrap(err, "Retrieving up sharepoint sites by ID and URL")) } selectorSet := []selectors.Selector{} - for _, discSel := range sel.SplitByResourceOwner(gc.GetSiteIDs()) { + for _, discSel := range sel.SplitByResourceOwner(ins.IDs()) { selectorSet = append(selectorSet, discSel.Selector) } @@ -175,7 +177,7 @@ func createSharePointCmd(cmd *cobra.Command, args []string) error { r, "SharePoint", "site", selectorSet, - ) + ins) } func validateSharePointBackupCreateFlags(sites, weburls, cats []string) error { @@ -201,44 +203,28 @@ func validateSharePointBackupCreateFlags(sites, weburls, cats []string) error { // TODO: users might specify a data type, this only supports AllData(). func sharePointBackupCreateSelectors( ctx context.Context, + ins common.IDNameSwapper, sites, weburls, cats []string, - gc *connector.GraphConnector, ) (*selectors.SharePointBackup, error) { if len(sites) == 0 && len(weburls) == 0 { return selectors.NewSharePointBackup(selectors.None()), nil } - for _, site := range sites { - if site == utils.Wildcard { - return includeAllSitesWithCategories(cats), nil - } + if filters.PathContains(sites).Compare(utils.Wildcard) { + return includeAllSitesWithCategories(ins, cats), nil } - for _, wURL := range weburls { - if wURL == utils.Wildcard { - return includeAllSitesWithCategories(cats), nil - } + if filters.PathContains(weburls).Compare(utils.Wildcard) { + return includeAllSitesWithCategories(ins, cats), nil } - // TODO: log/print recoverable errors - errs := fault.New(false) - - union, err := gc.UnionSiteIDsAndWebURLs(ctx, sites, weburls, errs) - if err != nil { - return nil, err - } - - sel := selectors.NewSharePointBackup(union) + sel := selectors.NewSharePointBackup(append(slices.Clone(sites), weburls...)) return addCategories(sel, cats), nil } -func includeAllSitesWithCategories(categories []string) *selectors.SharePointBackup { - sel := addCategories( - selectors.NewSharePointBackup(selectors.Any()), - categories) - - return sel +func includeAllSitesWithCategories(ins common.IDNameSwapper, categories []string) *selectors.SharePointBackup { + return addCategories(selectors.NewSharePointBackup(ins.IDs()), categories) } func addCategories(sel *selectors.SharePointBackup, cats []string) *selectors.SharePointBackup { diff --git a/src/cli/backup/sharepoint_e2e_test.go b/src/cli/backup/sharepoint_e2e_test.go index 94289a7d5..42116760d 100644 --- a/src/cli/backup/sharepoint_e2e_test.go +++ b/src/cli/backup/sharepoint_e2e_test.go @@ -16,12 +16,14 @@ import ( "github.com/alcionai/corso/src/cli/config" "github.com/alcionai/corso/src/cli/print" "github.com/alcionai/corso/src/cli/utils" + "github.com/alcionai/corso/src/internal/common" "github.com/alcionai/corso/src/internal/operations" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/account" "github.com/alcionai/corso/src/pkg/control" "github.com/alcionai/corso/src/pkg/repository" "github.com/alcionai/corso/src/pkg/selectors" + "github.com/alcionai/corso/src/pkg/selectors/testdata" "github.com/alcionai/corso/src/pkg/storage" ) @@ -156,14 +158,22 @@ func (suite *BackupDeleteSharePointE2ESuite) SetupSuite() { suite.repo, err = repository.Initialize(ctx, suite.acct, suite.st, control.Options{}) require.NoError(t, err, clues.ToCore(err)) - m365SiteID := tester.M365SiteID(t) - sites := []string{m365SiteID} + var ( + m365SiteID = tester.M365SiteID(t) + sites = []string{m365SiteID} + idToName = map[string]string{m365SiteID: "todo-name-" + m365SiteID} + nameToID = map[string]string{"todo-name-" + m365SiteID: m365SiteID} + ins = common.IDsNames{ + IDToName: idToName, + NameToID: nameToID, + } + ) // some tests require an existing backup sel := selectors.NewSharePointBackup(sites) - sel.Include(sel.LibraryFolders(selectors.Any())) + sel.Include(testdata.SharePointBackupFolderScope(sel)) - suite.backupOp, err = suite.repo.NewBackup(ctx, sel.Selector) + suite.backupOp, err = suite.repo.NewBackupWithLookup(ctx, sel.Selector, ins) require.NoError(t, err, clues.ToCore(err)) err = suite.backupOp.Run(ctx) diff --git a/src/cli/backup/sharepoint_test.go b/src/cli/backup/sharepoint_test.go index 917977cd5..578c60d99 100644 --- a/src/cli/backup/sharepoint_test.go +++ b/src/cli/backup/sharepoint_test.go @@ -11,7 +11,7 @@ import ( "github.com/alcionai/corso/src/cli/utils" "github.com/alcionai/corso/src/cli/utils/testdata" - "github.com/alcionai/corso/src/internal/connector" + "github.com/alcionai/corso/src/internal/common" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/selectors" ) @@ -108,13 +108,20 @@ func (suite *SharePointSuite) TestValidateSharePointBackupCreateFlags() { } func (suite *SharePointSuite) TestSharePointBackupCreateSelectors() { - comboString := []string{"id_1", "id_2"} - gc := &connector.GraphConnector{ - Sites: map[string]string{ - "url_1": "id_1", - "url_2": "id_2", - }, - } + const ( + id1 = "id_1" + id2 = "id_2" + url1 = "url_1/foo" + url2 = "url_2/bar" + ) + + var ( + ins = common.IDsNames{ + IDToName: map[string]string{id1: url1, id2: url2}, + NameToID: map[string]string{url1: id1, url2: id2}, + } + bothIDs = []string{id1, id2} + ) table := []struct { name string @@ -137,73 +144,72 @@ func (suite *SharePointSuite) TestSharePointBackupCreateSelectors() { { name: "site wildcard", site: []string{utils.Wildcard}, - expect: selectors.Any(), + expect: bothIDs, expectScopesLen: 2, }, { name: "url wildcard", weburl: []string{utils.Wildcard}, - expect: selectors.Any(), + expect: bothIDs, expectScopesLen: 2, }, { name: "sites", - site: []string{"id_1", "id_2"}, - expect: []string{"id_1", "id_2"}, + site: []string{id1, id2}, + expect: []string{id1, id2}, expectScopesLen: 2, }, { name: "urls", - weburl: []string{"url_1", "url_2"}, - expect: []string{"id_1", "id_2"}, + weburl: []string{url1, url2}, + expect: []string{url1, url2}, expectScopesLen: 2, }, { name: "mix sites and urls", - site: []string{"id_1"}, - weburl: []string{"url_2"}, - expect: []string{"id_1", "id_2"}, + site: []string{id1}, + weburl: []string{url2}, + expect: []string{id1, url2}, expectScopesLen: 2, }, { name: "duplicate sites and urls", - site: []string{"id_1", "id_2"}, - weburl: []string{"url_1", "url_2"}, - expect: comboString, + site: []string{id1, id2}, + weburl: []string{url1, url2}, + expect: []string{id1, id2, url1, url2}, expectScopesLen: 2, }, { name: "unnecessary site wildcard", - site: []string{"id_1", utils.Wildcard}, - weburl: []string{"url_1", "url_2"}, - expect: selectors.Any(), + site: []string{id1, utils.Wildcard}, + weburl: []string{url1, url2}, + expect: bothIDs, expectScopesLen: 2, }, { name: "unnecessary url wildcard", - site: comboString, - weburl: []string{"url_1", utils.Wildcard}, - expect: selectors.Any(), + site: []string{id1}, + weburl: []string{url1, utils.Wildcard}, + expect: bothIDs, expectScopesLen: 2, }, { name: "Pages", - site: comboString, + site: bothIDs, data: []string{dataPages}, - expect: comboString, + expect: bothIDs, expectScopesLen: 1, }, } for _, test := range table { suite.Run(test.name, func() { - t := suite.T() - ctx, flush := tester.NewContext() defer flush() - sel, err := sharePointBackupCreateSelectors(ctx, test.site, test.weburl, test.data, gc) - require.NoError(t, err, clues.ToCore(err)) + t := suite.T() + sel, err := sharePointBackupCreateSelectors(ctx, ins, test.site, test.weburl, test.data) + require.NoError(t, err, clues.ToCore(err)) assert.ElementsMatch(t, test.expect, sel.DiscreteResourceOwners()) }) } diff --git a/src/cli/options/options.go b/src/cli/options/options.go index d9bdd08c1..56a43dc49 100644 --- a/src/cli/options/options.go +++ b/src/cli/options/options.go @@ -11,12 +11,14 @@ import ( func Control() control.Options { opt := control.Defaults() - opt.FailFast = fastFail + if fastFail { + opt.FailureHandling = control.FailFast + } + opt.DisableMetrics = noStats opt.RestorePermissions = restorePermissions opt.SkipReduce = skipReduce opt.ToggleFeatures.DisableIncrementals = disableIncrementals - opt.ToggleFeatures.EnablePermissionsBackup = enablePermissionsBackup opt.ItemFetchParallelism = fetchParallelism return opt @@ -52,8 +54,6 @@ func AddGlobalOperationFlags(cmd *cobra.Command) { func AddRestorePermissionsFlag(cmd *cobra.Command) { fs := cmd.Flags() fs.BoolVar(&restorePermissions, "restore-permissions", false, "Restore permissions for files and folders") - // TODO: reveal this flag once backing up permissions becomes default - cobra.CheckErr(fs.MarkHidden("restore-permissions")) } // AddSkipReduceFlag adds a hidden flag that allows callers to skip the selector @@ -78,10 +78,7 @@ func AddFetchParallelismFlag(cmd *cobra.Command) { // Feature Flags // --------------------------------------------------------------------------- -var ( - disableIncrementals bool - enablePermissionsBackup bool -) +var disableIncrementals bool type exposeFeatureFlag func(*pflag.FlagSet) @@ -106,16 +103,3 @@ func DisableIncrementals() func(*pflag.FlagSet) { cobra.CheckErr(fs.MarkHidden("disable-incrementals")) } } - -// Adds the hidden '--enable-permissions-backup' cli flag which, when -// set, enables backing up permissions. -func EnablePermissionsBackup() func(*pflag.FlagSet) { - return func(fs *pflag.FlagSet) { - fs.BoolVar( - &enablePermissionsBackup, - "enable-permissions-backup", - false, - "Enable backing up item permissions for OneDrive") - cobra.CheckErr(fs.MarkHidden("enable-permissions-backup")) - } -} diff --git a/src/cli/print/print.go b/src/cli/print/print.go index 98c774015..5ab61acca 100644 --- a/src/cli/print/print.go +++ b/src/cli/print/print.go @@ -62,7 +62,7 @@ func StderrWriter(ctx context.Context) io.Writer { } // --------------------------------------------------------------------------------------------------------- -// Helper funcs +// Exported interface // --------------------------------------------------------------------------------------------------------- // Only tells the CLI to only display this error, preventing the usage @@ -76,7 +76,7 @@ func Only(ctx context.Context, e error) error { // if s is nil, prints nothing. // Prepends the message with "Error: " func Err(ctx context.Context, s ...any) { - out(getRootCmd(ctx).ErrOrStderr()) + out(getRootCmd(ctx).ErrOrStderr(), s...) } // Errf prints the params to cobra's error writer (stdErr by default) @@ -110,6 +110,15 @@ func Infof(ctx context.Context, t string, s ...any) { outf(getRootCmd(ctx).ErrOrStderr(), t, s...) } +// PrettyJSON prettifies and prints the value. +func PrettyJSON(ctx context.Context, p minimumPrintabler) { + if p == nil { + Err(ctx, "") + } + + outputJSON(getRootCmd(ctx).ErrOrStderr(), p, outputAsJSONDebug) +} + // out is the testable core of exported print funcs func out(w io.Writer, s ...any) { if len(s) == 0 { @@ -135,8 +144,7 @@ func outf(w io.Writer, t string, s ...any) { // --------------------------------------------------------------------------------------------------------- type Printable interface { - // reduces the struct to a minimized format for easier human consumption - MinimumPrintable() any + minimumPrintabler // should list the property names of the values surfaced in Values() Headers() []string // list of values for tabular or csv formatting @@ -145,6 +153,11 @@ type Printable interface { Values() []string } +type minimumPrintabler interface { + // reduces the struct to a minimized format for easier human consumption + MinimumPrintable() any +} + // Item prints the printable, according to the caller's requested format. func Item(ctx context.Context, p Printable) { printItem(getRootCmd(ctx).OutOrStdout(), p) @@ -216,13 +229,17 @@ func outputTable(w io.Writer, ps []Printable) { // JSON // ------------------------------------------------------------------------------------------ -func outputJSON(w io.Writer, p Printable, debug bool) { +func outputJSON(w io.Writer, p minimumPrintabler, debug bool) { if debug { printJSON(w, p) return } - printJSON(w, p.MinimumPrintable()) + if debug { + printJSON(w, p) + } else { + printJSON(w, p.MinimumPrintable()) + } } func outputJSONArr(w io.Writer, ps []Printable, debug bool) { diff --git a/src/cli/restore/exchange_e2e_test.go b/src/cli/restore/exchange_e2e_test.go index 2c03fd6bb..23fb7dc9d 100644 --- a/src/cli/restore/exchange_e2e_test.go +++ b/src/cli/restore/exchange_e2e_test.go @@ -12,6 +12,7 @@ import ( "github.com/alcionai/corso/src/cli" "github.com/alcionai/corso/src/cli/config" "github.com/alcionai/corso/src/cli/utils" + "github.com/alcionai/corso/src/internal/common" "github.com/alcionai/corso/src/internal/connector/exchange" "github.com/alcionai/corso/src/internal/operations" "github.com/alcionai/corso/src/internal/tester" @@ -73,7 +74,16 @@ func (suite *RestoreExchangeE2ESuite) SetupSuite() { suite.vpr, suite.cfgFP = tester.MakeTempTestConfigClone(t, force) suite.m365UserID = tester.M365UserID(t) - users := []string{suite.m365UserID} + + var ( + users = []string{suite.m365UserID} + idToName = map[string]string{suite.m365UserID: "todo-name-" + suite.m365UserID} + nameToID = map[string]string{"todo-name-" + suite.m365UserID: suite.m365UserID} + ins = common.IDsNames{ + IDToName: idToName, + NameToID: nameToID, + } + ) // init the repo first suite.repo, err = repository.Initialize(ctx, suite.acct, suite.st, control.Options{}) @@ -100,7 +110,7 @@ func (suite *RestoreExchangeE2ESuite) SetupSuite() { sel.Include(scopes) - bop, err := suite.repo.NewBackup(ctx, sel.Selector) + bop, err := suite.repo.NewBackupWithLookup(ctx, sel.Selector, ins) require.NoError(t, err, clues.ToCore(err)) err = bop.Run(ctx) @@ -109,7 +119,7 @@ func (suite *RestoreExchangeE2ESuite) SetupSuite() { suite.backupOps[set] = bop // sanity check, ensure we can find the backup and its details immediately - _, err = suite.repo.Backup(ctx, bop.Results.BackupID) + _, err = suite.repo.Backup(ctx, string(bop.Results.BackupID)) require.NoError(t, err, "retrieving recent backup by ID", clues.ToCore(err)) _, _, errs := suite.repo.GetBackupDetails(ctx, string(bop.Results.BackupID)) diff --git a/src/cli/utils/testdata/opts.go b/src/cli/utils/testdata/opts.go index 413be011b..d3ce9915c 100644 --- a/src/cli/utils/testdata/opts.go +++ b/src/cli/utils/testdata/opts.go @@ -8,7 +8,6 @@ import ( "github.com/alcionai/corso/src/cli/utils" "github.com/alcionai/corso/src/internal/common" - "github.com/alcionai/corso/src/internal/model" "github.com/alcionai/corso/src/pkg/backup" "github.com/alcionai/corso/src/pkg/backup/details" "github.com/alcionai/corso/src/pkg/backup/details/testdata" @@ -559,14 +558,14 @@ type MockBackupGetter struct { func (MockBackupGetter) Backup( context.Context, - model.StableID, + string, ) (*backup.Backup, error) { return nil, clues.New("unexpected call to mock") } func (MockBackupGetter) Backups( context.Context, - []model.StableID, + []string, ) ([]*backup.Backup, *fault.Bus) { return nil, fault.New(false).Fail(clues.New("unexpected call to mock")) } diff --git a/src/cli/utils/utils.go b/src/cli/utils/utils.go index 9e829bf05..f22583c80 100644 --- a/src/cli/utils/utils.go +++ b/src/cli/utils/utils.go @@ -144,7 +144,7 @@ func SendStartCorsoEvent( ) { bus, err := events.NewBus(ctx, s, tenID, opts) if err != nil { - logger.Ctx(ctx).Infow("analytics event failure", "err", err) + logger.CtxErr(ctx, err).Info("sending start event") } bus.SetRepoID(repoID) diff --git a/src/cmd/factory/impl/common.go b/src/cmd/factory/impl/common.go index 6215b58d8..17dd0b922 100644 --- a/src/cmd/factory/impl/common.go +++ b/src/cmd/factory/impl/common.go @@ -88,15 +88,14 @@ func generateAndRestoreItems( service, tenantID, userID, dest, - collections, - ) + collections) if err != nil { return nil, err } print.Infof(ctx, "Generating %d %s items in %s\n", howMany, cat, Destination) - return gc.RestoreDataCollections(ctx, version.Backup, acct, sel, dest, opts, dataColls, errs) + return gc.ConsumeRestoreCollections(ctx, version.Backup, acct, sel, dest, opts, dataColls, errs) } // ------------------------------------------------------------------------------------------ @@ -121,21 +120,18 @@ func getGCAndVerifyUser(ctx context.Context, userID string) (*connector.GraphCon return nil, account.Account{}, clues.Wrap(err, "finding m365 account details") } - // build a graph connector // TODO: log/print recoverable errors errs := fault.New(false) - normUsers := map[string]struct{}{} - users, err := m365.UserPNs(ctx, acct, errs) + ins, err := m365.UsersMap(ctx, acct, errs) if err != nil { return nil, account.Account{}, clues.Wrap(err, "getting tenant users") } - for _, k := range users { - normUsers[strings.ToLower(k)] = struct{}{} - } + _, idOK := ins.NameOf(strings.ToLower(userID)) + _, nameOK := ins.IDOf(strings.ToLower(userID)) - if _, ok := normUsers[strings.ToLower(User)]; !ok { + if !idOK && !nameOK { return nil, account.Account{}, clues.New("user not found within tenant") } diff --git a/src/cmd/getM365/getItem.go b/src/cmd/getM365/exchange/get_item.go similarity index 50% rename from src/cmd/getM365/getItem.go rename to src/cmd/getM365/exchange/get_item.go index 829c70b1a..6e7f9022a 100644 --- a/src/cmd/getM365/getItem.go +++ b/src/cmd/getM365/exchange/get_item.go @@ -1,8 +1,8 @@ -// getItem.go is a source file designed to retrieve an m365 object from an +// get_item.go is a source file designed to retrieve an m365 object from an // existing M365 account. Data displayed is representative of the current // serialization abstraction versioning used by Microsoft Graph and stored by Corso. -package main +package exchange import ( "context" @@ -14,76 +14,65 @@ import ( kw "github.com/microsoft/kiota-serialization-json-go" "github.com/spf13/cobra" - . "github.com/alcionai/corso/src/cli/print" "github.com/alcionai/corso/src/cli/utils" "github.com/alcionai/corso/src/internal/common" - "github.com/alcionai/corso/src/internal/connector" "github.com/alcionai/corso/src/internal/connector/exchange/api" - "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/pkg/account" "github.com/alcionai/corso/src/pkg/backup/details" "github.com/alcionai/corso/src/pkg/credentials" "github.com/alcionai/corso/src/pkg/fault" - "github.com/alcionai/corso/src/pkg/logger" "github.com/alcionai/corso/src/pkg/path" ) -var getCmd = &cobra.Command{ - Use: "get", - Short: "Get a M365ID item JSON", - RunE: handleGetCommand, -} - // Required inputs from user for command execution var ( - tenant, user, m365ID, category string + user, tenant, m365ID, category string ) -// main function will produce the JSON String for a given m365 object of a -// user. Displayed Objects can be used as inputs for Mockable data -// Supports: -// - exchange (contacts, email, and events) -// Input: go run ./getItem.go --user -// -// --m365ID --category -func main() { - ctx, _ := logger.SeedLevel(context.Background(), logger.Development) - ctx = SetRootCmd(ctx, getCmd) - - defer logger.Flush(ctx) - - fs := getCmd.PersistentFlags() - fs.StringVar(&user, "user", "", "m365 user id of M365 user") - fs.StringVar(&tenant, "tenant", "", - "m365 Tenant: m365 identifier for the tenant, not required if active in OS Environment") - fs.StringVar(&m365ID, "m365ID", "", "m365 identifier for object to be created") - fs.StringVar(&category, "category", "", "type of M365 data (contacts, email, events or files)") // files not supported - - cobra.CheckErr(getCmd.MarkPersistentFlagRequired("user")) - cobra.CheckErr(getCmd.MarkPersistentFlagRequired("m365ID")) - cobra.CheckErr(getCmd.MarkPersistentFlagRequired("category")) - - if err := getCmd.ExecuteContext(ctx); err != nil { - logger.Flush(ctx) - os.Exit(1) +func AddCommands(parent *cobra.Command) { + exCmd := &cobra.Command{ + Use: "exchange", + Short: "Get an M365ID item JSON", + RunE: handleExchangeCmd, } + + fs := exCmd.PersistentFlags() + fs.StringVar(&m365ID, "id", "", "m365 identifier for object") + fs.StringVar(&category, "category", "", "type of M365 data (contacts, email, events)") + fs.StringVar(&user, "user", "", "m365 user id of M365 user") + fs.StringVar(&tenant, "tenant", "", "m365 identifier for the tenant") + + cobra.CheckErr(exCmd.MarkPersistentFlagRequired("user")) + cobra.CheckErr(exCmd.MarkPersistentFlagRequired("id")) + cobra.CheckErr(exCmd.MarkPersistentFlagRequired("category")) + + parent.AddCommand(exCmd) } -func handleGetCommand(cmd *cobra.Command, args []string) error { - ctx := cmd.Context() - +func handleExchangeCmd(cmd *cobra.Command, args []string) error { if utils.HasNoFlagsAndShownHelp(cmd) { return nil } - _, creds, err := getGC(ctx) - if err != nil { - return err + tid := common.First(tenant, os.Getenv(account.AzureTenantID)) + + ctx := clues.Add( + cmd.Context(), + "item_id", m365ID, + "resource_owner", user, + "tenant", tid) + + creds := account.M365Config{ + M365: credentials.GetM365(), + AzureTenantID: tid, } - err = runDisplayM365JSON(ctx, creds, user, m365ID, fault.New(true)) + err := runDisplayM365JSON(ctx, creds, user, m365ID, fault.New(true)) if err != nil { - return Only(ctx, clues.Wrap(err, "Error displaying item: "+m365ID)) + cmd.SilenceUsage = true + cmd.SilenceErrors = true + + return clues.Wrap(err, "getting item") } return nil @@ -165,30 +154,3 @@ func getItem( return itm.Serialize(ctx, sp, user, itemID) } - -//------------------------------------------------------------------------------- -// Helpers -//------------------------------------------------------------------------------- - -func getGC(ctx context.Context) (*connector.GraphConnector, account.M365Config, error) { - // get account info - m365Cfg := account.M365Config{ - M365: credentials.GetM365(), - AzureTenantID: common.First(tenant, os.Getenv(account.AzureTenantID)), - } - - acct, err := account.NewAccount(account.ProviderM365, m365Cfg) - if err != nil { - return nil, m365Cfg, Only(ctx, clues.Wrap(err, "finding m365 account details")) - } - - // TODO: log/print recoverable errors - errs := fault.New(false) - - gc, err := connector.NewGraphConnector(ctx, graph.HTTPClient(graph.NoTimeout()), acct, connector.Users, errs) - if err != nil { - return nil, m365Cfg, Only(ctx, clues.Wrap(err, "connecting to graph API")) - } - - return gc, m365Cfg, nil -} diff --git a/src/cmd/getM365/main.go b/src/cmd/getM365/main.go new file mode 100644 index 000000000..17aa71d78 --- /dev/null +++ b/src/cmd/getM365/main.go @@ -0,0 +1,32 @@ +package main + +import ( + "context" + "os" + + "github.com/spf13/cobra" + + . "github.com/alcionai/corso/src/cli/print" + "github.com/alcionai/corso/src/cmd/getM365/exchange" + "github.com/alcionai/corso/src/cmd/getM365/onedrive" + "github.com/alcionai/corso/src/pkg/logger" +) + +var rootCmd = &cobra.Command{ + Use: "getM365", +} + +func main() { + ctx, _ := logger.SeedLevel(context.Background(), logger.Development) + + ctx = SetRootCmd(ctx, rootCmd) + defer logger.Flush(ctx) + + exchange.AddCommands(rootCmd) + onedrive.AddCommands(rootCmd) + + if err := rootCmd.Execute(); err != nil { + Err(ctx, err) + os.Exit(1) + } +} diff --git a/src/cmd/getM365/onedrive/get_item.go b/src/cmd/getM365/onedrive/get_item.go new file mode 100644 index 000000000..8794fbb03 --- /dev/null +++ b/src/cmd/getM365/onedrive/get_item.go @@ -0,0 +1,207 @@ +// get_item.go is a source file designed to retrieve an m365 object from an +// existing M365 account. Data displayed is representative of the current +// serialization abstraction versioning used by Microsoft Graph and stored by Corso. + +package onedrive + +import ( + "context" + "encoding/json" + "io" + "net/http" + "os" + + "github.com/alcionai/clues" + "github.com/microsoft/kiota-abstractions-go/serialization" + kjson "github.com/microsoft/kiota-serialization-json-go" + "github.com/microsoftgraph/msgraph-sdk-go/models" + "github.com/spf13/cobra" + + . "github.com/alcionai/corso/src/cli/print" + "github.com/alcionai/corso/src/cli/utils" + "github.com/alcionai/corso/src/internal/common" + "github.com/alcionai/corso/src/internal/common/ptr" + "github.com/alcionai/corso/src/internal/connector/graph" + "github.com/alcionai/corso/src/internal/connector/onedrive/api" + "github.com/alcionai/corso/src/pkg/account" + "github.com/alcionai/corso/src/pkg/credentials" +) + +const downloadURLKey = "@microsoft.graph.downloadUrl" + +// Required inputs from user for command execution +var ( + user, tenant, m365ID string +) + +func AddCommands(parent *cobra.Command) { + exCmd := &cobra.Command{ + Use: "onedrive", + Short: "Get an M365ID item", + RunE: handleOneDriveCmd, + } + + fs := exCmd.PersistentFlags() + fs.StringVar(&m365ID, "id", "", "m365 identifier for object") + fs.StringVar(&user, "user", "", "m365 user id of M365 user") + fs.StringVar(&tenant, "tenant", "", "m365 identifier for the tenant") + + cobra.CheckErr(exCmd.MarkPersistentFlagRequired("user")) + cobra.CheckErr(exCmd.MarkPersistentFlagRequired("id")) + + parent.AddCommand(exCmd) +} + +func handleOneDriveCmd(cmd *cobra.Command, args []string) error { + if utils.HasNoFlagsAndShownHelp(cmd) { + return nil + } + + tid := common.First(tenant, os.Getenv(account.AzureTenantID)) + + ctx := clues.Add( + cmd.Context(), + "item_id", m365ID, + "resource_owner", user, + "tenant", tid) + + // get account info + creds := account.M365Config{ + M365: credentials.GetM365(), + AzureTenantID: tid, + } + + // todo: swap to drive api client, when finished. + adpt, err := graph.CreateAdapter(tid, creds.AzureClientID, creds.AzureClientSecret) + if err != nil { + return Only(ctx, clues.Wrap(err, "creating graph adapter")) + } + + err = runDisplayM365JSON(ctx, graph.NewService(adpt), creds, user, m365ID) + if err != nil { + cmd.SilenceUsage = true + cmd.SilenceErrors = true + + return Only(ctx, clues.Wrap(err, "getting item")) + } + + return nil +} + +type itemData struct { + Size int `json:"size"` +} + +type itemPrintable struct { + Info json.RawMessage `json:"info"` + Permissions json.RawMessage `json:"permissions"` + Data itemData `json:"data"` +} + +func (i itemPrintable) MinimumPrintable() any { + return i +} + +func runDisplayM365JSON( + ctx context.Context, + srv graph.Servicer, + creds account.M365Config, + user, itemID string, +) error { + drive, err := api.GetDriveByID(ctx, srv, user) + if err != nil { + return err + } + + driveID := ptr.Val(drive.GetId()) + + it := itemPrintable{} + + item, err := api.GetDriveItem(ctx, srv, driveID, itemID) + if err != nil { + return err + } + + if item != nil { + content, err := getDriveItemContent(item) + if err != nil { + return err + } + + // We could get size from item.GetSize(), but the + // getDriveItemContent call is to ensure that we are able to + // download the file. + it.Data.Size = len(content) + } + + sInfo, err := serializeObject(item) + if err != nil { + return err + } + + err = json.Unmarshal([]byte(sInfo), &it.Info) + if err != nil { + return err + } + + perms, err := api.GetItemPermission(ctx, srv, driveID, itemID) + if err != nil { + return err + } + + sPerms, err := serializeObject(perms) + if err != nil { + return err + } + + err = json.Unmarshal([]byte(sPerms), &it.Permissions) + if err != nil { + return err + } + + PrettyJSON(ctx, it) + + return nil +} + +func serializeObject(data serialization.Parsable) (string, error) { + sw := kjson.NewJsonSerializationWriter() + + err := sw.WriteObjectValue("", data) + if err != nil { + return "", clues.Wrap(err, "writing serializing info") + } + + content, err := sw.GetSerializedContent() + if err != nil { + return "", clues.Wrap(err, "getting serializing info") + } + + return string(content), err +} + +func getDriveItemContent(item models.DriveItemable) ([]byte, error) { + url, ok := item.GetAdditionalData()[downloadURLKey].(*string) + if !ok { + return nil, clues.New("get download url") + } + + req, err := http.NewRequest(http.MethodGet, *url, nil) + if err != nil { + return nil, clues.New("create download request").With("error", err) + } + + hc := graph.HTTPClient(graph.NoTimeout()) + + resp, err := hc.Do(req) + if err != nil { + return nil, clues.New("download item").With("error", err) + } + + content, err := io.ReadAll(resp.Body) + if err != nil { + return nil, clues.New("read downloaded item").With("error", err) + } + + return content, nil +} diff --git a/src/cmd/graph_pwsh/Auth-Graph.ps1 b/src/cmd/graph_pwsh/Auth-Graph.ps1 new file mode 100644 index 000000000..e22d2dbe0 --- /dev/null +++ b/src/cmd/graph_pwsh/Auth-Graph.ps1 @@ -0,0 +1,39 @@ +$tenantId = $ENV:AZURE_TENANT_ID +$clientId = $ENV:AZURE_CLIENT_ID +$clientSecret = $ENV:AZURE_CLIENT_SECRET +$useBeta = ($ENV:MSGRAPH_USE_BETA -eq 1) -or ($ENV:MSGRAPH_USE_BETA -eq "1") -or ($ENV:MSGRAPH_USE_BETA -eq "true") + +# This version of Graph Powershell does not support app secret auth yet so roll our own +$body = @{ + Grant_Type = "client_credentials" + Scope = "https://graph.microsoft.com/.default" + Client_Id = $clientId + Client_Secret = $clientSecret +} + +$ConectionRequest = @{ + Uri = "https://login.microsoftonline.com/$tenantId/oauth2/v2.0/token" + Method = "POST" + Body = $body +} + +$connection = Invoke-RestMethod @ConectionRequest + +Write-Host "Authenticating with tenantId: $tenantId ..." +try { + Connect-MgGraph -AccessToken $connection.access_token + Write-Host "Successfully authenticated with tenantId: $tenantId ..." +} +catch { + Write-Host "Authentication failed..." + Write-Output $_ +} + +if ($useBeta) { + Write-Host "Switching to Beta Graph API..." + Select-MgProfile -Name "beta" +} + + + + diff --git a/src/cmd/graph_pwsh/Dockerfile b/src/cmd/graph_pwsh/Dockerfile new file mode 100644 index 000000000..24eabd647 --- /dev/null +++ b/src/cmd/graph_pwsh/Dockerfile @@ -0,0 +1,9 @@ +from m365pnp/powershell:2.1.1-alpine-3.14 + +RUN Install-Module PowerShellGet -Force +RUN Install-Module Microsoft.Graph -Force -RequiredVersion 1.25.0 -Scope AllUsers + +COPY ./Auth-Graph.ps1 /tmp/Auth-Graph.ps1 +RUN Move-Item -Path /tmp/Auth-Graph.ps1 -Destination $PROFILE.AllUsersAllHosts + +WORKDIR /usr/pwsh \ No newline at end of file diff --git a/src/cmd/graph_pwsh/README.md b/src/cmd/graph_pwsh/README.md new file mode 100644 index 000000000..276353d35 --- /dev/null +++ b/src/cmd/graph_pwsh/README.md @@ -0,0 +1,112 @@ +# Graph SDK Powershell Troubleshooter + +In certain cases, troubleshooting would be significantly simplified if a Corso +user had a simple mechanism to execute targeted MS Graph API commands against +their environment. + +One convenient mechanism to accomplish this without going down to the level of +wrapping individual Graph API calls is to use the +[Microsoft Graph PowerShell](https://learn.microsoft.com/en-us/powershell/microsoftgraph/overview?view=graph-powershell-1.0). +It provides a convenient wrapper and great coverage of the API surface. + +## Build container + +Before using the tool you want to build the container that packages it. + +```sh +docker build -t corso/graph_pwsh:latest . +``` + +## Prerequisites + +### Docker + +You need to have Docker installed on your system. + +### Azure AD app credentials + +The tool uses your existing Corso app to make Graph calls and for authentication +you want `AZURE_TENANT_ID`, `AZURE_CLIENT_ID`, and `AZURE_CLIENT_SECRET` to be +set as environment variables. You can read more about this [here](https://corsobackup.io/docs/setup/m365-access/). +You will then pass these into the container run so that authentication can be completed. + +## Using the tool + +### Interactive use + +This is suitable if you would like to issue a number of MS Graph API commands from an +interactive shell in the container. + +```sh +docker run --rm -it -v $(pwd):/usr/pwsh -e AZURE_TENANT_ID -e AZURE_CLIENT_ID -e AZURE_CLIENT_SECRET corso/graph_pwsh pwsh +``` + +Alternatively you can use an environment variable file `env_names` that has the names of the required environment variables + +```sh +docker run --rm -it -v $(pwd):/usr/pwsh --env-file env_names corso/graph_pwsh pwsh +``` + +Before you run any command you want to authenticate with Graph using a convenient script +that will create a connection using the default permissions granted to the app. + +```powershell +PS> ./Auth-Graph.ps1 +``` + +If you know what you are doing feel free to use `Connect-MgGraph` directly. + +### Specific command use + +Suitable when you want to run just a single command. Essentially running the `Auth-Graph.ps1` +before the actual command you want to run. + +```sh +docker run --rm -it -v $(pwd):/usr/pwsh --env-file env_names corso/graph_pwsh \ + pwsh -c "" +``` + +Here is a complete example to get all users + +```sh +# This is the equivalent of GET https://graph.microsoft.com/v1.0/users +docker run --rm -it -v $(pwd):/usr/pwsh --env-file env_names corso/graph_pwsh \ + pwsh -c "Get-MgUser -All" +``` + +Another example to retrieve an email message for a given user by ID. + +```sh +# This is the equivalent of GET https://graph.microsoft.com/v1.0//messages/ +docker run --rm -it -v $(pwd):/usr/pwsh --env-file env_names corso/graph_pwsh \ + pwsh -c "Get-MgUserMessage -UserId -MessageID " +``` + +## Debug output + +To see the requests and responses made by the specific Graph PowerShell commands, add `-Debug` to you command, +similar to the example below. + +```sh +# This is the equivalent of GET https://graph.microsoft.com/v1.0/users +docker run --rm -it -v $(pwd):/usr/pwsh --env-file env_names corso/graph_pwsh \ + pwsh -c "Get-MgUser -All -Debug" +``` + +## Using Beta API calls + +In order to use the Beta Graph API, make sure you have done `export MSGRAPH_USE_BETA=1` +before running the container and pass the environment variable in. + +Alternatively you can do the following: + +```sh +# This is the equivalent of GET https://graph.microsoft.com/v1.0/users +docker run --rm -it -v $(pwd):/usr/pwsh --env-file env_names corso/graph_pwsh \ + pwsh -c "Select-MgProfile -Name "beta" && Get-MgUser -All" +``` + +## Graph PowerShell reference + +To learn about specific commands, see the +[Graph PowerShell Reference](https://learn.microsoft.com/en-us/powershell/microsoftgraph/get-started?view=graph-powershell-1.0) diff --git a/src/cmd/graph_pwsh/env_names b/src/cmd/graph_pwsh/env_names new file mode 100644 index 000000000..4941e765d --- /dev/null +++ b/src/cmd/graph_pwsh/env_names @@ -0,0 +1,4 @@ +AZURE_TENANT_ID +AZURE_CLIENT_ID +AZURE_CLIENT_SECRET +MSGRAPH_USE_BETA diff --git a/src/cmd/purge/scripts/exchangePurge.ps1 b/src/cmd/purge/scripts/exchangePurge.ps1 index 63775d90a..7ba036522 100644 --- a/src/cmd/purge/scripts/exchangePurge.ps1 +++ b/src/cmd/purge/scripts/exchangePurge.ps1 @@ -129,7 +129,7 @@ function Get-TimestampFromName { try { # Assumes that the timestamp is at the end and starts with yyyy-mm-ddT and is ISO8601 - if ($name -imatch "(\d{4}}-\d{2}-\d{2}T.*)") { + if ($name -imatch "(\d{4}-\d{2}-\d{2}T[\S]*)") { $timestamp = [System.Convert]::ToDatetime($Matches.0) } @@ -226,21 +226,31 @@ function Get-FoldersToPurge { | Select-Object -ExpandProperty Value | Get-Date - $IsNameMatchParams = @{ - 'FolderName' = $folderName; - 'FolderNamePurgeList' = $FolderNamePurgeList - } + if ($FolderNamePurgeList.count -gt 0) { + $IsNameMatchParams = @{ + 'FolderName' = $folderName; + 'FolderNamePurgeList' = $FolderNamePurgeList + } - $IsPrefixAndAgeMatchParams = @{ - 'FolderName' = $folderName; - 'FolderCreateTime' = $folderCreateTime; - 'FolderPrefixPurgeList' = $FolderPrefixPurgeList; - 'PurgeBeforeTimestamp' = $PurgeBeforeTimestamp; + if ((IsNameMatch @IsNameMatchParams)) { + Write-Host "• Found name match: $folderName ($folderCreateTime)" + $foldersToDelete += $folder + continue + } } - if ((IsNameMatch @IsNameMatchParams) -or (IsPrefixAndAgeMatch @IsPrefixAndAgeMatchParams)) { - Write-Host "`nFound desired folder to purge: $folderName ($folderCreateTime)" - $foldersToDelete += $folder + if ($FolderPrefixPurgeList.count -gt 0) { + $IsPrefixAndAgeMatchParams = @{ + 'FolderName' = $folderName; + 'FolderCreateTime' = $folderCreateTime; + 'FolderPrefixPurgeList' = $FolderPrefixPurgeList; + 'PurgeBeforeTimestamp' = $PurgeBeforeTimestamp; + } + + if ((IsPrefixAndAgeMatch @IsPrefixAndAgeMatchParams)) { + Write-Host "• Found prefix match: $folderName ($folderCreateTime)" + $foldersToDelete += $folder + } } } @@ -273,7 +283,13 @@ function Empty-Folder { } if ($PSCmdlet.ShouldProcess("Emptying $foldersToEmptyCount folders ($WellKnownRootList $FolderNameList)", "$foldersToEmptyCount folders ($WellKnownRootList $FolderNameList)", "Empty folders")) { - Write-Host "`nEmptying $foldersToEmptyCount folders ($WellKnownRootList $FolderNameList)" + Write-Host "`nEmptying $foldersToEmptyCount folders..." + foreach ($folder in $FolderNameList) { + Write-Host "• $folder" + } + foreach ($folder in $WellKnownRootList) { + Write-Host "• $folder" + } # DeleteType = HardDelete, MoveToDeletedItems, or SoftDelete $body = @" @@ -308,6 +324,9 @@ function Delete-Folder { if ($PSCmdlet.ShouldProcess("Removing $foldersToRemoveCount folders ($FolderNameList)", "$foldersToRemoveCount folders ($FolderNameList)", "Delete folders")) { Write-Host "`nRemoving $foldersToRemoveCount folders ($FolderNameList)" + foreach ($folder in $FolderNameList) { + Write-Host "• $folder" + } # DeleteType = HardDelete, MoveToDeletedItems, or SoftDelete $body = @" @@ -353,7 +372,10 @@ function Purge-Folders { } if ($FolderPrefixPurgeList.count -gt 0 -and $PurgeBeforeTimestamp -ne $null) { - Write-Host "Folders older than $PurgeBeforeTimestamp with prefix: $FolderPrefixPurgeList" + Write-Host "Folders older than $PurgeBeforeTimestamp with prefix:" + foreach ($folder in $FolderPrefixPurgeList) { + Write-Host "• $folder" + } } $foldersToDeleteParams = @{ @@ -387,6 +409,8 @@ function Purge-Folders { } function Create-Contact { + [CmdletBinding(SupportsShouldProcess)] + $now = (Get-Date (Get-Date).ToUniversalTime() -Format "o") #used to create a recent seed contact that will be shielded from cleanup. CI tests rely on this $body = @" @@ -407,14 +431,16 @@ function Create-Contact { 2000-01-01T11:59:00Z Tester - Plate "@ - $createContactMsg = Initialize-SOAPMessage -User $User -Body $body - $response = Invoke-SOAPRequest -Token $Token -Message $createContactMsg + if ($PSCmdlet.ShouldProcess("Creating seed contact...", "", "Create contact")) { + Write-Host "`nCreating seed contact..." + $createContactMsg = Initialize-SOAPMessage -User $User -Body $body + $response = Invoke-SOAPRequest -Token $Token -Message $createContactMsg + } } function Get-ItemsToPurge { @@ -422,11 +448,33 @@ function Get-ItemsToPurge { [Parameter(Mandatory = $True, HelpMessage = "Folder under which to look for items matching removal criteria")] [String]$WellKnownRoot, + [Parameter(Mandatory = $False, HelpMessage = "Immediate subfolder within well known folder")] + [String]$SubFolderName = $null, + [Parameter(Mandatory = $True, HelpMessage = "Purge items before this date time (UTC)")] [datetime]$PurgeBeforeTimestamp ) $itemsToDelete = @() + $foldersToSearchBody = "" + + if (![String]::IsNullOrEmpty($SubFolderName)) { + $subFolders, $moreToList = Get-FoldersToPurge -WellKnownRoot $WellKnownRoot -FolderNamePurgeList $SubFolderName -PurgeBeforeTimestamp $PurgeBeforeTimestamp + + if ($subFolders.count -gt 0 ) { + $foldersToSearchBody = "" + foreach ($sub in $subFolders) { + $subName = $sub.DisplayName + $subId = $sub.FolderId.Id + Write-Host "Found subfolder from which to purge items: $subName" + $foldersToSearchBody = "`n" + } + } + else { + Write-Host "Requested subfolder $SubFolderName in folder $WellKnownRoot was not found" + return + } + } # SOAP message for getting the folder id $body = @" @@ -438,12 +486,12 @@ function Get-ItemsToPurge { - + $FoldersToSearchBody "@ - Write-Host "`nLooking for items under well-known folder: $WellKnownRoot older than $PurgeBeforeTimestamp for user: $User" + Write-Host "`nLooking for items under well-known folder: $WellKnownRoot($SubFolderName) older than $PurgeBeforeTimestamp for user: $User" $getItemsMsg = Initialize-SOAPMessage -User $User -Body $body $response = Invoke-SOAPRequest -Token $Token -Message $getItemsMsg @@ -456,15 +504,24 @@ function Get-ItemsToPurge { Select-Object -ExpandProperty Node $moreToList = ![System.Convert]::ToBoolean($rootFolder.IncludesLastItemInRange) + Write-Host "Total items under $WellKnownRoot/$SubFolderName"$rootFolder.TotalItemsInView + foreach ($item in $items) { $itemId = $item.ItemId.Id $changeKey = $item.ItemId.Changekey - $itemName = $item.DisplayName + $itemName = "" $itemCreateTime = $item.ExtendedProperty | Where-Object { $_.ExtendedFieldURI.PropertyTag -eq "0x3007" } | Select-Object -ExpandProperty Value | Get-Date + # can be improved to pass the field to use as a name as a parameter but this is good for now + switch -casesensitive ($WellKnownRoot) { + "calendar" { $itemName = $item.Subject } + "contacts" { $itemName = $item.DisplayName } + Default { $itemName = $item.DisplayName } + } + if ([String]::IsNullOrEmpty($itemId) -or [String]::IsNullOrEmpty($changeKey)) { continue } @@ -479,33 +536,51 @@ function Get-ItemsToPurge { $itemsToDelete += $item } + if ($WhatIfPreference) { + # not actually deleting items so only do a single iteration + $moreToList = $false + } + return $itemsToDelete, $moreToList } -function Purge-Contacts { +function Purge-Items { [CmdletBinding(SupportsShouldProcess)] Param( [Parameter(Mandatory = $True, HelpMessage = "Purge items before this date time (UTC)")] - [datetime]$PurgeBeforeTimestamp + [datetime]$PurgeBeforeTimestamp, + + [Parameter(Mandatory = $True, HelpMessage = "Items folder")] + [string]$ItemsFolder, + + [Parameter(Mandatory = $False, HelpMessage = "Items sub-folder")] + [string]$ItemsSubFolder = $null + ) - Write-Host "`nCleaning up contacts older than $PurgeBeforeTimestamp" - Write-Host "-------------------------------------------------------" + $additionalAttributes = "SendMeetingCancellations='SendToNone'" - # Create one seed contact which will have recent create date and will not be sweapt - # This is needed since tests rely on some contact data being present - Write-Host "`nCreating seed contact" - Create-Contact + Write-Host "`nCleaning up items from folder $ItemsFolder($ItemsSubFolder) older than $PurgeBeforeTimestamp" + Write-Host "-----------------------------------------------------------------------------" + + if ($ItemsFolder -eq "contacts") { + $ItemsSubFolder = $null + $additionalAttributes = "" + + # Create one seed contact which will have recent create date and will not be sweapt + # This is needed since tests rely on some contact data being present + Create-Contact + } $moreToList = $True # only get max of 1000 results so we may need to iterate over eligible contacts while ($moreToList) { - $itemsToDelete, $moreToList = Get-ItemsToPurge -WellKnownRoot "contacts" -PurgeBeforeTimestamp $PurgeBeforeTimestamp + $itemsToDelete, $moreToList = Get-ItemsToPurge -WellKnownRoot $ItemsFolder -SubFolderName $ItemsSubFolder -PurgeBeforeTimestamp $PurgeBeforeTimestamp $itemsToDeleteCount = $itemsToDelete.count $itemsToDeleteBody = "" if ($itemsToDeleteCount -eq 0) { - Write-Host "`nNo more contacts to delete matching criteria" + Write-Host "`nNo more items to delete matching criteria" break } @@ -519,21 +594,23 @@ function Purge-Contacts { # Do the actual deletion in a batch request # DeleteType = HardDelete, MoveToDeletedItems, or SoftDelete $body = @" - + $itemsToDeleteBody "@ - + if ($PSCmdlet.ShouldProcess("Deleting $itemsToDeleteCount items...", "$itemsToDeleteCount items", "Delete items")) { Write-Host "`nDeleting $itemsToDeleteCount items..." $emptyFolderMsg = Initialize-SOAPMessage -User $User -Body $body $response = Invoke-SOAPRequest -Token $Token -Message $emptyFolderMsg + + Write-Verbose "Delete response:`n" + Write-Verbose $response.OuterXml Write-Host "`nDeleted $itemsToDeleteCount items..." - } } } @@ -552,7 +629,10 @@ $purgeFolderParams = @{ Purge-Folders @purgeFolderParams #purge older contacts -Purge-Contacts -PurgeBeforeTimestamp $PurgeBeforeTimestamp +Purge-Items -ItemsFolder "contacts" -PurgeBeforeTimestamp $PurgeBeforeTimestamp + +#purge older contact birthday events +Purge-Items -ItemsFolder "calendar" -ItemsSubFolder "Birthdays" -PurgeBeforeTimestamp $PurgeBeforeTimestamp # Empty Deleted Items and then purge all recoverable items. Deletes the following # -/Recoverable Items/Audits diff --git a/src/cmd/sanity_test/sanity_tests.go b/src/cmd/sanity_test/sanity_tests.go index 82f0f1002..40c4acbfc 100644 --- a/src/cmd/sanity_test/sanity_tests.go +++ b/src/cmd/sanity_test/sanity_tests.go @@ -13,15 +13,21 @@ import ( msgraphsdk "github.com/microsoftgraph/msgraph-sdk-go" "github.com/microsoftgraph/msgraph-sdk-go/models" "github.com/microsoftgraph/msgraph-sdk-go/users" - "golang.org/x/exp/maps" "golang.org/x/exp/slices" "github.com/alcionai/corso/src/internal/common" "github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/internal/connector/graph" + "github.com/alcionai/corso/src/internal/tester" + "github.com/alcionai/corso/src/pkg/filters" "github.com/alcionai/corso/src/pkg/logger" ) +type permissionInfo struct { + entityID string + roles []string +} + func main() { ctx, log := logger.Seed(context.Background(), "info", logger.GetLogFile("")) defer func() { @@ -29,7 +35,7 @@ func main() { }() adapter, err := graph.CreateAdapter( - os.Getenv("AZURE_TENANT_ID"), + tester.GetM365TenantID(ctx), os.Getenv("AZURE_CLIENT_ID"), os.Getenv("AZURE_CLIENT_SECRET")) if err != nil { @@ -37,11 +43,13 @@ func main() { } var ( - client = msgraphsdk.NewGraphServiceClient(adapter) - testUser = os.Getenv("CORSO_M365_TEST_USER_ID") - testService = os.Getenv("SANITY_RESTORE_SERVICE") - folder = strings.TrimSpace(os.Getenv("SANITY_RESTORE_FOLDER")) - startTime, _ = mustGetTimeFromName(ctx, folder) + client = msgraphsdk.NewGraphServiceClient(adapter) + testUser = tester.GetM365UserID(ctx) + testService = os.Getenv("SANITY_RESTORE_SERVICE") + folder = strings.TrimSpace(os.Getenv("SANITY_RESTORE_FOLDER")) + startTime, _ = mustGetTimeFromName(ctx, folder) + dataFolder = os.Getenv("TEST_DATA") + baseBackupFolder = os.Getenv("BASE_BACKUP") ) ctx = clues.Add( @@ -55,7 +63,7 @@ func main() { switch testService { case "exchange": - checkEmailRestoration(ctx, client, testUser, folder, startTime) + checkEmailRestoration(ctx, client, testUser, folder, dataFolder, baseBackupFolder, startTime) case "onedrive": checkOnedriveRestoration(ctx, client, testUser, folder, startTime) default: @@ -68,13 +76,14 @@ func main() { func checkEmailRestoration( ctx context.Context, client *msgraphsdk.GraphServiceClient, - testUser, folderName string, + testUser, folderName, dataFolder, baseBackupFolder string, startTime time.Time, ) { var ( - itemCount = make(map[string]int32) - restoreFolder models.MailFolderable - builder = client.UsersById(testUser).MailFolders() + restoreFolder models.MailFolderable + itemCount = make(map[string]int32) + restoreItemCount = make(map[string]int32) + builder = client.UsersById(testUser).MailFolders() ) for { @@ -85,29 +94,20 @@ func checkEmailRestoration( values := result.GetValue() - // recursive restore folder discovery before proceeding with tests for _, v := range values { - var ( - itemID = ptr.Val(v.GetId()) - itemName = ptr.Val(v.GetDisplayName()) - ictx = clues.Add(ctx, "item_id", itemID, "item_name", itemName) - folderTime, hasTime = mustGetTimeFromName(ctx, itemName) - ) + itemName := ptr.Val(v.GetDisplayName()) - if !isWithinTimeBound(ictx, startTime, folderTime, hasTime) { - continue - } - - // if we found the folder to testt against, back out of this loop. if itemName == folderName { restoreFolder = v continue } - // otherwise, recursively aggregate all child folders. - getAllSubFolder(ctx, client, testUser, v, itemName, itemCount) + if itemName == dataFolder || itemName == baseBackupFolder { + // otherwise, recursively aggregate all child folders. + getAllSubFolder(ctx, client, testUser, v, itemName, dataFolder, itemCount) - itemCount[itemName] = ptr.Val(v.GetTotalItemCount()) + itemCount[itemName] = ptr.Val(v.GetTotalItemCount()) + } } link, ok := ptr.ValOK(result.GetOdataNextLink()) @@ -135,28 +135,36 @@ func checkEmailRestoration( } for _, fld := range childFolder.GetValue() { - var ( - fldID = ptr.Val(fld.GetId()) - fldName = ptr.Val(fld.GetDisplayName()) - count = ptr.Val(fld.GetTotalItemCount()) - ictx = clues.Add( - ctx, - "child_folder_id", fldID, - "child_folder_name", fldName, - "expected_count", itemCount[fldName], - "actual_count", count) - ) + restoreDisplayName := ptr.Val(fld.GetDisplayName()) + + // check if folder is the data folder we loaded or the base backup to verify + // the incremental backup worked fine + if strings.EqualFold(restoreDisplayName, dataFolder) || strings.EqualFold(restoreDisplayName, baseBackupFolder) { + count, _ := ptr.ValOK(fld.GetTotalItemCount()) + + restoreItemCount[restoreDisplayName] = count + checkAllSubFolder(ctx, client, fld, testUser, restoreDisplayName, dataFolder, restoreItemCount) + } + } + + verifyEmailData(ctx, restoreItemCount, itemCount) +} + +func verifyEmailData(ctx context.Context, restoreMessageCount, messageCount map[string]int32) { + for fldName, emailCount := range messageCount { + if restoreMessageCount[fldName] != emailCount { + logger.Ctx(ctx).Errorw( + "test failure: Restore item counts do not match", + "expected:", emailCount, + "actual:", restoreMessageCount[fldName]) + + fmt.Println( + "test failure: Restore item counts do not match", + "* expected:", emailCount, + "* actual:", restoreMessageCount[fldName]) - if itemCount[fldName] != count { - logger.Ctx(ictx).Error("test failure: Restore item counts do not match") - fmt.Println("Restore item counts do not match:") - fmt.Println("* expected:", itemCount[fldName]) - fmt.Println("* actual:", count) - fmt.Println("Folder:", fldName, ptr.Val(fld.GetId())) os.Exit(1) } - - checkAllSubFolder(ctx, client, testUser, fld, fldName, itemCount) } } @@ -167,7 +175,8 @@ func getAllSubFolder( client *msgraphsdk.GraphServiceClient, testUser string, r models.MailFolderable, - parentFolder string, + parentFolder, + dataFolder string, messageCount map[string]int32, ) { var ( @@ -195,16 +204,18 @@ func getAllSubFolder( var ( childDisplayName = ptr.Val(child.GetDisplayName()) childFolderCount = ptr.Val(child.GetChildFolderCount()) - fullFolderName = parentFolder + "/" + childDisplayName + //nolint:forbidigo + fullFolderName = path.Join(parentFolder, childDisplayName) ) - messageCount[fullFolderName], _ = ptr.ValOK(child.GetTotalItemCount()) + if filters.PathContains([]string{dataFolder}).Compare(fullFolderName) { + messageCount[fullFolderName] = ptr.Val(child.GetTotalItemCount()) + // recursively check for subfolders + if childFolderCount > 0 { + parentFolder := fullFolderName - // recursively check for subfolders - if childFolderCount > 0 { - parentFolder := fullFolderName - - getAllSubFolder(ctx, client, testUser, child, parentFolder, messageCount) + getAllSubFolder(ctx, client, testUser, child, parentFolder, dataFolder, messageCount) + } } } } @@ -214,10 +225,11 @@ func getAllSubFolder( func checkAllSubFolder( ctx context.Context, client *msgraphsdk.GraphServiceClient, - testUser string, r models.MailFolderable, - parentFolder string, - messageCount map[string]int32, + testUser, + parentFolder, + dataFolder string, + restoreMessageCount map[string]int32, ) { var ( folderID = ptr.Val(r.GetId()) @@ -241,23 +253,20 @@ func checkAllSubFolder( for _, child := range childFolder.GetValue() { var ( childDisplayName = ptr.Val(child.GetDisplayName()) - childTotalCount = ptr.Val(child.GetTotalItemCount()) //nolint:forbidigo fullFolderName = path.Join(parentFolder, childDisplayName) ) - if messageCount[fullFolderName] != childTotalCount { - fmt.Println("Message count doesn't match:") - fmt.Println("* expected:", messageCount[fullFolderName]) - fmt.Println("* actual:", childTotalCount) - fmt.Println("Item:", fullFolderName, folderID) - os.Exit(1) + if filters.PathContains([]string{dataFolder}).Compare(fullFolderName) { + childTotalCount, _ := ptr.ValOK(child.GetTotalItemCount()) + restoreMessageCount[fullFolderName] = childTotalCount } childFolderCount := ptr.Val(child.GetChildFolderCount()) if childFolderCount > 0 { - checkAllSubFolder(ctx, client, testUser, child, fullFolderName, messageCount) + parentFolder := fullFolderName + checkAllSubFolder(ctx, client, child, testUser, parentFolder, dataFolder, restoreMessageCount) } } } @@ -265,14 +274,17 @@ func checkAllSubFolder( func checkOnedriveRestoration( ctx context.Context, client *msgraphsdk.GraphServiceClient, - testUser, folderName string, + testUser, + folderName string, startTime time.Time, ) { var ( // map itemID -> item size fileSizes = make(map[string]int64) // map itemID -> permission id -> []permission roles - folderPermission = make(map[string]map[string][]string) + folderPermission = make(map[string][]permissionInfo) + restoreFile = make(map[string]int64) + restoreFolderPermission = make(map[string][]permissionInfo) ) drive, err := client. @@ -313,7 +325,6 @@ func checkOnedriveRestoration( } folderTime, hasTime := mustGetTimeFromName(ictx, itemName) - if !isWithinTimeBound(ctx, startTime, folderTime, hasTime) { continue } @@ -323,21 +334,185 @@ func checkOnedriveRestoration( fileSizes[itemName] = ptr.Val(driveItem.GetSize()) } - folderPermission[itemID] = permissionsIn(ctx, client, driveID, itemID, folderPermission[itemID]) + if driveItem.GetFolder() == nil && driveItem.GetPackage() == nil { + continue + } + + // currently we don't restore blank folders. + // skip permission check for empty folders + if ptr.Val(driveItem.GetFolder().GetChildCount()) == 0 { + logger.Ctx(ctx).Info("skipped empty folder: ", itemName) + fmt.Println("skipped empty folder: ", itemName) + + continue + } + + permissionIn(ctx, client, driveID, itemID, itemName, folderPermission) + getOneDriveChildFolder(ctx, client, driveID, itemID, itemName, fileSizes, folderPermission, startTime) } - checkFileData(ctx, client, driveID, restoreFolderID, fileSizes, folderPermission) + getRestoreData(ctx, client, *drive.GetId(), restoreFolderID, restoreFile, restoreFolderPermission, startTime) + + for folderName, permissions := range folderPermission { + logger.Ctx(ctx).Info("checking for folder: %s \n", folderName) + fmt.Printf("checking for folder: %s \n", folderName) + + restoreFolderPerm := restoreFolderPermission[folderName] + + if len(permissions) < 1 { + logger.Ctx(ctx).Info("no permissions found for folder :", folderName) + fmt.Println("no permissions found for folder :", folderName) + + continue + } + + if len(restoreFolderPerm) < 1 { + logger.Ctx(ctx).Info("permission roles are not equal for :", + "Item:", folderName, + "* Permission found: ", permissions, + "* blank permission found in restore.") + + fmt.Println("permission roles are not equal for:") + fmt.Println("Item:", folderName) + fmt.Println("* Permission found: ", permissions) + fmt.Println("blank permission found in restore.") + + os.Exit(1) + } + + for i, orginalPerm := range permissions { + restorePerm := restoreFolderPerm[i] + + if !(orginalPerm.entityID != restorePerm.entityID) && + !slices.Equal(orginalPerm.roles, restorePerm.roles) { + logger.Ctx(ctx).Info("permission roles are not equal for :", + "Item:", folderName, + "* Original permission: ", orginalPerm.entityID, + "* Restored permission: ", restorePerm.entityID) + + fmt.Println("permission roles are not equal for:") + fmt.Println("Item:", folderName) + fmt.Println("* Original permission: ", orginalPerm.entityID) + fmt.Println("* Restored permission: ", restorePerm.entityID) + os.Exit(1) + } + } + } + + for fileName, fileSize := range fileSizes { + if fileSize != restoreFile[fileName] { + logger.Ctx(ctx).Info("File size does not match for:", + "Item:", fileName, + "* expected:", fileSize, + "* actual:", restoreFile[fileName]) + + fmt.Println("File size does not match for:") + fmt.Println("item:", fileName) + fmt.Println("* expected:", fileSize) + fmt.Println("* actual:", restoreFile[fileName]) + os.Exit(1) + } + } fmt.Println("Success") } -func checkFileData( +func getOneDriveChildFolder( ctx context.Context, client *msgraphsdk.GraphServiceClient, - driveID, - restoreFolderID string, + driveID, itemID, parentName string, fileSizes map[string]int64, - folderPermission map[string]map[string][]string, + folderPermission map[string][]permissionInfo, + startTime time.Time, +) { + response, err := client.DrivesById(driveID).ItemsById(itemID).Children().Get(ctx, nil) + if err != nil { + fatal(ctx, "getting child folder", err) + } + + for _, driveItem := range response.GetValue() { + var ( + itemID = ptr.Val(driveItem.GetId()) + itemName = ptr.Val(driveItem.GetName()) + fullName = parentName + "/" + itemName + ) + + folderTime, hasTime := mustGetTimeFromName(ctx, itemName) + if !isWithinTimeBound(ctx, startTime, folderTime, hasTime) { + continue + } + + // if it's a file check the size + if driveItem.GetFile() != nil { + fileSizes[fullName] = ptr.Val(driveItem.GetSize()) + } + + if driveItem.GetFolder() == nil && driveItem.GetPackage() == nil { + continue + } + + // currently we don't restore blank folders. + // skip permission check for empty folders + if ptr.Val(driveItem.GetFolder().GetChildCount()) == 0 { + logger.Ctx(ctx).Info("skipped empty folder: ", fullName) + fmt.Println("skipped empty folder: ", fullName) + + continue + } + + permissionIn(ctx, client, driveID, itemID, fullName, folderPermission) + getOneDriveChildFolder(ctx, client, driveID, itemID, fullName, fileSizes, folderPermission, startTime) + } +} + +func permissionIn( + ctx context.Context, + client *msgraphsdk.GraphServiceClient, + driveID, itemID, folderName string, + permMap map[string][]permissionInfo, +) { + permMap[folderName] = []permissionInfo{} + + pcr, err := client. + DrivesById(driveID). + ItemsById(itemID). + Permissions(). + Get(ctx, nil) + if err != nil { + fatal(ctx, "getting permission", err) + } + + for _, perm := range pcr.GetValue() { + if perm.GetGrantedToV2() == nil { + continue + } + + var ( + gv2 = perm.GetGrantedToV2() + perInfo = permissionInfo{} + ) + + if gv2.GetUser() != nil { + perInfo.entityID = ptr.Val(gv2.GetUser().GetId()) + } else if gv2.GetGroup() != nil { + perInfo.entityID = ptr.Val(gv2.GetGroup().GetId()) + } + + perInfo.roles = perm.GetRoles() + + slices.Sort(perInfo.roles) + + permMap[folderName] = append(permMap[folderName], perInfo) + } +} + +func getRestoreData( + ctx context.Context, + client *msgraphsdk.GraphServiceClient, + driveID, restoreFolderID string, + restoreFile map[string]int64, + restoreFolder map[string][]permissionInfo, + startTime time.Time, ) { restored, err := client. DrivesById(driveID). @@ -356,14 +531,7 @@ func checkFileData( ) if item.GetFile() != nil { - if itemSize != fileSizes[itemName] { - fmt.Println("File size does not match:") - fmt.Println("* expected:", fileSizes[itemName]) - fmt.Println("* actual:", itemSize) - fmt.Println("Item:", itemName, itemID) - os.Exit(1) - } - + restoreFile[itemName] = itemSize continue } @@ -371,23 +539,8 @@ func checkFileData( continue } - var ( - expectItem = folderPermission[itemID] - results = permissionsIn(ctx, client, driveID, itemID, nil) - ) - - for pid, result := range results { - expect := expectItem[pid] - - if !slices.Equal(expect, result) { - fmt.Println("permissions are not equal") - fmt.Println("* expected: ", expect) - fmt.Println("* actual: ", result) - fmt.Println("Item:", itemName, itemID) - fmt.Println("Permission:", pid) - os.Exit(1) - } - } + permissionIn(ctx, client, driveID, itemID, itemName, restoreFolder) + getOneDriveChildFolder(ctx, client, driveID, itemID, itemName, restoreFile, restoreFolder, startTime) } } @@ -401,41 +554,6 @@ func fatal(ctx context.Context, msg string, err error) { os.Exit(1) } -func permissionsIn( - ctx context.Context, - client *msgraphsdk.GraphServiceClient, - driveID, itemID string, - init map[string][]string, -) map[string][]string { - result := map[string][]string{} - - pcr, err := client. - DrivesById(driveID). - ItemsById(itemID). - Permissions(). - Get(ctx, nil) - if err != nil { - fatal(ctx, "getting permission", err) - } - - if len(init) > 0 { - maps.Copy(result, init) - } - - for _, p := range pcr.GetValue() { - var ( - pid = ptr.Val(p.GetId()) - roles = p.GetRoles() - ) - - slices.Sort(roles) - - result[pid] = roles - } - - return result -} - func mustGetTimeFromName(ctx context.Context, name string) (time.Time, bool) { t, err := common.ExtractTime(name) if err != nil && !errors.Is(err, common.ErrNoTimeString) { @@ -445,17 +563,15 @@ func mustGetTimeFromName(ctx context.Context, name string) (time.Time, bool) { return t, !errors.Is(err, common.ErrNoTimeString) } -func isWithinTimeBound(ctx context.Context, bound, check time.Time, skip bool) bool { - if skip { - return true - } +func isWithinTimeBound(ctx context.Context, bound, check time.Time, hasTime bool) bool { + if hasTime { + if bound.Before(check) { + logger.Ctx(ctx). + With("boundary_time", bound, "check_time", check). + Info("skipping restore folder: not older than time bound") - if bound.Before(check) { - logger.Ctx(ctx). - With("boundary_time", bound, "check_time", check). - Info("skipping restore folder: not older than time bound") - - return false + return false + } } return true diff --git a/src/go.mod b/src/go.mod index 9e5c73ac4..c7ca4d78c 100644 --- a/src/go.mod +++ b/src/go.mod @@ -2,11 +2,13 @@ module github.com/alcionai/corso/src go 1.19 +replace github.com/kopia/kopia => github.com/alcionai/kopia v0.12.2-0.20230403174648-98bfae225045 + require ( github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.2.0 - github.com/alcionai/clues v0.0.0-20230327232656-5b9b43a79836 - github.com/armon/go-metrics v0.4.0 - github.com/aws/aws-sdk-go v1.44.220 + github.com/alcionai/clues v0.0.0-20230331202049-339059c90c6e + github.com/armon/go-metrics v0.4.1 + github.com/aws/aws-sdk-go v1.44.237 github.com/aws/aws-xray-sdk-go v1.8.1 github.com/cenkalti/backoff/v4 v4.2.0 github.com/google/uuid v1.3.0 @@ -69,13 +71,13 @@ require ( github.com/go-logr/logr v1.2.3 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/golang-jwt/jwt/v4 v4.5.0 // indirect - github.com/golang/protobuf v1.5.2 // indirect + github.com/golang/protobuf v1.5.3 // indirect github.com/hashicorp/golang-lru v0.5.4 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/jmespath/go-jmespath v0.4.0 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/klauspost/compress v1.16.3 // indirect - github.com/klauspost/cpuid/v2 v2.2.3 // indirect + github.com/klauspost/cpuid/v2 v2.2.4 // indirect github.com/klauspost/pgzip v1.2.5 // indirect github.com/klauspost/reedsolomon v1.11.7 // indirect github.com/kylelemons/godebug v1.1.0 // indirect @@ -86,7 +88,7 @@ require ( github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d // indirect github.com/microsoft/kiota-serialization-text-go v0.7.0 github.com/minio/md5-simd v1.1.2 // indirect - github.com/minio/minio-go/v7 v7.0.49 // indirect + github.com/minio/minio-go/v7 v7.0.50 // indirect github.com/minio/sha256-simd v1.0.0 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect @@ -110,16 +112,16 @@ require ( go.opentelemetry.io/otel v1.14.0 // indirect go.opentelemetry.io/otel/trace v1.14.0 // indirect go.uber.org/atomic v1.10.0 // indirect - go.uber.org/multierr v1.9.0 // indirect + go.uber.org/multierr v1.11.0 // indirect golang.org/x/crypto v0.7.0 // indirect golang.org/x/mod v0.9.0 // indirect golang.org/x/net v0.8.0 // indirect golang.org/x/sync v0.1.0 // indirect golang.org/x/sys v0.6.0 // indirect golang.org/x/text v0.8.0 // indirect - google.golang.org/genproto v0.0.0-20230303212802-e74f57abe488 // indirect - google.golang.org/grpc v1.53.0 // indirect - google.golang.org/protobuf v1.29.1 // indirect + google.golang.org/genproto v0.0.0-20230320184635-7606e756e683 // indirect + google.golang.org/grpc v1.54.0 // indirect + google.golang.org/protobuf v1.30.0 // indirect gopkg.in/ini.v1 v1.67.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/src/go.sum b/src/go.sum index 138a3768b..cab7a39ae 100644 --- a/src/go.sum +++ b/src/go.sum @@ -53,8 +53,10 @@ github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1o github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4= github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d h1:licZJFw2RwpHMqeKTCYkitsPqHNxTmd4SNR5r94FGM8= github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d/go.mod h1:asat636LX7Bqt5lYEZ27JNDcqxfjdBQuJ/MM4CN/Lzo= -github.com/alcionai/clues v0.0.0-20230327232656-5b9b43a79836 h1:239Dcnoe7y4kLeWS6XbdtvFwYOKT9Q28wqSZpwwqtbY= -github.com/alcionai/clues v0.0.0-20230327232656-5b9b43a79836/go.mod h1:DeaMbAwDvYM6ZfPMR/GUl3hceqI5C8jIQ1lstjB2IW8= +github.com/alcionai/clues v0.0.0-20230331202049-339059c90c6e h1:3M/ND3HBj5U2N0q2l7sMbkKTagPMbCnp7Lk6i5bVX4Q= +github.com/alcionai/clues v0.0.0-20230331202049-339059c90c6e/go.mod h1:DeaMbAwDvYM6ZfPMR/GUl3hceqI5C8jIQ1lstjB2IW8= +github.com/alcionai/kopia v0.12.2-0.20230403174648-98bfae225045 h1:KalMY/JU+3t/3IosvP8yLdUWqcy+mAupTjFeV7I+wHg= +github.com/alcionai/kopia v0.12.2-0.20230403174648-98bfae225045/go.mod h1:WGFVh9/5R3bi6vgGw7pPR65I32cyKJjb854467Goz0w= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= @@ -62,10 +64,10 @@ github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRF github.com/alessio/shellescape v1.4.1 h1:V7yhSDDn8LP4lc4jS8pFkt0zCnzVJlG5JXy9BVKJUX0= github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY= github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= -github.com/armon/go-metrics v0.4.0 h1:yCQqn7dwca4ITXb+CbubHmedzaQYHhNhrEXLYUeEe8Q= -github.com/armon/go-metrics v0.4.0/go.mod h1:E6amYzXo6aW1tqzoZGT755KkbgrJsSdpwZ+3JqfkOG4= -github.com/aws/aws-sdk-go v1.44.220 h1:yAj99qAt0Htjle9Up3DglgHfOP77lmFPrElA4jKnrBo= -github.com/aws/aws-sdk-go v1.44.220/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI= +github.com/armon/go-metrics v0.4.1 h1:hR91U9KYmb6bLBYLQjyM+3j+rcd/UhE+G78SFnF8gJA= +github.com/armon/go-metrics v0.4.1/go.mod h1:E6amYzXo6aW1tqzoZGT755KkbgrJsSdpwZ+3JqfkOG4= +github.com/aws/aws-sdk-go v1.44.237 h1:gsmVP8eTB6id4tmEsBPcjLlYi1sXtKA047bSn7kJZAI= +github.com/aws/aws-sdk-go v1.44.237/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI= github.com/aws/aws-xray-sdk-go v1.8.1 h1:O4pXV+hnCskaamGsZnFpzHyAmgPGusBMN6i7nnsy0Fo= github.com/aws/aws-xray-sdk-go v1.8.1/go.mod h1:wMmVYzej3sykAttNBkXQHK/+clAPWTOrPiajEk7Cp3A= github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8= @@ -158,8 +160,8 @@ github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QD github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= -github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw= -github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= +github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= @@ -233,16 +235,14 @@ github.com/klauspost/compress v1.16.3/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQs github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.0.4/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.0.12/go.mod h1:g2LTdtYhdyuGPqyWyv7qRAmj1WBqxuObKfj5c0PQa7c= -github.com/klauspost/cpuid/v2 v2.2.3 h1:sxCkb+qR91z4vsqw4vGGZlDgPz3G7gjaLyK3V8y70BU= -github.com/klauspost/cpuid/v2 v2.2.3/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY= +github.com/klauspost/cpuid/v2 v2.2.4 h1:acbojRNwl3o09bUq+yDCtZFc1aiwaAAxtcn8YkZXnvk= +github.com/klauspost/cpuid/v2 v2.2.4/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY= github.com/klauspost/pgzip v1.2.5 h1:qnWYvvKqedOF2ulHpMG72XQol4ILEJ8k2wwRl/Km8oE= github.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= github.com/klauspost/reedsolomon v1.11.7 h1:9uaHU0slncktTEEg4+7Vl7q7XUNMBUOK4R9gnKhMjAU= github.com/klauspost/reedsolomon v1.11.7/go.mod h1:4bXRN+cVzMdml6ti7qLouuYi32KHJ5MGv0Qd8a47h6A= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/kopia/htmluibuild v0.0.0-20230326183719-f482ef17e2c9 h1:s5Wa89s8RlPjuwqd8K8kuf+T9Kz4+NsbKwR/pJ3PAT0= -github.com/kopia/kopia v0.12.2-0.20230327171220-747baeebdab1 h1:C4Z3JlYWxg/o3EQCjlLcHv9atJXL9j8J1m0scNzjNDQ= -github.com/kopia/kopia v0.12.2-0.20230327171220-747baeebdab1/go.mod h1:D1k/M4+8zCL4ExSawl10G5qKhcky9MNuMwYAtH8jR4c= github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg= github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= @@ -287,8 +287,8 @@ github.com/microsoftgraph/msgraph-sdk-go-core v0.33.0 h1:cDL3ov/IZ2ZarUJdGGPsdR+ github.com/microsoftgraph/msgraph-sdk-go-core v0.33.0/go.mod h1:d0mU3PQAWnN/C4CwPJEZz2QhesrnR5UDnqRu2ODWPkI= github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34= github.com/minio/md5-simd v1.1.2/go.mod h1:MzdKDxYpY2BT9XQFocsiZf/NKVtR7nkE4RoEpN+20RM= -github.com/minio/minio-go/v7 v7.0.49 h1:dE5DfOtnXMXCjr/HWI6zN9vCrY6Sv666qhhiwUMvGV4= -github.com/minio/minio-go/v7 v7.0.49/go.mod h1:UI34MvQEiob3Cf/gGExGMmzugkM/tNgbFypNDy5LMVc= +github.com/minio/minio-go/v7 v7.0.50 h1:4IL4V8m/kI90ZL6GupCARZVrBv8/XrcKcJhaJ3iz68k= +github.com/minio/minio-go/v7 v7.0.50/go.mod h1:IbbodHyjUAguneyucUaahv+VMNs/EOTV9du7A7/Z3HU= github.com/minio/sha256-simd v1.0.0 h1:v1ta+49hkWZyvaKwrQB8elexRqm6Y0aMLjCNsrYxo6g= github.com/minio/sha256-simd v1.0.0/go.mod h1:OuYzVNI5vcoYIAmbIvHPl3N3jUzVedXbKy5RFepssQM= github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= @@ -432,8 +432,8 @@ go.opentelemetry.io/otel/trace v1.14.0/go.mod h1:8avnQLK+CG77yNLUae4ea2JDQ6iT+go go.uber.org/atomic v1.10.0 h1:9qC72Qh0+3MqyJbAn8YU5xVq1frD8bn3JtD2oXtafVQ= go.uber.org/atomic v1.10.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0= go.uber.org/goleak v1.1.11 h1:wy28qYRKZgnJTxGxvye5/wgWr1EKjmUDGYox5mGlRlI= -go.uber.org/multierr v1.9.0 h1:7fIwc/ZtS0q++VgcfqFDxSBZVv/Xo49/SYnDFupUwlI= -go.uber.org/multierr v1.9.0/go.mod h1:X2jQV1h+kxSjClGpnseKVIxpmcjrj7MNnI0bnlfKTVQ= +go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= +go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= go.uber.org/zap v1.24.0 h1:FiJd5l1UOLj0wCgbSE0rwwXHzEdAZS6hiiSnxJN/D60= go.uber.org/zap v1.24.0/go.mod h1:2kMP+WWQ8aoFoedH3T2sq6iJ2yDWpHbP0f6MQbS9Gkg= golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= @@ -735,8 +735,8 @@ google.golang.org/genproto v0.0.0-20201210142538-e3217bee35cc/go.mod h1:FWY/as6D google.golang.org/genproto v0.0.0-20201214200347-8c77b98c765d/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20210108203827-ffc7fda8c3d7/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20210226172003-ab064af71705/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= -google.golang.org/genproto v0.0.0-20230303212802-e74f57abe488 h1:QQF+HdiI4iocoxUjjpLgvTYDHKm99C/VtTBFnfiCJos= -google.golang.org/genproto v0.0.0-20230303212802-e74f57abe488/go.mod h1:TvhZT5f700eVlTNwND1xoEZQeWTB2RY/65kplwl/bFA= +google.golang.org/genproto v0.0.0-20230320184635-7606e756e683 h1:khxVcsk/FhnzxMKOyD+TDGwjbEOpcPuIpmafPGFmhMA= +google.golang.org/genproto v0.0.0-20230320184635-7606e756e683/go.mod h1:NWraEVixdDnqcqQ30jipen1STv2r/n24Wb7twVTGR4s= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= @@ -753,8 +753,8 @@ google.golang.org/grpc v1.31.1/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= google.golang.org/grpc v1.34.0/go.mod h1:WotjhfgOW/POjDeRt8vscBtXq+2VjORFy659qA51WJ8= google.golang.org/grpc v1.35.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= -google.golang.org/grpc v1.53.0 h1:LAv2ds7cmFV/XTS3XG1NneeENYrXGmorPxsBbptIjNc= -google.golang.org/grpc v1.53.0/go.mod h1:OnIrk0ipVdj4N5d9IUoFUx72/VlD7+jUsHwZgwSMQpw= +google.golang.org/grpc v1.54.0 h1:EhTqbhiYeixwWQtAEZAxmV9MGqcjEU2mFx52xCzNyag= +google.golang.org/grpc v1.54.0/go.mod h1:PUSEXI6iWghWaB6lXM4knEgpJNu2qUcKfDtNci3EC2g= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= @@ -767,8 +767,8 @@ google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGj google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.29.1 h1:7QBf+IK2gx70Ap/hDsOmam3GE0v9HicjfEdAxE62UoM= -google.golang.org/protobuf v1.29.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng= +google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/src/internal/common/idname.go b/src/internal/common/idname.go new file mode 100644 index 000000000..efee8493f --- /dev/null +++ b/src/internal/common/idname.go @@ -0,0 +1,47 @@ +package common + +import "golang.org/x/exp/maps" + +type IDNamer interface { + // the canonical id of the thing, generated and usable + // by whichever system has ownership of it. + ID() string + // the human-readable name of the thing. + Name() string +} + +type IDNameSwapper interface { + IDOf(name string) (string, bool) + NameOf(id string) (string, bool) + IDs() []string + Names() []string +} + +var _ IDNameSwapper = &IDsNames{} + +type IDsNames struct { + IDToName map[string]string + NameToID map[string]string +} + +// IDOf returns the id associated with the given name. +func (in IDsNames) IDOf(name string) (string, bool) { + id, ok := in.NameToID[name] + return id, ok +} + +// NameOf returns the name associated with the given id. +func (in IDsNames) NameOf(id string) (string, bool) { + name, ok := in.IDToName[id] + return name, ok +} + +// IDs returns all known ids. +func (in IDsNames) IDs() []string { + return maps.Keys(in.IDToName) +} + +// Names returns all known names. +func (in IDsNames) Names() []string { + return maps.Keys(in.NameToID) +} diff --git a/src/internal/common/pii/pii.go b/src/internal/common/pii/pii.go new file mode 100644 index 000000000..102d782d1 --- /dev/null +++ b/src/internal/common/pii/pii.go @@ -0,0 +1,18 @@ +package pii + +import "strings" + +// MapWithPlurls places the toLower value of each string +// into a map[string]struct{}, along with a copy of the that +// string as a plural (ex: FoO => foo, foos). +func MapWithPlurals(ss ...string) map[string]struct{} { + mss := make(map[string]struct{}, len(ss)*2) + + for _, s := range ss { + tl := strings.ToLower(s) + mss[tl] = struct{}{} + mss[tl+"s"] = struct{}{} + } + + return mss +} diff --git a/src/internal/common/pii/url.go b/src/internal/common/pii/url.go new file mode 100644 index 000000000..34707a360 --- /dev/null +++ b/src/internal/common/pii/url.go @@ -0,0 +1,96 @@ +package pii + +import ( + "fmt" + "net/url" + "strings" + + "github.com/alcionai/clues" + "golang.org/x/exp/maps" + "golang.org/x/exp/slices" +) + +// SafeURL complies with the clues.Concealer and fmt.Stringer +// interfaces to produce a safely loggable version of the URL. +// Path elements that equal a SafePathWords entry will show in +// plain text. All other path elements will get hashed by clues. +// Query parameters that match a key in SafeQueryParams will have +// their values displayed in plain text. All other query parames +// will get hashed by clues. +type SafeURL struct { + // the original URL + URL string + // path elements that do not need to be hidden + // keys should be lower-cased + SafePathElems map[string]struct{} + // query parameters that do not need to be hidden + // keys should be lower-cased + SafeQueryKeys map[string]struct{} +} + +var _ clues.Concealer = &SafeURL{} + +// Conceal produces a string of the url with the sensitive info +// obscured (hashed or replaced). +func (u SafeURL) Conceal() string { + if len(u.URL) == 0 { + return "" + } + + p, err := url.Parse(u.URL) + if err != nil { + return "malformed-URL" + } + + elems := slices.Clone(strings.Split(p.EscapedPath(), "/")) + + // conceal any non-safe path elem + for i := range elems { + e := elems[i] + + if _, ok := u.SafePathElems[strings.ToLower(e)]; !ok { + elems[i] = clues.Conceal(e) + } + } + + qry := maps.Clone(p.Query()) + + // conceal any non-safe query param values + for k, v := range p.Query() { + if _, ok := u.SafeQueryKeys[strings.ToLower(k)]; ok { + continue + } + + for i := range v { + v[i] = clues.Conceal(v[i]) + } + + qry[k] = v + } + + je := strings.Join(elems, "/") + esc := p.Scheme + "://" + p.Hostname() + je + + if len(qry) > 0 { + esc += "?" + qry.Encode() + } + + unesc, err := url.QueryUnescape(esc) + if err != nil { + return esc + } + + return unesc +} + +// Format ensures the safeURL will output the Conceal() version +// even when used in a PrintF. +func (u SafeURL) Format(fs fmt.State, _ rune) { + fmt.Fprint(fs, u.Conceal()) +} + +// String complies with Stringer to ensure the Conceal() version +// of the url is printed anytime it gets transformed to a string. +func (u SafeURL) String() string { + return u.Conceal() +} diff --git a/src/internal/common/pii/url_test.go b/src/internal/common/pii/url_test.go new file mode 100644 index 000000000..a89fd2d26 --- /dev/null +++ b/src/internal/common/pii/url_test.go @@ -0,0 +1,123 @@ +package pii_test + +import ( + "fmt" + "testing" + + "github.com/alcionai/clues" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/suite" + + "github.com/alcionai/corso/src/internal/common/pii" + "github.com/alcionai/corso/src/internal/tester" +) + +type URLUnitSuite struct { + tester.Suite +} + +func TestURLUnitSuite(t *testing.T) { + suite.Run(t, &URLUnitSuite{Suite: tester.NewUnitSuite(t)}) +} + +// set the clues hashing to mask for the span of this suite +func (suite *URLUnitSuite) SetupSuite() { + clues.SetHasher(clues.HashCfg{HashAlg: clues.Flatmask}) +} + +// revert clues hashing to plaintext for all other tests +func (suite *URLUnitSuite) TeardownSuite() { + clues.SetHasher(clues.NoHash()) +} + +func (suite *URLUnitSuite) TestDoesThings() { + stubURL := "https://host.com/foo/bar/baz/qux?fnords=smarfs&fnords=brunhilda&beaux=regard" + + table := []struct { + name string + input string + expect string + safePath map[string]struct{} + safeQuery map[string]struct{} + }{ + { + name: "no safety", + input: stubURL, + expect: "https://host.com/***/***/***/***?beaux=***&fnords=***&fnords=***", + }, + { + name: "safe paths", + input: stubURL, + expect: "https://host.com/foo/***/baz/***?beaux=***&fnords=***&fnords=***", + safePath: map[string]struct{}{"foo": {}, "baz": {}}, + }, + { + name: "safe query", + input: stubURL, + expect: "https://host.com/***/***/***/***?beaux=regard&fnords=***&fnords=***", + safeQuery: map[string]struct{}{"beaux": {}}, + }, + { + name: "safe path and query", + input: stubURL, + expect: "https://host.com/foo/***/baz/***?beaux=regard&fnords=***&fnords=***", + safePath: map[string]struct{}{"foo": {}, "baz": {}}, + safeQuery: map[string]struct{}{"beaux": {}}, + }, + { + name: "empty elements", + input: "https://host.com/foo//baz/?fnords=&beaux=", + expect: "https://host.com/foo//baz/?beaux=&fnords=", + safePath: map[string]struct{}{"foo": {}, "baz": {}}, + }, + { + name: "no path", + input: "https://host.com/", + expect: "https://host.com/", + }, + { + name: "no path with query", + input: "https://host.com/?fnords=smarfs&fnords=brunhilda&beaux=regard", + expect: "https://host.com/?beaux=***&fnords=***&fnords=***", + }, + { + name: "relative path", + input: "/foo/bar/baz/qux?fnords=smarfs&fnords=brunhilda&beaux=regard", + expect: ":///***/***/***/***?beaux=***&fnords=***&fnords=***", + }, + { + name: "malformed url", + input: "i am not a url", + expect: "://***", + }, + { + name: "empty url", + input: "", + expect: "", + }, + } + for _, test := range table { + suite.Run(test.name, func() { + var ( + t = suite.T() + su = pii.SafeURL{ + URL: test.input, + SafePathElems: test.safePath, + SafeQueryKeys: test.safeQuery, + } + ) + + result := su.Conceal() + assert.Equal(t, test.expect, result, "Conceal()") + + result = su.String() + assert.Equal(t, test.expect, result, "String()") + + result = fmt.Sprintf("%s", su) + assert.Equal(t, test.expect, result, "fmt %%s") + + result = fmt.Sprintf("%+v", su) + assert.Equal(t, test.expect, result, "fmt %%+v") + }) + } +} diff --git a/src/internal/common/ptr/pointer.go b/src/internal/common/ptr/pointer.go index a8f9a02b9..fa9d3f606 100644 --- a/src/internal/common/ptr/pointer.go +++ b/src/internal/common/ptr/pointer.go @@ -43,3 +43,10 @@ func OrNow(t *time.Time) time.Time { return *t } + +// To generates a pointer from any value. Primarily useful +// for generating pointers to strings and other primitives +// without needing to store a second variable. +func To[T any](t T) *T { + return &t +} diff --git a/src/internal/common/time.go b/src/internal/common/time.go index 9a39a2a02..23db15b77 100644 --- a/src/internal/common/time.go +++ b/src/internal/common/time.go @@ -53,6 +53,7 @@ var ( dateOnlyRE = regexp.MustCompile(`.*(\d{4}-\d{2}-\d{2}).*`) legacyTimeRE = regexp.MustCompile( `.*(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}?([Zz]|[a-zA-Z]{2}|([\+|\-]([01]\d|2[0-3])))).*`) + simpleTimeTestingRE = regexp.MustCompile(`.*(\d{2}-[a-zA-Z]{3}-\d{4}_\d{2}-\d{2}-\d{2}.\d{6}).*`) simpleDateTimeRE = regexp.MustCompile(`.*(\d{2}-[a-zA-Z]{3}-\d{4}_\d{2}:\d{2}:\d{2}).*`) simpleDateTimeOneDriveRE = regexp.MustCompile(`.*(\d{2}-[a-zA-Z]{3}-\d{4}_\d{2}-\d{2}-\d{2}).*`) standardTimeRE = regexp.MustCompile( @@ -65,6 +66,7 @@ var ( // get eagerly chosen as the parsable format, slicing out some data. formats = []TimeFormat{ StandardTime, + SimpleTimeTesting, SimpleDateTime, SimpleDateTimeOneDrive, LegacyTime, @@ -75,6 +77,7 @@ var ( } regexes = []*regexp.Regexp{ standardTimeRE, + simpleTimeTestingRE, simpleDateTimeRE, simpleDateTimeOneDriveRE, legacyTimeRE, diff --git a/src/internal/connector/data_collections.go b/src/internal/connector/data_collections.go index 728e3b52f..dac0d6f29 100644 --- a/src/internal/connector/data_collections.go +++ b/src/internal/connector/data_collections.go @@ -6,6 +6,7 @@ import ( "github.com/alcionai/clues" + "github.com/alcionai/corso/src/internal/common" "github.com/alcionai/corso/src/internal/connector/discovery" "github.com/alcionai/corso/src/internal/connector/discovery/api" "github.com/alcionai/corso/src/internal/connector/exchange" @@ -27,22 +28,26 @@ import ( // Data Collections // --------------------------------------------------------------------------- -// DataCollections utility function to launch backup operations for exchange and -// onedrive. metadataCols contains any collections with metadata files that may -// be useful for the current backup. Metadata can include things like delta -// tokens or the previous backup's folder hierarchy. The absence of metadataCols -// results in all data being pulled. -func (gc *GraphConnector) DataCollections( +// ProduceBackupCollections generates a slice of data.BackupCollections for the service +// specified in the selectors. +// The metadata field can include things like delta tokens or the previous backup's +// folder hierarchy. The absence of metadata causes the collection creation to ignore +// prior history (ie, incrementals) and run a full backup. +func (gc *GraphConnector) ProduceBackupCollections( ctx context.Context, + owner common.IDNamer, sels selectors.Selector, metadata []data.RestoreCollection, ctrlOpts control.Options, errs *fault.Bus, ) ([]data.BackupCollection, map[string]map[string]struct{}, error) { - ctx, end := diagnostics.Span(ctx, "gc:dataCollections", diagnostics.Index("service", sels.Service.String())) + ctx, end := diagnostics.Span( + ctx, + "gc:produceBackupCollections", + diagnostics.Index("service", sels.Service.String())) defer end() - err := verifyBackupInputs(sels, gc.GetSiteIDs()) + err := verifyBackupInputs(sels, gc.IDNameLookup.IDs()) if err != nil { return nil, nil, clues.Stack(err).WithClues(ctx) } @@ -188,10 +193,10 @@ func checkServiceEnabled( return true, nil } -// RestoreDataCollections restores data from the specified collections +// ConsumeRestoreCollections restores data from the specified collections // into M365 using the GraphAPI. // SideEffect: gc.status is updated at the completion of operation -func (gc *GraphConnector) RestoreDataCollections( +func (gc *GraphConnector) ConsumeRestoreCollections( ctx context.Context, backupVersion int, acct account.Account, diff --git a/src/internal/connector/data_collections_test.go b/src/internal/connector/data_collections_test.go index 44cffd192..a50ea0106 100644 --- a/src/internal/connector/data_collections_test.go +++ b/src/internal/connector/data_collections_test.go @@ -18,6 +18,7 @@ import ( "github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/path" "github.com/alcionai/corso/src/pkg/selectors" + "github.com/alcionai/corso/src/pkg/selectors/testdata" ) // --------------------------------------------------------------------------- @@ -129,8 +130,8 @@ func (suite *ConnectorDataCollectionIntegrationSuite) TestExchangeDataCollection } } - status := connector.AwaitStatus() - assert.NotZero(t, status.Metrics.Successes) + status := connector.Wait() + assert.NotZero(t, status.Successes) t.Log(status.String()) }) } @@ -168,7 +169,7 @@ func (suite *ConnectorDataCollectionIntegrationSuite) TestDataCollections_invali name: "Invalid sharepoint backup site", getSelector: func(t *testing.T) selectors.Selector { sel := selectors.NewSharePointBackup(owners) - sel.Include(sel.LibraryFolders(selectors.Any())) + sel.Include(testdata.SharePointBackupFolderScope(sel)) return sel.Selector }, }, @@ -194,7 +195,7 @@ func (suite *ConnectorDataCollectionIntegrationSuite) TestDataCollections_invali name: "missing sharepoint backup site", getSelector: func(t *testing.T) selectors.Selector { sel := selectors.NewSharePointBackup(owners) - sel.Include(sel.LibraryFolders(selectors.Any())) + sel.Include(testdata.SharePointBackupFolderScope(sel)) sel.DiscreteOwner = "" return sel.Selector }, @@ -205,9 +206,10 @@ func (suite *ConnectorDataCollectionIntegrationSuite) TestDataCollections_invali suite.Run(test.name, func() { t := suite.T() - collections, excludes, err := connector.DataCollections( + collections, excludes, err := connector.ProduceBackupCollections( ctx, test.getSelector(t), + test.getSelector(t), nil, control.Options{}, fault.New(true)) @@ -237,7 +239,7 @@ func (suite *ConnectorDataCollectionIntegrationSuite) TestSharePointDataCollecti name: "Libraries", getSelector: func() selectors.Selector { sel := selectors.NewSharePointBackup(selSites) - sel.Include(sel.LibraryFolders(selectors.Any())) + sel.Include(testdata.SharePointBackupFolderScope(sel)) return sel.Selector }, }, @@ -286,8 +288,8 @@ func (suite *ConnectorDataCollectionIntegrationSuite) TestSharePointDataCollecti } } - status := connector.AwaitStatus() - assert.NotZero(t, status.Metrics.Successes) + status := connector.Wait() + assert.NotZero(t, status.Successes) t.Log(status.String()) }) } @@ -333,12 +335,18 @@ func (suite *ConnectorCreateSharePointCollectionIntegrationSuite) TestCreateShar siteIDs = []string{siteID} ) + id, name, err := gc.PopulateOwnerIDAndNamesFrom(siteID, nil) + require.NoError(t, err, clues.ToCore(err)) + sel := selectors.NewSharePointBackup(siteIDs) sel.Include(sel.LibraryFolders([]string{"foo"}, selectors.PrefixMatch())) - cols, excludes, err := gc.DataCollections( + sel.SetDiscreteOwnerIDName(id, name) + + cols, excludes, err := gc.ProduceBackupCollections( ctx, sel.Selector, + sel.Selector, nil, control.Options{}, fault.New(true)) @@ -371,12 +379,18 @@ func (suite *ConnectorCreateSharePointCollectionIntegrationSuite) TestCreateShar siteIDs = []string{siteID} ) - sel := selectors.NewSharePointBackup(siteIDs) - sel.Include(sel.Lists(selectors.Any(), selectors.PrefixMatch())) + id, name, err := gc.PopulateOwnerIDAndNamesFrom(siteID, nil) + require.NoError(t, err, clues.ToCore(err)) - cols, excludes, err := gc.DataCollections( + sel := selectors.NewSharePointBackup(siteIDs) + sel.Include(sel.Lists(selectors.Any())) + + sel.SetDiscreteOwnerIDName(id, name) + + cols, excludes, err := gc.ProduceBackupCollections( ctx, sel.Selector, + sel.Selector, nil, control.Options{}, fault.New(true)) diff --git a/src/internal/connector/discovery/api/sites.go b/src/internal/connector/discovery/api/sites.go new file mode 100644 index 000000000..5d64bb778 --- /dev/null +++ b/src/internal/connector/discovery/api/sites.go @@ -0,0 +1,142 @@ +package api + +import ( + "context" + "fmt" + "strings" + + "github.com/alcionai/clues" + msgraphgocore "github.com/microsoftgraph/msgraph-sdk-go-core" + "github.com/microsoftgraph/msgraph-sdk-go/models" + "github.com/pkg/errors" + + "github.com/alcionai/corso/src/internal/common/ptr" + "github.com/alcionai/corso/src/internal/connector/graph" + "github.com/alcionai/corso/src/pkg/fault" +) + +// --------------------------------------------------------------------------- +// controller +// --------------------------------------------------------------------------- + +func (c Client) Sites() Sites { + return Sites{c} +} + +// Sites is an interface-compliant provider of the client. +type Sites struct { + Client +} + +// --------------------------------------------------------------------------- +// methods +// --------------------------------------------------------------------------- + +// GetAll retrieves all sites. +func (c Sites) GetAll(ctx context.Context, errs *fault.Bus) ([]models.Siteable, error) { + service, err := c.service() + if err != nil { + return nil, err + } + + resp, err := service.Client().Sites().Get(ctx, nil) + if err != nil { + return nil, graph.Wrap(ctx, err, "getting all sites") + } + + iter, err := msgraphgocore.NewPageIterator( + resp, + service.Adapter(), + models.CreateSiteCollectionResponseFromDiscriminatorValue) + if err != nil { + return nil, graph.Wrap(ctx, err, "creating sites iterator") + } + + var ( + us = make([]models.Siteable, 0) + el = errs.Local() + ) + + iterator := func(item any) bool { + if el.Failure() != nil { + return false + } + + s, err := validateSite(item) + if errors.Is(err, errKnownSkippableCase) { + // safe to no-op + return true + } + + if err != nil { + el.AddRecoverable(graph.Wrap(ctx, err, "validating site")) + return true + } + + us = append(us, s) + + return true + } + + if err := iter.Iterate(ctx, iterator); err != nil { + return nil, graph.Wrap(ctx, err, "enumerating sites") + } + + return us, el.Failure() +} + +func (c Sites) GetByID(ctx context.Context, id string) (models.Siteable, error) { + resp, err := c.stable.Client().SitesById(id).Get(ctx, nil) + if err != nil { + return nil, graph.Wrap(ctx, err, "getting site") + } + + return resp, err +} + +// --------------------------------------------------------------------------- +// helpers +// --------------------------------------------------------------------------- + +var errKnownSkippableCase = clues.New("case is known and skippable") + +const personalSitePath = "sharepoint.com/personal/" + +// validateSite ensures the item is a Siteable, and contains the necessary +// identifiers that we handle with all users. +// returns the item as a Siteable model. +func validateSite(item any) (models.Siteable, error) { + m, ok := item.(models.Siteable) + if !ok { + return nil, clues.New(fmt.Sprintf("unexpected model: %T", item)) + } + + id := ptr.Val(m.GetId()) + if len(id) == 0 { + return nil, clues.New("missing ID") + } + + url := ptr.Val(m.GetWebUrl()) + if len(url) == 0 { + return nil, clues.New("missing webURL").With("site_id", id) // TODO: pii + } + + // personal (ie: oneDrive) sites have to be filtered out server-side. + if strings.Contains(url, personalSitePath) { + return nil, clues.Stack(errKnownSkippableCase). + With("site_id", id, "site_url", url) // TODO: pii + } + + name := ptr.Val(m.GetDisplayName()) + if len(name) == 0 { + // the built-in site at "https://{tenant-domain}/search" never has a name. + if strings.HasSuffix(url, "/search") { + return nil, clues.Stack(errKnownSkippableCase). + With("site_id", id, "site_url", url) // TODO: pii + } + + return nil, clues.New("missing site display name").With("site_id", id) + } + + return m, nil +} diff --git a/src/internal/connector/discovery/api/sites_test.go b/src/internal/connector/discovery/api/sites_test.go new file mode 100644 index 000000000..b03b464b9 --- /dev/null +++ b/src/internal/connector/discovery/api/sites_test.go @@ -0,0 +1,155 @@ +package api + +import ( + "testing" + + "github.com/alcionai/clues" + "github.com/microsoftgraph/msgraph-sdk-go/models" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + + "github.com/alcionai/corso/src/internal/common/ptr" + "github.com/alcionai/corso/src/internal/tester" + "github.com/alcionai/corso/src/pkg/account" + "github.com/alcionai/corso/src/pkg/fault" +) + +type SitesUnitSuite struct { + tester.Suite +} + +func TestSitesUnitSuite(t *testing.T) { + suite.Run(t, &SitesUnitSuite{Suite: tester.NewUnitSuite(t)}) +} + +func (suite *SitesUnitSuite) TestValidateSite() { + site := models.NewSite() + site.SetWebUrl(ptr.To("sharepoint.com/sites/foo")) + site.SetDisplayName(ptr.To("testsite")) + site.SetId(ptr.To("testID")) + + tests := []struct { + name string + args any + want models.Siteable + errCheck assert.ErrorAssertionFunc + errIsSkippable bool + }{ + { + name: "Invalid type", + args: string("invalid type"), + errCheck: assert.Error, + }, + { + name: "No ID", + args: models.NewSite(), + errCheck: assert.Error, + }, + { + name: "No WebURL", + args: func() *models.Site { + s := models.NewSite() + s.SetId(ptr.To("id")) + return s + }(), + errCheck: assert.Error, + }, + { + name: "No name", + args: func() *models.Site { + s := models.NewSite() + s.SetId(ptr.To("id")) + s.SetWebUrl(ptr.To("sharepoint.com/sites/foo")) + return s + }(), + errCheck: assert.Error, + }, + { + name: "Search site", + args: func() *models.Site { + s := models.NewSite() + s.SetId(ptr.To("id")) + s.SetWebUrl(ptr.To("sharepoint.com/search")) + return s + }(), + errCheck: assert.Error, + errIsSkippable: true, + }, + { + name: "Personal OneDrive", + args: func() *models.Site { + s := models.NewSite() + s.SetId(ptr.To("id")) + s.SetWebUrl(ptr.To("https://" + personalSitePath + "/someone's/onedrive")) + return s + }(), + errCheck: assert.Error, + errIsSkippable: true, + }, + { + name: "Valid Site", + args: site, + want: site, + errCheck: assert.NoError, + }, + } + for _, test := range tests { + suite.Run(test.name, func() { + t := suite.T() + + got, err := validateSite(test.args) + test.errCheck(t, err, clues.ToCore(err)) + + if test.errIsSkippable { + assert.ErrorIs(t, err, errKnownSkippableCase) + } + + assert.Equal(t, test.want, got) + }) + } +} + +type SitesIntgSuite struct { + tester.Suite + + creds account.M365Config +} + +func TestSitesIntgSuite(t *testing.T) { + suite.Run(t, &SitesIntgSuite{ + Suite: tester.NewIntegrationSuite( + t, + [][]string{tester.M365AcctCredEnvs, tester.AWSStorageCredEnvs}), + }) +} + +func (suite *SitesIntgSuite) SetupSuite() { + var ( + t = suite.T() + acct = tester.NewM365Account(t) + ) + + m365, err := acct.M365Config() + require.NoError(t, err, clues.ToCore(err)) + + suite.creds = m365 +} + +func (suite *SitesIntgSuite) TestGetAll() { + ctx, flush := tester.NewContext() + defer flush() + + t := suite.T() + + cli, err := NewClient(suite.creds) + require.NoError(t, err, clues.ToCore(err)) + + sites, err := cli.Sites().GetAll(ctx, fault.New(true)) + require.NoError(t, err) + require.NotZero(t, len(sites), "must have at least one site") + + for _, site := range sites { + assert.NotContains(t, ptr.Val(site.GetWebUrl()), personalSitePath, "must not return onedrive sites") + } +} diff --git a/src/internal/connector/discovery/discovery.go b/src/internal/connector/discovery/discovery.go index 0090c51ea..e955803c0 100644 --- a/src/internal/connector/discovery/discovery.go +++ b/src/internal/connector/discovery/discovery.go @@ -29,6 +29,24 @@ type getWithInfoer interface { getInfoer } +// --------------------------------------------------------------------------- +// helpers +// --------------------------------------------------------------------------- + +func apiClient(ctx context.Context, acct account.Account) (api.Client, error) { + m365, err := acct.M365Config() + if err != nil { + return api.Client{}, clues.Wrap(err, "retrieving m365 account configuration").WithClues(ctx) + } + + client, err := api.NewClient(m365) + if err != nil { + return api.Client{}, clues.Wrap(err, "creating api client").WithClues(ctx) + } + + return client, nil +} + // --------------------------------------------------------------------------- // api // --------------------------------------------------------------------------- @@ -39,19 +57,15 @@ func Users( acct account.Account, errs *fault.Bus, ) ([]models.Userable, error) { - m365, err := acct.M365Config() + client, err := apiClient(ctx, acct) if err != nil { - return nil, clues.Wrap(err, "retrieving m365 account configuration").WithClues(ctx) - } - - client, err := api.NewClient(m365) - if err != nil { - return nil, clues.Wrap(err, "creating api client").WithClues(ctx) + return nil, err } return client.Users().GetAll(ctx, errs) } +// User fetches a single user's data. func User(ctx context.Context, gwi getWithInfoer, userID string) (models.Userable, *api.UserInfo, error) { u, err := gwi.GetByID(ctx, userID) if err != nil { @@ -69,3 +83,17 @@ func User(ctx context.Context, gwi getWithInfoer, userID string) (models.Userabl return u, ui, nil } + +// Sites fetches all sharepoint sites in the tenant +func Sites( + ctx context.Context, + acct account.Account, + errs *fault.Bus, +) ([]models.Siteable, error) { + client, err := apiClient(ctx, acct) + if err != nil { + return nil, err + } + + return client.Sites().GetAll(ctx, errs) +} diff --git a/src/internal/connector/discovery/discovery_test.go b/src/internal/connector/discovery/discovery_test.go index c9a2c3f48..8191b7b3f 100644 --- a/src/internal/connector/discovery/discovery_test.go +++ b/src/internal/connector/discovery/discovery_test.go @@ -31,10 +31,11 @@ func (suite *DiscoveryIntegrationSuite) TestUsers() { ctx, flush := tester.NewContext() defer flush() - t := suite.T() - - acct := tester.NewM365Account(t) - errs := fault.New(true) + var ( + t = suite.T() + acct = tester.NewM365Account(t) + errs = fault.New(true) + ) users, err := discovery.Users(ctx, acct, errs) assert.NoError(t, err, clues.ToCore(err)) @@ -42,8 +43,7 @@ func (suite *DiscoveryIntegrationSuite) TestUsers() { ferrs := errs.Errors() assert.Nil(t, ferrs.Failure) assert.Empty(t, ferrs.Recovered) - - assert.Less(t, 0, len(users)) + assert.NotEmpty(t, users) } func (suite *DiscoveryIntegrationSuite) TestUsers_InvalidCredentials() { @@ -84,16 +84,85 @@ func (suite *DiscoveryIntegrationSuite) TestUsers_InvalidCredentials() { for _, test := range table { suite.Run(test.name, func() { - t := suite.T() + var ( + t = suite.T() + a = test.acct(t) + errs = fault.New(true) + ) - a := test.acct(t) - errs := fault.New(true) users, err := discovery.Users(ctx, a, errs) - assert.Empty(t, users, "returned some users") assert.NotNil(t, err) - // TODO(ashmrtn): Uncomment when fault package is used in discovery API. - // assert.NotNil(t, errs.Err()) + }) + } +} + +func (suite *DiscoveryIntegrationSuite) TestSites() { + ctx, flush := tester.NewContext() + defer flush() + + var ( + t = suite.T() + acct = tester.NewM365Account(t) + errs = fault.New(true) + ) + + sites, err := discovery.Sites(ctx, acct, errs) + assert.NoError(t, err, clues.ToCore(err)) + + ferrs := errs.Errors() + assert.Nil(t, ferrs.Failure) + assert.Empty(t, ferrs.Recovered) + assert.NotEmpty(t, sites) +} + +func (suite *DiscoveryIntegrationSuite) TestSites_InvalidCredentials() { + ctx, flush := tester.NewContext() + defer flush() + + table := []struct { + name string + acct func(t *testing.T) account.Account + }{ + { + name: "Invalid Credentials", + acct: func(t *testing.T) account.Account { + a, err := account.NewAccount( + account.ProviderM365, + account.M365Config{ + M365: credentials.M365{ + AzureClientID: "Test", + AzureClientSecret: "without", + }, + AzureTenantID: "data", + }, + ) + require.NoError(t, err, clues.ToCore(err)) + + return a + }, + }, + { + name: "Empty Credentials", + acct: func(t *testing.T) account.Account { + // intentionally swallowing the error here + a, _ := account.NewAccount(account.ProviderM365) + return a + }, + }, + } + + for _, test := range table { + suite.Run(test.name, func() { + var ( + t = suite.T() + a = test.acct(t) + errs = fault.New(true) + ) + + sites, err := discovery.Sites(ctx, a, errs) + assert.Empty(t, sites, "returned some sites") + assert.NotNil(t, err) }) } } diff --git a/src/internal/connector/exchange/api/contacts.go b/src/internal/connector/exchange/api/contacts.go index 90af25bac..f4f768519 100644 --- a/src/internal/connector/exchange/api/contacts.go +++ b/src/internal/connector/exchange/api/contacts.go @@ -258,11 +258,10 @@ func (c Contacts) GetAddedAndRemovedItemIDs( if len(os.Getenv("CORSO_URL_LOGGING")) > 0 { gri, err := builder.ToGetRequestInformation(ctx, options) if err != nil { - logger.Ctx(ctx).Errorw("getting builder info", "error", err) + logger.CtxErr(ctx, err).Error("getting builder info") } else { logger.Ctx(ctx). - With("user", user, "container", directoryID). - Warnw("builder path-parameters", "path_parameters", gri.PathParameters) + Infow("builder path-parameters", "path_parameters", gri.PathParameters) } } diff --git a/src/internal/connector/exchange/api/events.go b/src/internal/connector/exchange/api/events.go index 421e4a66b..dfa4d8541 100644 --- a/src/internal/connector/exchange/api/events.go +++ b/src/internal/connector/exchange/api/events.go @@ -292,11 +292,10 @@ func (c Events) GetAddedAndRemovedItemIDs( if len(os.Getenv("CORSO_URL_LOGGING")) > 0 { gri, err := builder.ToGetRequestInformation(ctx, nil) if err != nil { - logger.Ctx(ctx).Errorw("getting builder info", "error", err) + logger.CtxErr(ctx, err).Error("getting builder info") } else { logger.Ctx(ctx). - With("user", user, "container", calendarID). - Warnw("builder path-parameters", "path_parameters", gri.PathParameters) + Infow("builder path-parameters", "path_parameters", gri.PathParameters) } } diff --git a/src/internal/connector/exchange/api/mail.go b/src/internal/connector/exchange/api/mail.go index 095ab1525..03c302461 100644 --- a/src/internal/connector/exchange/api/mail.go +++ b/src/internal/connector/exchange/api/mail.go @@ -303,11 +303,10 @@ func (c Mail) GetAddedAndRemovedItemIDs( if len(os.Getenv("CORSO_URL_LOGGING")) > 0 { gri, err := builder.ToGetRequestInformation(ctx, options) if err != nil { - logger.Ctx(ctx).Errorw("getting builder info", "error", err) + logger.CtxErr(ctx, err).Error("getting builder info") } else { logger.Ctx(ctx). - With("user", user, "container", directoryID). - Warnw("builder path-parameters", "path_parameters", gri.PathParameters) + Infow("builder path-parameters", "path_parameters", gri.PathParameters) } } diff --git a/src/internal/connector/exchange/attachment.go b/src/internal/connector/exchange/attachment.go index 019699e77..4c6c99d13 100644 --- a/src/internal/connector/exchange/attachment.go +++ b/src/internal/connector/exchange/attachment.go @@ -9,6 +9,7 @@ import ( "github.com/microsoftgraph/msgraph-sdk-go/models" "github.com/alcionai/corso/src/internal/common/ptr" + "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/support" "github.com/alcionai/corso/src/internal/connector/uploadsession" "github.com/alcionai/corso/src/pkg/logger" @@ -52,7 +53,7 @@ func uploadAttachment( ctx, "attachment_size", ptr.Val(attachment.GetSize()), "attachment_id", ptr.Val(attachment.GetId()), - "attachment_name", ptr.Val(attachment.GetName()), // TODO: pii + "attachment_name", clues.Hide(ptr.Val(attachment.GetName())), "attachment_type", attachmentType, "internal_item_type", getItemAttachmentItemType(attachment), "uploader_item_id", uploader.getItemID()) @@ -104,7 +105,7 @@ func uploadLargeAttachment( url := ptr.Val(session.GetUploadUrl()) aw := uploadsession.NewWriter(uploader.getItemID(), url, size) - logger.Ctx(ctx).Debugw("uploading large attachment", "attachment_url", url) // TODO: url pii + logger.Ctx(ctx).Debugw("uploading large attachment", "attachment_url", graph.LoggableURL(url)) // Upload the stream data copyBuffer := make([]byte, attachmentChunkSize) diff --git a/src/internal/connector/exchange/data_collections.go b/src/internal/connector/exchange/data_collections.go index 3391b96f4..87a0842b8 100644 --- a/src/internal/connector/exchange/data_collections.go +++ b/src/internal/connector/exchange/data_collections.go @@ -279,7 +279,7 @@ func createCollections( foldersComplete, closer := observe.MessageWithCompletion( ctx, - observe.Bulletf("%s", observe.Safe(qp.Category.String()))) + observe.Bulletf("%s", qp.Category)) defer closer() defer close(foldersComplete) diff --git a/src/internal/connector/exchange/exchange_data_collection.go b/src/internal/connector/exchange/exchange_data_collection.go index 1b935cfca..2f8f9d4f2 100644 --- a/src/internal/connector/exchange/exchange_data_collection.go +++ b/src/internal/connector/exchange/exchange_data_collection.go @@ -7,7 +7,6 @@ import ( "bytes" "context" "io" - "strings" "sync" "sync/atomic" "time" @@ -83,8 +82,7 @@ type Collection struct { // LocationPath contains the path with human-readable display names. // IE: "/Inbox/Important" instead of "/abcdxyz123/algha=lgkhal=t" - // Currently only implemented for Exchange Calendars. - locationPath path.Path + locationPath *path.Builder state data.CollectionState @@ -100,7 +98,8 @@ type Collection struct { // or notMoved (if they match). func NewCollection( user string, - curr, prev, location path.Path, + curr, prev path.Path, + location *path.Builder, category path.CategoryType, items itemer, statusUpdater support.StatusUpdater, @@ -140,7 +139,7 @@ func (col *Collection) FullPath() path.Path { // LocationPath produces the Collection's full path, but with display names // instead of IDs in the folders. Only populated for Calendars. -func (col *Collection) LocationPath() path.Path { +func (col *Collection) LocationPath() *path.Builder { return col.locationPath } @@ -186,7 +185,8 @@ func (col *Collection) streamItems(ctx context.Context, errs *fault.Bus) { colProgress, closer = observe.CollectionProgress( ctx, col.fullPath.Category().String(), - observe.PII(col.fullPath.Folder(false))) + // TODO(keepers): conceal compliance in path, drop Hide() + clues.Hide(col.fullPath.Folder(false))) go closer() @@ -252,11 +252,10 @@ func (col *Collection) streamItems(ctx context.Context, errs *fault.Bus) { defer wg.Done() defer func() { <-semaphoreCh }() - item, info, err := getItemWithRetries( + item, info, err := col.items.GetItem( ctx, user, id, - col.items, fault.New(true)) // temporary way to force a failFast error if err != nil { // Don't report errors for deleted items as there's no way for us to @@ -280,7 +279,7 @@ func (col *Collection) streamItems(ctx context.Context, errs *fault.Bus) { } info.Size = int64(len(data)) - info.ParentPath = strings.Join(col.fullPath.Folders(), "/") + info.ParentPath = col.locationPath.String() col.data <- &Stream{ id: id, @@ -301,21 +300,6 @@ func (col *Collection) streamItems(ctx context.Context, errs *fault.Bus) { wg.Wait() } -// get an item while handling retry and backoff. -func getItemWithRetries( - ctx context.Context, - userID, itemID string, - items itemer, - errs *fault.Bus, -) (serialization.Parsable, *details.ExchangeInfo, error) { - item, info, err := items.GetItem(ctx, userID, itemID, errs) - if err != nil { - return nil, nil, err - } - - return item, info, nil -} - // terminatePopulateSequence is a utility function used to close a Collection's data channel // and to send the status update through the channel. func (col *Collection) finishPopulation( diff --git a/src/internal/connector/exchange/exchange_data_collection_test.go b/src/internal/connector/exchange/exchange_data_collection_test.go index 4df0bbb87..d78911b92 100644 --- a/src/internal/connector/exchange/exchange_data_collection_test.go +++ b/src/internal/connector/exchange/exchange_data_collection_test.go @@ -133,34 +133,34 @@ func (suite *ExchangeDataCollectionSuite) TestNewCollection_state() { require.NoError(suite.T(), err, clues.ToCore(err)) barP, err := path.Build("t", "u", path.ExchangeService, path.EmailCategory, false, "bar") require.NoError(suite.T(), err, clues.ToCore(err)) - locP, err := path.Build("t", "u", path.ExchangeService, path.EmailCategory, false, "human-readable") - require.NoError(suite.T(), err, clues.ToCore(err)) + + locPB := path.Builder{}.Append("human-readable") table := []struct { name string prev path.Path curr path.Path - loc path.Path + loc *path.Builder expect data.CollectionState }{ { name: "new", curr: fooP, - loc: locP, + loc: locPB, expect: data.NewState, }, { name: "not moved", prev: fooP, curr: fooP, - loc: locP, + loc: locPB, expect: data.NotMovedState, }, { name: "moved", prev: fooP, curr: barP, - loc: locP, + loc: locPB, expect: data.MovedState, }, { @@ -228,7 +228,7 @@ func (suite *ExchangeDataCollectionSuite) TestGetItemWithRetries() { defer flush() // itemer is mocked, so only the errors are configured atm. - _, _, err := getItemWithRetries(ctx, "userID", "itemID", test.items, fault.New(true)) + _, _, err := test.items.GetItem(ctx, "userID", "itemID", fault.New(true)) test.expectErr(suite.T(), err) }) } diff --git a/src/internal/connector/exchange/service_functions.go b/src/internal/connector/exchange/service_functions.go index 0827fbb05..5fd1a0845 100644 --- a/src/internal/connector/exchange/service_functions.go +++ b/src/internal/connector/exchange/service_functions.go @@ -95,7 +95,7 @@ func includeContainer( qp graph.QueryParams, c graph.CachedContainer, scope selectors.ExchangeScope, -) (path.Path, path.Path, bool) { +) (path.Path, *path.Builder, bool) { var ( directory string locPath path.Path @@ -154,5 +154,5 @@ func includeContainer( return nil, nil, false } - return pathRes, locPath, ok + return pathRes, loc, ok } diff --git a/src/internal/connector/exchange/service_iterators.go b/src/internal/connector/exchange/service_iterators.go index a014f0797..29551b21b 100644 --- a/src/internal/connector/exchange/service_iterators.go +++ b/src/internal/connector/exchange/service_iterators.go @@ -115,10 +115,6 @@ func filterContainersAndFillCollections( deltaURLs[cID] = newDelta.URL } - if qp.Category != path.EventsCategory { - locPath = nil - } - edc := NewCollection( qp.ResourceOwner, currPath, diff --git a/src/internal/connector/exchange/service_iterators_test.go b/src/internal/connector/exchange/service_iterators_test.go index f14601b8c..9d72dc181 100644 --- a/src/internal/connector/exchange/service_iterators_test.go +++ b/src/internal/connector/exchange/service_iterators_test.go @@ -162,7 +162,7 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections() { getter mockGetter resolver graph.ContainerResolver scope selectors.ExchangeScope - failFast bool + failFast control.FailureBehavior expectErr assert.ErrorAssertionFunc expectNewColls int expectMetadataColls int @@ -271,7 +271,7 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections() { }, resolver: newMockResolver(container1, container2), scope: allScope, - failFast: true, + failFast: control.FailFast, expectErr: assert.NoError, expectNewColls: 2, expectMetadataColls: 1, @@ -285,7 +285,7 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections() { }, resolver: newMockResolver(container1, container2), scope: allScope, - failFast: true, + failFast: control.FailFast, expectErr: assert.Error, expectNewColls: 0, expectMetadataColls: 0, @@ -309,8 +309,8 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections() { test.resolver, test.scope, dps, - control.Options{FailFast: test.failFast}, - fault.New(test.failFast)) + control.Options{FailureHandling: test.failFast}, + fault.New(test.failFast == control.FailFast)) test.expectErr(t, err, clues.ToCore(err)) // collection assertions @@ -465,7 +465,7 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections_repea resolver, allScope, dps, - control.Options{FailFast: true}, + control.Options{FailureHandling: control.FailFast}, fault.New(true)) require.NoError(t, err, clues.ToCore(err)) diff --git a/src/internal/connector/exchange/service_restore.go b/src/internal/connector/exchange/service_restore.go index 3a0cdd68e..10808e7ed 100644 --- a/src/internal/connector/exchange/service_restore.go +++ b/src/internal/connector/exchange/service_restore.go @@ -314,7 +314,7 @@ func RestoreExchangeDataCollections( if len(dcs) > 0 { userID = dcs[0].FullPath().ResourceOwner() - ctx = clues.Add(ctx, "resource_owner", userID) // TODO: pii + ctx = clues.Add(ctx, "resource_owner", clues.Hide(userID)) } for _, dc := range dcs { @@ -390,7 +390,7 @@ func restoreCollection( colProgress, closer := observe.CollectionProgress( ctx, category.String(), - observe.PII(directory.Folder(false))) + clues.Hide(directory.Folder(false))) defer closer() defer close(colProgress) diff --git a/src/internal/connector/graph/service.go b/src/internal/connector/graph/service.go index 82f634d92..ab2d890fc 100644 --- a/src/internal/connector/graph/service.go +++ b/src/internal/connector/graph/service.go @@ -19,6 +19,7 @@ import ( msgraphgocore "github.com/microsoftgraph/msgraph-sdk-go-core" "golang.org/x/time/rate" + "github.com/alcionai/corso/src/internal/common/pii" "github.com/alcionai/corso/src/internal/events" "github.com/alcionai/corso/src/pkg/account" "github.com/alcionai/corso/src/pkg/logger" @@ -28,6 +29,7 @@ import ( const ( logGraphRequestsEnvKey = "LOG_GRAPH_REQUESTS" log2xxGraphRequestsEnvKey = "LOG_2XX_GRAPH_REQUESTS" + log2xxGraphResponseEnvKey = "LOG_2XX_GRAPH_RESPONSES" retryAttemptHeader = "Retry-Attempt" retryAfterHeader = "Retry-After" defaultMaxRetries = 3 @@ -271,20 +273,86 @@ type Servicer interface { // LoggingMiddleware can be used to log the http request sent by the graph client type LoggingMiddleware struct{} +// well-known path names used by graph api calls +// used to un-hide path elements in a pii.SafeURL +var safePathParams = pii.MapWithPlurals( + //nolint:misspell + "alltime", + "analytics", + "archive", + "beta", + "calendargroup", + "calendar", + "calendarview", + "channel", + "childfolder", + "children", + "clone", + "column", + "contactfolder", + "contact", + "contenttype", + "delta", + "drive", + "event", + "group", + "inbox", + "instance", + "invitation", + "item", + "joinedteam", + "label", + "list", + "mailfolder", + "member", + "message", + "notification", + "page", + "primarychannel", + "root", + "security", + "site", + "subscription", + "team", + "unarchive", + "user", + "v1.0") + +// well-known safe query parameters used by graph api calls +// +// used to un-hide query params in a pii.SafeURL +var safeQueryParams = map[string]struct{}{ + "deltatoken": {}, + "startdatetime": {}, + "enddatetime": {}, + "$count": {}, + "$expand": {}, + "$filter": {}, + "$select": {}, + "$top": {}, +} + +func LoggableURL(url string) pii.SafeURL { + return pii.SafeURL{ + URL: url, + SafePathElems: safePathParams, + SafeQueryKeys: safeQueryParams, + } +} + func (handler *LoggingMiddleware) Intercept( pipeline khttp.Pipeline, middlewareIndex int, req *http.Request, ) (*http.Response, error) { - var ( - ctx = clues.Add( - req.Context(), - "method", req.Method, - "url", req.URL, // TODO: pii - "request_len", req.ContentLength, - ) - resp, err = pipeline.Next(req, middlewareIndex) - ) + ctx := clues.Add( + req.Context(), + "method", req.Method, + "url", LoggableURL(req.URL.String()), + "request_len", req.ContentLength) + + // call the next middleware + resp, err := pipeline.Next(req, middlewareIndex) if strings.Contains(req.URL.String(), "users//") { logger.Ctx(ctx).Error("malformed request url: missing resource") @@ -301,7 +369,7 @@ func (handler *LoggingMiddleware) Intercept( // If api logging is toggled, log a body-less dump of the request/resp. if (resp.StatusCode / 100) == 2 { if logger.DebugAPI || os.Getenv(log2xxGraphRequestsEnvKey) != "" { - log.Debugw("2xx graph api resp", "response", getRespDump(ctx, resp, false)) + log.Debugw("2xx graph api resp", "response", getRespDump(ctx, resp, os.Getenv(log2xxGraphResponseEnvKey) != "")) } return resp, err @@ -319,13 +387,13 @@ func (handler *LoggingMiddleware) Intercept( msg := fmt.Sprintf("graph api error: %s", resp.Status) // special case for supportability: log all throttling cases. - if resp.StatusCode == http.StatusTooManyRequests || resp.StatusCode == http.StatusServiceUnavailable { + if resp.StatusCode == http.StatusTooManyRequests { log = log.With( "limit", resp.Header.Get(rateLimitHeader), "remaining", resp.Header.Get(rateRemainingHeader), "reset", resp.Header.Get(rateResetHeader), "retry-after", resp.Header.Get(retryAfterHeader)) - } else if resp.StatusCode/100 == 4 { + } else if resp.StatusCode/100 == 4 || resp.StatusCode == http.StatusServiceUnavailable { log = log.With("response", getRespDump(ctx, resp, true)) } diff --git a/src/internal/connector/graph_connector.go b/src/internal/connector/graph_connector.go index 2f8652f81..0180934a9 100644 --- a/src/internal/connector/graph_connector.go +++ b/src/internal/connector/graph_connector.go @@ -4,34 +4,33 @@ package connector import ( "context" - "fmt" "net/http" "runtime/trace" - "strings" "sync" "github.com/alcionai/clues" - "github.com/microsoft/kiota-abstractions-go/serialization" - msgraphgocore "github.com/microsoftgraph/msgraph-sdk-go-core" - "github.com/microsoftgraph/msgraph-sdk-go/models" "github.com/pkg/errors" - "golang.org/x/exp/maps" - "github.com/alcionai/corso/src/internal/common/ptr" + "github.com/alcionai/corso/src/internal/common" "github.com/alcionai/corso/src/internal/connector/discovery/api" "github.com/alcionai/corso/src/internal/connector/graph" - "github.com/alcionai/corso/src/internal/connector/sharepoint" "github.com/alcionai/corso/src/internal/connector/support" - "github.com/alcionai/corso/src/internal/diagnostics" + "github.com/alcionai/corso/src/internal/data" + "github.com/alcionai/corso/src/internal/operations/inject" "github.com/alcionai/corso/src/pkg/account" "github.com/alcionai/corso/src/pkg/fault" - "github.com/alcionai/corso/src/pkg/filters" ) // --------------------------------------------------------------------------- // Graph Connector // --------------------------------------------------------------------------- +// must comply with BackupProducer and RestoreConsumer +var ( + _ inject.BackupProducer = &GraphConnector{} + _ inject.RestoreConsumer = &GraphConnector{} +) + // GraphConnector is a struct used to wrap the GraphServiceClient and // GraphRequestAdapter from the msgraph-sdk-go. Additional fields are for // bookkeeping and interfacing with other component. @@ -41,9 +40,13 @@ type GraphConnector struct { itemClient *http.Client // configured to handle large item downloads tenant string - Sites map[string]string // webURL -> siteID and siteID -> webURL credentials account.M365Config + // maps of resource owner ids to names, and names to ids. + // not guaranteed to be populated, only here as a post-population + // reference for processes that choose to populate the values. + IDNameLookup common.IDNameSwapper + // wg is used to track completion of GC tasks wg *sync.WaitGroup region *trace.Region @@ -75,10 +78,11 @@ func NewGraphConnector( } gc := GraphConnector{ - itemClient: itemClient, - tenant: m365.AzureTenantID, - wg: &sync.WaitGroup{}, - credentials: m365, + itemClient: itemClient, + tenant: m365.AzureTenantID, + wg: &sync.WaitGroup{}, + credentials: m365, + IDNameLookup: common.IDsNames{}, } gc.Service, err = gc.createService() @@ -91,13 +95,64 @@ func NewGraphConnector( return nil, clues.Wrap(err, "creating api client").WithClues(ctx) } - if r == AllResources || r == Sites { - if err = gc.setTenantSites(ctx, errs); err != nil { - return nil, clues.Wrap(err, "retrieveing tenant site list") + return &gc, nil +} + +// PopulateOwnerIDAndNamesFrom takes the provided owner identifier and produces +// the owner's name and ID from that value. Returns an error if the owner is +// not recognized by the current tenant. +// +// The id-name swapper is optional. Some processes will look up all owners in +// the tenant before reaching this step. In that case, the data gets handed +// down for this func to consume instead of performing further queries. The +// maps get stored inside the gc instance for later re-use. +// +// TODO: If the maps are nil or empty, this func will perform a lookup on the given +// owner, and populate each map with that owner's id and name for downstream +// guarantees about that data being present. Optional performance enhancement +// idea: downstream from here, we should _only_ need the given user's id and name, +// and could store minimal map copies with that info instead of the whole tenant. +func (gc *GraphConnector) PopulateOwnerIDAndNamesFrom( + owner string, // input value, can be either id or name + ins common.IDNameSwapper, +) (string, string, error) { + // move this to GC method + id, name, err := getOwnerIDAndNameFrom(owner, ins) + if err != nil { + return "", "", errors.Wrap(err, "resolving resource owner details") + } + + gc.IDNameLookup = ins + + if ins == nil || (len(ins.IDs()) == 0 && len(ins.Names()) == 0) { + gc.IDNameLookup = common.IDsNames{ + IDToName: map[string]string{id: name}, + NameToID: map[string]string{name: id}, } } - return &gc, nil + return id, name, nil +} + +func getOwnerIDAndNameFrom( + owner string, + ins common.IDNameSwapper, +) (string, string, error) { + if ins == nil { + return owner, owner, nil + } + + if n, ok := ins.NameOf(owner); ok { + return owner, n, nil + } else if i, ok := ins.IDOf(owner); ok { + return i, owner, nil + } + + // TODO: look-up user by owner, either id or name, + // and populate with maps as a result. Only + // return owner, owner as a very last resort. + + return owner, owner, nil } // createService constructor for graphService component @@ -113,117 +168,8 @@ func (gc *GraphConnector) createService() (*graph.Service, error) { return graph.NewService(adapter), nil } -// setTenantSites queries the M365 to identify the sites in the -// workspace. The sites field is updated during this method -// iff the returned error is nil. -func (gc *GraphConnector) setTenantSites(ctx context.Context, errs *fault.Bus) error { - gc.Sites = map[string]string{} - - ctx, end := diagnostics.Span(ctx, "gc:setTenantSites") - defer end() - - sites, err := getResources( - ctx, - gc.Service, - gc.tenant, - sharepoint.GetAllSitesForTenant, - models.CreateSiteCollectionResponseFromDiscriminatorValue, - identifySite, - errs) - if err != nil { - return err - } - - gc.Sites = sites - - return nil -} - -var errKnownSkippableCase = clues.New("case is known and skippable") - -const personalSitePath = "sharepoint.com/personal/" - -// Transforms an interface{} into a key,value pair representing -// siteName:siteID. -func identifySite(item any) (string, string, error) { - m, ok := item.(models.Siteable) - if !ok { - return "", "", clues.New("non-Siteable item").With("item_type", fmt.Sprintf("%T", item)) - } - - id := ptr.Val(m.GetId()) - url, ok := ptr.ValOK(m.GetWebUrl()) - - if m.GetName() == nil { - // the built-in site at "https://{tenant-domain}/search" never has a name. - if ok && strings.HasSuffix(url, "/search") { - // TODO: pii siteID, on this and all following cases - return "", "", clues.Stack(errKnownSkippableCase).With("site_id", id) - } - - return "", "", clues.New("site has no name").With("site_id", id) - } - - // personal (ie: oneDrive) sites have to be filtered out server-side. - if ok && strings.Contains(url, personalSitePath) { - return "", "", clues.Stack(errKnownSkippableCase).With("site_id", id) - } - - return url, id, nil -} - -// GetSiteWebURLs returns the WebURLs of sharepoint sites within the tenant. -func (gc *GraphConnector) GetSiteWebURLs() []string { - return maps.Keys(gc.Sites) -} - -// GetSiteIds returns the canonical site IDs in the tenant -func (gc *GraphConnector) GetSiteIDs() []string { - return maps.Values(gc.Sites) -} - -// UnionSiteIDsAndWebURLs reduces the id and url slices into a single slice of site IDs. -// WebURLs will run as a path-suffix style matcher. Callers may provide partial urls, though -// each element in the url must fully match. Ex: the webURL value "foo" will match "www.ex.com/foo", -// but not match "www.ex.com/foobar". -// The returned IDs are reduced to a set of unique values. -func (gc *GraphConnector) UnionSiteIDsAndWebURLs( - ctx context.Context, - ids, urls []string, - errs *fault.Bus, -) ([]string, error) { - if len(gc.Sites) == 0 { - if err := gc.setTenantSites(ctx, errs); err != nil { - return nil, err - } - } - - idm := map[string]struct{}{} - - for _, id := range ids { - idm[id] = struct{}{} - } - - match := filters.PathSuffix(urls) - - for url, id := range gc.Sites { - if !match.Compare(url) { - continue - } - - idm[id] = struct{}{} - } - - idsl := make([]string, 0, len(idm)) - for id := range idm { - idsl = append(idsl, id) - } - - return idsl, nil -} - // AwaitStatus waits for all gc tasks to complete and then returns status -func (gc *GraphConnector) AwaitStatus() *support.ConnectorOperationStatus { +func (gc *GraphConnector) Wait() *data.CollectionStats { defer func() { if gc.region != nil { gc.region.End() @@ -233,12 +179,18 @@ func (gc *GraphConnector) AwaitStatus() *support.ConnectorOperationStatus { gc.wg.Wait() // clean up and reset statefulness - status := gc.status + dcs := data.CollectionStats{ + Folders: gc.status.Folders, + Objects: gc.status.Metrics.Objects, + Successes: gc.status.Metrics.Successes, + Bytes: gc.status.Metrics.Bytes, + Details: gc.status.String(), + } gc.wg = &sync.WaitGroup{} gc.status = support.ConnectorOperationStatus{} - return &status + return &dcs } // UpdateStatus is used by gc initiated tasks to indicate completion @@ -271,59 +223,3 @@ func (gc *GraphConnector) incrementAwaitingMessages() { func (gc *GraphConnector) incrementMessagesBy(num int) { gc.wg.Add(num) } - -// --------------------------------------------------------------------------- -// Helper Funcs -// --------------------------------------------------------------------------- - -func getResources( - ctx context.Context, - gs graph.Servicer, - tenantID string, - query func(context.Context, graph.Servicer) (serialization.Parsable, error), - parser func(parseNode serialization.ParseNode) (serialization.Parsable, error), - identify func(any) (string, string, error), - errs *fault.Bus, -) (map[string]string, error) { - resources := map[string]string{} - - response, err := query(ctx, gs) - if err != nil { - return nil, graph.Wrap(ctx, err, "retrieving tenant's resources") - } - - iter, err := msgraphgocore.NewPageIterator(response, gs.Adapter(), parser) - if err != nil { - return nil, graph.Stack(ctx, err) - } - - el := errs.Local() - - callbackFunc := func(item any) bool { - if el.Failure() != nil { - return false - } - - k, v, err := identify(item) - if err != nil { - if !errors.Is(err, errKnownSkippableCase) { - el.AddRecoverable(clues.Stack(err). - WithClues(ctx). - With("query_url", gs.Adapter().GetBaseUrl())) - } - - return true - } - - resources[k] = v - resources[v] = k - - return true - } - - if err := iter.Iterate(ctx, callbackFunc); err != nil { - return nil, graph.Stack(ctx, err) - } - - return resources, el.Failure() -} diff --git a/src/internal/connector/graph_connector_disconnected_test.go b/src/internal/connector/graph_connector_disconnected_test.go index d92a018e4..b95f75335 100644 --- a/src/internal/connector/graph_connector_disconnected_test.go +++ b/src/internal/connector/graph_connector_disconnected_test.go @@ -6,15 +6,10 @@ import ( "github.com/alcionai/clues" "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" - "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/support" "github.com/alcionai/corso/src/internal/tester" - "github.com/alcionai/corso/src/pkg/account" - "github.com/alcionai/corso/src/pkg/credentials" - "github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/selectors" ) @@ -33,57 +28,6 @@ func TestDisconnectedGraphSuite(t *testing.T) { suite.Run(t, s) } -func (suite *DisconnectedGraphConnectorSuite) TestBadConnection() { - ctx, flush := tester.NewContext() - defer flush() - - table := []struct { - name string - acct func(t *testing.T) account.Account - }{ - { - name: "Invalid Credentials", - acct: func(t *testing.T) account.Account { - a, err := account.NewAccount( - account.ProviderM365, - account.M365Config{ - M365: credentials.M365{ - AzureClientID: "Test", - AzureClientSecret: "without", - }, - AzureTenantID: "data", - }, - ) - require.NoError(t, err, clues.ToCore(err)) - return a - }, - }, - { - name: "Empty Credentials", - acct: func(t *testing.T) account.Account { - // intentionally swallowing the error here - a, _ := account.NewAccount(account.ProviderM365) - return a - }, - }, - } - - for _, test := range table { - suite.Run(test.name, func() { - t := suite.T() - - gc, err := NewGraphConnector( - ctx, - graph.HTTPClient(graph.NoTimeout()), - test.acct(t), - Sites, - fault.New(true)) - assert.Nil(t, gc, test.name+" failed") - assert.NotNil(t, err, test.name+" failed") - }) - } -} - func statusTestTask(gc *GraphConnector, objects, success, folder int) { ctx, flush := tester.NewContext() defer flush() @@ -111,17 +55,16 @@ func (suite *DisconnectedGraphConnectorSuite) TestGraphConnector_Status() { go statusTestTask(&gc, 4, 1, 1) go statusTestTask(&gc, 4, 1, 1) - status := gc.AwaitStatus() - + stats := gc.Wait() t := suite.T() assert.NotEmpty(t, gc.PrintableStatus()) // Expect 8 objects - assert.Equal(t, 8, status.Metrics.Objects) + assert.Equal(t, 8, stats.Objects) // Expect 2 success - assert.Equal(t, 2, status.Metrics.Successes) + assert.Equal(t, 2, stats.Successes) // Expect 2 folders - assert.Equal(t, 2, status.Folders) + assert.Equal(t, 2, stats.Folders) } func (suite *DisconnectedGraphConnectorSuite) TestVerifyBackupInputs_allServices() { diff --git a/src/internal/connector/graph_connector_onedrive_test.go b/src/internal/connector/graph_connector_onedrive_test.go index 385055860..02b6f454e 100644 --- a/src/internal/connector/graph_connector_onedrive_test.go +++ b/src/internal/connector/graph_connector_onedrive_test.go @@ -526,143 +526,6 @@ func (suite *GraphConnectorOneDriveIntegrationSuite) TestPermissionsInheritanceR testPermissionsInheritanceRestoreAndBackup(suite, version.Backup) } -// TestPermissionsRestoreAndNoBackup checks that even if permissions exist -// not setting EnablePermissionsBackup results in empty permissions. This test -// only needs to run on the current version.Backup because it's about backup -// behavior not restore behavior (restore behavior is checked in other tests). -func (suite *GraphConnectorOneDriveIntegrationSuite) TestPermissionsRestoreAndNoBackup() { - ctx, flush := tester.NewContext() - defer flush() - - t := suite.T() - - secondaryUserName, secondaryUserID := suite.SecondaryUser() - - driveID := mustGetDefaultDriveID( - t, - ctx, - suite.BackupService(), - suite.Service(), - suite.BackupResourceOwner(), - ) - - secondaryUserRead := permData{ - user: secondaryUserName, - entityID: secondaryUserID, - roles: readPerm, - } - - secondaryUserWrite := permData{ - user: secondaryUserName, - entityID: secondaryUserID, - roles: writePerm, - } - - test := restoreBackupInfoMultiVersion{ - service: suite.BackupService(), - resource: suite.Resource(), - backupVersion: version.Backup, - collectionsPrevious: []colInfo{ - newOneDriveCollection( - suite.T(), - suite.BackupService(), - []string{ - "drives", - driveID, - "root:", - }, - version.Backup, - ). - withFile( - fileName, - fileAData, - secondaryUserWrite, - ). - withFolder( - folderBName, - secondaryUserRead, - ). - collection(), - newOneDriveCollection( - suite.T(), - suite.BackupService(), - []string{ - "drives", - driveID, - "root:", - folderBName, - }, - version.Backup, - ). - withFile( - fileName, - fileEData, - secondaryUserRead, - ). - withPermissions( - secondaryUserRead, - ). - collection(), - }, - collectionsLatest: []colInfo{ - newOneDriveCollection( - suite.T(), - suite.BackupService(), - []string{ - "drives", - driveID, - "root:", - }, - version.Backup, - ). - withFile( - fileName, - fileAData, - permData{}, - ). - withFolder( - folderBName, - permData{}, - ). - collection(), - newOneDriveCollection( - suite.T(), - suite.BackupService(), - []string{ - "drives", - driveID, - "root:", - folderBName, - }, - version.Backup, - ). - withFile( - fileName, - fileEData, - permData{}, - ). - // Call this to generate a meta file with the folder name that we can - // check. - withPermissions( - permData{}, - ). - collection(), - }, - } - - runRestoreBackupTestVersions( - t, - suite.Account(), - test, - suite.Tenant(), - []string{suite.BackupResourceOwner()}, - control.Options{ - RestorePermissions: true, - ToggleFeatures: control.Toggles{EnablePermissionsBackup: false}, - }, - ) -} - // --------------------------------------------------------------------------- // OneDrive regression // --------------------------------------------------------------------------- @@ -862,7 +725,7 @@ func testRestoreAndBackupMultipleFilesAndFoldersNoPermissions( []string{suite.BackupResourceOwner()}, control.Options{ RestorePermissions: true, - ToggleFeatures: control.Toggles{EnablePermissionsBackup: true}, + ToggleFeatures: control.Toggles{}, }, ) }) @@ -1073,7 +936,7 @@ func testPermissionsRestoreAndBackup(suite oneDriveSuite, startVersion int) { []string{suite.BackupResourceOwner()}, control.Options{ RestorePermissions: true, - ToggleFeatures: control.Toggles{EnablePermissionsBackup: true}, + ToggleFeatures: control.Toggles{}, }, ) }) @@ -1156,7 +1019,7 @@ func testPermissionsBackupAndNoRestore(suite oneDriveSuite, startVersion int) { []string{suite.BackupResourceOwner()}, control.Options{ RestorePermissions: false, - ToggleFeatures: control.Toggles{EnablePermissionsBackup: true}, + ToggleFeatures: control.Toggles{}, }, ) }) @@ -1308,7 +1171,7 @@ func testPermissionsInheritanceRestoreAndBackup(suite oneDriveSuite, startVersio []string{suite.BackupResourceOwner()}, control.Options{ RestorePermissions: true, - ToggleFeatures: control.Toggles{EnablePermissionsBackup: true}, + ToggleFeatures: control.Toggles{}, }, ) }) diff --git a/src/internal/connector/graph_connector_test.go b/src/internal/connector/graph_connector_test.go index dae0ed4cc..2b4525bc0 100644 --- a/src/internal/connector/graph_connector_test.go +++ b/src/internal/connector/graph_connector_test.go @@ -13,6 +13,7 @@ import ( "github.com/stretchr/testify/suite" "golang.org/x/exp/maps" + "github.com/alcionai/corso/src/internal/common" "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/mockconnector" "github.com/alcionai/corso/src/internal/connector/support" @@ -38,104 +39,127 @@ func TestGraphConnectorUnitSuite(t *testing.T) { suite.Run(t, &GraphConnectorUnitSuite{Suite: tester.NewUnitSuite(t)}) } -func (suite *GraphConnectorUnitSuite) TestUnionSiteIDsAndWebURLs() { +func (suite *GraphConnectorUnitSuite) TestPopulateOwnerIDAndNamesFrom() { const ( - url1 = "www.foo.com/bar" - url2 = "www.fnords.com/smarf" - path1 = "bar" - path2 = "/smarf" - id1 = "site-id-1" - id2 = "site-id-2" + ownerID = "owner-id" + ownerName = "owner-name" ) - gc := &GraphConnector{ - // must be populated, else the func will try to make a graph call - // to retrieve site data. - Sites: map[string]string{ - url1: id1, - url2: id2, - }, - } + var ( + itn = map[string]string{ownerID: ownerName} + nti = map[string]string{ownerName: ownerID} + ) table := []struct { - name string - ids []string - urls []string - expect []string + name string + owner string + ins common.IDsNames + expectID string + expectName string }{ { - name: "nil", + name: "nil ins", + owner: ownerID, + expectID: ownerID, + expectName: ownerID, }, { - name: "empty", - ids: []string{}, - urls: []string{}, - expect: []string{}, + name: "only id map with owner id", + owner: ownerID, + ins: common.IDsNames{ + IDToName: itn, + NameToID: nil, + }, + expectID: ownerID, + expectName: ownerName, }, { - name: "ids only", - ids: []string{id1, id2}, - urls: []string{}, - expect: []string{id1, id2}, + name: "only name map with owner id", + owner: ownerID, + ins: common.IDsNames{ + IDToName: nil, + NameToID: nti, + }, + expectID: ownerID, + expectName: ownerID, }, { - name: "urls only", - ids: []string{}, - urls: []string{url1, url2}, - expect: []string{id1, id2}, + name: "only id map with owner name", + owner: ownerName, + ins: common.IDsNames{ + IDToName: itn, + NameToID: nil, + }, + expectID: ownerName, + expectName: ownerName, }, { - name: "url suffix only", - ids: []string{}, - urls: []string{path1, path2}, - expect: []string{id1, id2}, + name: "only name map with owner name", + owner: ownerName, + ins: common.IDsNames{ + IDToName: nil, + NameToID: nti, + }, + expectID: ownerID, + expectName: ownerName, }, { - name: "url and suffix overlap", - ids: []string{}, - urls: []string{url1, url2, path1, path2}, - expect: []string{id1, id2}, + name: "both maps with owner id", + owner: ownerID, + ins: common.IDsNames{ + IDToName: itn, + NameToID: nti, + }, + expectID: ownerID, + expectName: ownerName, }, { - name: "ids and urls, no overlap", - ids: []string{id1}, - urls: []string{url2}, - expect: []string{id1, id2}, + name: "both maps with owner name", + owner: ownerName, + ins: common.IDsNames{ + IDToName: itn, + NameToID: nti, + }, + expectID: ownerID, + expectName: ownerName, }, { - name: "ids and urls, overlap", - ids: []string{id1, id2}, - urls: []string{url1, url2}, - expect: []string{id1, id2}, + name: "non-matching maps with owner id", + owner: ownerID, + ins: common.IDsNames{ + IDToName: map[string]string{"foo": "bar"}, + NameToID: map[string]string{"fnords": "smarf"}, + }, + expectID: ownerID, + expectName: ownerID, }, { - name: "partial non-match on path", - ids: []string{}, - urls: []string{path1[2:], path2[2:]}, - expect: []string{}, - }, - { - name: "partial non-match on url", - ids: []string{}, - urls: []string{url1[5:], url2[5:]}, - expect: []string{}, + name: "non-matching with owner name", + owner: ownerName, + ins: common.IDsNames{ + IDToName: map[string]string{"foo": "bar"}, + NameToID: map[string]string{"fnords": "smarf"}, + }, + expectID: ownerName, + expectName: ownerName, }, } for _, test := range table { suite.Run(test.name, func() { - t := suite.T() + var ( + t = suite.T() + gc = &GraphConnector{} + ) - ctx, flush := tester.NewContext() - defer flush() - - result, err := gc.UnionSiteIDsAndWebURLs(ctx, test.ids, test.urls, fault.New(true)) - assert.NoError(t, err, clues.ToCore(err)) - assert.ElementsMatch(t, test.expect, result) + id, name, err := gc.PopulateOwnerIDAndNamesFrom(test.owner, test.ins) + require.NoError(t, err, clues.ToCore(err)) + assert.Equal(t, test.expectID, id) + assert.Equal(t, test.expectName, name) }) } } -func (suite *GraphConnectorUnitSuite) TestGraphConnector_AwaitStatus() { +func (suite *GraphConnectorUnitSuite) TestGraphConnector_Wait() { ctx, flush := tester.NewContext() defer flush() @@ -156,14 +180,14 @@ func (suite *GraphConnectorUnitSuite) TestGraphConnector_AwaitStatus() { gc.wg.Add(1) gc.UpdateStatus(status) - result := gc.AwaitStatus() + result := gc.Wait() require.NotNil(t, result) assert.Nil(t, gc.region, "region") assert.Empty(t, gc.status, "status") assert.Equal(t, 1, result.Folders) - assert.Equal(t, 2, result.Metrics.Objects) - assert.Equal(t, 3, result.Metrics.Successes) - assert.Equal(t, int64(4), result.Metrics.Bytes) + assert.Equal(t, 2, result.Objects) + assert.Equal(t, 3, result.Successes) + assert.Equal(t, int64(4), result.Bytes) } // --------------------------------------------------------------------------- @@ -199,35 +223,6 @@ func (suite *GraphConnectorIntegrationSuite) SetupSuite() { tester.LogTimeOfTest(suite.T()) } -// TestSetTenantSites verifies GraphConnector's ability to query -// the sites associated with the credentials -func (suite *GraphConnectorIntegrationSuite) TestSetTenantSites() { - newConnector := GraphConnector{ - tenant: "test_tenant", - Sites: make(map[string]string, 0), - credentials: suite.connector.credentials, - } - - ctx, flush := tester.NewContext() - defer flush() - - t := suite.T() - - service, err := newConnector.createService() - require.NoError(t, err, clues.ToCore(err)) - - newConnector.Service = service - assert.Equal(t, 0, len(newConnector.Sites)) - - err = newConnector.setTenantSites(ctx, fault.New(true)) - assert.NoError(t, err, clues.ToCore(err)) - assert.Less(t, 0, len(newConnector.Sites)) - - for _, site := range newConnector.Sites { - assert.NotContains(t, "sharepoint.com/personal/", site) - } -} - func (suite *GraphConnectorIntegrationSuite) TestRestoreFailsBadService() { ctx, flush := tester.NewContext() defer flush() @@ -241,7 +236,7 @@ func (suite *GraphConnectorIntegrationSuite) TestRestoreFailsBadService() { } ) - deets, err := suite.connector.RestoreDataCollections( + deets, err := suite.connector.ConsumeRestoreCollections( ctx, version.Backup, acct, @@ -249,17 +244,17 @@ func (suite *GraphConnectorIntegrationSuite) TestRestoreFailsBadService() { dest, control.Options{ RestorePermissions: true, - ToggleFeatures: control.Toggles{EnablePermissionsBackup: true}, + ToggleFeatures: control.Toggles{}, }, nil, fault.New(true)) assert.Error(t, err, clues.ToCore(err)) assert.NotNil(t, deets) - status := suite.connector.AwaitStatus() - assert.Equal(t, 0, status.Metrics.Objects) + status := suite.connector.Wait() + assert.Equal(t, 0, status.Objects) assert.Equal(t, 0, status.Folders) - assert.Equal(t, 0, status.Metrics.Successes) + assert.Equal(t, 0, status.Successes) } func (suite *GraphConnectorIntegrationSuite) TestEmptyCollections() { @@ -320,7 +315,7 @@ func (suite *GraphConnectorIntegrationSuite) TestEmptyCollections() { ctx, flush := tester.NewContext() defer flush() - deets, err := suite.connector.RestoreDataCollections( + deets, err := suite.connector.ConsumeRestoreCollections( ctx, version.Backup, suite.acct, @@ -328,17 +323,17 @@ func (suite *GraphConnectorIntegrationSuite) TestEmptyCollections() { dest, control.Options{ RestorePermissions: true, - ToggleFeatures: control.Toggles{EnablePermissionsBackup: true}, + ToggleFeatures: control.Toggles{}, }, test.col, fault.New(true)) require.NoError(t, err, clues.ToCore(err)) assert.NotNil(t, deets) - stats := suite.connector.AwaitStatus() - assert.Zero(t, stats.Metrics.Objects) + stats := suite.connector.Wait() + assert.Zero(t, stats.Objects) assert.Zero(t, stats.Folders) - assert.Zero(t, stats.Metrics.Successes) + assert.Zero(t, stats.Successes) }) } } @@ -400,7 +395,7 @@ func runRestore( restoreGC := loadConnector(ctx, t, graph.HTTPClient(graph.NoTimeout()), config.resource) restoreSel := getSelectorWith(t, config.service, config.resourceOwners, true) - deets, err := restoreGC.RestoreDataCollections( + deets, err := restoreGC.ConsumeRestoreCollections( ctx, backupVersion, config.acct, @@ -412,11 +407,11 @@ func runRestore( require.NoError(t, err, clues.ToCore(err)) assert.NotNil(t, deets) - status := restoreGC.AwaitStatus() + status := restoreGC.Wait() runTime := time.Since(start) - assert.Equal(t, numRestoreItems, status.Metrics.Objects, "restored status.Metrics.Objects") - assert.Equal(t, numRestoreItems, status.Metrics.Successes, "restored status.Metrics.Successes") + assert.Equal(t, numRestoreItems, status.Objects, "restored status.Objects") + assert.Equal(t, numRestoreItems, status.Successes, "restored status.Successes") assert.Len( t, deets.Entries, @@ -443,23 +438,34 @@ func runBackupAndCompare( cats[c.category] = struct{}{} } - expectedDests := make([]destAndCats, 0, len(config.resourceOwners)) + var ( + expectedDests = make([]destAndCats, 0, len(config.resourceOwners)) + idToName = map[string]string{} + nameToID = map[string]string{} + ) + for _, ro := range config.resourceOwners { expectedDests = append(expectedDests, destAndCats{ resourceOwner: ro, dest: config.dest.ContainerName, cats: cats, }) + + idToName[ro] = ro + nameToID[ro] = ro } backupGC := loadConnector(ctx, t, graph.HTTPClient(graph.NoTimeout()), config.resource) + backupGC.IDNameLookup = common.IDsNames{IDToName: idToName, NameToID: nameToID} + backupSel := backupSelectorForExpected(t, config.service, expectedDests) t.Logf("Selective backup of %s\n", backupSel) start := time.Now() - dcs, excludes, err := backupGC.DataCollections( + dcs, excludes, err := backupGC.ProduceBackupCollections( ctx, backupSel, + backupSel, nil, config.opts, fault.New(true)) @@ -480,12 +486,12 @@ func runBackupAndCompare( config.dest, config.opts.RestorePermissions) - status := backupGC.AwaitStatus() + status := backupGC.Wait() - assert.Equalf(t, totalItems+skipped, status.Metrics.Objects, - "backup status.Metrics.Objects; wanted %d items + %d skipped", totalItems, skipped) - assert.Equalf(t, totalItems+skipped, status.Metrics.Successes, - "backup status.Metrics.Successes; wanted %d items + %d skipped", totalItems, skipped) + assert.Equalf(t, totalItems+skipped, status.Objects, + "backup status.Objects; wanted %d items + %d skipped", totalItems, skipped) + assert.Equalf(t, totalItems+skipped, status.Successes, + "backup status.Successes; wanted %d items + %d skipped", totalItems, skipped) } func runRestoreBackupTest( @@ -850,7 +856,7 @@ func (suite *GraphConnectorIntegrationSuite) TestRestoreAndBackup() { []string{suite.user}, control.Options{ RestorePermissions: true, - ToggleFeatures: control.Toggles{EnablePermissionsBackup: true}, + ToggleFeatures: control.Toggles{}, }, ) }) @@ -964,7 +970,7 @@ func (suite *GraphConnectorIntegrationSuite) TestMultiFolderBackupDifferentNames ) restoreGC := loadConnector(ctx, t, graph.HTTPClient(graph.NoTimeout()), test.resource) - deets, err := restoreGC.RestoreDataCollections( + deets, err := restoreGC.ConsumeRestoreCollections( ctx, version.Backup, suite.acct, @@ -972,19 +978,19 @@ func (suite *GraphConnectorIntegrationSuite) TestMultiFolderBackupDifferentNames dest, control.Options{ RestorePermissions: true, - ToggleFeatures: control.Toggles{EnablePermissionsBackup: true}, + ToggleFeatures: control.Toggles{}, }, collections, fault.New(true)) require.NoError(t, err, clues.ToCore(err)) require.NotNil(t, deets) - status := restoreGC.AwaitStatus() + status := restoreGC.Wait() // Always just 1 because it's just 1 collection. - assert.Equal(t, totalItems, status.Metrics.Objects, "status.Metrics.Objects") - assert.Equal(t, totalItems, status.Metrics.Successes, "status.Metrics.Successes") - assert.Len( - t, deets.Entries, totalItems, + assert.Equal(t, totalItems, status.Objects, "status.Objects") + assert.Equal(t, totalItems, status.Successes, "status.Successes") + assert.Equal( + t, totalItems, len(deets.Entries), "details entries contains same item count as total successful items restored") t.Log("Restore complete") @@ -996,13 +1002,14 @@ func (suite *GraphConnectorIntegrationSuite) TestMultiFolderBackupDifferentNames backupSel := backupSelectorForExpected(t, test.service, expectedDests) t.Log("Selective backup of", backupSel) - dcs, excludes, err := backupGC.DataCollections( + dcs, excludes, err := backupGC.ProduceBackupCollections( ctx, backupSel, + backupSel, nil, control.Options{ RestorePermissions: true, - ToggleFeatures: control.Toggles{EnablePermissionsBackup: true}, + ToggleFeatures: control.Toggles{}, }, fault.New(true)) require.NoError(t, err, clues.ToCore(err)) @@ -1023,9 +1030,9 @@ func (suite *GraphConnectorIntegrationSuite) TestMultiFolderBackupDifferentNames control.RestoreDestination{}, true) - status := backupGC.AwaitStatus() - assert.Equal(t, allItems+skipped, status.Metrics.Objects, "status.Metrics.Objects") - assert.Equal(t, allItems+skipped, status.Metrics.Successes, "status.Metrics.Successes") + status := backupGC.Wait() + assert.Equal(t, allItems+skipped, status.Objects, "status.Objects") + assert.Equal(t, allItems+skipped, status.Successes, "status.Successes") }) } } @@ -1062,7 +1069,7 @@ func (suite *GraphConnectorIntegrationSuite) TestRestoreAndBackup_largeMailAttac []string{suite.user}, control.Options{ RestorePermissions: true, - ToggleFeatures: control.Toggles{EnablePermissionsBackup: true}, + ToggleFeatures: control.Toggles{}, }, ) } @@ -1111,27 +1118,28 @@ func (suite *GraphConnectorIntegrationSuite) TestBackup_CreatesPrefixCollections path.FilesCategory.String(), }, }, - // SharePoint lists and pages don't seem to check selectors as expected. - //{ - // name: "SharePoint", - // resource: Sites, - // selectorFunc: func(t *testing.T) selectors.Selector { - // sel := selectors.NewSharePointBackup([]string{tester.M365SiteID(t)}) - // sel.Include( - // sel.Pages([]string{selectors.NoneTgt}), - // sel.Lists([]string{selectors.NoneTgt}), - // sel.Libraries([]string{selectors.NoneTgt}), - // ) + { + name: "SharePoint", + resource: Sites, + selectorFunc: func(t *testing.T) selectors.Selector { + sel := selectors.NewSharePointBackup([]string{tester.M365SiteID(t)}) + sel.Include( + sel.LibraryFolders([]string{selectors.NoneTgt}), + // not yet in use + // sel.Pages([]string{selectors.NoneTgt}), + // sel.Lists([]string{selectors.NoneTgt}), + ) - // return sel.Selector - // }, - // service: path.SharePointService, - // categories: []string{ - // path.PagesCategory.String(), - // path.ListsCategory.String(), - // path.LibrariesCategory.String(), - // }, - //}, + return sel.Selector + }, + service: path.SharePointService, + categories: []string{ + path.LibrariesCategory.String(), + // not yet in use + // path.PagesCategory.String(), + // path.ListsCategory.String(), + }, + }, } for _, test := range table { @@ -1147,13 +1155,19 @@ func (suite *GraphConnectorIntegrationSuite) TestBackup_CreatesPrefixCollections start = time.Now() ) - dcs, excludes, err := backupGC.DataCollections( + id, name, err := backupGC.PopulateOwnerIDAndNamesFrom(backupSel.DiscreteOwner, nil) + require.NoError(t, err, clues.ToCore(err)) + + backupSel.SetDiscreteOwnerIDName(id, name) + + dcs, excludes, err := backupGC.ProduceBackupCollections( ctx, backupSel, + backupSel, nil, control.Options{ RestorePermissions: false, - ToggleFeatures: control.Toggles{EnablePermissionsBackup: false}, + ToggleFeatures: control.Toggles{}, }, fault.New(true)) require.NoError(t, err) @@ -1191,7 +1205,7 @@ func (suite *GraphConnectorIntegrationSuite) TestBackup_CreatesPrefixCollections assert.ElementsMatch(t, test.categories, foundCategories) - backupGC.AwaitStatus() + backupGC.Wait() assert.NoError(t, errs.Failure()) }) diff --git a/src/internal/connector/mockconnector/mock_data_connector.go b/src/internal/connector/mockconnector/mock_data_connector.go new file mode 100644 index 000000000..6c5850bdb --- /dev/null +++ b/src/internal/connector/mockconnector/mock_data_connector.go @@ -0,0 +1,56 @@ +package mockconnector + +import ( + "context" + + "github.com/alcionai/corso/src/internal/common" + "github.com/alcionai/corso/src/internal/data" + "github.com/alcionai/corso/src/pkg/account" + "github.com/alcionai/corso/src/pkg/backup/details" + "github.com/alcionai/corso/src/pkg/control" + "github.com/alcionai/corso/src/pkg/fault" + "github.com/alcionai/corso/src/pkg/selectors" +) + +type GraphConnector struct { + Collections []data.BackupCollection + Exclude map[string]map[string]struct{} + + Deets *details.Details + + Err error + + Stats data.CollectionStats +} + +func (gc GraphConnector) ProduceBackupCollections( + _ context.Context, + _ common.IDNamer, + _ selectors.Selector, + _ []data.RestoreCollection, + _ control.Options, + _ *fault.Bus, +) ( + []data.BackupCollection, + map[string]map[string]struct{}, + error, +) { + return gc.Collections, gc.Exclude, gc.Err +} + +func (gc GraphConnector) Wait() *data.CollectionStats { + return &gc.Stats +} + +func (gc GraphConnector) ConsumeRestoreCollections( + _ context.Context, + _ int, + _ account.Account, + _ selectors.Selector, + _ control.RestoreDestination, + _ control.Options, + _ []data.RestoreCollection, + _ *fault.Bus, +) (*details.Details, error) { + return gc.Deets, gc.Err +} diff --git a/src/internal/connector/onedrive/api/drive.go b/src/internal/connector/onedrive/api/drive.go index 9133d6c03..f72cdf10f 100644 --- a/src/internal/connector/onedrive/api/drive.go +++ b/src/internal/connector/onedrive/api/drive.go @@ -203,10 +203,6 @@ func (p *siteDrivePager) ValuesIn(l api.PageLinker) ([]models.Driveable, error) return getValues[models.Driveable](l) } -// --------------------------------------------------------------------------- -// Drive Paging -// --------------------------------------------------------------------------- - // DrivePager pages through different types of drive owners type DrivePager interface { GetPage(context.Context) (api.PageLinker, error) @@ -275,3 +271,55 @@ func GetAllDrives( return ds, nil } + +// generic drive item getter +func GetDriveItem( + ctx context.Context, + srv graph.Servicer, + driveID, itemID string, +) (models.DriveItemable, error) { + di, err := srv.Client(). + DrivesById(driveID). + ItemsById(itemID). + Get(ctx, nil) + if err != nil { + return nil, graph.Wrap(ctx, err, "getting item") + } + + return di, nil +} + +func GetItemPermission( + ctx context.Context, + service graph.Servicer, + driveID, itemID string, +) (models.PermissionCollectionResponseable, error) { + perm, err := service. + Client(). + DrivesById(driveID). + ItemsById(itemID). + Permissions(). + Get(ctx, nil) + if err != nil { + return nil, graph.Wrap(ctx, err, "getting item metadata").With("item_id", itemID) + } + + return perm, nil +} + +func GetDriveByID( + ctx context.Context, + srv graph.Servicer, + userID string, +) (models.Driveable, error) { + //revive:enable:context-as-argument + d, err := srv.Client(). + UsersById(userID). + Drive(). + Get(ctx, nil) + if err != nil { + return nil, graph.Wrap(ctx, err, "getting drive") + } + + return d, nil +} diff --git a/src/internal/connector/onedrive/collection.go b/src/internal/connector/onedrive/collection.go index 5ac7a14dd..6f45eb9bf 100644 --- a/src/internal/connector/onedrive/collection.go +++ b/src/internal/connector/onedrive/collection.go @@ -16,6 +16,7 @@ import ( "github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/internal/connector/graph" + "github.com/alcionai/corso/src/internal/connector/onedrive/api" "github.com/alcionai/corso/src/internal/connector/support" "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/observe" @@ -41,6 +42,9 @@ const ( MetaFileSuffix = ".meta" DirMetaFileSuffix = ".dirmeta" DataFileSuffix = ".data" + + // Used to compare in case of OneNote files + MaxOneNoteFileSize = 2 * 1024 * 1024 * 1024 ) func IsMetaFile(name string) bool { @@ -96,6 +100,14 @@ type Collection struct { // Specifies if it new, moved/rename or deleted state data.CollectionState + // scope specifies what scope the items in a collection belongs + // to. This is primarily useful when dealing with a "package", + // like in the case of a OneNote file. A OneNote file is a + // collection with a package scope and multiple files in it. Most + // other collections have a scope of folder to indicate that the + // files within them belong to a folder. + scope collectionScope + // should only be true if the old delta token expired doNotMergeItems bool } @@ -121,7 +133,6 @@ type itemMetaReaderFunc func( service graph.Servicer, driveID string, item models.DriveItemable, - fetchPermissions bool, ) (io.ReadCloser, int, error) // NewCollection creates a Collection @@ -134,6 +145,7 @@ func NewCollection( statusUpdater support.StatusUpdater, source driveSource, ctrlOpts control.Options, + colScope collectionScope, doNotMergeItems bool, ) *Collection { c := &Collection{ @@ -148,17 +160,18 @@ func NewCollection( statusUpdater: statusUpdater, ctrl: ctrlOpts, state: data.StateOf(prevPath, folderPath), + scope: colScope, doNotMergeItems: doNotMergeItems, } // Allows tests to set a mock populator switch source { case SharePointSource: - c.itemGetter = getDriveItem + c.itemGetter = api.GetDriveItem c.itemReader = sharePointItemReader c.itemMetaReader = sharePointItemMetaReader default: - c.itemGetter = getDriveItem + c.itemGetter = api.GetDriveItem c.itemReader = oneDriveItemReader c.itemMetaReader = oneDriveItemMetaReader } @@ -345,12 +358,27 @@ func (oc *Collection) getDriveItemContent( } if clues.HasLabel(err, graph.LabelStatus(http.StatusNotFound)) || graph.IsErrDeletedInFlight(err) { - logger.CtxErr(ctx, err).With("skipped_reason", fault.SkipNotFound).Error("item not found") + logger.CtxErr(ctx, err).With("skipped_reason", fault.SkipNotFound).Info("item not found") el.AddSkip(fault.FileSkip(fault.SkipNotFound, itemID, itemName, graph.ItemInfo(item))) return nil, clues.Wrap(err, "downloading item").Label(graph.LabelsSkippable) } + // Skip big OneNote files as they can't be downloaded + if clues.HasLabel(err, graph.LabelStatus(http.StatusServiceUnavailable)) && + oc.scope == CollectionScopePackage && *item.GetSize() >= MaxOneNoteFileSize { + // FIXME: It is possible that in case of a OneNote file we + // will end up just backing up the `onetoc2` file without + // the one file which is the important part of the OneNote + // "item". This will have to be handled during the + // restore, or we have to handle it separately by somehow + // deleting the entire collection. + logger.CtxErr(ctx, err).With("skipped_reason", fault.SkipBigOneNote).Info("max OneNote file size exceeded") + el.AddSkip(fault.FileSkip(fault.SkipBigOneNote, itemID, itemName, graph.ItemInfo(item))) + + return nil, clues.Wrap(err, "downloading item").Label(graph.LabelsSkippable) + } + logger.CtxErr(ctx, err).Error("downloading item") el.AddRecoverable(clues.Stack(err).WithClues(ctx).Label(fault.LabelForceNoBackupCreation)) @@ -391,7 +419,8 @@ func (oc *Collection) populateItems(ctx context.Context, errs *fault.Bus) { folderProgress, colCloser := observe.ProgressWithCount( ctx, observe.ItemQueueMsg, - observe.PII(queuedPath), + // TODO(keepers): conceal compliance in path, drop Hide() + clues.Hide(queuedPath), int64(len(oc.driveItems))) defer colCloser() defer close(folderProgress) @@ -452,8 +481,7 @@ func (oc *Collection) populateItems(ctx context.Context, errs *fault.Bus) { ctx, oc.service, oc.driveID, - item, - oc.ctrl.ToggleFeatures.EnablePermissionsBackup) + item) if err != nil { el.AddRecoverable(clues.Wrap(err, "getting item metadata").Label(fault.LabelForceNoBackupCreation)) @@ -489,7 +517,7 @@ func (oc *Collection) populateItems(ctx context.Context, errs *fault.Bus) { ctx, itemData, observe.ItemBackupMsg, - observe.PII(itemID+dataSuffix), + clues.Hide(itemID+dataSuffix), itemSize) go closer() @@ -505,15 +533,20 @@ func (oc *Collection) populateItems(ctx context.Context, errs *fault.Bus) { metaReader := lazy.NewLazyReadCloser(func() (io.ReadCloser, error) { progReader, closer := observe.ItemProgress( - ctx, itemMeta, observe.ItemBackupMsg, - observe.PII(metaFileName+metaSuffix), int64(itemMetaSize)) + ctx, + itemMeta, + observe.ItemBackupMsg, + clues.Hide(metaFileName+metaSuffix), + int64(itemMetaSize)) go closer() return progReader, nil }) oc.data <- &MetadataItem{ - id: metaFileName + metaSuffix, - data: metaReader, + id: metaFileName + metaSuffix, + data: metaReader, + // Metadata file should always use the latest time as + // permissions change does not update mod time. modTime: time.Now(), } diff --git a/src/internal/connector/onedrive/collection_test.go b/src/internal/connector/onedrive/collection_test.go index dbf5d76a3..3afa31984 100644 --- a/src/internal/connector/onedrive/collection_test.go +++ b/src/internal/connector/onedrive/collection_test.go @@ -213,7 +213,8 @@ func (suite *CollectionUnitTestSuite) TestCollection() { suite, suite.testStatusUpdater(&wg, &collStatus), test.source, - control.Options{ToggleFeatures: control.Toggles{EnablePermissionsBackup: true}}, + control.Options{ToggleFeatures: control.Toggles{}}, + CollectionScopeFolder, true) require.NotNil(t, coll) assert.Equal(t, folderPath, coll.FullPath()) @@ -236,7 +237,6 @@ func (suite *CollectionUnitTestSuite) TestCollection() { _ graph.Servicer, _ string, _ models.DriveItemable, - _ bool, ) (io.ReadCloser, int, error) { metaJSON, err := json.Marshal(testItemMeta) if err != nil { @@ -352,7 +352,8 @@ func (suite *CollectionUnitTestSuite) TestCollectionReadError() { suite, suite.testStatusUpdater(&wg, &collStatus), test.source, - control.Options{ToggleFeatures: control.Toggles{EnablePermissionsBackup: true}}, + control.Options{ToggleFeatures: control.Toggles{}}, + CollectionScopeFolder, true) mockItem := models.NewDriveItem() @@ -376,7 +377,6 @@ func (suite *CollectionUnitTestSuite) TestCollectionReadError() { _ graph.Servicer, _ string, _ models.DriveItemable, - _ bool, ) (io.ReadCloser, int, error) { return io.NopCloser(strings.NewReader(`{}`)), 2, nil } @@ -441,7 +441,8 @@ func (suite *CollectionUnitTestSuite) TestCollectionReadUnauthorizedErrorRetry() suite, suite.testStatusUpdater(&wg, &collStatus), test.source, - control.Options{ToggleFeatures: control.Toggles{EnablePermissionsBackup: true}}, + control.Options{ToggleFeatures: control.Toggles{}}, + CollectionScopeFolder, true) mockItem := models.NewDriveItem() @@ -481,7 +482,6 @@ func (suite *CollectionUnitTestSuite) TestCollectionReadUnauthorizedErrorRetry() _ graph.Servicer, _ string, _ models.DriveItemable, - _ bool, ) (io.ReadCloser, int, error) { return io.NopCloser(strings.NewReader(`{}`)), 2, nil } @@ -501,7 +501,7 @@ func (suite *CollectionUnitTestSuite) TestCollectionReadUnauthorizedErrorRetry() } } -// TODO(meain): Remove this test once we start always backing up permissions +// Ensure metadata file always uses latest time for mod time func (suite *CollectionUnitTestSuite) TestCollectionPermissionBackupLatestModTime() { table := []struct { name string @@ -540,7 +540,8 @@ func (suite *CollectionUnitTestSuite) TestCollectionPermissionBackupLatestModTim suite, suite.testStatusUpdater(&wg, &collStatus), test.source, - control.Options{ToggleFeatures: control.Toggles{EnablePermissionsBackup: true}}, + control.Options{ToggleFeatures: control.Toggles{}}, + CollectionScopeFolder, true) mtime := time.Now().AddDate(0, -1, 0) @@ -567,7 +568,6 @@ func (suite *CollectionUnitTestSuite) TestCollectionPermissionBackupLatestModTim _ graph.Servicer, _ string, _ models.DriveItemable, - _ bool, ) (io.ReadCloser, int, error) { return io.NopCloser(strings.NewReader(`{}`)), 16, nil } @@ -597,3 +597,123 @@ func (suite *CollectionUnitTestSuite) TestCollectionPermissionBackupLatestModTim }) } } + +type GetDriveItemUnitTestSuite struct { + tester.Suite +} + +func TestGetDriveItemUnitTestSuite(t *testing.T) { + suite.Run(t, &GetDriveItemUnitTestSuite{Suite: tester.NewUnitSuite(t)}) +} + +func (suite *GetDriveItemUnitTestSuite) TestGetDriveItemError() { + strval := "not-important" + + table := []struct { + name string + colScope collectionScope + itemSize int64 + labels []string + err error + }{ + { + name: "Simple item fetch no error", + colScope: CollectionScopeFolder, + itemSize: 10, + err: nil, + }, + { + name: "Simple item fetch error", + colScope: CollectionScopeFolder, + itemSize: 10, + err: assert.AnError, + }, + { + name: "malware error", + colScope: CollectionScopeFolder, + itemSize: 10, + err: clues.New("test error").Label(graph.LabelsMalware), + labels: []string{graph.LabelsMalware, graph.LabelsSkippable}, + }, + { + name: "file not found error", + colScope: CollectionScopeFolder, + itemSize: 10, + err: clues.New("test error").Label(graph.LabelStatus(http.StatusNotFound)), + labels: []string{graph.LabelStatus(http.StatusNotFound), graph.LabelsSkippable}, + }, + { + // This should create an error that stops the backup + name: "small OneNote file", + colScope: CollectionScopePackage, + itemSize: 10, + err: clues.New("test error").Label(graph.LabelStatus(http.StatusServiceUnavailable)), + labels: []string{graph.LabelStatus(http.StatusServiceUnavailable)}, + }, + { + name: "big OneNote file", + colScope: CollectionScopePackage, + itemSize: MaxOneNoteFileSize, + err: clues.New("test error").Label(graph.LabelStatus(http.StatusServiceUnavailable)), + labels: []string{graph.LabelStatus(http.StatusServiceUnavailable), graph.LabelsSkippable}, + }, + { + // This should block backup, only big OneNote files should be a problem + name: "big file", + colScope: CollectionScopeFolder, + itemSize: MaxOneNoteFileSize, + err: clues.New("test error").Label(graph.LabelStatus(http.StatusServiceUnavailable)), + labels: []string{graph.LabelStatus(http.StatusServiceUnavailable)}, + }, + } + + for _, test := range table { + suite.Run(test.name, func() { + ctx, flush := tester.NewContext() + defer flush() + + var ( + t = suite.T() + errs = fault.New(false) + item = models.NewDriveItem() + col = &Collection{scope: test.colScope} + ) + + item.SetId(&strval) + item.SetName(&strval) + item.SetSize(&test.itemSize) + + col.itemReader = func( + ctx context.Context, + hc *http.Client, + item models.DriveItemable, + ) (details.ItemInfo, io.ReadCloser, error) { + return details.ItemInfo{}, nil, test.err + } + + col.itemGetter = func( + ctx context.Context, + srv graph.Servicer, + driveID, itemID string, + ) (models.DriveItemable, error) { + // We are not testing this err here + return item, nil + } + + _, err := col.getDriveItemContent(ctx, item, errs) + if test.err == nil { + assert.NoError(t, err, "no error") + return + } + + assert.EqualError(t, err, clues.Wrap(test.err, "downloading item").Error(), "error") + + labelsMap := map[string]struct{}{} + for _, l := range test.labels { + labelsMap[l] = struct{}{} + } + + assert.Equal(t, labelsMap, clues.Labels(err)) + }) + } +} diff --git a/src/internal/connector/onedrive/collections.go b/src/internal/connector/onedrive/collections.go index acab9e573..4ab890552 100644 --- a/src/internal/connector/onedrive/collections.go +++ b/src/internal/connector/onedrive/collections.go @@ -33,6 +33,20 @@ const ( SharePointSource ) +type collectionScope int + +const ( + // CollectionScopeUnknown is used when we don't know and don't need + // to know the kind, like in the case of deletes + CollectionScopeUnknown collectionScope = iota + + // CollectionScopeFolder is used for regular folder collections + CollectionScopeFolder + + // CollectionScopePackage is used to represent OneNote items + CollectionScopePackage +) + const ( restrictedDirectory = "Site Pages" rootDrivePattern = "/drives/%s/root:" @@ -411,13 +425,14 @@ func (c *Collections) Get( c.statusUpdater, c.source, c.ctrl, + CollectionScopeUnknown, true) c.CollectionMap[driveID][fldID] = col } } - observe.Message(ctx, observe.Safe(fmt.Sprintf("Discovered %d items to backup", c.NumItems))) + observe.Message(ctx, fmt.Sprintf("Discovered %d items to backup", c.NumItems)) // Add an extra for the metadata collection. collections := []data.BackupCollection{} @@ -572,6 +587,7 @@ func (c *Collections) handleDelete( c.statusUpdater, c.source, c.ctrl, + CollectionScopeUnknown, // DoNotMerge is not checked for deleted items. false) @@ -744,6 +760,11 @@ func (c *Collections) UpdateCollections( continue } + colScope := CollectionScopeFolder + if item.GetPackage() != nil { + colScope = CollectionScopePackage + } + col := NewCollection( c.itemClient, collectionPath, @@ -753,6 +774,7 @@ func (c *Collections) UpdateCollections( c.statusUpdater, c.source, c.ctrl, + colScope, invalidPrevDelta, ) col.driveName = driveName diff --git a/src/internal/connector/onedrive/collections_test.go b/src/internal/connector/onedrive/collections_test.go index ffe42f777..2f823ce60 100644 --- a/src/internal/connector/onedrive/collections_test.go +++ b/src/internal/connector/onedrive/collections_test.go @@ -786,7 +786,7 @@ func (suite *OneDriveCollectionsUnitSuite) TestUpdateCollections() { testFolderMatcher{tt.scope}, &MockGraphService{}, nil, - control.Options{ToggleFeatures: control.Toggles{EnablePermissionsBackup: true}}) + control.Options{ToggleFeatures: control.Toggles{}}) c.CollectionMap[driveID] = map[string]*Collection{} @@ -2237,7 +2237,7 @@ func (suite *OneDriveCollectionsUnitSuite) TestGet() { testFolderMatcher{anyFolder}, &MockGraphService{}, func(*support.ConnectorOperationStatus) {}, - control.Options{ToggleFeatures: control.Toggles{EnablePermissionsBackup: true}}, + control.Options{ToggleFeatures: control.Toggles{}}, ) c.drivePagerFunc = drivePagerFunc c.itemPagerFunc = itemPagerFunc diff --git a/src/internal/connector/onedrive/drive.go b/src/internal/connector/onedrive/drive.go index f443cb8c7..117e8f288 100644 --- a/src/internal/connector/onedrive/drive.go +++ b/src/internal/connector/onedrive/drive.go @@ -303,7 +303,7 @@ func GetAllFolders( name = ptr.Val(d.GetName()) ) - ictx := clues.Add(ctx, "drive_id", id, "drive_name", name) // TODO: pii + ictx := clues.Add(ctx, "drive_id", id, "drive_name", clues.Hide(name)) collector := func( _ context.Context, _, _ string, diff --git a/src/internal/connector/onedrive/drive_test.go b/src/internal/connector/onedrive/drive_test.go index 1bbc28779..26f8c5c85 100644 --- a/src/internal/connector/onedrive/drive_test.go +++ b/src/internal/connector/onedrive/drive_test.go @@ -299,11 +299,13 @@ func (suite *OneDriveSuite) TestCreateGetDeleteFolder() { ctx, flush := tester.NewContext() defer flush() - t := suite.T() - folderIDs := []string{} - folderName1 := "Corso_Folder_Test_" + common.FormatNow(common.SimpleTimeTesting) - folderElements := []string{folderName1} - gs := loadTestService(t) + var ( + t = suite.T() + folderIDs = []string{} + folderName1 = "Corso_Folder_Test_" + common.FormatNow(common.SimpleTimeTesting) + folderElements = []string{folderName1} + gs = loadTestService(t) + ) pager, err := PagerForSource(OneDriveSource, gs, suite.userID, nil) require.NoError(t, err, clues.ToCore(err)) @@ -317,11 +319,13 @@ func (suite *OneDriveSuite) TestCreateGetDeleteFolder() { defer func() { for _, id := range folderIDs { + ictx := clues.Add(ctx, "folder_id", id) + // deletes require unique http clients // https://github.com/alcionai/corso/issues/2707 - err := DeleteItem(ctx, loadTestService(t), driveID, id) + err := DeleteItem(ictx, loadTestService(t), driveID, id) if err != nil { - logger.Ctx(ctx).Warnw("deleting folder", "id", id, "error", err) + logger.CtxErr(ictx, err).Errorw("deleting folder") } } }() @@ -430,7 +434,7 @@ func (suite *OneDriveSuite) TestOneDriveNewCollections() { service, service.updateStatus, control.Options{ - ToggleFeatures: control.Toggles{EnablePermissionsBackup: true}, + ToggleFeatures: control.Toggles{}, }) odcs, excludes, err := colls.Get(ctx, nil, fault.New(true)) diff --git a/src/internal/connector/onedrive/item.go b/src/internal/connector/onedrive/item.go index f508ce506..209cdce15 100644 --- a/src/internal/connector/onedrive/item.go +++ b/src/internal/connector/onedrive/item.go @@ -14,6 +14,7 @@ import ( "github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/internal/connector/graph" + "github.com/alcionai/corso/src/internal/connector/onedrive/api" "github.com/alcionai/corso/src/internal/connector/uploadsession" "github.com/alcionai/corso/src/internal/version" "github.com/alcionai/corso/src/pkg/backup/details" @@ -26,20 +27,6 @@ const ( downloadURLKey = "@microsoft.graph.downloadUrl" ) -// generic drive item getter -func getDriveItem( - ctx context.Context, - srv graph.Servicer, - driveID, itemID string, -) (models.DriveItemable, error) { - di, err := srv.Client().DrivesById(driveID).ItemsById(itemID).Get(ctx, nil) - if err != nil { - return nil, graph.Wrap(ctx, err, "getting item") - } - - return di, nil -} - // sharePointItemReader will return a io.ReadCloser for the specified item // It crafts this by querying M365 for a download URL for the item // and using a http client to initialize a reader @@ -66,9 +53,8 @@ func oneDriveItemMetaReader( service graph.Servicer, driveID string, item models.DriveItemable, - fetchPermissions bool, ) (io.ReadCloser, int, error) { - return baseItemMetaReader(ctx, service, driveID, item, fetchPermissions) + return baseItemMetaReader(ctx, service, driveID, item) } func sharePointItemMetaReader( @@ -76,10 +62,9 @@ func sharePointItemMetaReader( service graph.Servicer, driveID string, item models.DriveItemable, - fetchPermissions bool, ) (io.ReadCloser, int, error) { // TODO: include permissions - return baseItemMetaReader(ctx, service, driveID, item, false) + return baseItemMetaReader(ctx, service, driveID, item) } func baseItemMetaReader( @@ -87,7 +72,6 @@ func baseItemMetaReader( service graph.Servicer, driveID string, item models.DriveItemable, - fetchPermissions bool, ) (io.ReadCloser, int, error) { var ( perms []UserPermission @@ -101,7 +85,7 @@ func baseItemMetaReader( meta.SharingMode = SharingModeCustom } - if meta.SharingMode == SharingModeCustom && fetchPermissions { + if meta.SharingMode == SharingModeCustom { perms, err = driveItemPermissionInfo(ctx, service, driveID, ptr.Val(item.GetId())) if err != nil { return nil, 0, err @@ -232,14 +216,9 @@ func driveItemPermissionInfo( driveID string, itemID string, ) ([]UserPermission, error) { - perm, err := service. - Client(). - DrivesById(driveID). - ItemsById(itemID). - Permissions(). - Get(ctx, nil) + perm, err := api.GetItemPermission(ctx, service, driveID, itemID) if err != nil { - return nil, graph.Wrap(ctx, err, "fetching item permissions").With("item_id", itemID) + return nil, err } uperms := filterUserPermissions(ctx, perm.GetValue()) @@ -283,7 +262,7 @@ func filterUserPermissions(ctx context.Context, perms []models.Permissionable) [ if gv2.GetDevice() != nil { logm.With("application_id", ptr.Val(gv2.GetDevice().GetId())) } - logm.Warn("untracked permission") + logm.Info("untracked permission") } // Technically GrantedToV2 can also contain devices, but the diff --git a/src/internal/connector/onedrive/restore.go b/src/internal/connector/onedrive/restore.go index dc1c870a3..765bc38b9 100644 --- a/src/internal/connector/onedrive/restore.go +++ b/src/internal/connector/onedrive/restore.go @@ -75,9 +75,9 @@ func RestoreCollections( err error ictx = clues.Add( ctx, - "resource_owner", dc.FullPath().ResourceOwner(), // TODO: pii + "resource_owner", clues.Hide(dc.FullPath().ResourceOwner()), "category", dc.FullPath().Category(), - "path", dc.FullPath()) // TODO: pii + "path", dc.FullPath()) // TODO: pii, path needs concealer compliance ) metrics, folderMetas, err = RestoreCollection( @@ -598,7 +598,12 @@ func restoreData( } iReader := itemData.ToReader() - progReader, closer := observe.ItemProgress(ctx, iReader, observe.ItemRestoreMsg, observe.PII(itemName), ss.Size()) + progReader, closer := observe.ItemProgress( + ctx, + iReader, + observe.ItemRestoreMsg, + clues.Hide(itemName), + ss.Size()) go closer() diff --git a/src/internal/connector/sharepoint/collection.go b/src/internal/connector/sharepoint/collection.go index 1db542e9b..0350601e7 100644 --- a/src/internal/connector/sharepoint/collection.go +++ b/src/internal/connector/sharepoint/collection.go @@ -186,7 +186,8 @@ func (sc *Collection) runPopulate(ctx context.Context, errs *fault.Bus) (support colProgress, closer := observe.CollectionProgress( ctx, sc.fullPath.Category().String(), - observe.PII(sc.fullPath.Folder(false))) + // TODO(keepers): conceal compliance in path, drop Hide() + clues.Hide(sc.fullPath.Folder(false))) go closer() defer func() { diff --git a/src/internal/connector/sharepoint/data_collections.go b/src/internal/connector/sharepoint/data_collections.go index 02da46e3b..4c3895bc3 100644 --- a/src/internal/connector/sharepoint/data_collections.go +++ b/src/internal/connector/sharepoint/data_collections.go @@ -56,7 +56,7 @@ func DataCollections( foldersComplete, closer := observe.MessageWithCompletion( ctx, - observe.Bulletf("%s", observe.Safe(scope.Category().PathType().String()))) + observe.Bulletf("%s", scope.Category().PathType())) defer closer() defer close(foldersComplete) diff --git a/src/internal/connector/sharepoint/restore.go b/src/internal/connector/sharepoint/restore.go index afa7a2dd8..1f77b7b7a 100644 --- a/src/internal/connector/sharepoint/restore.go +++ b/src/internal/connector/sharepoint/restore.go @@ -61,8 +61,8 @@ func RestoreCollections( metrics support.CollectionMetrics ictx = clues.Add(ctx, "category", category, - "destination", dest.ContainerName, // TODO: pii - "resource_owner", dc.FullPath().ResourceOwner()) // TODO: pii + "destination", clues.Hide(dest.ContainerName), + "resource_owner", clues.Hide(dc.FullPath().ResourceOwner())) ) switch dc.FullPath().Category() { diff --git a/src/internal/connector/uploadsession/uploadsession.go b/src/internal/connector/uploadsession/uploadsession.go index 60f84f6ef..210abe018 100644 --- a/src/internal/connector/uploadsession/uploadsession.go +++ b/src/internal/connector/uploadsession/uploadsession.go @@ -40,8 +40,9 @@ func NewWriter(id, url string, size int64) *writer { // https://docs.microsoft.com/en-us/graph/api/driveitem-createuploadsession func (iw *writer) Write(p []byte) (int, error) { rangeLength := len(p) - logger.Ctx(context.Background()).Debugf("WRITE for %s. Size:%d, Offset: %d, TotalSize: %d", - iw.id, rangeLength, iw.lastWrittenOffset, iw.contentLength) + logger.Ctx(context.Background()). + Debugf("WRITE for %s. Size:%d, Offset: %d, TotalSize: %d", + iw.id, rangeLength, iw.lastWrittenOffset, iw.contentLength) endOffset := iw.lastWrittenOffset + int64(rangeLength) @@ -49,13 +50,15 @@ func (iw *writer) Write(p []byte) (int, error) { // data in the current request _, err := iw.client.R(). SetHeaders(map[string]string{ - contentRangeHeaderKey: fmt.Sprintf(contentRangeHeaderValueFmt, + contentRangeHeaderKey: fmt.Sprintf( + contentRangeHeaderValueFmt, iw.lastWrittenOffset, endOffset-1, iw.contentLength), contentLengthHeaderKey: fmt.Sprintf("%d", rangeLength), }). - SetBody(bytes.NewReader(p)).Put(iw.url) + SetBody(bytes.NewReader(p)). + Put(iw.url) if err != nil { return 0, clues.Wrap(err, "uploading item").With( "upload_id", iw.id, diff --git a/src/internal/data/data_collection.go b/src/internal/data/data_collection.go index 23f402083..d0dc1cd07 100644 --- a/src/internal/data/data_collection.go +++ b/src/internal/data/data_collection.go @@ -101,7 +101,7 @@ type Stream interface { // LocationPather provides a LocationPath describing the path with Display Names // instead of canonical IDs type LocationPather interface { - LocationPath() path.Path + LocationPath() *path.Builder } // StreamInfo is used to provide service specific diff --git a/src/internal/data/metrics.go b/src/internal/data/metrics.go new file mode 100644 index 000000000..f34d20a16 --- /dev/null +++ b/src/internal/data/metrics.go @@ -0,0 +1,17 @@ +package data + +type CollectionStats struct { + Folders, + Objects, + Successes int + Bytes int64 + Details string +} + +func (cs CollectionStats) IsZero() bool { + return cs.Folders+cs.Objects+cs.Successes+int(cs.Bytes) == 0 +} + +func (cs CollectionStats) String() string { + return cs.Details +} diff --git a/src/internal/events/events.go b/src/internal/events/events.go index ed4cc5b05..da7f8cfe3 100644 --- a/src/internal/events/events.go +++ b/src/internal/events/events.go @@ -138,7 +138,7 @@ func (b Bus) Event(ctx context.Context, key string, data map[string]any) { Set(tenantID, b.tenant), }) if err != nil { - logger.Ctx(ctx).Debugw("analytics event failure", "err", err) + logger.CtxErr(ctx, err).Debug("analytics event failure: repo identity") } } @@ -149,7 +149,7 @@ func (b Bus) Event(ctx context.Context, key string, data map[string]any) { Properties: props, }) if err != nil { - logger.Ctx(ctx).Info("analytics event failure", "err", err) + logger.CtxErr(ctx, err).Info("analytics event failure: tracking event") } } diff --git a/src/internal/events/events_signal_windows.go b/src/internal/events/events_signal_windows.go index 86ae519d8..d5119e476 100644 --- a/src/internal/events/events_signal_windows.go +++ b/src/internal/events/events_signal_windows.go @@ -7,5 +7,5 @@ import ( ) func signalDump(ctx context.Context) { - logger.Ctx(ctx).Warn("cannot send signal on Windows") + logger.Ctx(ctx).Error("cannot send signal on Windows") } diff --git a/src/internal/kopia/upload.go b/src/internal/kopia/upload.go index b0a363727..08859b4ee 100644 --- a/src/internal/kopia/upload.go +++ b/src/internal/kopia/upload.go @@ -127,7 +127,7 @@ type itemDetails struct { info *details.ItemInfo repoPath path.Path prevPath path.Path - locationPath path.Path + locationPath *path.Builder cached bool } @@ -205,20 +205,11 @@ func (cp *corsoProgress) FinishedFile(relativePath string, err error) { var ( locationFolders string - locPB *path.Builder parent = d.repoPath.ToBuilder().Dir() ) if d.locationPath != nil { - locationFolders = d.locationPath.Folder(true) - - locPB = d.locationPath.ToBuilder() - - // folderEntriesForPath assumes the location will - // not have an item element appended - if len(d.locationPath.Item()) > 0 { - locPB = locPB.Dir() - } + locationFolders = d.locationPath.String() } err = cp.deets.Add( @@ -239,7 +230,7 @@ func (cp *corsoProgress) FinishedFile(relativePath string, err error) { return } - folders := details.FolderEntriesForPath(parent, locPB) + folders := details.FolderEntriesForPath(parent, d.locationPath) cp.deets.AddFoldersForItem( folders, *d.info, @@ -328,7 +319,7 @@ func collectionEntries( } var ( - locationPath path.Path + locationPath *path.Builder // Track which items have already been seen so we can skip them if we see // them again in the data from the base snapshot. seen = map[string]struct{}{} @@ -431,7 +422,7 @@ func streamBaseEntries( cb func(context.Context, fs.Entry) error, curPath path.Path, prevPath path.Path, - locationPath path.Path, + locationPath *path.Builder, dir fs.Directory, encodedSeen map[string]struct{}, globalExcludeSet map[string]map[string]struct{}, @@ -556,7 +547,7 @@ func getStreamItemFunc( } } - var locationPath path.Path + var locationPath *path.Builder if lp, ok := streamedEnts.(data.LocationPather); ok { locationPath = lp.LocationPath() diff --git a/src/internal/kopia/upload_test.go b/src/internal/kopia/upload_test.go index 4c78df606..30f722e2b 100644 --- a/src/internal/kopia/upload_test.go +++ b/src/internal/kopia/upload_test.go @@ -345,6 +345,7 @@ func (suite *VersionReadersUnitSuite) TestWriteHandlesShortReads() { type CorsoProgressUnitSuite struct { tester.Suite targetFilePath path.Path + targetFileLoc *path.Builder targetFileName string } @@ -363,6 +364,7 @@ func (suite *CorsoProgressUnitSuite) SetupSuite() { require.NoError(suite.T(), err, clues.ToCore(err)) suite.targetFilePath = p + suite.targetFileLoc = path.Builder{}.Append(testInboxDir) suite.targetFileName = suite.targetFilePath.ToBuilder().Dir().String() } @@ -596,7 +598,7 @@ func (suite *CorsoProgressUnitSuite) TestFinishedFileBaseItemDoesntBuildHierarch expectedToMerge := map[string]PrevRefs{ prevPath.ShortRef(): { Repo: suite.targetFilePath, - Location: suite.targetFilePath, + Location: suite.targetFileLoc, }, } @@ -614,7 +616,7 @@ func (suite *CorsoProgressUnitSuite) TestFinishedFileBaseItemDoesntBuildHierarch info: nil, repoPath: suite.targetFilePath, prevPath: prevPath, - locationPath: suite.targetFilePath, + locationPath: suite.targetFileLoc, } cp.put(suite.targetFileName, deets) diff --git a/src/internal/kopia/wrapper.go b/src/internal/kopia/wrapper.go index 89258ec78..ba6bb38a5 100644 --- a/src/internal/kopia/wrapper.go +++ b/src/internal/kopia/wrapper.go @@ -93,6 +93,13 @@ func NewWrapper(c *conn) (*Wrapper, error) { return &Wrapper{c}, nil } +// FIXME: Circular references. +// must comply with restore producer and backup consumer +// var ( +// _ inject.BackupConsumer = &Wrapper{} +// _ inject.RestoreProducer = &Wrapper{} +// ) + type Wrapper struct { c *conn } @@ -121,16 +128,16 @@ type IncrementalBase struct { // that need to be merged in from prior snapshots. type PrevRefs struct { Repo path.Path - Location path.Path + Location *path.Builder } -// BackupCollections takes a set of collections and creates a kopia snapshot +// ConsumeBackupCollections takes a set of collections and creates a kopia snapshot // with the data that they contain. previousSnapshots is used for incremental // backups and should represent the base snapshot from which metadata is sourced // from as well as any incomplete snapshot checkpoints that may contain more // recent data than the base snapshot. The absence of previousSnapshots causes a // complete backup of all data. -func (w Wrapper) BackupCollections( +func (w Wrapper) ConsumeBackupCollections( ctx context.Context, previousSnapshots []IncrementalBase, collections []data.BackupCollection, @@ -143,7 +150,7 @@ func (w Wrapper) BackupCollections( return nil, nil, nil, clues.Stack(errNotConnected).WithClues(ctx) } - ctx, end := diagnostics.Span(ctx, "kopia:backupCollections") + ctx, end := diagnostics.Span(ctx, "kopia:consumeBackupCollections") defer end() if len(collections) == 0 && len(globalExcludeSet) == 0 { @@ -382,21 +389,21 @@ type ByteCounter interface { Count(numBytes int64) } -// RestoreMultipleItems looks up all paths- assuming each is an item declaration, +// ProduceRestoreCollections looks up all paths- assuming each is an item declaration, // not a directory- in the snapshot with id snapshotID. The path should be the // full path of the item from the root. Returns the results as a slice of single- // item DataCollections, where the DataCollection.FullPath() matches the path. // If the item does not exist in kopia or is not a file an error is returned. // The UUID of the returned DataStreams will be the name of the kopia file the // data is sourced from. -func (w Wrapper) RestoreMultipleItems( +func (w Wrapper) ProduceRestoreCollections( ctx context.Context, snapshotID string, paths []path.Path, bcounter ByteCounter, errs *fault.Bus, ) ([]data.RestoreCollection, error) { - ctx, end := diagnostics.Span(ctx, "kopia:restoreMultipleItems") + ctx, end := diagnostics.Span(ctx, "kopia:produceRestoreCollections") defer end() if len(paths) == 0 { diff --git a/src/internal/kopia/wrapper_test.go b/src/internal/kopia/wrapper_test.go index 2f03a1fb2..bd5d0d724 100644 --- a/src/internal/kopia/wrapper_test.go +++ b/src/internal/kopia/wrapper_test.go @@ -276,7 +276,7 @@ func (suite *KopiaIntegrationSuite) TestBackupCollections() { suite.Run(test.name, func() { t := suite.T() - stats, deets, _, err := suite.w.BackupCollections( + stats, deets, _, err := suite.w.ConsumeBackupCollections( suite.ctx, prevSnaps, collections, @@ -423,7 +423,7 @@ func (suite *KopiaIntegrationSuite) TestBackupCollections_NoDetailsForMeta() { t := suite.T() collections := test.cols() - stats, deets, prevShortRefs, err := suite.w.BackupCollections( + stats, deets, prevShortRefs, err := suite.w.ConsumeBackupCollections( suite.ctx, prevSnaps, collections, @@ -525,7 +525,7 @@ func (suite *KopiaIntegrationSuite) TestRestoreAfterCompressionChange() { fp2, err := suite.storePath2.Append(dc2.Names[0], true) require.NoError(t, err, clues.ToCore(err)) - stats, _, _, err := w.BackupCollections( + stats, _, _, err := w.ConsumeBackupCollections( ctx, nil, []data.BackupCollection{dc1, dc2}, @@ -543,7 +543,7 @@ func (suite *KopiaIntegrationSuite) TestRestoreAfterCompressionChange() { fp2.String(): dc2.Data[0], } - result, err := w.RestoreMultipleItems( + result, err := w.ProduceRestoreCollections( ctx, string(stats.SnapshotID), []path.Path{ @@ -644,7 +644,7 @@ func (suite *KopiaIntegrationSuite) TestBackupCollections_ReaderError() { }, } - stats, deets, _, err := suite.w.BackupCollections( + stats, deets, _, err := suite.w.ConsumeBackupCollections( suite.ctx, nil, collections, @@ -666,7 +666,7 @@ func (suite *KopiaIntegrationSuite) TestBackupCollections_ReaderError() { ic := i64counter{} - _, err = suite.w.RestoreMultipleItems( + _, err = suite.w.ProduceRestoreCollections( suite.ctx, string(stats.SnapshotID), []path.Path{failedPath}, @@ -706,7 +706,7 @@ func (suite *KopiaIntegrationSuite) TestBackupCollectionsHandlesNoCollections() ctx, flush := tester.NewContext() defer flush() - s, d, _, err := suite.w.BackupCollections( + s, d, _, err := suite.w.ConsumeBackupCollections( ctx, nil, test.collections, @@ -866,7 +866,7 @@ func (suite *KopiaSimpleRepoIntegrationSuite) SetupTest() { tags[k] = "" } - stats, deets, _, err := suite.w.BackupCollections( + stats, deets, _, err := suite.w.ConsumeBackupCollections( suite.ctx, nil, collections, @@ -1018,7 +1018,7 @@ func (suite *KopiaSimpleRepoIntegrationSuite) TestBackupExcludeItem() { } } - stats, _, _, err := suite.w.BackupCollections( + stats, _, _, err := suite.w.ConsumeBackupCollections( suite.ctx, []IncrementalBase{ { @@ -1045,7 +1045,7 @@ func (suite *KopiaSimpleRepoIntegrationSuite) TestBackupExcludeItem() { ic := i64counter{} - _, err = suite.w.RestoreMultipleItems( + _, err = suite.w.ProduceRestoreCollections( suite.ctx, string(stats.SnapshotID), []path.Path{ @@ -1058,7 +1058,7 @@ func (suite *KopiaSimpleRepoIntegrationSuite) TestBackupExcludeItem() { } } -func (suite *KopiaSimpleRepoIntegrationSuite) TestRestoreMultipleItems() { +func (suite *KopiaSimpleRepoIntegrationSuite) TestProduceRestoreCollections() { doesntExist, err := path.Build( testTenant, testUser, @@ -1148,7 +1148,7 @@ func (suite *KopiaSimpleRepoIntegrationSuite) TestRestoreMultipleItems() { ic := i64counter{} - result, err := suite.w.RestoreMultipleItems( + result, err := suite.w.ProduceRestoreCollections( suite.ctx, string(suite.snapshotID), test.inputPaths, @@ -1167,7 +1167,7 @@ func (suite *KopiaSimpleRepoIntegrationSuite) TestRestoreMultipleItems() { } } -func (suite *KopiaSimpleRepoIntegrationSuite) TestRestoreMultipleItems_Errors() { +func (suite *KopiaSimpleRepoIntegrationSuite) TestProduceRestoreCollections_Errors() { itemPath, err := suite.testPath1.Append(testFileName, true) require.NoError(suite.T(), err, clues.ToCore(err)) @@ -1197,7 +1197,7 @@ func (suite *KopiaSimpleRepoIntegrationSuite) TestRestoreMultipleItems_Errors() suite.Run(test.name, func() { t := suite.T() - c, err := suite.w.RestoreMultipleItems( + c, err := suite.w.ProduceRestoreCollections( suite.ctx, test.snapshotID, test.paths, @@ -1219,7 +1219,7 @@ func (suite *KopiaSimpleRepoIntegrationSuite) TestDeleteSnapshot() { itemPath := suite.files[suite.testPath1.String()][0].itemPath ic := i64counter{} - c, err := suite.w.RestoreMultipleItems( + c, err := suite.w.ProduceRestoreCollections( suite.ctx, string(suite.snapshotID), []path.Path{itemPath}, diff --git a/src/internal/observe/observe.go b/src/internal/observe/observe.go index 50e7411b1..67db8b7a2 100644 --- a/src/internal/observe/observe.go +++ b/src/internal/observe/observe.go @@ -8,6 +8,7 @@ import ( "strings" "sync" + "github.com/alcionai/clues" "github.com/dustin/go-humanize" "github.com/spf13/cobra" "github.com/spf13/pflag" @@ -23,11 +24,6 @@ const ( progressBarWidth = 32 ) -// styling -const bullet = "∙" - -const Bullet = Safe(bullet) - var ( wg sync.WaitGroup // TODO: Revisit this being a global nd make it a parameter to the progress methods @@ -143,19 +139,19 @@ const ( // Progress Updates // Message is used to display a progress message -func Message(ctx context.Context, msgs ...cleanable) { - var ( - cleaned = make([]string, len(msgs)) - msg = make([]string, len(msgs)) - ) +func Message(ctx context.Context, msgs ...any) { + plainSl := make([]string, 0, len(msgs)) + loggableSl := make([]string, 0, len(msgs)) - for i := range msgs { - cleaned[i] = msgs[i].clean() - msg[i] = msgs[i].String() + for _, m := range msgs { + plainSl = append(plainSl, plainString(m)) + loggableSl = append(loggableSl, fmt.Sprintf("%v", m)) } - logger.Ctx(ctx).Info(strings.Join(cleaned, " ")) - message := strings.Join(msg, " ") + plain := strings.Join(plainSl, " ") + loggable := strings.Join(loggableSl, " ") + + logger.Ctx(ctx).Info(loggable) if cfg.hidden() { return @@ -167,9 +163,9 @@ func Message(ctx context.Context, msgs ...cleanable) { -1, mpb.NopStyle(), mpb.PrependDecorators(decor.Name( - message, + plain, decor.WC{ - W: len(message) + 1, + W: len(plain) + 1, C: decor.DidentRight, }))) @@ -183,19 +179,19 @@ func Message(ctx context.Context, msgs ...cleanable) { // that switches to "done" when the completion channel is signalled func MessageWithCompletion( ctx context.Context, - msg cleanable, + msg any, ) (chan<- struct{}, func()) { var ( - clean = msg.clean() - message = msg.String() - log = logger.Ctx(ctx) - ch = make(chan struct{}, 1) + plain = plainString(msg) + loggable = fmt.Sprintf("%v", msg) + log = logger.Ctx(ctx) + ch = make(chan struct{}, 1) ) - log.Info(clean) + log.Info(loggable) if cfg.hidden() { - return ch, func() { log.Info("done - " + clean) } + return ch, func() { log.Info("done - " + loggable) } } wg.Add(1) @@ -206,7 +202,7 @@ func MessageWithCompletion( -1, mpb.SpinnerStyle(frames...).PositionLeft(), mpb.PrependDecorators( - decor.Name(message+":"), + decor.Name(plain+":"), decor.Elapsed(decor.ET_STYLE_GO, decor.WC{W: 8})), mpb.BarFillerOnComplete("done")) @@ -224,7 +220,7 @@ func MessageWithCompletion( }) wacb := waitAndCloseBar(bar, func() { - log.Info("done - " + clean) + log.Info("done - " + loggable) }) return ch, wacb @@ -241,11 +237,12 @@ func ItemProgress( ctx context.Context, rc io.ReadCloser, header string, - iname cleanable, + iname any, totalBytes int64, ) (io.ReadCloser, func()) { + plain := plainString(iname) log := logger.Ctx(ctx).With( - "item", iname.clean(), + "item", iname, "size", humanize.Bytes(uint64(totalBytes))) log.Debug(header) @@ -258,7 +255,7 @@ func ItemProgress( barOpts := []mpb.BarOption{ mpb.PrependDecorators( decor.Name(header, decor.WCSyncSpaceR), - decor.Name(iname.String(), decor.WCSyncSpaceR), + decor.Name(plain, decor.WCSyncSpaceR), decor.CountersKibiByte(" %.1f/%.1f ", decor.WC{W: 8}), decor.NewPercentage("%d ", decor.WC{W: 4})), } @@ -284,20 +281,21 @@ func ItemProgress( func ProgressWithCount( ctx context.Context, header string, - message cleanable, + msg any, count int64, ) (chan<- struct{}, func()) { var ( - log = logger.Ctx(ctx) - lmsg = fmt.Sprintf("%s %s - %d", header, message.clean(), count) - ch = make(chan struct{}) + plain = plainString(msg) + loggable = fmt.Sprintf("%s %v - %d", header, msg, count) + log = logger.Ctx(ctx) + ch = make(chan struct{}) ) - log.Info(lmsg) + log.Info(loggable) if cfg.hidden() { go listen(ctx, ch, nop, nop) - return ch, func() { log.Info("done - " + lmsg) } + return ch, func() { log.Info("done - " + loggable) } } wg.Add(1) @@ -305,7 +303,7 @@ func ProgressWithCount( barOpts := []mpb.BarOption{ mpb.PrependDecorators( decor.Name(header, decor.WCSyncSpaceR), - decor.Name(message.String()), + decor.Name(plain), decor.Counters(0, " %d/%d ")), } @@ -322,7 +320,7 @@ func ProgressWithCount( bar.Increment) wacb := waitAndCloseBar(bar, func() { - log.Info("done - " + lmsg) + log.Info("done - " + loggable) }) return ch, wacb @@ -366,14 +364,15 @@ func makeSpinFrames(barWidth int) { func CollectionProgress( ctx context.Context, category string, - dirName cleanable, + dirName any, ) (chan<- struct{}, func()) { var ( counted int + plain = plainString(dirName) ch = make(chan struct{}) log = logger.Ctx(ctx).With( "category", category, - "dir", dirName.clean()) + "dir", dirName) message = "Collecting Directory" ) @@ -387,7 +386,7 @@ func CollectionProgress( } } - if cfg.hidden() || len(dirName.String()) == 0 { + if cfg.hidden() || len(plain) == 0 { go listen(ctx, ch, nop, incCount) return ch, func() { log.Infow("done - "+message, "count", counted) } } @@ -398,7 +397,7 @@ func CollectionProgress( mpb.PrependDecorators(decor.Name(string(category))), mpb.AppendDecorators( decor.CurrentNoUnit("%d - ", decor.WCSyncSpace), - decor.Name(dirName.String()), + decor.Name(plain), ), mpb.BarFillerOnComplete(spinFrames[0]), } @@ -466,62 +465,45 @@ func listen(ctx context.Context, ch <-chan struct{}, onEnd, onInc func()) { } // --------------------------------------------------------------------------- -// PII redaction +// Styling // --------------------------------------------------------------------------- -type cleanable interface { - clean() string - String() string -} +const Bullet = "∙" -type PII string - -func (p PII) clean() string { - return "***" -} - -func (p PII) String() string { - return string(p) -} - -type Safe string - -func (s Safe) clean() string { - return string(s) -} - -func (s Safe) String() string { - return string(s) -} - -type bulletPII struct { +type bulletf struct { tmpl string - vars []cleanable + vs []any } -func Bulletf(template string, vs ...cleanable) bulletPII { - return bulletPII{ - tmpl: "∙ " + template, - vars: vs, - } +func Bulletf(template string, vs ...any) bulletf { + return bulletf{template, vs} } -func (b bulletPII) clean() string { - vs := make([]any, 0, len(b.vars)) - - for _, v := range b.vars { - vs = append(vs, v.clean()) +func (b bulletf) PlainString() string { + ps := make([]any, 0, len(b.vs)) + for _, v := range b.vs { + ps = append(ps, plainString(v)) } - return fmt.Sprintf(b.tmpl, vs...) + return fmt.Sprintf("∙ "+b.tmpl, ps...) } -func (b bulletPII) String() string { - vs := make([]any, 0, len(b.vars)) +func (b bulletf) String() string { + return fmt.Sprintf("∙ "+b.tmpl, b.vs...) +} - for _, v := range b.vars { - vs = append(vs, v.String()) +// plainString attempts to cast v to a PlainStringer +// interface, and retrieve the un-altered value. If +// v is not compliant with PlainStringer, returns the +// %v fmt of v. +// +// This should only be used to display the value in the +// observe progress bar. Logged values should only use +// the fmt %v to ensure Concealers hide PII. +func plainString(v any) string { + if ps, ok := v.(clues.PlainStringer); ok { + return ps.PlainString() } - return fmt.Sprintf(b.tmpl, vs...) + return fmt.Sprintf("%v", v) } diff --git a/src/internal/observe/observe_test.go b/src/internal/observe/observe_test.go index 31c818e05..89361304e 100644 --- a/src/internal/observe/observe_test.go +++ b/src/internal/observe/observe_test.go @@ -29,9 +29,9 @@ func TestObserveProgressUnitSuite(t *testing.T) { } var ( - tst = Safe("test") - testcat = Safe("testcat") - testertons = Safe("testertons") + tst = "test" + testcat = "testcat" + testertons = "testertons" ) func (suite *ObserveProgressUnitSuite) TestItemProgress() { @@ -105,7 +105,7 @@ func (suite *ObserveProgressUnitSuite) TestCollectionProgress_unblockOnCtxCancel SeedWriter(context.Background(), nil, nil) }() - progCh, closer := CollectionProgress(ctx, testcat.clean(), testertons) + progCh, closer := CollectionProgress(ctx, testcat, testertons) require.NotNil(t, progCh) require.NotNil(t, closer) @@ -140,7 +140,7 @@ func (suite *ObserveProgressUnitSuite) TestCollectionProgress_unblockOnChannelCl SeedWriter(context.Background(), nil, nil) }() - progCh, closer := CollectionProgress(ctx, testcat.clean(), testertons) + progCh, closer := CollectionProgress(ctx, testcat, testertons) require.NotNil(t, progCh) require.NotNil(t, closer) @@ -172,7 +172,7 @@ func (suite *ObserveProgressUnitSuite) TestObserveProgress() { message := "Test Message" - Message(ctx, Safe(message)) + Message(ctx, message) Complete() require.NotEmpty(suite.T(), recorder.String()) require.Contains(suite.T(), recorder.String(), message) @@ -193,7 +193,7 @@ func (suite *ObserveProgressUnitSuite) TestObserveProgressWithCompletion() { message := "Test Message" - ch, closer := MessageWithCompletion(ctx, Safe(message)) + ch, closer := MessageWithCompletion(ctx, message) // Trigger completion ch <- struct{}{} @@ -223,7 +223,7 @@ func (suite *ObserveProgressUnitSuite) TestObserveProgressWithChannelClosed() { message := "Test Message" - ch, closer := MessageWithCompletion(ctx, Safe(message)) + ch, closer := MessageWithCompletion(ctx, message) // Close channel without completing close(ch) @@ -255,7 +255,7 @@ func (suite *ObserveProgressUnitSuite) TestObserveProgressWithContextCancelled() message := "Test Message" - _, closer := MessageWithCompletion(ctx, Safe(message)) + _, closer := MessageWithCompletion(ctx, message) // cancel context cancel() @@ -286,7 +286,7 @@ func (suite *ObserveProgressUnitSuite) TestObserveProgressWithCount() { message := "Test Message" count := 3 - ch, closer := ProgressWithCount(ctx, header, Safe(message), int64(count)) + ch, closer := ProgressWithCount(ctx, header, message, int64(count)) for i := 0; i < count; i++ { ch <- struct{}{} @@ -319,7 +319,7 @@ func (suite *ObserveProgressUnitSuite) TestrogressWithCountChannelClosed() { message := "Test Message" count := 3 - ch, closer := ProgressWithCount(ctx, header, Safe(message), int64(count)) + ch, closer := ProgressWithCount(ctx, header, message, int64(count)) close(ch) diff --git a/src/internal/operations/backup.go b/src/internal/operations/backup.go index 89b223cd2..1c59b75dd 100644 --- a/src/internal/operations/backup.go +++ b/src/internal/operations/backup.go @@ -9,14 +9,13 @@ import ( "github.com/alcionai/corso/src/internal/common" "github.com/alcionai/corso/src/internal/common/crash" - "github.com/alcionai/corso/src/internal/connector" - "github.com/alcionai/corso/src/internal/connector/support" "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/diagnostics" "github.com/alcionai/corso/src/internal/events" "github.com/alcionai/corso/src/internal/kopia" "github.com/alcionai/corso/src/internal/model" "github.com/alcionai/corso/src/internal/observe" + "github.com/alcionai/corso/src/internal/operations/inject" "github.com/alcionai/corso/src/internal/stats" "github.com/alcionai/corso/src/internal/streamstore" "github.com/alcionai/corso/src/pkg/account" @@ -34,14 +33,14 @@ import ( type BackupOperation struct { operation - ResourceOwner string `json:"resourceOwner"` - ResourceOwnerName string `json:"resourceOwnerName"` + ResourceOwner common.IDNamer Results BackupResults `json:"results"` Selectors selectors.Selector `json:"selectors"` Version string `json:"version"` account account.Account + bp inject.BackupProducer // when true, this allows for incremental backups instead of full data pulls incremental bool @@ -60,24 +59,20 @@ func NewBackupOperation( opts control.Options, kw *kopia.Wrapper, sw *store.Wrapper, - gc *connector.GraphConnector, + bp inject.BackupProducer, acct account.Account, selector selectors.Selector, - ownerName string, + owner common.IDNamer, bus events.Eventer, ) (BackupOperation, error) { op := BackupOperation{ - operation: newOperation(opts, bus, kw, sw, gc), - ResourceOwner: selector.DiscreteOwner, - ResourceOwnerName: ownerName, - Selectors: selector, - Version: "v0", - account: acct, - incremental: useIncrementalBackup(selector, opts), - } - - if len(ownerName) == 0 { - op.ResourceOwnerName = op.ResourceOwner + operation: newOperation(opts, bus, kw, sw), + ResourceOwner: owner, + Selectors: selector, + Version: "v0", + account: acct, + incremental: useIncrementalBackup(selector, opts), + bp: bp, } if err := op.validate(); err != nil { @@ -88,10 +83,18 @@ func NewBackupOperation( } func (op BackupOperation) validate() error { - if len(op.ResourceOwner) == 0 { + if op.ResourceOwner == nil { return clues.New("backup requires a resource owner") } + if len(op.ResourceOwner.ID()) == 0 { + return clues.New("backup requires a resource owner with a populated ID") + } + + if op.bp == nil { + return clues.New("missing backup producer") + } + return op.operation.validate() } @@ -101,7 +104,7 @@ func (op BackupOperation) validate() error { // get populated asynchronously. type backupStats struct { k *kopia.BackupStats - gc *support.ConnectorOperationStatus + gc *data.CollectionStats resourceCount int } @@ -141,8 +144,8 @@ func (op *BackupOperation) Run(ctx context.Context) (err error) { ctx = clues.Add( ctx, - "tenant_id", op.account.ID(), // TODO: pii - "resource_owner", op.ResourceOwner, // TODO: pii + "tenant_id", clues.Hide(op.account.ID()), + "resource_owner", clues.Hide(op.ResourceOwner), "backup_id", op.Results.BackupID, "service", op.Selectors.Service, "incremental", op.incremental) @@ -160,7 +163,7 @@ func (op *BackupOperation) Run(ctx context.Context) (err error) { // Execution // ----- - observe.Message(ctx, observe.Safe("Backing Up"), observe.Bullet, observe.PII(op.ResourceOwner)) + observe.Message(ctx, "Backing Up", observe.Bullet, clues.Hide(op.ResourceOwner.Name())) deets, err := op.do( ctx, @@ -175,6 +178,7 @@ func (op *BackupOperation) Run(ctx context.Context) (err error) { op.Errors.Fail(clues.Wrap(err, "running backup")) } + finalizeErrorHandling(ctx, op.Options, op.Errors, "running backup") LogFaultErrors(ctx, op.Errors.Errors(), "running backup") // ----- @@ -243,14 +247,21 @@ func (op *BackupOperation) do( return nil, clues.Wrap(err, "producing manifests and metadata") } - cs, excludes, err := produceBackupDataCollections(ctx, op.gc, op.Selectors, mdColls, op.Options, op.Errors) + cs, excludes, err := produceBackupDataCollections( + ctx, + op.bp, + op.ResourceOwner, + op.Selectors, + mdColls, + op.Options, + op.Errors) if err != nil { return nil, clues.Wrap(err, "producing backup data collections") } ctx = clues.Add(ctx, "coll_count", len(cs)) - writeStats, deets, toMerge, err := consumeBackupDataCollections( + writeStats, deets, toMerge, err := consumeBackupCollections( ctx, op.kopia, op.account.ID(), @@ -279,9 +290,9 @@ func (op *BackupOperation) do( return nil, clues.Wrap(err, "merging details") } - opStats.gc = op.gc.AwaitStatus() + opStats.gc = op.bp.Wait() - logger.Ctx(ctx).Debug(op.gc.PrintableStatus()) + logger.Ctx(ctx).Debug(opStats.gc) return deets, nil } @@ -291,18 +302,12 @@ func (op *BackupOperation) do( func useIncrementalBackup(sel selectors.Selector, opts control.Options) bool { enabled := !opts.ToggleFeatures.DisableIncrementals - switch sel.Service { - case selectors.ServiceExchange: + if sel.Service == selectors.ServiceExchange || + sel.Service == selectors.ServiceOneDrive { return enabled - - case selectors.ServiceOneDrive: - // TODO(ashmrtn): Remove the && part once we support permissions and - // incrementals. - return enabled && !opts.ToggleFeatures.EnablePermissionsBackup - - default: - return false } + + return false } // --------------------------------------------------------------------------- @@ -312,38 +317,27 @@ func useIncrementalBackup(sel selectors.Selector, opts control.Options) bool { // calls the producer to generate collections of data to backup func produceBackupDataCollections( ctx context.Context, - gc *connector.GraphConnector, + bp inject.BackupProducer, + resourceOwner common.IDNamer, sel selectors.Selector, metadata []data.RestoreCollection, ctrlOpts control.Options, errs *fault.Bus, ) ([]data.BackupCollection, map[string]map[string]struct{}, error) { - complete, closer := observe.MessageWithCompletion(ctx, observe.Safe("Discovering items to backup")) + complete, closer := observe.MessageWithCompletion(ctx, "Discovering items to backup") defer func() { complete <- struct{}{} close(complete) closer() }() - return gc.DataCollections(ctx, sel, metadata, ctrlOpts, errs) + return bp.ProduceBackupCollections(ctx, resourceOwner, sel, metadata, ctrlOpts, errs) } // --------------------------------------------------------------------------- // Consumer funcs // --------------------------------------------------------------------------- -type backuper interface { - BackupCollections( - ctx context.Context, - bases []kopia.IncrementalBase, - cs []data.BackupCollection, - excluded map[string]map[string]struct{}, - tags map[string]string, - buildTreeWithBase bool, - errs *fault.Bus, - ) (*kopia.BackupStats, *details.Builder, map[string]kopia.PrevRefs, error) -} - func selectorToReasons(sel selectors.Selector) []kopia.Reason { service := sel.PathService() reasons := []kopia.Reason{} @@ -389,9 +383,9 @@ func builderFromReason(ctx context.Context, tenant string, r kopia.Reason) (*pat } // calls kopia to backup the collections of data -func consumeBackupDataCollections( +func consumeBackupCollections( ctx context.Context, - bu backuper, + bc inject.BackupConsumer, tenantID string, reasons []kopia.Reason, mans []*kopia.ManifestEntry, @@ -401,7 +395,7 @@ func consumeBackupDataCollections( isIncremental bool, errs *fault.Bus, ) (*kopia.BackupStats, *details.Builder, map[string]kopia.PrevRefs, error) { - complete, closer := observe.MessageWithCompletion(ctx, observe.Safe("Backing up data")) + complete, closer := observe.MessageWithCompletion(ctx, "Backing up data") defer func() { complete <- struct{}{} close(complete) @@ -465,7 +459,7 @@ func consumeBackupDataCollections( "base_backup_id", mbID) } - kopiaStats, deets, itemsSourcedFromBase, err := bu.BackupCollections( + kopiaStats, deets, itemsSourcedFromBase, err := bc.ConsumeBackupCollections( ctx, bases, cs, @@ -555,7 +549,7 @@ func mergeDetails( if err != nil { return clues.New("parsing base item info path"). WithClues(mctx). - With("repo_ref", entry.RepoRef) // todo: pii + With("repo_ref", entry.RepoRef) // todo: pii, path needs concealer compliance } // Although this base has an entry it may not be the most recent. Check @@ -589,12 +583,10 @@ func mergeDetails( var ( itemUpdated = newPath.String() != rr.String() newLocStr string - locBuilder *path.Builder ) if newLoc != nil { - locBuilder = newLoc.ToBuilder() - newLocStr = newLoc.Folder(true) + newLocStr = newLoc.String() itemUpdated = itemUpdated || newLocStr != entry.LocationRef } @@ -609,7 +601,7 @@ func mergeDetails( return clues.Wrap(err, "adding item to details") } - folders := details.FolderEntriesForPath(newPath.ToBuilder().Dir(), locBuilder) + folders := details.FolderEntriesForPath(newPath.ToBuilder().Dir(), newLoc) deets.AddFoldersForItem(folders, item, itemUpdated) // Track how many entries we added so that we know if we got them all when @@ -663,11 +655,11 @@ func (op *BackupOperation) persistResults( return clues.New("backup population never completed") } - if op.Status != Failed && opStats.gc.Metrics.Successes == 0 { + if op.Status != Failed && opStats.gc.IsZero() { op.Status = NoData } - op.Results.ItemsRead = opStats.gc.Metrics.Successes + op.Results.ItemsRead = opStats.gc.Successes return op.Errors.Failure() } @@ -714,8 +706,8 @@ func (op *BackupOperation) createBackupModels( op.Status.String(), backupID, op.Selectors, - op.ResourceOwner, - op.ResourceOwnerName, + op.ResourceOwner.ID(), + op.ResourceOwner.Name(), op.Results.ReadWrites, op.Results.StartAndEndTime, op.Errors.Errors()) diff --git a/src/internal/operations/backup_integration_test.go b/src/internal/operations/backup_integration_test.go index 3ae3d8a72..a88641ae1 100644 --- a/src/internal/operations/backup_integration_test.go +++ b/src/internal/operations/backup_integration_test.go @@ -30,6 +30,7 @@ import ( evmock "github.com/alcionai/corso/src/internal/events/mock" "github.com/alcionai/corso/src/internal/kopia" "github.com/alcionai/corso/src/internal/model" + "github.com/alcionai/corso/src/internal/operations/inject" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/internal/version" "github.com/alcionai/corso/src/pkg/account" @@ -39,6 +40,7 @@ import ( "github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/path" "github.com/alcionai/corso/src/pkg/selectors" + "github.com/alcionai/corso/src/pkg/selectors/testdata" "github.com/alcionai/corso/src/pkg/store" ) @@ -121,6 +123,11 @@ func prepNewTestBackupOp( t.FailNow() } + id, name, err := gc.PopulateOwnerIDAndNamesFrom(sel.DiscreteOwner, nil) + require.NoError(t, err, clues.ToCore(err)) + + sel.SetDiscreteOwnerIDName(id, name) + bo := newTestBackupOp(t, ctx, kw, ms, gc, acct, sel, bus, featureToggles, closer) return bo, acct, kw, ms, gc, closer @@ -152,7 +159,7 @@ func newTestBackupOp( opts.ToggleFeatures = featureToggles - bo, err := NewBackupOperation(ctx, opts, kw, sw, gc, acct, sel, sel.DiscreteOwner, bus) + bo, err := NewBackupOperation(ctx, opts, kw, sw, gc, acct, sel, sel, bus) if !assert.NoError(t, err, clues.ToCore(err)) { closer() t.FailNow() @@ -288,7 +295,7 @@ func checkMetadataFilesExist( pathsByRef[dir.ShortRef()] = append(pathsByRef[dir.ShortRef()], fName) } - cols, err := kw.RestoreMultipleItems(ctx, bup.SnapshotID, paths, nil, fault.New(true)) + cols, err := kw.ProduceRestoreCollections(ctx, bup.SnapshotID, paths, nil, fault.New(true)) assert.NoError(t, err, clues.ToCore(err)) for _, col := range cols { @@ -383,7 +390,7 @@ func generateContainerOfItems( dest, collections) - deets, err := gc.RestoreDataCollections( + deets, err := gc.ConsumeRestoreCollections( ctx, backupVersion, acct, @@ -394,7 +401,9 @@ func generateContainerOfItems( fault.New(true)) require.NoError(t, err, clues.ToCore(err)) - gc.AwaitStatus() + // have to wait here, both to ensure the process + // finishes, and also to clean up the gc status + gc.Wait() return deets } @@ -539,7 +548,7 @@ func (suite *BackupOpIntegrationSuite) SetupSuite() { func (suite *BackupOpIntegrationSuite) TestNewBackupOperation() { kw := &kopia.Wrapper{} sw := &store.Wrapper{} - gc := &connector.GraphConnector{} + gc := &mockconnector.GraphConnector{} acct := tester.NewM365Account(suite.T()) table := []struct { @@ -547,7 +556,7 @@ func (suite *BackupOpIntegrationSuite) TestNewBackupOperation() { opts control.Options kw *kopia.Wrapper sw *store.Wrapper - gc *connector.GraphConnector + bp inject.BackupProducer acct account.Account targets []string errCheck assert.ErrorAssertionFunc @@ -555,22 +564,24 @@ func (suite *BackupOpIntegrationSuite) TestNewBackupOperation() { {"good", control.Options{}, kw, sw, gc, acct, nil, assert.NoError}, {"missing kopia", control.Options{}, nil, sw, gc, acct, nil, assert.Error}, {"missing modelstore", control.Options{}, kw, nil, gc, acct, nil, assert.Error}, - {"missing graphconnector", control.Options{}, kw, sw, nil, acct, nil, assert.Error}, + {"missing backup producer", control.Options{}, kw, sw, nil, acct, nil, assert.Error}, } for _, test := range table { suite.Run(test.name, func() { ctx, flush := tester.NewContext() defer flush() + sel := selectors.Selector{DiscreteOwner: "test"} + _, err := NewBackupOperation( ctx, test.opts, test.kw, test.sw, - test.gc, + test.bp, test.acct, - selectors.Selector{DiscreteOwner: "test"}, - "test-name", + sel, + sel, evmock.NewBus()) test.errCheck(suite.T(), err, clues.ToCore(err)) }) @@ -1095,7 +1106,6 @@ func (suite *BackupOpIntegrationSuite) TestBackup_Run_exchangeIncrementals() { } for _, test := range table { suite.Run(test.name, func() { - fmt.Printf("\n-----\ntest %+v\n-----\n", test.name) var ( t = suite.T() incMB = evmock.NewBus() @@ -1150,7 +1160,7 @@ func (suite *BackupOpIntegrationSuite) TestBackup_Run_oneDrive() { sel.Include(sel.AllData()) - bo, _, _, _, _, closer := prepNewTestBackupOp(t, ctx, mb, sel.Selector, control.Toggles{EnablePermissionsBackup: true}) + bo, _, _, _, _, closer := prepNewTestBackupOp(t, ctx, mb, sel.Selector, control.Toggles{}) defer closer() runAndCheckBackup(t, ctx, &bo, mb, false) @@ -1606,7 +1616,7 @@ func (suite *BackupOpIntegrationSuite) TestBackup_Run_sharePoint() { sel = selectors.NewSharePointBackup([]string{suite.site}) ) - sel.Include(sel.LibraryFolders(selectors.Any())) + sel.Include(testdata.SharePointBackupFolderScope(sel)) bo, _, kw, _, _, closer := prepNewTestBackupOp(t, ctx, mb, sel.Selector, control.Toggles{}) defer closer() diff --git a/src/internal/operations/backup_test.go b/src/internal/operations/backup_test.go index 74641704a..a1f68c2a3 100644 --- a/src/internal/operations/backup_test.go +++ b/src/internal/operations/backup_test.go @@ -14,8 +14,7 @@ import ( "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" - "github.com/alcionai/corso/src/internal/connector" - "github.com/alcionai/corso/src/internal/connector/support" + "github.com/alcionai/corso/src/internal/connector/mockconnector" "github.com/alcionai/corso/src/internal/data" evmock "github.com/alcionai/corso/src/internal/events/mock" "github.com/alcionai/corso/src/internal/kopia" @@ -38,7 +37,7 @@ import ( // ----- restore producer -type mockRestorer struct { +type mockRestoreProducer struct { gotPaths []path.Path colls []data.RestoreCollection collsByID map[string][]data.RestoreCollection // snapshotID: []RestoreCollection @@ -48,7 +47,7 @@ type mockRestorer struct { type restoreFunc func(id string, ps []path.Path) ([]data.RestoreCollection, error) -func (mr *mockRestorer) buildRestoreFunc( +func (mr *mockRestoreProducer) buildRestoreFunc( t *testing.T, oid string, ops []path.Path, @@ -61,7 +60,7 @@ func (mr *mockRestorer) buildRestoreFunc( } } -func (mr *mockRestorer) RestoreMultipleItems( +func (mr *mockRestoreProducer) ProduceRestoreCollections( ctx context.Context, snapshotID string, paths []path.Path, @@ -85,9 +84,9 @@ func checkPaths(t *testing.T, expected, got []path.Path) { assert.ElementsMatch(t, expected, got) } -// ----- backup producer +// ----- backup consumer -type mockBackuper struct { +type mockBackupConsumer struct { checkFunc func( bases []kopia.IncrementalBase, cs []data.BackupCollection, @@ -95,7 +94,7 @@ type mockBackuper struct { buildTreeWithBase bool) } -func (mbu mockBackuper) BackupCollections( +func (mbu mockBackupConsumer) ConsumeBackupCollections( ctx context.Context, bases []kopia.IncrementalBase, cs []data.BackupCollection, @@ -266,7 +265,7 @@ func makePath(t *testing.T, elements []string, isItem bool) path.Path { func makeDetailsEntry( t *testing.T, p path.Path, - l path.Path, + l *path.Builder, size int, updated bool, ) *details.DetailsEntry { @@ -274,7 +273,7 @@ func makeDetailsEntry( var lr string if l != nil { - lr = l.PopFront().PopFront().PopFront().PopFront().Dir().String() + lr = l.String() } res := &details.DetailsEntry{ @@ -299,7 +298,7 @@ func makeDetailsEntry( res.Exchange = &details.ExchangeInfo{ ItemType: details.ExchangeMail, Size: int64(size), - ParentPath: l.Folder(false), + ParentPath: l.String(), } case path.OneDriveService: @@ -360,7 +359,7 @@ func (suite *BackupOpUnitSuite) TestBackupOperation_PersistResults() { var ( kw = &kopia.Wrapper{} sw = &store.Wrapper{} - gc = &connector.GraphConnector{} + gc = &mockconnector.GraphConnector{} acct = account.Account{} now = time.Now() ) @@ -381,9 +380,7 @@ func (suite *BackupOpUnitSuite) TestBackupOperation_PersistResults() { TotalHashedBytes: 1, TotalUploadedBytes: 1, }, - gc: &support.ConnectorOperationStatus{ - Metrics: support.CollectionMetrics{Successes: 1}, - }, + gc: &data.CollectionStats{Successes: 1}, }, }, { @@ -392,7 +389,7 @@ func (suite *BackupOpUnitSuite) TestBackupOperation_PersistResults() { fail: assert.AnError, stats: backupStats{ k: &kopia.BackupStats{}, - gc: &support.ConnectorOperationStatus{}, + gc: &data.CollectionStats{}, }, }, { @@ -400,7 +397,7 @@ func (suite *BackupOpUnitSuite) TestBackupOperation_PersistResults() { expectErr: assert.NoError, stats: backupStats{ k: &kopia.BackupStats{}, - gc: &support.ConnectorOperationStatus{}, + gc: &data.CollectionStats{}, }, }, } @@ -418,7 +415,7 @@ func (suite *BackupOpUnitSuite) TestBackupOperation_PersistResults() { gc, acct, sel, - sel.DiscreteOwner, + sel, evmock.NewBus()) require.NoError(t, err, clues.ToCore(err)) @@ -427,7 +424,7 @@ func (suite *BackupOpUnitSuite) TestBackupOperation_PersistResults() { test.expectErr(t, op.persistResults(now, &test.stats)) assert.Equal(t, test.expectStatus.String(), op.Status.String(), "status") - assert.Equal(t, test.stats.gc.Metrics.Successes, op.Results.ItemsRead, "items read") + assert.Equal(t, test.stats.gc.Successes, op.Results.ItemsRead, "items read") assert.Equal(t, test.stats.k.TotalFileCount, op.Results.ItemsWritten, "items written") assert.Equal(t, test.stats.k.TotalHashedBytes, op.Results.BytesRead, "bytes read") assert.Equal(t, test.stats.k.TotalUploadedBytes, op.Results.BytesUploaded, "bytes written") @@ -564,7 +561,7 @@ func (suite *BackupOpUnitSuite) TestBackupOperation_ConsumeBackupDataCollections ctx, flush := tester.NewContext() defer flush() - mbu := &mockBackuper{ + mbu := &mockBackupConsumer{ checkFunc: func( bases []kopia.IncrementalBase, cs []data.BackupCollection, @@ -576,7 +573,7 @@ func (suite *BackupOpUnitSuite) TestBackupOperation_ConsumeBackupDataCollections } //nolint:errcheck - consumeBackupDataCollections( + consumeBackupCollections( ctx, mbu, tenant, @@ -611,22 +608,8 @@ func (suite *BackupOpUnitSuite) TestBackupOperation_MergeBackupDetails_AddsItems }, true, ) - locationPath1 = makePath( - suite.T(), - []string{ - tenant, - path.OneDriveService.String(), - ro, - path.FilesCategory.String(), - "drives", - "drive-id", - "root:", - "work-display-name", - "item1", - }, - true, - ) - itemPath2 = makePath( + locationPath1 = path.Builder{}.Append("root:", "work-display-name") + itemPath2 = makePath( suite.T(), []string{ tenant, @@ -641,22 +624,8 @@ func (suite *BackupOpUnitSuite) TestBackupOperation_MergeBackupDetails_AddsItems }, true, ) - locationPath2 = makePath( - suite.T(), - []string{ - tenant, - path.OneDriveService.String(), - ro, - path.FilesCategory.String(), - "drives", - "drive-id", - "root:", - "personal-display-name", - "item2", - }, - true, - ) - itemPath3 = makePath( + locationPath2 = path.Builder{}.Append("root:", "personal-display-name") + itemPath3 = makePath( suite.T(), []string{ tenant, @@ -668,18 +637,7 @@ func (suite *BackupOpUnitSuite) TestBackupOperation_MergeBackupDetails_AddsItems }, true, ) - locationPath3 = makePath( - suite.T(), - []string{ - tenant, - path.ExchangeService.String(), - ro, - path.EmailCategory.String(), - "personal-display-name", - "item3", - }, - true, - ) + locationPath3 = path.Builder{}.Append("personal-display-name") backup1 = backup.Backup{ BaseModel: model.BaseModel{ @@ -804,7 +762,7 @@ func (suite *BackupOpUnitSuite) TestBackupOperation_MergeBackupDetails_AddsItems backup1.DetailsID: { DetailsModel: details.DetailsModel{ Entries: []details.DetailsEntry{ - *makeDetailsEntry(suite.T(), itemPath1, itemPath1, 42, false), + *makeDetailsEntry(suite.T(), itemPath1, locationPath1, 42, false), }, }, }, @@ -840,7 +798,7 @@ func (suite *BackupOpUnitSuite) TestBackupOperation_MergeBackupDetails_AddsItems backup1.DetailsID: { DetailsModel: details.DetailsModel{ Entries: []details.DetailsEntry{ - *makeDetailsEntry(suite.T(), itemPath1, itemPath1, 42, false), + *makeDetailsEntry(suite.T(), itemPath1, locationPath1, 42, false), }, }, }, @@ -929,7 +887,7 @@ func (suite *BackupOpUnitSuite) TestBackupOperation_MergeBackupDetails_AddsItems backup1.DetailsID: { DetailsModel: details.DetailsModel{ Entries: []details.DetailsEntry{ - *makeDetailsEntry(suite.T(), itemPath1, itemPath1, 42, false), + *makeDetailsEntry(suite.T(), itemPath1, locationPath1, 42, false), }, }, }, @@ -1006,7 +964,7 @@ func (suite *BackupOpUnitSuite) TestBackupOperation_MergeBackupDetails_AddsItems inputShortRefsFromPrevBackup: map[string]kopia.PrevRefs{ itemPath1.ShortRef(): { Repo: itemPath1, - Location: itemPath1, + Location: locationPath1, }, }, inputMans: []*kopia.ManifestEntry{ @@ -1024,14 +982,14 @@ func (suite *BackupOpUnitSuite) TestBackupOperation_MergeBackupDetails_AddsItems backup1.DetailsID: { DetailsModel: details.DetailsModel{ Entries: []details.DetailsEntry{ - *makeDetailsEntry(suite.T(), itemPath1, itemPath1, 42, false), + *makeDetailsEntry(suite.T(), itemPath1, locationPath1, 42, false), }, }, }, }, errCheck: assert.NoError, expectedEntries: []*details.DetailsEntry{ - makeDetailsEntry(suite.T(), itemPath1, itemPath1, 42, false), + makeDetailsEntry(suite.T(), itemPath1, locationPath1, 42, false), }, }, { @@ -1257,10 +1215,7 @@ func (suite *BackupOpUnitSuite) TestBackupOperation_MergeBackupDetails_AddsFolde pathElems, true) - locPath1 = makePath( - t, - pathElems[:len(pathElems)-1], - false) + locPath1 = path.Builder{}.Append(pathElems[:len(pathElems)-1]...) backup1 = backup.Backup{ BaseModel: model.BaseModel{ @@ -1300,7 +1255,7 @@ func (suite *BackupOpUnitSuite) TestBackupOperation_MergeBackupDetails_AddsFolde // later = now.Add(42 * time.Minute) ) - itemDetails := makeDetailsEntry(t, itemPath1, itemPath1, itemSize, false) + itemDetails := makeDetailsEntry(t, itemPath1, locPath1, itemSize, false) // itemDetails.Exchange.Modified = now populatedDetails := map[string]*details.Details{ diff --git a/src/internal/operations/logging.go b/src/internal/operations/helpers.go similarity index 57% rename from src/internal/operations/logging.go rename to src/internal/operations/helpers.go index e0e0cd51d..8cfbe66a0 100644 --- a/src/internal/operations/logging.go +++ b/src/internal/operations/helpers.go @@ -2,11 +2,45 @@ package operations import ( "context" + "fmt" + "github.com/alcionai/clues" + + "github.com/alcionai/corso/src/pkg/control" "github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/logger" ) +// finalizeErrorHandling ensures the operation follow the options +// failure behavior requirements. +func finalizeErrorHandling( + ctx context.Context, + opts control.Options, + errs *fault.Bus, + prefix string, +) { + rcvd := errs.Recovered() + + // under certain conditions, there's nothing else left to do + if opts.FailureHandling == control.BestEffort || + errs.Failure() != nil || + len(rcvd) == 0 { + return + } + + if opts.FailureHandling == control.FailAfterRecovery { + msg := fmt.Sprintf("%s: partial success: %d errors occurred", prefix, len(rcvd)) + logger.Ctx(ctx).Error(msg) + + if len(rcvd) == 1 { + errs.Fail(rcvd[0]) + return + } + + errs.Fail(clues.New(msg)) + } +} + // LogFaultErrors is a helper function that logs all entries in the Errors struct. func LogFaultErrors(ctx context.Context, fe *fault.Errors, prefix string) { if fe == nil { diff --git a/src/internal/operations/helpers_test.go b/src/internal/operations/helpers_test.go new file mode 100644 index 000000000..c02f2131c --- /dev/null +++ b/src/internal/operations/helpers_test.go @@ -0,0 +1,102 @@ +package operations + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/suite" + + "github.com/alcionai/corso/src/internal/tester" + "github.com/alcionai/corso/src/pkg/control" + "github.com/alcionai/corso/src/pkg/fault" +) + +type HelpersUnitSuite struct { + tester.Suite +} + +func TestHelpersUnitSuite(t *testing.T) { + suite.Run(t, &HelpersUnitSuite{Suite: tester.NewUnitSuite(t)}) +} + +func (suite *HelpersUnitSuite) TestFinalizeErrorHandling() { + table := []struct { + name string + errs func() *fault.Bus + opts control.Options + expectErr assert.ErrorAssertionFunc + }{ + { + name: "no errors", + errs: func() *fault.Bus { + return fault.New(false) + }, + opts: control.Options{ + FailureHandling: control.FailAfterRecovery, + }, + expectErr: assert.NoError, + }, + { + name: "already failed", + errs: func() *fault.Bus { + fn := fault.New(false) + fn.Fail(assert.AnError) + return fn + }, + opts: control.Options{ + FailureHandling: control.FailAfterRecovery, + }, + expectErr: assert.Error, + }, + { + name: "best effort", + errs: func() *fault.Bus { + fn := fault.New(false) + fn.AddRecoverable(assert.AnError) + return fn + }, + opts: control.Options{ + FailureHandling: control.BestEffort, + }, + expectErr: assert.NoError, + }, + { + name: "recoverable errors produce hard fail", + errs: func() *fault.Bus { + fn := fault.New(false) + fn.AddRecoverable(assert.AnError) + return fn + }, + opts: control.Options{ + FailureHandling: control.FailAfterRecovery, + }, + expectErr: assert.Error, + }, + { + name: "multiple recoverable errors produce hard fail", + errs: func() *fault.Bus { + fn := fault.New(false) + fn.AddRecoverable(assert.AnError) + fn.AddRecoverable(assert.AnError) + fn.AddRecoverable(assert.AnError) + return fn + }, + opts: control.Options{ + FailureHandling: control.FailAfterRecovery, + }, + expectErr: assert.Error, + }, + } + for _, test := range table { + suite.Run(test.name, func() { + ctx, flush := tester.NewContext() + defer flush() + + t := suite.T() + errs := test.errs() + + finalizeErrorHandling(ctx, test.opts, errs, "test") + test.expectErr(t, errs.Failure()) + }) + } +} diff --git a/src/internal/operations/inject/inject.go b/src/internal/operations/inject/inject.go new file mode 100644 index 000000000..fa9339f50 --- /dev/null +++ b/src/internal/operations/inject/inject.go @@ -0,0 +1,67 @@ +package inject + +import ( + "context" + + "github.com/alcionai/corso/src/internal/common" + "github.com/alcionai/corso/src/internal/data" + "github.com/alcionai/corso/src/internal/kopia" + "github.com/alcionai/corso/src/pkg/account" + "github.com/alcionai/corso/src/pkg/backup/details" + "github.com/alcionai/corso/src/pkg/control" + "github.com/alcionai/corso/src/pkg/fault" + "github.com/alcionai/corso/src/pkg/path" + "github.com/alcionai/corso/src/pkg/selectors" +) + +type ( + BackupProducer interface { + ProduceBackupCollections( + ctx context.Context, + resourceOwner common.IDNamer, + sels selectors.Selector, + metadata []data.RestoreCollection, + ctrlOpts control.Options, + errs *fault.Bus, + ) ([]data.BackupCollection, map[string]map[string]struct{}, error) + + Wait() *data.CollectionStats + } + + BackupConsumer interface { + ConsumeBackupCollections( + ctx context.Context, + bases []kopia.IncrementalBase, + cs []data.BackupCollection, + excluded map[string]map[string]struct{}, + tags map[string]string, + buildTreeWithBase bool, + errs *fault.Bus, + ) (*kopia.BackupStats, *details.Builder, map[string]kopia.PrevRefs, error) + } + + RestoreProducer interface { + ProduceRestoreCollections( + ctx context.Context, + snapshotID string, + paths []path.Path, + bc kopia.ByteCounter, + errs *fault.Bus, + ) ([]data.RestoreCollection, error) + } + + RestoreConsumer interface { + ConsumeRestoreCollections( + ctx context.Context, + backupVersion int, + acct account.Account, + selector selectors.Selector, + dest control.RestoreDestination, + opts control.Options, + dcs []data.RestoreCollection, + errs *fault.Bus, + ) (*details.Details, error) + + Wait() *data.CollectionStats + } +) diff --git a/src/internal/operations/manifests.go b/src/internal/operations/manifests.go index b4505c915..f8f2c5041 100644 --- a/src/internal/operations/manifests.go +++ b/src/internal/operations/manifests.go @@ -11,6 +11,7 @@ import ( "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/kopia" "github.com/alcionai/corso/src/internal/model" + "github.com/alcionai/corso/src/internal/operations/inject" "github.com/alcionai/corso/src/pkg/backup" "github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/logger" @@ -27,7 +28,7 @@ type manifestFetcher interface { type manifestRestorer interface { manifestFetcher - restorer + inject.RestoreProducer } type getBackuper interface { @@ -173,7 +174,7 @@ func verifyDistinctBases(ctx context.Context, mans []*kopia.ManifestEntry) error // collectMetadata retrieves all metadata files associated with the manifest. func collectMetadata( ctx context.Context, - r restorer, + r inject.RestoreProducer, man *kopia.ManifestEntry, fileNames []string, tenantID string, @@ -201,7 +202,7 @@ func collectMetadata( } } - dcs, err := r.RestoreMultipleItems(ctx, string(man.ID), paths, nil, errs) + dcs, err := r.ProduceRestoreCollections(ctx, string(man.ID), paths, nil, errs) if err != nil { // Restore is best-effort and we want to keep it that way since we want to // return as much metadata as we can to reduce the work we'll need to do. diff --git a/src/internal/operations/manifests_test.go b/src/internal/operations/manifests_test.go index df0c54632..9f2a3b409 100644 --- a/src/internal/operations/manifests_test.go +++ b/src/internal/operations/manifests_test.go @@ -24,9 +24,9 @@ import ( // --------------------------------------------------------------------------- type mockManifestRestorer struct { - mockRestorer + mockRestoreProducer mans []*kopia.ManifestEntry - mrErr error // err varname already claimed by mockRestorer + mrErr error // err varname already claimed by mockRestoreProducer } func (mmr mockManifestRestorer) FetchPrevSnapshotManifests( @@ -225,7 +225,7 @@ func (suite *OperationsManifestsUnitSuite) TestCollectMetadata() { paths := test.expectPaths(t, test.fileNames) - mr := mockRestorer{err: test.expectErr} + mr := mockRestoreProducer{err: test.expectErr} mr.buildRestoreFunc(t, test.manID, paths) man := &kopia.ManifestEntry{ @@ -447,8 +447,8 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata() { { name: "don't get metadata, no mans", mr: mockManifestRestorer{ - mockRestorer: mockRestorer{}, - mans: []*kopia.ManifestEntry{}, + mockRestoreProducer: mockRestoreProducer{}, + mans: []*kopia.ManifestEntry{}, }, gb: mockGetBackuper{detailsID: did}, reasons: []kopia.Reason{}, @@ -460,8 +460,8 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata() { { name: "don't get metadata", mr: mockManifestRestorer{ - mockRestorer: mockRestorer{}, - mans: []*kopia.ManifestEntry{makeMan(path.EmailCategory, "", "", "")}, + mockRestoreProducer: mockRestoreProducer{}, + mans: []*kopia.ManifestEntry{makeMan(path.EmailCategory, "", "", "")}, }, gb: mockGetBackuper{detailsID: did}, reasons: []kopia.Reason{}, @@ -473,8 +473,8 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata() { { name: "don't get metadata, incomplete manifest", mr: mockManifestRestorer{ - mockRestorer: mockRestorer{}, - mans: []*kopia.ManifestEntry{makeMan(path.EmailCategory, "", "ir", "")}, + mockRestoreProducer: mockRestoreProducer{}, + mans: []*kopia.ManifestEntry{makeMan(path.EmailCategory, "", "ir", "")}, }, gb: mockGetBackuper{detailsID: did}, reasons: []kopia.Reason{}, @@ -486,8 +486,8 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata() { { name: "fetch manifests errors", mr: mockManifestRestorer{ - mockRestorer: mockRestorer{}, - mrErr: assert.AnError, + mockRestoreProducer: mockRestoreProducer{}, + mrErr: assert.AnError, }, gb: mockGetBackuper{detailsID: did}, reasons: []kopia.Reason{}, @@ -499,7 +499,7 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata() { { name: "verify distinct bases fails", mr: mockManifestRestorer{ - mockRestorer: mockRestorer{}, + mockRestoreProducer: mockRestoreProducer{}, mans: []*kopia.ManifestEntry{ makeMan(path.EmailCategory, "", "", ""), makeMan(path.EmailCategory, "", "", ""), @@ -515,8 +515,8 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata() { { name: "no manifests", mr: mockManifestRestorer{ - mockRestorer: mockRestorer{}, - mans: []*kopia.ManifestEntry{}, + mockRestoreProducer: mockRestoreProducer{}, + mans: []*kopia.ManifestEntry{}, }, gb: mockGetBackuper{detailsID: did}, reasons: []kopia.Reason{}, @@ -528,7 +528,7 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata() { { name: "only incomplete manifests", mr: mockManifestRestorer{ - mockRestorer: mockRestorer{}, + mockRestoreProducer: mockRestoreProducer{}, mans: []*kopia.ManifestEntry{ makeMan(path.EmailCategory, "", "ir", ""), makeMan(path.ContactsCategory, "", "ir", ""), @@ -544,9 +544,11 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata() { { name: "man missing backup id", mr: mockManifestRestorer{ - mockRestorer: mockRestorer{collsByID: map[string][]data.RestoreCollection{ - "id": {data.NotFoundRestoreCollection{Collection: mockColl{id: "id_coll"}}}, - }}, + mockRestoreProducer: mockRestoreProducer{ + collsByID: map[string][]data.RestoreCollection{ + "id": {data.NotFoundRestoreCollection{Collection: mockColl{id: "id_coll"}}}, + }, + }, mans: []*kopia.ManifestEntry{makeMan(path.EmailCategory, "id", "", "")}, }, gb: mockGetBackuper{detailsID: did}, @@ -559,8 +561,8 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata() { { name: "backup missing details id", mr: mockManifestRestorer{ - mockRestorer: mockRestorer{}, - mans: []*kopia.ManifestEntry{makeMan(path.EmailCategory, "", "", "bid")}, + mockRestoreProducer: mockRestoreProducer{}, + mans: []*kopia.ManifestEntry{makeMan(path.EmailCategory, "", "", "bid")}, }, gb: mockGetBackuper{}, reasons: []kopia.Reason{}, @@ -571,10 +573,12 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata() { { name: "one complete, one incomplete", mr: mockManifestRestorer{ - mockRestorer: mockRestorer{collsByID: map[string][]data.RestoreCollection{ - "id": {data.NotFoundRestoreCollection{Collection: mockColl{id: "id_coll"}}}, - "incmpl_id": {data.NotFoundRestoreCollection{Collection: mockColl{id: "incmpl_id_coll"}}}, - }}, + mockRestoreProducer: mockRestoreProducer{ + collsByID: map[string][]data.RestoreCollection{ + "id": {data.NotFoundRestoreCollection{Collection: mockColl{id: "id_coll"}}}, + "incmpl_id": {data.NotFoundRestoreCollection{Collection: mockColl{id: "incmpl_id_coll"}}}, + }, + }, mans: []*kopia.ManifestEntry{ makeMan(path.EmailCategory, "id", "", "bid"), makeMan(path.EmailCategory, "incmpl_id", "ir", ""), @@ -590,9 +594,11 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata() { { name: "single valid man", mr: mockManifestRestorer{ - mockRestorer: mockRestorer{collsByID: map[string][]data.RestoreCollection{ - "id": {data.NotFoundRestoreCollection{Collection: mockColl{id: "id_coll"}}}, - }}, + mockRestoreProducer: mockRestoreProducer{ + collsByID: map[string][]data.RestoreCollection{ + "id": {data.NotFoundRestoreCollection{Collection: mockColl{id: "id_coll"}}}, + }, + }, mans: []*kopia.ManifestEntry{makeMan(path.EmailCategory, "id", "", "bid")}, }, gb: mockGetBackuper{detailsID: did}, @@ -605,10 +611,12 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata() { { name: "multiple valid mans", mr: mockManifestRestorer{ - mockRestorer: mockRestorer{collsByID: map[string][]data.RestoreCollection{ - "mail": {data.NotFoundRestoreCollection{Collection: mockColl{id: "mail_coll"}}}, - "contact": {data.NotFoundRestoreCollection{Collection: mockColl{id: "contact_coll"}}}, - }}, + mockRestoreProducer: mockRestoreProducer{ + collsByID: map[string][]data.RestoreCollection{ + "mail": {data.NotFoundRestoreCollection{Collection: mockColl{id: "mail_coll"}}}, + "contact": {data.NotFoundRestoreCollection{Collection: mockColl{id: "contact_coll"}}}, + }, + }, mans: []*kopia.ManifestEntry{ makeMan(path.EmailCategory, "mail", "", "bid"), makeMan(path.ContactsCategory, "contact", "", "bid"), @@ -627,8 +635,8 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata() { { name: "error collecting metadata", mr: mockManifestRestorer{ - mockRestorer: mockRestorer{err: assert.AnError}, - mans: []*kopia.ManifestEntry{makeMan(path.EmailCategory, "", "", "bid")}, + mockRestoreProducer: mockRestoreProducer{err: assert.AnError}, + mans: []*kopia.ManifestEntry{makeMan(path.EmailCategory, "", "", "bid")}, }, gb: mockGetBackuper{detailsID: did}, reasons: []kopia.Reason{}, @@ -961,7 +969,7 @@ func (suite *BackupManifestUnitSuite) TestBackupOperation_CollectMetadata() { ctx, flush := tester.NewContext() defer flush() - mr := &mockRestorer{} + mr := &mockRestoreProducer{} _, err := collectMetadata(ctx, mr, test.inputMan, test.inputFiles, tenant, fault.New(true)) assert.NoError(t, err, clues.ToCore(err)) diff --git a/src/internal/operations/operation.go b/src/internal/operations/operation.go index 8dc27c87a..5144c4abb 100644 --- a/src/internal/operations/operation.go +++ b/src/internal/operations/operation.go @@ -5,7 +5,6 @@ import ( "github.com/alcionai/clues" - "github.com/alcionai/corso/src/internal/connector" "github.com/alcionai/corso/src/internal/events" "github.com/alcionai/corso/src/internal/kopia" "github.com/alcionai/corso/src/pkg/control" @@ -57,7 +56,6 @@ type operation struct { bus events.Eventer kopia *kopia.Wrapper store *store.Wrapper - gc *connector.GraphConnector } func newOperation( @@ -65,17 +63,15 @@ func newOperation( bus events.Eventer, kw *kopia.Wrapper, sw *store.Wrapper, - gc *connector.GraphConnector, ) operation { return operation{ CreatedAt: time.Now(), - Errors: fault.New(opts.FailFast), + Errors: fault.New(opts.FailureHandling == control.FailFast), Options: opts, bus: bus, kopia: kw, store: sw, - gc: gc, Status: InProgress, } @@ -90,9 +86,5 @@ func (op operation) validate() error { return clues.New("missing modelstore") } - if op.gc == nil { - return clues.New("missing graph connector") - } - return nil } diff --git a/src/internal/operations/operation_test.go b/src/internal/operations/operation_test.go index 5d0425022..27cf6185f 100644 --- a/src/internal/operations/operation_test.go +++ b/src/internal/operations/operation_test.go @@ -8,7 +8,6 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" - "github.com/alcionai/corso/src/internal/connector" "github.com/alcionai/corso/src/internal/events" "github.com/alcionai/corso/src/internal/kopia" "github.com/alcionai/corso/src/internal/tester" @@ -26,30 +25,27 @@ func TestOperationSuite(t *testing.T) { func (suite *OperationSuite) TestNewOperation() { t := suite.T() - op := newOperation(control.Options{}, events.Bus{}, nil, nil, nil) + op := newOperation(control.Options{}, events.Bus{}, nil, nil) assert.Greater(t, op.CreatedAt, time.Time{}) } func (suite *OperationSuite) TestOperation_Validate() { kwStub := &kopia.Wrapper{} swStub := &store.Wrapper{} - gcStub := &connector.GraphConnector{} table := []struct { name string kw *kopia.Wrapper sw *store.Wrapper - gc *connector.GraphConnector errCheck assert.ErrorAssertionFunc }{ - {"good", kwStub, swStub, gcStub, assert.NoError}, - {"missing kopia wrapper", nil, swStub, gcStub, assert.Error}, - {"missing store wrapper", kwStub, nil, gcStub, assert.Error}, - {"missing graph connector", kwStub, swStub, nil, assert.Error}, + {"good", kwStub, swStub, assert.NoError}, + {"missing kopia wrapper", nil, swStub, assert.Error}, + {"missing store wrapper", kwStub, nil, assert.Error}, } for _, test := range table { suite.Run(test.name, func() { - err := newOperation(control.Options{}, events.Bus{}, test.kw, test.sw, test.gc).validate() + err := newOperation(control.Options{}, events.Bus{}, test.kw, test.sw).validate() test.errCheck(suite.T(), err, clues.ToCore(err)) }) } diff --git a/src/internal/operations/restore.go b/src/internal/operations/restore.go index 0365765af..f11b3e56b 100644 --- a/src/internal/operations/restore.go +++ b/src/internal/operations/restore.go @@ -10,15 +10,14 @@ import ( "github.com/alcionai/corso/src/internal/common" "github.com/alcionai/corso/src/internal/common/crash" - "github.com/alcionai/corso/src/internal/connector" "github.com/alcionai/corso/src/internal/connector/onedrive" - "github.com/alcionai/corso/src/internal/connector/support" "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/diagnostics" "github.com/alcionai/corso/src/internal/events" "github.com/alcionai/corso/src/internal/kopia" "github.com/alcionai/corso/src/internal/model" "github.com/alcionai/corso/src/internal/observe" + "github.com/alcionai/corso/src/internal/operations/inject" "github.com/alcionai/corso/src/internal/stats" "github.com/alcionai/corso/src/internal/streamstore" "github.com/alcionai/corso/src/pkg/account" @@ -42,6 +41,7 @@ type RestoreOperation struct { Version string `json:"version"` account account.Account + rc inject.RestoreConsumer } // RestoreResults aggregate the details of the results of the operation. @@ -56,7 +56,7 @@ func NewRestoreOperation( opts control.Options, kw *kopia.Wrapper, sw *store.Wrapper, - gc *connector.GraphConnector, + rc inject.RestoreConsumer, acct account.Account, backupID model.StableID, sel selectors.Selector, @@ -64,12 +64,13 @@ func NewRestoreOperation( bus events.Eventer, ) (RestoreOperation, error) { op := RestoreOperation{ - operation: newOperation(opts, bus, kw, sw, gc), + operation: newOperation(opts, bus, kw, sw), BackupID: backupID, Selectors: sel, Destination: dest, Version: "v0", account: acct, + rc: rc, } if err := op.validate(); err != nil { return RestoreOperation{}, err @@ -79,6 +80,10 @@ func NewRestoreOperation( } func (op RestoreOperation) validate() error { + if op.rc == nil { + return clues.New("missing restore consumer") + } + return op.operation.validate() } @@ -88,7 +93,7 @@ func (op RestoreOperation) validate() error { // get populated asynchronously. type restoreStats struct { cs []data.RestoreCollection - gc *support.ConnectorOperationStatus + gc *data.CollectionStats bytesRead *stats.ByteCounter resourceCount int @@ -96,16 +101,6 @@ type restoreStats struct { restoreID string } -type restorer interface { - RestoreMultipleItems( - ctx context.Context, - snapshotID string, - paths []path.Path, - bc kopia.ByteCounter, - errs *fault.Bus, - ) ([]data.RestoreCollection, error) -} - // Run begins a synchronous restore operation. func (op *RestoreOperation) Run(ctx context.Context) (restoreDetails *details.Details, err error) { defer func() { @@ -139,10 +134,10 @@ func (op *RestoreOperation) Run(ctx context.Context) (restoreDetails *details.De ctx = clues.Add( ctx, - "tenant_id", op.account.ID(), // TODO: pii + "tenant_id", clues.Hide(op.account.ID()), "backup_id", op.BackupID, "service", op.Selectors.Service, - "destination_container", op.Destination.ContainerName) + "destination_container", clues.Hide(op.Destination.ContainerName)) // ----- // Execution @@ -157,6 +152,7 @@ func (op *RestoreOperation) Run(ctx context.Context) (restoreDetails *details.De op.Errors.Fail(clues.Wrap(err, "running restore")) } + finalizeErrorHandling(ctx, op.Options, op.Errors, "running restore") LogFaultErrors(ctx, op.Errors.Errors(), "running restore") // ----- @@ -190,7 +186,7 @@ func (op *RestoreOperation) do( return nil, clues.Wrap(err, "getting backup and details") } - observe.Message(ctx, observe.Safe("Restoring"), observe.Bullet, observe.PII(bup.Selector.DiscreteOwner)) + observe.Message(ctx, "Restoring", observe.Bullet, clues.Hide(bup.Selector.DiscreteOwner)) paths, err := formatDetailsForRestoration(ctx, bup.Version, op.Selectors, deets, op.Errors) if err != nil { @@ -215,14 +211,14 @@ func (op *RestoreOperation) do( events.RestoreID: opStats.restoreID, }) - observe.Message(ctx, observe.Safe(fmt.Sprintf("Discovered %d items in backup %s to restore", len(paths), op.BackupID))) + observe.Message(ctx, fmt.Sprintf("Discovered %d items in backup %s to restore", len(paths), op.BackupID)) logger.Ctx(ctx).With("selectors", op.Selectors).Info("restoring selection") - kopiaComplete, closer := observe.MessageWithCompletion(ctx, observe.Safe("Enumerating items in repository")) + kopiaComplete, closer := observe.MessageWithCompletion(ctx, "Enumerating items in repository") defer closer() defer close(kopiaComplete) - dcs, err := op.kopia.RestoreMultipleItems(ctx, bup.SnapshotID, paths, opStats.bytesRead, op.Errors) + dcs, err := op.kopia.ProduceRestoreCollections(ctx, bup.SnapshotID, paths, opStats.bytesRead, op.Errors) if err != nil { return nil, clues.Wrap(err, "producing collections to restore") } @@ -235,12 +231,9 @@ func (op *RestoreOperation) do( opStats.resourceCount = 1 opStats.cs = dcs - restoreComplete, closer := observe.MessageWithCompletion(ctx, observe.Safe("Restoring data")) - defer closer() - defer close(restoreComplete) - - restoreDetails, err := op.gc.RestoreDataCollections( + deets, err = consumeRestoreCollections( ctx, + op.rc, bup.Version, op.account, op.Selectors, @@ -252,13 +245,11 @@ func (op *RestoreOperation) do( return nil, clues.Wrap(err, "restoring collections") } - restoreComplete <- struct{}{} + opStats.gc = op.rc.Wait() - opStats.gc = op.gc.AwaitStatus() + logger.Ctx(ctx).Debug(opStats.gc) - logger.Ctx(ctx).Debug(op.gc.PrintableStatus()) - - return restoreDetails, nil + return deets, nil } // persists details and statistics about the restore operation. @@ -285,11 +276,11 @@ func (op *RestoreOperation) persistResults( return clues.New("restoration never completed") } - if op.Status != Failed && opStats.gc.Metrics.Successes == 0 { + if op.Status != Failed && opStats.gc.IsZero() { op.Status = NoData } - op.Results.ItemsWritten = opStats.gc.Metrics.Successes + op.Results.ItemsWritten = opStats.gc.Successes op.bus.Event( ctx, @@ -312,6 +303,44 @@ func (op *RestoreOperation) persistResults( return op.Errors.Failure() } +// --------------------------------------------------------------------------- +// Restorer funcs +// --------------------------------------------------------------------------- + +func consumeRestoreCollections( + ctx context.Context, + rc inject.RestoreConsumer, + backupVersion int, + acct account.Account, + sel selectors.Selector, + dest control.RestoreDestination, + opts control.Options, + dcs []data.RestoreCollection, + errs *fault.Bus, +) (*details.Details, error) { + complete, closer := observe.MessageWithCompletion(ctx, "Restoring data") + defer func() { + complete <- struct{}{} + close(complete) + closer() + }() + + deets, err := rc.ConsumeRestoreCollections( + ctx, + backupVersion, + acct, + sel, + dest, + opts, + dcs, + errs) + if err != nil { + return nil, clues.Wrap(err, "restoring collections") + } + + return deets, nil +} + // formatDetailsForRestoration reduces the provided detail entries according to the // selector specifications. func formatDetailsForRestoration( diff --git a/src/internal/operations/restore_test.go b/src/internal/operations/restore_test.go index 0ffbf587e..649bbe140 100644 --- a/src/internal/operations/restore_test.go +++ b/src/internal/operations/restore_test.go @@ -16,12 +16,12 @@ import ( "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/mockconnector" "github.com/alcionai/corso/src/internal/connector/onedrive/api" - "github.com/alcionai/corso/src/internal/connector/support" "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/events" evmock "github.com/alcionai/corso/src/internal/events/mock" "github.com/alcionai/corso/src/internal/kopia" "github.com/alcionai/corso/src/internal/model" + "github.com/alcionai/corso/src/internal/operations/inject" "github.com/alcionai/corso/src/internal/stats" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/account" @@ -50,7 +50,7 @@ func (suite *RestoreOpSuite) TestRestoreOperation_PersistResults() { var ( kw = &kopia.Wrapper{} sw = &store.Wrapper{} - gc = &connector.GraphConnector{} + gc = &mockconnector.GraphConnector{} acct = account.Account{} now = time.Now() dest = tester.DefaultTestRestoreDestination() @@ -75,11 +75,9 @@ func (suite *RestoreOpSuite) TestRestoreOperation_PersistResults() { Collection: &mockconnector.MockExchangeDataCollection{}, }, }, - gc: &support.ConnectorOperationStatus{ - Metrics: support.CollectionMetrics{ - Objects: 1, - Successes: 1, - }, + gc: &data.CollectionStats{ + Objects: 1, + Successes: 1, }, }, }, @@ -89,7 +87,7 @@ func (suite *RestoreOpSuite) TestRestoreOperation_PersistResults() { fail: assert.AnError, stats: restoreStats{ bytesRead: &stats.ByteCounter{}, - gc: &support.ConnectorOperationStatus{}, + gc: &data.CollectionStats{}, }, }, { @@ -98,7 +96,7 @@ func (suite *RestoreOpSuite) TestRestoreOperation_PersistResults() { stats: restoreStats{ bytesRead: &stats.ByteCounter{}, cs: []data.RestoreCollection{}, - gc: &support.ConnectorOperationStatus{}, + gc: &data.CollectionStats{}, }, }, } @@ -126,7 +124,7 @@ func (suite *RestoreOpSuite) TestRestoreOperation_PersistResults() { assert.Equal(t, test.expectStatus.String(), op.Status.String(), "status") assert.Equal(t, len(test.stats.cs), op.Results.ItemsRead, "items read") - assert.Equal(t, test.stats.gc.Metrics.Successes, op.Results.ItemsWritten, "items written") + assert.Equal(t, test.stats.gc.Successes, op.Results.ItemsWritten, "items written") assert.Equal(t, test.stats.bytesRead.NumBytes, op.Results.BytesRead, "resource owners") assert.Equal(t, test.stats.resourceCount, op.Results.ResourceOwners, "resource owners") assert.Equal(t, now, op.Results.StartedAt, "started at") @@ -217,7 +215,7 @@ func (suite *RestoreOpIntegrationSuite) TearDownSuite() { func (suite *RestoreOpIntegrationSuite) TestNewRestoreOperation() { kw := &kopia.Wrapper{} sw := &store.Wrapper{} - gc := &connector.GraphConnector{} + gc := &mockconnector.GraphConnector{} acct := tester.NewM365Account(suite.T()) dest := tester.DefaultTestRestoreDestination() @@ -226,7 +224,7 @@ func (suite *RestoreOpIntegrationSuite) TestNewRestoreOperation() { opts control.Options kw *kopia.Wrapper sw *store.Wrapper - gc *connector.GraphConnector + rc inject.RestoreConsumer acct account.Account targets []string errCheck assert.ErrorAssertionFunc @@ -234,7 +232,7 @@ func (suite *RestoreOpIntegrationSuite) TestNewRestoreOperation() { {"good", control.Options{}, kw, sw, gc, acct, nil, assert.NoError}, {"missing kopia", control.Options{}, nil, sw, gc, acct, nil, assert.Error}, {"missing modelstore", control.Options{}, kw, nil, gc, acct, nil, assert.Error}, - {"missing graphConnector", control.Options{}, kw, sw, nil, acct, nil, assert.Error}, + {"missing restore consumer", control.Options{}, kw, sw, nil, acct, nil, assert.Error}, } for _, test := range table { suite.Run(test.name, func() { @@ -246,7 +244,7 @@ func (suite *RestoreOpIntegrationSuite) TestNewRestoreOperation() { test.opts, test.kw, test.sw, - test.gc, + test.rc, test.acct, "backup-id", selectors.Selector{DiscreteOwner: "test"}, @@ -280,6 +278,9 @@ func setupExchangeBackup( fault.New(true)) require.NoError(t, err, clues.ToCore(err)) + id, name, err := gc.PopulateOwnerIDAndNamesFrom(owner, nil) + require.NoError(t, err, clues.ToCore(err)) + bsel.DiscreteOwner = owner bsel.Include( bsel.MailFolders([]string{exchange.DefaultMailFolder}, selectors.PrefixMatch()), @@ -287,6 +288,8 @@ func setupExchangeBackup( bsel.EventCalendars([]string{exchange.DefaultCalendar}, selectors.PrefixMatch()), ) + bsel.SetDiscreteOwnerIDName(id, name) + bo, err := NewBackupOperation( ctx, control.Options{}, @@ -295,7 +298,7 @@ func setupExchangeBackup( gc, acct, bsel.Selector, - bsel.Selector.DiscreteOwner, + bsel.Selector, evmock.NewBus()) require.NoError(t, err, clues.ToCore(err)) @@ -337,6 +340,9 @@ func setupSharePointBackup( fault.New(true)) require.NoError(t, err, clues.ToCore(err)) + id, name, err := gc.PopulateOwnerIDAndNamesFrom(owner, nil) + require.NoError(t, err, clues.ToCore(err)) + spsel.DiscreteOwner = owner // assume a folder name "test" exists in the drive. // this is brittle, and requires us to backfill anytime @@ -344,6 +350,8 @@ func setupSharePointBackup( // growth from re-backup/restore of restored files. spsel.Include(spsel.LibraryFolders([]string{"test"}, selectors.PrefixMatch())) + spsel.SetDiscreteOwnerIDName(id, name) + bo, err := NewBackupOperation( ctx, control.Options{}, @@ -352,7 +360,7 @@ func setupSharePointBackup( gc, acct, spsel.Selector, - spsel.Selector.DiscreteOwner, + spsel.Selector, evmock.NewBus()) require.NoError(t, err, clues.ToCore(err)) @@ -439,7 +447,7 @@ func (suite *RestoreOpIntegrationSuite) TestRestore_Run() { ro, err := NewRestoreOperation( ctx, - control.Options{FailFast: true}, + control.Options{FailureHandling: control.FailFast}, suite.kw, suite.sw, bup.gc, diff --git a/src/internal/stats/stats.go b/src/internal/stats/stats.go index 32402221d..c061e67bc 100644 --- a/src/internal/stats/stats.go +++ b/src/internal/stats/stats.go @@ -30,7 +30,8 @@ func (bc *ByteCounter) Count(i int64) { } type SkippedCounts struct { - TotalSkippedItems int `json:"totalSkippedItems"` - SkippedMalware int `json:"skippedMalware"` - SkippedNotFound int `json:"skippedNotFound"` + TotalSkippedItems int `json:"totalSkippedItems"` + SkippedMalware int `json:"skippedMalware"` + SkippedNotFound int `json:"skippedNotFound"` + SkippedInvalidOneNoteFile int `json:"skippedInvalidOneNoteFile"` } diff --git a/src/internal/streamstore/streamstore.go b/src/internal/streamstore/streamstore.go index ec728df67..57fe5b8f1 100644 --- a/src/internal/streamstore/streamstore.go +++ b/src/internal/streamstore/streamstore.go @@ -11,8 +11,8 @@ import ( "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/kopia" + "github.com/alcionai/corso/src/internal/operations/inject" "github.com/alcionai/corso/src/internal/stats" - "github.com/alcionai/corso/src/pkg/backup/details" "github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/path" ) @@ -221,26 +221,14 @@ func collect( return &dc, nil } -type backuper interface { - BackupCollections( - ctx context.Context, - bases []kopia.IncrementalBase, - cs []data.BackupCollection, - globalExcludeSet map[string]map[string]struct{}, - tags map[string]string, - buildTreeWithBase bool, - errs *fault.Bus, - ) (*kopia.BackupStats, *details.Builder, map[string]kopia.PrevRefs, error) -} - // write persists bytes to the store func write( ctx context.Context, - bup backuper, + bup inject.BackupConsumer, dbcs []data.BackupCollection, errs *fault.Bus, ) (string, error) { - backupStats, _, _, err := bup.BackupCollections( + backupStats, _, _, err := bup.ConsumeBackupCollections( ctx, nil, dbcs, @@ -255,16 +243,6 @@ func write( return backupStats.SnapshotID, nil } -type restorer interface { - RestoreMultipleItems( - ctx context.Context, - snapshotID string, - paths []path.Path, - bc kopia.ByteCounter, - errs *fault.Bus, - ) ([]data.RestoreCollection, error) -} - // read retrieves an object from the store func read( ctx context.Context, @@ -272,7 +250,7 @@ func read( tenantID string, service path.ServiceType, col Collectable, - rer restorer, + rer inject.RestoreProducer, errs *fault.Bus, ) error { // construct the path of the container @@ -285,7 +263,7 @@ func read( ctx = clues.Add(ctx, "snapshot_id", snapshotID) - cs, err := rer.RestoreMultipleItems( + cs, err := rer.ProduceRestoreCollections( ctx, snapshotID, []path.Path{p}, diff --git a/src/internal/tester/resource_owners.go b/src/internal/tester/resource_owners.go index c36386b96..b5a1625a0 100644 --- a/src/internal/tester/resource_owners.go +++ b/src/internal/tester/resource_owners.go @@ -1,12 +1,15 @@ package tester import ( + "context" "os" "strings" "testing" "github.com/alcionai/clues" "github.com/stretchr/testify/require" + + "github.com/alcionai/corso/src/pkg/logger" ) // M365TenantID returns a tenantID string representing the azureTenantID described @@ -15,7 +18,20 @@ import ( // last-attempt fallback that will only work on alcion's testing org. func M365TenantID(t *testing.T) string { cfg, err := readTestConfig() - require.NoError(t, err, "retrieving m365 user id from test configuration", clues.ToCore(err)) + require.NoError(t, err, "retrieving m365 tenant ID from test configuration", clues.ToCore(err)) + + return cfg[TestCfgAzureTenantID] +} + +// M365TenantID returns a tenantID string representing the azureTenantID described +// by either the env var AZURE_TENANT_ID, the corso_test.toml config +// file or the default value (in that order of priority). The default is a +// last-attempt fallback that will only work on alcion's testing org. +func GetM365TenantID(ctx context.Context) string { + cfg, err := readTestConfig() + if err != nil { + logger.Ctx(ctx).Error(err, "retrieving m365 tenant ID from test configuration") + } return cfg[TestCfgAzureTenantID] } @@ -31,6 +47,19 @@ func M365UserID(t *testing.T) string { return cfg[TestCfgUserID] } +// GetM365UserID returns an userID string representing the m365UserID described +// by either the env var CORSO_M365_TEST_USER_ID, the corso_test.toml config +// file or the default value (in that order of priority). The default is a +// last-attempt fallback that will only work on alcion's testing org. +func GetM365UserID(ctx context.Context) string { + cfg, err := readTestConfig() + if err != nil { + logger.Ctx(ctx).Error(err, "retrieving m365 user id from test configuration") + } + + return cfg[TestCfgUserID] +} + // SecondaryM365UserID returns an userID string representing the m365UserID // described by either the env var CORSO_SECONDARY_M365_TEST_USER_ID, the // corso_test.toml config file or the default value (in that order of priority). diff --git a/src/pkg/backup/backup.go b/src/pkg/backup/backup.go index 24f02992a..d9b52c9d3 100644 --- a/src/pkg/backup/backup.go +++ b/src/pkg/backup/backup.go @@ -3,6 +3,7 @@ package backup import ( "context" "fmt" + "strings" "time" "github.com/alcionai/corso/src/cli/print" @@ -75,10 +76,12 @@ func New( } var ( - errCount = len(fe.Items) - skipCount = len(fe.Skipped) - failMsg string - malware, notFound, otherSkips int + errCount = len(fe.Items) + skipCount = len(fe.Skipped) + failMsg string + + malware, notFound, + invalidONFile, otherSkips int ) if fe.Failure != nil { @@ -92,6 +95,8 @@ func New( malware++ case s.HasCause(fault.SkipNotFound): notFound++ + case s.HasCause(fault.SkipBigOneNote): + invalidONFile++ default: otherSkips++ } @@ -105,6 +110,9 @@ func New( }, }, + ResourceOwnerID: ownerID, + ResourceOwnerName: ownerName, + Version: version.Backup, SnapshotID: snapshotID, StreamStoreID: streamStoreID, @@ -121,9 +129,10 @@ func New( ReadWrites: rw, StartAndEndTime: se, SkippedCounts: stats.SkippedCounts{ - TotalSkippedItems: skipCount, - SkippedMalware: malware, - SkippedNotFound: notFound, + TotalSkippedItems: skipCount, + SkippedMalware: malware, + SkippedNotFound: notFound, + SkippedInvalidOneNoteFile: invalidONFile, }, } } @@ -211,31 +220,45 @@ func (b Backup) Values() []string { if b.TotalSkippedItems > 0 { status += fmt.Sprintf("%d skipped", b.TotalSkippedItems) - if b.SkippedMalware+b.SkippedNotFound > 0 { + if b.SkippedMalware+b.SkippedNotFound+b.SkippedInvalidOneNoteFile > 0 { status += ": " } } - if b.SkippedMalware > 0 { - status += fmt.Sprintf("%d malware", b.SkippedMalware) + skipped := []string{} - if b.SkippedNotFound > 0 { - status += ", " - } + if b.SkippedMalware > 0 { + skipped = append(skipped, fmt.Sprintf("%d malware", b.SkippedMalware)) } if b.SkippedNotFound > 0 { - status += fmt.Sprintf("%d not found", b.SkippedNotFound) + skipped = append(skipped, fmt.Sprintf("%d not found", b.SkippedNotFound)) } + if b.SkippedInvalidOneNoteFile > 0 { + skipped = append(skipped, fmt.Sprintf("%d invalid OneNote file", b.SkippedInvalidOneNoteFile)) + } + + status += strings.Join(skipped, ", ") + if errCount+b.TotalSkippedItems > 0 { status += (")") } + name := b.ResourceOwnerName + + if len(name) == 0 { + name = b.ResourceOwnerID + } + + if len(name) == 0 { + name = b.Selector.DiscreteOwner + } + return []string{ common.FormatTabularDisplayTime(b.StartedAt), string(b.ID), status, - b.Selector.DiscreteOwner, + name, } } diff --git a/src/pkg/backup/backup_test.go b/src/pkg/backup/backup_test.go index 570bcf187..91bde1a17 100644 --- a/src/pkg/backup/backup_test.go +++ b/src/pkg/backup/backup_test.go @@ -24,7 +24,7 @@ func TestBackupUnitSuite(t *testing.T) { suite.Run(t, &BackupUnitSuite{Suite: tester.NewUnitSuite(t)}) } -func stubBackup(t time.Time) backup.Backup { +func stubBackup(t time.Time, ownerID, ownerName string) backup.Backup { sel := selectors.NewExchangeBackup([]string{"test"}) sel.Include(sel.AllData()) @@ -63,7 +63,7 @@ func (suite *BackupUnitSuite) TestBackup_HeadersValues() { var ( t = suite.T() now = time.Now() - b = stubBackup(now) + b = stubBackup(now, "id", "name") expectHs = []string{ "Started At", "ID", @@ -153,17 +153,30 @@ func (suite *BackupUnitSuite) TestBackup_Values_statusVariations() { expect: "test (42 errors, 1 skipped: 1 not found)", }, { - name: "errors, malware, notFound", + name: "errors and invalid OneNote", bup: backup.Backup{ Status: "test", ErrorCount: 42, SkippedCounts: stats.SkippedCounts{ - TotalSkippedItems: 1, - SkippedMalware: 1, - SkippedNotFound: 1, + TotalSkippedItems: 1, + SkippedInvalidOneNoteFile: 1, }, }, - expect: "test (42 errors, 1 skipped: 1 malware, 1 not found)", + expect: "test (42 errors, 1 skipped: 1 invalid OneNote file)", + }, + { + name: "errors, malware, notFound, invalid OneNote", + bup: backup.Backup{ + Status: "test", + ErrorCount: 42, + SkippedCounts: stats.SkippedCounts{ + TotalSkippedItems: 1, + SkippedMalware: 1, + SkippedNotFound: 1, + SkippedInvalidOneNoteFile: 1, + }, + }, + expect: "test (42 errors, 1 skipped: 1 malware, 1 not found, 1 invalid OneNote file)", }, } for _, test := range table { @@ -177,7 +190,7 @@ func (suite *BackupUnitSuite) TestBackup_Values_statusVariations() { func (suite *BackupUnitSuite) TestBackup_MinimumPrintable() { t := suite.T() now := time.Now() - b := stubBackup(now) + b := stubBackup(now, "id", "name") resultIface := b.MinimumPrintable() result, ok := resultIface.(backup.Printable) diff --git a/src/pkg/backup/details/details.go b/src/pkg/backup/details/details.go index 1af96edcf..6c8efbdfa 100644 --- a/src/pkg/backup/details/details.go +++ b/src/pkg/backup/details/details.go @@ -468,10 +468,10 @@ const ( FolderItem ItemType = 306 ) -func UpdateItem(item *ItemInfo, repoPath, locPath path.Path) error { +func UpdateItem(item *ItemInfo, repoPath path.Path, locPath *path.Builder) error { // Only OneDrive and SharePoint have information about parent folders // contained in them. - var updatePath func(repo path.Path, location path.Path) error + var updatePath func(repo path.Path, location *path.Builder) error switch item.infoType() { case ExchangeContact, ExchangeEvent, ExchangeMail: @@ -632,13 +632,13 @@ func (i ExchangeInfo) Values() []string { return []string{} } -func (i *ExchangeInfo) UpdateParentPath(_, locPath path.Path) error { +func (i *ExchangeInfo) UpdateParentPath(_ path.Path, locPath *path.Builder) error { // Not all data types have this set yet. if locPath == nil { return nil } - i.ParentPath = locPath.Folder(true) + i.ParentPath = locPath.String() return nil } @@ -677,7 +677,7 @@ func (i SharePointInfo) Values() []string { } } -func (i *SharePointInfo) UpdateParentPath(newPath, _ path.Path) error { +func (i *SharePointInfo) UpdateParentPath(newPath path.Path, _ *path.Builder) error { newParent, err := path.GetDriveFolderPath(newPath) if err != nil { return clues.Wrap(err, "making sharePoint path").With("path", newPath) @@ -721,7 +721,7 @@ func (i OneDriveInfo) Values() []string { } } -func (i *OneDriveInfo) UpdateParentPath(newPath, _ path.Path) error { +func (i *OneDriveInfo) UpdateParentPath(newPath path.Path, _ *path.Builder) error { newParent, err := path.GetDriveFolderPath(newPath) if err != nil { return clues.Wrap(err, "making oneDrive path").With("path", newPath) diff --git a/src/pkg/backup/details/details_test.go b/src/pkg/backup/details/details_test.go index 633fc0fdf..b268e39b8 100644 --- a/src/pkg/backup/details/details_test.go +++ b/src/pkg/backup/details/details_test.go @@ -880,17 +880,7 @@ func (suite *DetailsUnitSuite) TestUpdateItem() { item, }, ) - newExchangePath := makeItemPath( - suite.T(), - path.ExchangeService, - path.EmailCategory, - tenant, - resourceOwner, - []string{ - folder3, - item, - }, - ) + newExchangePB := path.Builder{}.Append(folder3) badOneDrivePath := makeItemPath( suite.T(), path.OneDriveService, @@ -904,7 +894,7 @@ func (suite *DetailsUnitSuite) TestUpdateItem() { name string input ItemInfo repoPath path.Path - locPath path.Path + locPath *path.Builder errCheck assert.ErrorAssertionFunc expectedItem ItemInfo }{ @@ -917,7 +907,7 @@ func (suite *DetailsUnitSuite) TestUpdateItem() { }, }, repoPath: newOneDrivePath, - locPath: newExchangePath, + locPath: newExchangePB, errCheck: assert.NoError, expectedItem: ItemInfo{ Exchange: &ExchangeInfo{ @@ -935,7 +925,7 @@ func (suite *DetailsUnitSuite) TestUpdateItem() { }, }, repoPath: newOneDrivePath, - locPath: newExchangePath, + locPath: newExchangePB, errCheck: assert.NoError, expectedItem: ItemInfo{ Exchange: &ExchangeInfo{ @@ -953,7 +943,7 @@ func (suite *DetailsUnitSuite) TestUpdateItem() { }, }, repoPath: newOneDrivePath, - locPath: newExchangePath, + locPath: newExchangePB, errCheck: assert.NoError, expectedItem: ItemInfo{ Exchange: &ExchangeInfo{ @@ -971,7 +961,7 @@ func (suite *DetailsUnitSuite) TestUpdateItem() { }, }, repoPath: newOneDrivePath, - locPath: newExchangePath, + locPath: newExchangePB, errCheck: assert.NoError, expectedItem: ItemInfo{ OneDrive: &OneDriveInfo{ @@ -989,7 +979,7 @@ func (suite *DetailsUnitSuite) TestUpdateItem() { }, }, repoPath: newOneDrivePath, - locPath: newExchangePath, + locPath: newExchangePB, errCheck: assert.NoError, expectedItem: ItemInfo{ SharePoint: &SharePointInfo{ @@ -1007,7 +997,7 @@ func (suite *DetailsUnitSuite) TestUpdateItem() { }, }, repoPath: badOneDrivePath, - locPath: newExchangePath, + locPath: newExchangePB, errCheck: assert.Error, }, { @@ -1019,7 +1009,7 @@ func (suite *DetailsUnitSuite) TestUpdateItem() { }, }, repoPath: badOneDrivePath, - locPath: newExchangePath, + locPath: newExchangePB, errCheck: assert.Error, }, } diff --git a/src/pkg/control/options.go b/src/pkg/control/options.go index f57ae492d..5f194f5ca 100644 --- a/src/pkg/control/options.go +++ b/src/pkg/control/options.go @@ -8,18 +8,29 @@ import ( type Options struct { Collision CollisionPolicy `json:"-"` DisableMetrics bool `json:"disableMetrics"` - FailFast bool `json:"failFast"` + FailureHandling FailureBehavior `json:"failureHandling"` + ItemFetchParallelism int `json:"itemFetchParallelism"` RestorePermissions bool `json:"restorePermissions"` SkipReduce bool `json:"skipReduce"` - ItemFetchParallelism int `json:"itemFetchParallelism"` ToggleFeatures Toggles `json:"ToggleFeatures"` } +type FailureBehavior string + +const ( + // fails and exits the run immediately + FailFast FailureBehavior = "fail-fast" + // recovers whenever possible, reports non-zero recoveries as a failure + FailAfterRecovery FailureBehavior = "fail-after-recovery" + // recovers whenever possible, does not report recovery as failure + BestEffort FailureBehavior = "best-effort" +) + // Defaults provides an Options with the default values set. func Defaults() Options { return Options{ - FailFast: true, - ToggleFeatures: Toggles{}, + FailureHandling: FailAfterRecovery, + ToggleFeatures: Toggles{}, } } @@ -77,9 +88,4 @@ type Toggles struct { // DisableIncrementals prevents backups from using incremental lookups, // forcing a new, complete backup of all data regardless of prior state. DisableIncrementals bool `json:"exchangeIncrementals,omitempty"` - - // EnablePermissionsBackup is used to enable backups of item - // permissions. Permission metadata increases graph api call count, - // so disabling their retrieval when not needed is advised. - EnablePermissionsBackup bool `json:"enablePermissionsBackup,omitempty"` } diff --git a/src/pkg/fault/item.go b/src/pkg/fault/item.go index 83bdf48c1..551fb01fb 100644 --- a/src/pkg/fault/item.go +++ b/src/pkg/fault/item.go @@ -156,6 +156,13 @@ const ( // SkipNotFound identifies that a file was skipped because we could // not find it when trying to download contents SkipNotFound skipCause = "file_not_found" + + // SkipBigOneNote identifies that a file was skipped because it + // was big OneNote file and we can only download OneNote files which + // are less that 2GB in size. + //nolint:lll + // https://support.microsoft.com/en-us/office/restrictions-and-limitations-in-onedrive-and-sharepoint-64883a5d-228e-48f5-b3d2-eb39e07630fa#onenotenotebooks + SkipBigOneNote skipCause = "big_one_note_file" ) var _ print.Printable = &Skipped{} diff --git a/src/pkg/filters/filters.go b/src/pkg/filters/filters.go index 8a2504f27..e05134592 100644 --- a/src/pkg/filters/filters.go +++ b/src/pkg/filters/filters.go @@ -444,7 +444,7 @@ func (f Filter) Compare(input string) bool { // true if t == i func equals(target, input string) bool { - return target == input + return strings.EqualFold(target, input) } // true if t > i @@ -531,7 +531,7 @@ func pathSuffix(target, input string) bool { // The input is assumed to be the complete path that may // match the target. func pathEquals(target, input string) bool { - return normPathElem(input) == target + return strings.EqualFold(normPathElem(input), target) } // ---------------------------------------------------------------------------------------------------- diff --git a/src/pkg/logger/example_logger_test.go b/src/pkg/logger/example_logger_test.go index 6241f4b99..13b1ee417 100644 --- a/src/pkg/logger/example_logger_test.go +++ b/src/pkg/logger/example_logger_test.go @@ -5,7 +5,9 @@ import ( "github.com/alcionai/clues" + "github.com/alcionai/corso/src/internal/connector" "github.com/alcionai/corso/src/pkg/logger" + "github.com/alcionai/corso/src/pkg/path" ) // --------------------------------------------------------------------------- @@ -18,7 +20,10 @@ const ( itemID = "item_id" ) -var err error +var ( + err error + itemPath, _ = path.Build("tid", "own", path.ExchangeService, path.ContactsCategory, false, "foo") +) // --------------------------------------------------------------------------- // examples @@ -129,6 +134,36 @@ func Example_logger_clues_standards() { // in the expected format. logger.CtxErr(ctx, err).Error("getting item") - // TODO(keepers): PII // 3. Protect pii in logs. + // When it comes to protecting sensitive information, we only want + // to hand loggers (and, by extension, clues errors) using one of + // three approaches to securing values. + // + // First: plain, unhidden data. This can only be logged if we are + // absolutely assured that this data does not expose sensitive + // information for a user. Eg: internal ids and enums are fine to + // log in plain text. Everything else must be considered wisely. + // + // Second: manually concealed values. Strings containing sensitive + // info, and structs from external pacakges containing sensitive info, + // can be logged by manually wrapping them with a clues.Hide() call. + // Ex: clues.Hide(userName). This will hash the value according to + // the user's hash algorithm configuration. + // + // Third: structs that comply with clues.Concealer. The Concealer + // interface requires a struct to comply with Conceal() (for cases + // where the struct is handed to a clues aggregator directly), and + // fmt's Format(state, verb), where the assumption is the standard + // format writer will be replaced with a Conceal() call (for cases + // where the struct is handed to some non-compliant formatter/printer). + // + // preferred + log.With( + // internal type, safe to log plainly + "resource_type", connector.Users, + // string containing sensitive info, wrap with Hide() + "user_name", clues.Hide("your_user_name@microsoft.example"), + // a concealer-compliant struct, safe to add plainly + "storage_path", itemPath, + ) } diff --git a/src/pkg/repository/loadtest/repository_load_test.go b/src/pkg/repository/loadtest/repository_load_test.go index 72093a27f..226b6e15d 100644 --- a/src/pkg/repository/loadtest/repository_load_test.go +++ b/src/pkg/repository/loadtest/repository_load_test.go @@ -94,8 +94,8 @@ func initM365Repo(t *testing.T) ( st := tester.NewPrefixedS3Storage(t) ac := tester.NewM365Account(t) opts := control.Options{ - DisableMetrics: true, - FailFast: true, + DisableMetrics: true, + FailureHandling: control.FailFast, } repo, err := repository.Initialize(ctx, ac, st, opts) @@ -447,8 +447,7 @@ func (suite *LoadExchangeSuite) TestExchange() { "all_users", "exchange", suite.usersUnderTest, sel, sel, // same selection for backup and restore - true, - ) + true) } // single user, lots of data @@ -500,8 +499,7 @@ func (suite *IndividualLoadExchangeSuite) TestExchange() { "single_user", "exchange", suite.usersUnderTest, sel, sel, // same selection for backup and restore - true, - ) + true) } // ------------------------------------------------------------------------------------------------ @@ -553,8 +551,7 @@ func (suite *LoadOneDriveSuite) TestOneDrive() { "all_users", "one_drive", suite.usersUnderTest, sel, sel, // same selection for backup and restore - false, - ) + false) } type IndividualLoadOneDriveSuite struct { @@ -601,8 +598,7 @@ func (suite *IndividualLoadOneDriveSuite) TestOneDrive() { "single_user", "one_drive", suite.usersUnderTest, sel, sel, // same selection for backup and restore - false, - ) + false) } // ------------------------------------------------------------------------------------------------ @@ -654,8 +650,7 @@ func (suite *LoadSharePointSuite) TestSharePoint() { "all_sites", "share_point", suite.sitesUnderTest, sel, sel, // same selection for backup and restore - false, - ) + false) } type IndividualLoadSharePointSuite struct { @@ -703,6 +698,5 @@ func (suite *IndividualLoadSharePointSuite) TestSharePoint() { "single_site", "share_point", suite.sitesUnderTest, sel, sel, // same selection for backup and restore - false, - ) + false) } diff --git a/src/pkg/repository/repository.go b/src/pkg/repository/repository.go index d2c6e3aaa..80686368d 100644 --- a/src/pkg/repository/repository.go +++ b/src/pkg/repository/repository.go @@ -8,10 +8,12 @@ import ( "github.com/google/uuid" "github.com/pkg/errors" + "github.com/alcionai/corso/src/internal/common" "github.com/alcionai/corso/src/internal/common/crash" "github.com/alcionai/corso/src/internal/connector" "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/onedrive" + "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/events" "github.com/alcionai/corso/src/internal/kopia" "github.com/alcionai/corso/src/internal/model" @@ -30,13 +32,16 @@ import ( "github.com/alcionai/corso/src/pkg/store" ) -var ErrorRepoAlreadyExists = clues.New("a repository was already initialized with that configuration") +var ( + ErrorRepoAlreadyExists = clues.New("a repository was already initialized with that configuration") + ErrorBackupNotFound = clues.New("no backup exists with that id") +) // BackupGetter deals with retrieving metadata about backups from the // repository. type BackupGetter interface { - Backup(ctx context.Context, id model.StableID) (*backup.Backup, error) - Backups(ctx context.Context, ids []model.StableID) ([]*backup.Backup, *fault.Bus) + Backup(ctx context.Context, id string) (*backup.Backup, error) + Backups(ctx context.Context, ids []string) ([]*backup.Backup, *fault.Bus) BackupsByTag(ctx context.Context, fs ...store.FilterOption) ([]*backup.Backup, error) GetBackupDetails( ctx context.Context, @@ -55,13 +60,18 @@ type Repository interface { ctx context.Context, self selectors.Selector, ) (operations.BackupOperation, error) + NewBackupWithLookup( + ctx context.Context, + self selectors.Selector, + ins common.IDNameSwapper, + ) (operations.BackupOperation, error) NewRestore( ctx context.Context, backupID string, sel selectors.Selector, dest control.RestoreDestination, ) (operations.RestoreOperation, error) - DeleteBackup(ctx context.Context, id model.StableID) error + DeleteBackup(ctx context.Context, id string) error BackupGetter } @@ -101,7 +111,7 @@ func Initialize( ctx = clues.Add( ctx, "acct_provider", acct.Provider.String(), - "acct_id", acct.ID(), // TODO: pii + "acct_id", clues.Hide(acct.ID()), "storage_provider", s.Provider.String()) defer func() { @@ -175,7 +185,7 @@ func Connect( ctx = clues.Add( ctx, "acct_provider", acct.Provider.String(), - "acct_id", acct.ID(), // TODO: pii + "acct_id", clues.Hide(acct.ID()), "storage_provider", s.Provider.String()) defer func() { @@ -188,7 +198,7 @@ func Connect( // their output getting clobbered (#1720) defer observe.Complete() - complete, closer := observe.MessageWithCompletion(ctx, observe.Safe("Connecting to repository")) + complete, closer := observe.MessageWithCompletion(ctx, "Connecting to repository") defer closer() defer close(complete) @@ -286,12 +296,31 @@ func (r *repository) Close(ctx context.Context) error { func (r repository) NewBackup( ctx context.Context, sel selectors.Selector, +) (operations.BackupOperation, error) { + return r.NewBackupWithLookup(ctx, sel, nil) +} + +// NewBackupWithLookup generates a BackupOperation runner. +// ownerIDToName and ownerNameToID are optional populations, in case the caller has +// already generated those values. +func (r repository) NewBackupWithLookup( + ctx context.Context, + sel selectors.Selector, + ins common.IDNameSwapper, ) (operations.BackupOperation, error) { gc, err := connectToM365(ctx, sel, r.Account, fault.New(true)) if err != nil { return operations.BackupOperation{}, errors.Wrap(err, "connecting to m365") } + ownerID, ownerName, err := gc.PopulateOwnerIDAndNamesFrom(sel.DiscreteOwner, ins) + if err != nil { + return operations.BackupOperation{}, errors.Wrap(err, "resolving resource owner details") + } + + // TODO: retrieve display name from gc + sel = sel.SetDiscreteOwnerIDName(ownerID, ownerName) + return operations.NewBackupOperation( ctx, r.Opts, @@ -300,7 +329,7 @@ func (r repository) NewBackup( gc, r.Account, sel, - sel.DiscreteOwner, + sel, r.Bus) } @@ -329,15 +358,28 @@ func (r repository) NewRestore( r.Bus) } -// backups lists a backup by id -func (r repository) Backup(ctx context.Context, id model.StableID) (*backup.Backup, error) { - sw := store.NewKopiaStore(r.modelStore) - return sw.GetBackup(ctx, id) +// Backup retrieves a backup by id. +func (r repository) Backup(ctx context.Context, id string) (*backup.Backup, error) { + return getBackup(ctx, id, store.NewKopiaStore(r.modelStore)) +} + +// getBackup handles the processing for Backup. +func getBackup( + ctx context.Context, + id string, + sw store.BackupGetter, +) (*backup.Backup, error) { + b, err := sw.GetBackup(ctx, model.StableID(id)) + if err != nil { + return nil, errWrapper(err) + } + + return b, nil } // BackupsByID lists backups by ID. Returns as many backups as possible with // errors for the backups it was unable to retrieve. -func (r repository) Backups(ctx context.Context, ids []model.StableID) ([]*backup.Backup, *fault.Bus) { +func (r repository) Backups(ctx context.Context, ids []string) ([]*backup.Backup, *fault.Bus) { var ( bups []*backup.Backup errs = fault.New(false) @@ -345,9 +387,11 @@ func (r repository) Backups(ctx context.Context, ids []model.StableID) ([]*backu ) for _, id := range ids { - b, err := sw.GetBackup(ctx, id) + ictx := clues.Add(ctx, "backup_id", id) + + b, err := sw.GetBackup(ictx, model.StableID(id)) if err != nil { - errs.AddRecoverable(clues.Stack(err).With("backup_id", id)) + errs.AddRecoverable(errWrapper(err)) } bups = append(bups, b) @@ -385,12 +429,12 @@ func getBackupDetails( ctx context.Context, backupID, tenantID string, kw *kopia.Wrapper, - sw *store.Wrapper, + sw store.BackupGetter, errs *fault.Bus, ) (*details.Details, *backup.Backup, error) { b, err := sw.GetBackup(ctx, model.StableID(backupID)) if err != nil { - return nil, nil, err + return nil, nil, errWrapper(err) } ssid := b.StreamStoreID @@ -455,12 +499,12 @@ func getBackupErrors( ctx context.Context, backupID, tenantID string, kw *kopia.Wrapper, - sw *store.Wrapper, + sw store.BackupGetter, errs *fault.Bus, ) (*fault.Errors, *backup.Backup, error) { b, err := sw.GetBackup(ctx, model.StableID(backupID)) if err != nil { - return nil, nil, err + return nil, nil, errWrapper(err) } ssid := b.StreamStoreID @@ -485,32 +529,44 @@ func getBackupErrors( return &fe, b, nil } +type snapshotDeleter interface { + DeleteSnapshot(ctx context.Context, snapshotID string) error +} + // DeleteBackup removes the backup from both the model store and the backup storage. -func (r repository) DeleteBackup(ctx context.Context, id model.StableID) error { - bu, err := r.Backup(ctx, id) +func (r repository) DeleteBackup(ctx context.Context, id string) error { + return deleteBackup(ctx, id, r.dataLayer, store.NewKopiaStore(r.modelStore)) +} + +// deleteBackup handles the processing for Backup. +func deleteBackup( + ctx context.Context, + id string, + kw snapshotDeleter, + sw store.BackupGetterDeleter, +) error { + b, err := sw.GetBackup(ctx, model.StableID(id)) if err != nil { + return errWrapper(err) + } + + if err := kw.DeleteSnapshot(ctx, b.SnapshotID); err != nil { return err } - if err := r.dataLayer.DeleteSnapshot(ctx, bu.SnapshotID); err != nil { - return err - } - - if len(bu.SnapshotID) > 0 { - if err := r.dataLayer.DeleteSnapshot(ctx, bu.SnapshotID); err != nil { + if len(b.SnapshotID) > 0 { + if err := kw.DeleteSnapshot(ctx, b.SnapshotID); err != nil { return err } } - if len(bu.DetailsID) > 0 { - if err := r.dataLayer.DeleteSnapshot(ctx, bu.DetailsID); err != nil { + if len(b.DetailsID) > 0 { + if err := kw.DeleteSnapshot(ctx, b.DetailsID); err != nil { return err } } - sw := store.NewKopiaStore(r.modelStore) - - return sw.DeleteBackup(ctx, id) + return sw.DeleteBackup(ctx, model.StableID(id)) } // --------------------------------------------------------------------------- @@ -568,7 +624,7 @@ func connectToM365( acct account.Account, errs *fault.Bus, ) (*connector.GraphConnector, error) { - complete, closer := observe.MessageWithCompletion(ctx, observe.Safe("Connecting to M365")) + complete, closer := observe.MessageWithCompletion(ctx, "Connecting to M365") defer func() { complete <- struct{}{} close(complete) @@ -588,3 +644,11 @@ func connectToM365( return gc, nil } + +func errWrapper(err error) error { + if errors.Is(err, data.ErrNotFound) { + return clues.Stack(ErrorBackupNotFound, err) + } + + return err +} diff --git a/src/pkg/repository/repository_unexported_test.go b/src/pkg/repository/repository_unexported_test.go index 2837978c3..9f7416615 100644 --- a/src/pkg/repository/repository_unexported_test.go +++ b/src/pkg/repository/repository_unexported_test.go @@ -5,10 +5,12 @@ import ( "testing" "github.com/alcionai/clues" + "github.com/google/uuid" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" + "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/kopia" "github.com/alcionai/corso/src/internal/model" "github.com/alcionai/corso/src/internal/operations" @@ -20,8 +22,172 @@ import ( "github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/selectors" "github.com/alcionai/corso/src/pkg/store" + "github.com/alcionai/corso/src/pkg/store/mock" ) +// --------------------------------------------------------------------------- +// Unit +// --------------------------------------------------------------------------- + +type RepositoryBackupsUnitSuite struct { + tester.Suite +} + +func TestRepositoryBackupsUnitSuite(t *testing.T) { + suite.Run(t, &RepositoryBackupsUnitSuite{Suite: tester.NewUnitSuite(t)}) +} + +func (suite *RepositoryBackupsUnitSuite) TestGetBackup() { + bup := &backup.Backup{ + BaseModel: model.BaseModel{ + ID: model.StableID(uuid.NewString()), + }, + } + + table := []struct { + name string + sw mock.BackupWrapper + expectErr func(t *testing.T, result error) + expectID model.StableID + }{ + { + name: "no error", + sw: mock.BackupWrapper{ + Backup: bup, + GetErr: nil, + DeleteErr: nil, + }, + expectErr: func(t *testing.T, result error) { + assert.NoError(t, result, clues.ToCore(result)) + }, + expectID: bup.ID, + }, + { + name: "get error", + sw: mock.BackupWrapper{ + Backup: bup, + GetErr: data.ErrNotFound, + DeleteErr: nil, + }, + expectErr: func(t *testing.T, result error) { + assert.ErrorIs(t, result, data.ErrNotFound, clues.ToCore(result)) + assert.ErrorIs(t, result, ErrorBackupNotFound, clues.ToCore(result)) + }, + expectID: bup.ID, + }, + } + for _, test := range table { + suite.Run(test.name, func() { + ctx, flush := tester.NewContext() + defer flush() + + t := suite.T() + + b, err := getBackup(ctx, string(bup.ID), test.sw) + test.expectErr(t, err) + + if err != nil { + return + } + + assert.Equal(t, test.expectID, b.ID) + }) + } +} + +type mockSSDeleter struct { + err error +} + +func (sd mockSSDeleter) DeleteSnapshot(_ context.Context, _ string) error { + return sd.err +} + +func (suite *RepositoryBackupsUnitSuite) TestDeleteBackup() { + bup := &backup.Backup{ + BaseModel: model.BaseModel{ + ID: model.StableID(uuid.NewString()), + }, + } + + table := []struct { + name string + sw mock.BackupWrapper + kw mockSSDeleter + expectErr func(t *testing.T, result error) + expectID model.StableID + }{ + { + name: "no error", + sw: mock.BackupWrapper{ + Backup: bup, + GetErr: nil, + DeleteErr: nil, + }, + kw: mockSSDeleter{}, + expectErr: func(t *testing.T, result error) { + assert.NoError(t, result, clues.ToCore(result)) + }, + expectID: bup.ID, + }, + { + name: "get error", + sw: mock.BackupWrapper{ + Backup: bup, + GetErr: data.ErrNotFound, + DeleteErr: nil, + }, + kw: mockSSDeleter{}, + expectErr: func(t *testing.T, result error) { + assert.ErrorIs(t, result, data.ErrNotFound, clues.ToCore(result)) + assert.ErrorIs(t, result, ErrorBackupNotFound, clues.ToCore(result)) + }, + expectID: bup.ID, + }, + { + name: "delete error", + sw: mock.BackupWrapper{ + Backup: bup, + GetErr: nil, + DeleteErr: assert.AnError, + }, + kw: mockSSDeleter{}, + expectErr: func(t *testing.T, result error) { + assert.ErrorIs(t, result, assert.AnError, clues.ToCore(result)) + }, + expectID: bup.ID, + }, + { + name: "snapshot delete error", + sw: mock.BackupWrapper{ + Backup: bup, + GetErr: nil, + DeleteErr: nil, + }, + kw: mockSSDeleter{assert.AnError}, + expectErr: func(t *testing.T, result error) { + assert.ErrorIs(t, result, assert.AnError, clues.ToCore(result)) + }, + expectID: bup.ID, + }, + } + for _, test := range table { + suite.Run(test.name, func() { + ctx, flush := tester.NewContext() + defer flush() + + t := suite.T() + + err := deleteBackup(ctx, string(bup.ID), test.kw, test.sw) + test.expectErr(t, err) + }) + } +} + +// --------------------------------------------------------------------------- +// integration +// --------------------------------------------------------------------------- + type RepositoryModelIntgSuite struct { tester.Suite kw *kopia.Wrapper diff --git a/src/pkg/selectors/selectors.go b/src/pkg/selectors/selectors.go index caf1f5cb1..17247b39b 100644 --- a/src/pkg/selectors/selectors.go +++ b/src/pkg/selectors/selectors.go @@ -112,6 +112,8 @@ type Selector struct { // iterate over the results, where each one will populate this field // with a different owner. DiscreteOwner string `json:"discreteOwner,omitempty"` + // display name for the DiscreteOwner. + DiscreteOwnerName string `json:"discreteOwnerName,omitempty"` // A slice of exclusion scopes. Exclusions apply globally to all // inclusions/filters, with any-match behavior. @@ -146,6 +148,48 @@ func (s Selector) DiscreteResourceOwners() []string { return split(s.ResourceOwners.Target) } +// SetDiscreteOwnerIDName ensures the selector has the correct discrete owner +// id and name. Assumes that these values are sourced using the current +// s.DiscreteOwner as input. The reason for taking in both the id and name, and +// not just the name, is so that constructors can input owner aliases in place +// of ids, with the expectation that the two will get sorted and re-written +// later on with this setter. +// +// If the id is empty, the original DiscreteOwner value is retained. +// If the name is empty, the id is duplicated as the name. +func (s Selector) SetDiscreteOwnerIDName(id, name string) Selector { + r := s + + if len(id) == 0 { + // assume a the discreteOwner is already set, and don't replace anything. + id = s.DiscreteOwner + } + + r.DiscreteOwner = id + r.DiscreteOwnerName = name + + if len(name) == 0 { + r.DiscreteOwnerName = id + } + + return r +} + +// ID returns s.discreteOwner, which is assumed to be a stable ID. +func (s Selector) ID() string { + return s.DiscreteOwner +} + +// Name returns s.discreteOwnerName. If that value is empty, it returns +// s.DiscreteOwner instead. +func (s Selector) Name() string { + if len(s.DiscreteOwnerName) == 0 { + return s.DiscreteOwner + } + + return s.DiscreteOwnerName +} + // isAnyResourceOwner returns true if the selector includes all resource owners. func isAnyResourceOwner(s Selector) bool { return s.ResourceOwners.Comparator == filters.Passes @@ -336,7 +380,7 @@ func pathCategoriesIn[T scopeT, C categoryT](ss []scope) []path.CategoryType { } // --------------------------------------------------------------------------- -// scope helpers +// scope constructors // --------------------------------------------------------------------------- type scopeConfig struct { diff --git a/src/pkg/selectors/selectors_test.go b/src/pkg/selectors/selectors_test.go index 8651d1823..28ed90198 100644 --- a/src/pkg/selectors/selectors_test.go +++ b/src/pkg/selectors/selectors_test.go @@ -248,6 +248,49 @@ func (suite *SelectorSuite) TestSplitByResourceOnwer() { } } +func (suite *SelectorSuite) TestIDName() { + table := []struct { + title string + id, name string + expectID, expectName string + }{ + {"empty", "", "", "", ""}, + {"only id", "id", "", "id", "id"}, + {"only name", "", "name", "", "name"}, + {"both", "id", "name", "id", "name"}, + } + for _, test := range table { + suite.Run(test.title, func() { + sel := Selector{DiscreteOwner: test.id, DiscreteOwnerName: test.name} + assert.Equal(suite.T(), test.expectID, sel.ID()) + assert.Equal(suite.T(), test.expectName, sel.Name()) + }) + } +} + +func (suite *SelectorSuite) TestSetDiscreteOwnerIDName() { + table := []struct { + title string + initID, initName string + id, name string + expectID, expectName string + }{ + {"empty", "", "", "", "", "", ""}, + {"only id", "", "", "id", "", "id", "id"}, + {"only name", "", "", "", "", "", ""}, + {"both", "", "", "id", "name", "id", "name"}, + {"both", "init-id", "", "", "name", "init-id", "name"}, + } + for _, test := range table { + suite.Run(test.title, func() { + sel := Selector{DiscreteOwner: test.initID, DiscreteOwnerName: test.initName} + sel = sel.SetDiscreteOwnerIDName(test.id, test.name) + assert.Equal(suite.T(), test.expectID, sel.ID()) + assert.Equal(suite.T(), test.expectName, sel.Name()) + }) + } +} + // TestPathCategories verifies that no scope produces a `path.UnknownCategory` func (suite *SelectorSuite) TestPathCategories_includes() { users := []string{"someuser@onmicrosoft.com"} diff --git a/src/pkg/selectors/testdata/sharepoint.go b/src/pkg/selectors/testdata/sharepoint.go new file mode 100644 index 000000000..6051c2fda --- /dev/null +++ b/src/pkg/selectors/testdata/sharepoint.go @@ -0,0 +1,9 @@ +package testdata + +import "github.com/alcionai/corso/src/pkg/selectors" + +// SharePointBackupFolderScope is the standard folder scope that should be used +// in integration backups with sharepoint. +func SharePointBackupFolderScope(sel *selectors.SharePointBackup) []selectors.SharePointScope { + return sel.LibraryFolders([]string{"test"}, selectors.PrefixMatch()) +} diff --git a/src/pkg/services/m365/m365.go b/src/pkg/services/m365/m365.go index e078152cf..b2b8371cd 100644 --- a/src/pkg/services/m365/m365.go +++ b/src/pkg/services/m365/m365.go @@ -2,13 +2,14 @@ package m365 import ( "context" + "strings" "github.com/alcionai/clues" "github.com/microsoftgraph/msgraph-sdk-go/models" - "github.com/alcionai/corso/src/internal/connector" + "github.com/alcionai/corso/src/internal/common" + "github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/internal/connector/discovery" - "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/pkg/account" "github.com/alcionai/corso/src/pkg/fault" ) @@ -55,34 +56,51 @@ func Users(ctx context.Context, acct account.Account, errs *fault.Bus) ([]*User, return ret, nil } -func UserIDs(ctx context.Context, acct account.Account, errs *fault.Bus) ([]string, error) { - users, err := Users(ctx, acct, errs) - if err != nil { - return nil, err +// parseUser extracts information from `models.Userable` we care about +func parseUser(item models.Userable) (*User, error) { + if item.GetUserPrincipalName() == nil { + return nil, clues.New("user missing principal name"). + With("user_id", *item.GetId()) // TODO: pii } - ret := make([]string, 0, len(users)) - for _, u := range users { - ret = append(ret, u.ID) + u := &User{ + PrincipalName: ptr.Val(item.GetUserPrincipalName()), + ID: ptr.Val(item.GetId()), + Name: ptr.Val(item.GetDisplayName()), } - return ret, nil + return u, nil } -// UserPNs retrieves all user principleNames in the tenant. Principle Names -// can be used analogous userIDs in graph API queries. -func UserPNs(ctx context.Context, acct account.Account, errs *fault.Bus) ([]string, error) { +// UsersMap retrieves all users in the tenant, and returns two maps: one id-to-principalName, +// and one principalName-to-id. +func UsersMap( + ctx context.Context, + acct account.Account, + errs *fault.Bus, +) (common.IDsNames, error) { users, err := Users(ctx, acct, errs) if err != nil { - return nil, err + return common.IDsNames{}, err } - ret := make([]string, 0, len(users)) + var ( + idToName = make(map[string]string, len(users)) + nameToID = make(map[string]string, len(users)) + ) + for _, u := range users { - ret = append(ret, u.PrincipalName) + id, name := strings.ToLower(u.ID), strings.ToLower(u.PrincipalName) + idToName[id] = name + nameToID[name] = id } - return ret, nil + ins := common.IDsNames{ + IDToName: idToName, + NameToID: nameToID, + } + + return ins, nil } type Site struct { @@ -92,59 +110,66 @@ type Site struct { // ID is of the format: .. // for example: contoso.sharepoint.com,abcdeab3-0ccc-4ce1-80ae-b32912c9468d,xyzud296-9f7c-44e1-af81-3c06d0d43007 ID string + + // DisplayName is the human-readable name of the site. Normally the plaintext name that the + // user provided when they created the site, though it can be changed across time. + // Ex: webUrl: https://host.com/sites/TestingSite, displayName: "Testing Site" + DisplayName string } // Sites returns a list of Sites in a specified M365 tenant func Sites(ctx context.Context, acct account.Account, errs *fault.Bus) ([]*Site, error) { - gc, err := connector.NewGraphConnector(ctx, graph.HTTPClient(graph.NoTimeout()), acct, connector.Sites, errs) + sites, err := discovery.Sites(ctx, acct, errs) if err != nil { return nil, clues.Wrap(err, "initializing M365 graph connection") } - // gc.Sites is a map with keys: SiteURL, values: ID - ret := make([]*Site, 0, len(gc.Sites)) - for k, v := range gc.Sites { - ret = append(ret, &Site{ - WebURL: k, - ID: v, - }) + ret := make([]*Site, 0, len(sites)) + + for _, s := range sites { + ps, err := parseSite(s) + if err != nil { + return nil, clues.Wrap(err, "parsing siteable") + } + + ret = append(ret, ps) } return ret, nil } -// SiteURLs returns a list of SharePoint site WebURLs in the specified M365 tenant -func SiteURLs(ctx context.Context, acct account.Account, errs *fault.Bus) ([]string, error) { - gc, err := connector.NewGraphConnector(ctx, graph.HTTPClient(graph.NoTimeout()), acct, connector.Sites, errs) +// parseSite extracts the information from `models.Siteable` we care about +func parseSite(item models.Siteable) (*Site, error) { + s := &Site{ + ID: ptr.Val(item.GetId()), + WebURL: ptr.Val(item.GetWebUrl()), + DisplayName: ptr.Val(item.GetDisplayName()), + } + + return s, nil +} + +// SitesMap retrieves all sites in the tenant, and returns two maps: one id-to-webURL, +// and one webURL-to-id. +func SitesMap( + ctx context.Context, + acct account.Account, + errs *fault.Bus, +) (common.IDsNames, error) { + sites, err := Sites(ctx, acct, errs) if err != nil { - return nil, clues.Wrap(err, "initializing M365 graph connection") + return common.IDsNames{}, err } - return gc.GetSiteWebURLs(), nil -} - -// SiteIDs returns a list of SharePoint sites IDs in the specified M365 tenant -func SiteIDs(ctx context.Context, acct account.Account, errs *fault.Bus) ([]string, error) { - gc, err := connector.NewGraphConnector(ctx, graph.HTTPClient(graph.NoTimeout()), acct, connector.Sites, errs) - if err != nil { - return nil, clues.Wrap(err, "initializing graph connection") - } - - return gc.GetSiteIDs(), nil -} - -// parseUser extracts information from `models.Userable` we care about -func parseUser(item models.Userable) (*User, error) { - if item.GetUserPrincipalName() == nil { - return nil, clues.New("user missing principal name"). - With("user_id", *item.GetId()) // TODO: pii - } - - u := &User{PrincipalName: *item.GetUserPrincipalName(), ID: *item.GetId()} - - if item.GetDisplayName() != nil { - u.Name = *item.GetDisplayName() - } - - return u, nil + ins := common.IDsNames{ + IDToName: make(map[string]string, len(sites)), + NameToID: make(map[string]string, len(sites)), + } + + for _, s := range sites { + ins.IDToName[s.ID] = s.WebURL + ins.NameToID[s.WebURL] = s.ID + } + + return ins, nil } diff --git a/src/pkg/services/m365/m365_test.go b/src/pkg/services/m365/m365_test.go index b22f0a37b..9137bb066 100644 --- a/src/pkg/services/m365/m365_test.go +++ b/src/pkg/services/m365/m365_test.go @@ -66,6 +66,7 @@ func (suite *M365IntegrationSuite) TestSites() { t := suite.T() assert.NotEmpty(t, s.WebURL) assert.NotEmpty(t, s.ID) + assert.NotEmpty(t, s.DisplayName) }) } } diff --git a/src/pkg/store/backup.go b/src/pkg/store/backup.go index 85aeb2a8f..41ab97a1e 100644 --- a/src/pkg/store/backup.go +++ b/src/pkg/store/backup.go @@ -28,6 +28,29 @@ func (q *queryFilters) populate(qf ...FilterOption) { } } +type ( + BackupWrapper interface { + BackupGetterDeleter + GetBackups( + ctx context.Context, + filters ...FilterOption, + ) ([]*backup.Backup, error) + } + + BackupGetterDeleter interface { + BackupGetter + BackupDeleter + } + + BackupGetter interface { + GetBackup(ctx context.Context, backupID model.StableID) (*backup.Backup, error) + } + + BackupDeleter interface { + DeleteBackup(ctx context.Context, backupID model.StableID) error + } +) + // Service ensures the retrieved backups only match // the specified service. func Service(pst path.ServiceType) FilterOption { diff --git a/src/pkg/store/backup_test.go b/src/pkg/store/backup_test.go index c33cd8f58..9600ec67a 100644 --- a/src/pkg/store/backup_test.go +++ b/src/pkg/store/backup_test.go @@ -14,7 +14,7 @@ import ( "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/backup" "github.com/alcionai/corso/src/pkg/store" - storeMock "github.com/alcionai/corso/src/pkg/store/mock" + "github.com/alcionai/corso/src/pkg/store/mock" ) // ------------------------------------------------------------ @@ -48,17 +48,17 @@ func (suite *StoreBackupUnitSuite) TestGetBackup() { table := []struct { name string - mock *storeMock.MockModelStore + mock *mock.ModelStore expect assert.ErrorAssertionFunc }{ { name: "gets backup", - mock: storeMock.NewMock(&bu, nil), + mock: mock.NewModelStoreMock(&bu, nil), expect: assert.NoError, }, { name: "errors", - mock: storeMock.NewMock(&bu, assert.AnError), + mock: mock.NewModelStoreMock(&bu, assert.AnError), expect: assert.Error, }, } @@ -85,17 +85,17 @@ func (suite *StoreBackupUnitSuite) TestGetBackups() { table := []struct { name string - mock *storeMock.MockModelStore + mock *mock.ModelStore expect assert.ErrorAssertionFunc }{ { name: "gets backups", - mock: storeMock.NewMock(&bu, nil), + mock: mock.NewModelStoreMock(&bu, nil), expect: assert.NoError, }, { name: "errors", - mock: storeMock.NewMock(&bu, assert.AnError), + mock: mock.NewModelStoreMock(&bu, assert.AnError), expect: assert.Error, }, } @@ -123,17 +123,17 @@ func (suite *StoreBackupUnitSuite) TestDeleteBackup() { table := []struct { name string - mock *storeMock.MockModelStore + mock *mock.ModelStore expect assert.ErrorAssertionFunc }{ { name: "deletes backup", - mock: storeMock.NewMock(&bu, nil), + mock: mock.NewModelStoreMock(&bu, nil), expect: assert.NoError, }, { name: "errors", - mock: storeMock.NewMock(&bu, assert.AnError), + mock: mock.NewModelStoreMock(&bu, assert.AnError), expect: assert.Error, }, } diff --git a/src/pkg/store/mock/store_mock.go b/src/pkg/store/mock/model_store.go similarity index 78% rename from src/pkg/store/mock/store_mock.go rename to src/pkg/store/mock/model_store.go index 1e20f0bb9..15e47a972 100644 --- a/src/pkg/store/mock/store_mock.go +++ b/src/pkg/store/mock/model_store.go @@ -14,13 +14,13 @@ import ( // model wrapper model store // ------------------------------------------------------------ -type MockModelStore struct { +type ModelStore struct { backup *backup.Backup err error } -func NewMock(b *backup.Backup, err error) *MockModelStore { - return &MockModelStore{ +func NewModelStoreMock(b *backup.Backup, err error) *ModelStore { + return &ModelStore{ backup: b, err: err, } @@ -30,11 +30,11 @@ func NewMock(b *backup.Backup, err error) *MockModelStore { // deleter iface // ------------------------------------------------------------ -func (mms *MockModelStore) Delete(ctx context.Context, s model.Schema, id model.StableID) error { +func (mms *ModelStore) Delete(ctx context.Context, s model.Schema, id model.StableID) error { return mms.err } -func (mms *MockModelStore) DeleteWithModelStoreID(ctx context.Context, id manifest.ID) error { +func (mms *ModelStore) DeleteWithModelStoreID(ctx context.Context, id manifest.ID) error { return mms.err } @@ -42,7 +42,7 @@ func (mms *MockModelStore) DeleteWithModelStoreID(ctx context.Context, id manife // getter iface // ------------------------------------------------------------ -func (mms *MockModelStore) Get( +func (mms *ModelStore) Get( ctx context.Context, s model.Schema, id model.StableID, @@ -64,7 +64,7 @@ func (mms *MockModelStore) Get( return nil } -func (mms *MockModelStore) GetIDsForType( +func (mms *ModelStore) GetIDsForType( ctx context.Context, s model.Schema, tags map[string]string, @@ -82,7 +82,7 @@ func (mms *MockModelStore) GetIDsForType( return nil, clues.New("schema not supported by mock GetIDsForType").With("schema", s) } -func (mms *MockModelStore) GetWithModelStoreID( +func (mms *ModelStore) GetWithModelStoreID( ctx context.Context, s model.Schema, id manifest.ID, @@ -108,7 +108,7 @@ func (mms *MockModelStore) GetWithModelStoreID( // updater iface // ------------------------------------------------------------ -func (mms *MockModelStore) Put(ctx context.Context, s model.Schema, m model.Model) error { +func (mms *ModelStore) Put(ctx context.Context, s model.Schema, m model.Model) error { switch s { case model.BackupSchema: bm := m.(*backup.Backup) @@ -121,7 +121,7 @@ func (mms *MockModelStore) Put(ctx context.Context, s model.Schema, m model.Mode return mms.err } -func (mms *MockModelStore) Update(ctx context.Context, s model.Schema, m model.Model) error { +func (mms *ModelStore) Update(ctx context.Context, s model.Schema, m model.Model) error { switch s { case model.BackupSchema: bm := m.(*backup.Backup) diff --git a/src/pkg/store/mock/wrapper.go b/src/pkg/store/mock/wrapper.go new file mode 100644 index 000000000..3112fbdff --- /dev/null +++ b/src/pkg/store/mock/wrapper.go @@ -0,0 +1,38 @@ +package mock + +import ( + "context" + + "github.com/alcionai/clues" + + "github.com/alcionai/corso/src/internal/model" + "github.com/alcionai/corso/src/pkg/backup" + "github.com/alcionai/corso/src/pkg/store" +) + +type BackupWrapper struct { + Backup *backup.Backup + GetErr error + DeleteErr error +} + +func (bw BackupWrapper) GetBackup( + ctx context.Context, + backupID model.StableID, +) (*backup.Backup, error) { + return bw.Backup, bw.GetErr +} + +func (bw BackupWrapper) DeleteBackup( + ctx context.Context, + backupID model.StableID, +) error { + return bw.DeleteErr +} + +func (bw BackupWrapper) GetBackups( + ctx context.Context, + filters ...store.FilterOption, +) ([]*backup.Backup, error) { + return nil, clues.New("GetBackups mock not implemented yet") +} diff --git a/website/blog/2023-04-08-malware.md b/website/blog/2023-04-08-malware.md new file mode 100644 index 000000000..6f07fb9b1 --- /dev/null +++ b/website/blog/2023-04-08-malware.md @@ -0,0 +1,111 @@ +--- +slug: malware-detection +title: "Microsoft 365, Malware 👾, and your responsibilities" +description: "When we evaluate malware threats, we often think mainly of protecting our users. +The biggest concern is always going to be lost availability and leaked data if +malware affects our system. But like any threat with an infection model, part of +the story is about your responsibilities as an operations engineer to keep +others safe." +authors: nica +tags: [corso, microsoft 365, backups, security, malware] +date: 2023-04-08 +image: ./images/invaders.png +--- + +![a clone of the game 'space invaders' Cover image By Lee Robinson - https://github.com/leerob/space-invaders, MIT, https://commons.wikimedia.org/w/index.php?curid=127314893](./images/invaders.png) + +When we evaluate malware threats, we often think mainly of protecting our users. +The biggest concern is always going to be lost availability and leaked data if +malware affects our system. But like any threat with an infection model, part of +the story is about your responsibilities as an operations engineer to keep +others safe. + + +## Microsoft-hosted Malware + +Research earlier in 2023 showed that Microsoft OneDrive was +[host to about 30% of all malware](https://www.cybertalk.org/2023/01/27/do-you-use-onedrive-or-google-drive-watch-out-for-this-malware/). +OneDrive is a popular platform for hosting malware because the malicious actor +can get a legitimate looking URL that will increase the chance of their payload +being downloaded or executed. + +This malware hosting is usually done on accounts created by malicious actors, +but it's even more effective if a compromised account within a legitimate +organization can be used + +The responsibility for addressing this issue lies more with administrators than +with Microsoft. +Data on OneDrive is customer data and it will be intrusive and disruptive +for Microsoft to automatically start taking down files. + +Anyone running OneDrive and SharePoint should take measures to detect and remove +malware - to protect their own users and the broader community. + +If you accept that as Microsoft 365 and OneDrive user you should ‘be +part of the solution’, how can you take a stand against malware? + +## Scan for Malware + +Sophisticated malware is difficult to engineer. Threats like BazarLoader, which +use a Trojan horse to create an ISO which waits for the user to open an +innocent-looking ‘Documents’ folder, aren’t being developed from scratch every +day. Therefore, it’s possible to scan for malware and find most threats before +they affect large numbers of systems. + +While there are a number of tools to scan backups, attachments, and other file +locations, I’m pleased to say that Corso has implemented +[malware scanning for your backups as of V0.5.0](https://github.com/alcionai/corso/releases/tag/v0.5.0). +Corso aims to prevent content already flagged as malware from making it in your +backups. Since Corso is free and open-source, admins can take advantage of this +and take action (for example delete, extract for forensic analysis) against files +flagged by Corso. + +## See it in Action: Create a Malware-Free Backup with Corso + +We hope that the first time you use a tool like Corso to scan your backups, you will +have no malware detected. This however begs the question: how do we know it’s +working? + +Good news: there are long-standing resources to grab ‘known bad’ files that +should set off any malware or virus scanner. The European Institute for Computer +Anti-Virus Research (EICAR) have made such a file available. With this rather +choice paragraph about why a non-virus ‘known bad’ file is useful for security +practices: + +> Using real viruses for testing in the real world is rather like setting fire +> to the dustbin in your office to see whether the smoke detector is working. +> Such a test will give meaningful results, but with unappealing, unacceptable +> risks. + +Download the EICAR test file here. Any scanner worth its salt will alert on at +least the first two versions of the file (`eicar.com` and `eicar.com.txt`) and +*should* notice malware inside a .zip as well. When using Corso with any of +these files, the feedback is quite clear: + +![Corso giving feedback](./images/malware1.png) + +Any detected files will be listed as 'skipped' and the rest of the backup will complete as normal. + +## What to do when Corso Detects Malware + +Files that Corso detects as malware will be skipped from backups, but you should +take steps to delete these files and do some analysis of their source within +your OneDrive instance. When Corso detects malware, it will log the fact +(Corso’s log location is displayed when the CLI runs). + +![Image of Corso logging errors and exceptions, with one item of malware detected](./images/malware2.png) + +Lines for detected malware will show up marked as `malware detected` and will even have a `malware_description` parameter. + +## Monitor for new reports + +The landscape for malware is shifting, and it’s vital you stay on top of new reports. Three sources of updates I’d recommend: + +- [Microsoft’s Vulnerabilities Registry](https://msrc.microsoft.com/update-guide/vulnerability) +- [The Malwarebytes blog](https://www.malwarebytes.com/blog) +- [The Register’s tech news](https://www.theregister.com/), for a more industry-wide view of trends and major issues + + +If you keep these practices in place in your organization, not only are you less +likely to suffer from malware attacks, but the danger of your playing host to +malicious files and attacks on others will be greatly reduced! diff --git a/website/blog/2023-4-04-aws-storage-class.md b/website/blog/2023-4-04-aws-storage-class.md new file mode 100644 index 000000000..e9a5a4987 --- /dev/null +++ b/website/blog/2023-4-04-aws-storage-class.md @@ -0,0 +1,131 @@ +--- +slug: aws-storage-class +title: "Choosing the Best AWS S3 Storage Class for Corso Backups" +description: "Recently when writing about the storage options for Corso, I found myself going pretty far in the weeds on storage classes in S3. I thought I’d make a list of all the storage options and why they might, or might not, work for backups." +authors: nica +tags: [corso, microsoft 365, AWS, backups] +date: 2023-04-04 +image: ./images/box_sizes.jpeg +--- + +![multiple box sizes](./images/box_sizes.jpeg) +Recently when writing +about the storage options for Corso, I found myself going pretty far in the +weeds on storage classes in S3. I thought I’d make a list of all the storage +options and why they might, or might not, work for backups. + + +## First, some assumptions + +If we’re talking about backups, we're assuming far more writes than read, and +that most objects that are written will never be read. + +Increasing age of an object increases the chances that it will never be read. + +And we can’t afford to lose anything! One-zone options that carry a small +chance of data loss like OneZone_IA won't work. + +Finally, there will be index data and metadata that may well be overwritten +frequently. For more detail on this, and an exploration of non-AWS alternatives +to S3, see our past article on +[where to store your Corso data](https://corsobackup.io/blog/where-to-store-corso/). + +If your backup solution breaks one of these expectations, for example if you’re +restoring from backups every single day, the advice in this article may not be +applicable to you. + +## Best practices no matter your storage class + +Using a tool for backups rather than a naive file copy process is the first step +towards an efficient backup process. Before you drag that folder over to that +network drive icon, consider the following requirements: + +- Compression - don’t use more network bandwidth than you have to +- De-duplication - backing up a team’s email shouldn’t mean storing 50 identical copies of ‘Presentation_FINAL.pptx’ +- Incremental Backups - Ideally, your second backup should only include updated objects +- Bundling - creating millions of 2kb objects each backup is going to add to costs and hurt performance + +## Storage Classes, considered + +The AWS Storage classes are STANDARD | REDUCED_REDUNDANCY | STANDARD_IA | +ONEZONE_IA | INTELLIGENT_TIERING | GLACIER | DEEP_ARCHIVE | OUTPOSTS | +GLACIER_IR + +of which we won’t consider REDUCED_REDUNDANCY (it’s outdated and Standard is now +cheaper) and OUTPOSTS (if you need on-prem S3, it’s not for cost or efficiency). + +### STANDARD + +The S3 Standard storage should work for all backup implementations, as long as +you’re not using something that can’t really work with object storage with +network latency (for example your backup application is trying to do fine-grained +low-latency database-style queries using indices stored in S3). + +For Corso, Standard is a great place to start testing your setup, letting you +perform regular backups, restores, and deletions. We also recommend storing all +your non-blob data in Standard, how to do this automatically is covered at the +end of this list. + +### STANDARD_IA and ONEZONE_IA + +These are the storage classes AWS recommends for backups! But it’s likely that +Glacier Instant Retrieval will be cheaper. Also, Infrequent Access charges a +minimum storage size of 128KB and a minimum storage time of 30 days. If your +backups are creating many small objects, or if you have incremental backups +constantly updating most objects, Infrequent Access may come out more expensive +than standard. + +For Corso, it’s not likely that this storage class will make the most sense. +Maybe a case where periodic restores are expected with some frequency would +benefit from this class, but that would have to be so frequent I’m not sure +‘backup’ is the right term. If you found this was the best class for you please +join our Discord and tell us about it. + +### INTELLIGENT_TIERING + +Intelligent Tiering is the most appealing of AWS’s new S3 offerings for backups. +As objects age they’ll move down to cheaper and cheaper, finally dropping to the +same storage costs per GB as Glacier Instant Retrieval. + +Two considerations should give you pause when using Intelligent Tiering for backups: first +there’s a small compute cost to Intelligent Tiering, and second you probably +do know the usage pattern of these backups: almost all will never be touched. + +With Intelligent Tiering you’ll pay for your backups to be on a more expensive +tier for 60 days before you get the pricing that you probably could have picked +out for yourself in the first place. + +Intelligent Tiering probably only makes sense if you’re using backups in a +nonstandard way, for example restoring from backups every morning. If you’re not sure +*how* your data will be used, Intelligent Tiering is a safe bet. + + +### GLACIER and DEEP_ARCHIVE + +Glacier (not Instant retrieval, which is discussed below) is a great way to +archive data, which is a slightly different idea than backups. If you have a +reason to store and not touch data (for example, for compliance) and can tolerate +extremely high latencies (hours to days) for recovery you may want to use +Glacier Archive. However, high-performance backup tools, like Corso, usually +contain smart optimizations like incremental backups and backup indexes that +won’t work if the latency for all requests is in minutes. Further, for cost and +efficiency, deduplicating object stores will often compact data as the primary +data source churns. Using default Glacier or Glacier Deep Archive is a poor fit +for that workload. + +### GLACIER_IR + +Likely to be your best option for backups Glacier IR is cheaper than any +non-glacier option for storage costs, with a low request latency. Corso’s +de-duplication, bundling, and compression will help ensure that you’re paying as +little as possible for storage. + +## Glacier Instant Retrieval is the best choice for Corso backups + +With these considerations, and with the best practices mentioned above, you +should be able to build reliable backups with a minimal cost impact. If you’re +ready to give Corso a try, check out our +[Quickstart Guide](https://corsobackup.io/docs/quickstart/), or take a look at a +recent article on backing up +[large Exchange instances](https://corsobackup.io/blog/large-microsoft-365-exchange-backups/) +with Corso. diff --git a/website/blog/images/box_sizes.jpeg b/website/blog/images/box_sizes.jpeg new file mode 100644 index 000000000..fcd68e811 Binary files /dev/null and b/website/blog/images/box_sizes.jpeg differ diff --git a/website/blog/images/invaders.png b/website/blog/images/invaders.png new file mode 100644 index 000000000..b99f39ff1 Binary files /dev/null and b/website/blog/images/invaders.png differ diff --git a/website/blog/images/malware1.png b/website/blog/images/malware1.png new file mode 100644 index 000000000..9e9b596ef Binary files /dev/null and b/website/blog/images/malware1.png differ diff --git a/website/blog/images/malware2.png b/website/blog/images/malware2.png new file mode 100644 index 000000000..b1a662e9f Binary files /dev/null and b/website/blog/images/malware2.png differ diff --git a/website/package-lock.json b/website/package-lock.json index f899e192d..fd5097e31 100644 --- a/website/package-lock.json +++ b/website/package-lock.json @@ -20,7 +20,7 @@ "feather-icons": "^4.29.0", "jarallax": "^2.1.3", "mdx-mermaid": "^1.3.2", - "mermaid": "^10.0.2", + "mermaid": "^10.1.0", "prism-react-renderer": "^1.3.5", "react": "^17.0.2", "react-dom": "^17.0.2", @@ -34,7 +34,7 @@ "@iconify/react": "^4.1.0", "autoprefixer": "^10.4.14", "postcss": "^8.4.21", - "tailwindcss": "^3.3.0" + "tailwindcss": "^3.3.1" } }, "node_modules/@algolia/autocomplete-core": { @@ -5758,9 +5758,9 @@ "integrity": "sha512-dp3s92+uNI1hWIpPGH3jK2kxE2lMjdXdr+DH8ynZHpd6PUlH6x6cbuXnoMmiNumznqaNO31xu9e79F0uuZ0JFg==" }, "node_modules/d3": { - "version": "7.8.2", - "resolved": "https://registry.npmjs.org/d3/-/d3-7.8.2.tgz", - "integrity": "sha512-WXty7qOGSHb7HR7CfOzwN1Gw04MUOzN8qh9ZUsvwycIMb4DYMpY9xczZ6jUorGtO6bR9BPMPaueIKwiDxu9uiQ==", + "version": "7.8.4", + "resolved": "https://registry.npmjs.org/d3/-/d3-7.8.4.tgz", + "integrity": "sha512-q2WHStdhiBtD8DMmhDPyJmXUxr6VWRngKyiJ5EfXMxPw+tqT6BhNjhJZ4w3BHsNm3QoVfZLY8Orq/qPFczwKRA==", "dependencies": { "d3-array": "3", "d3-axis": "3", @@ -5798,9 +5798,9 @@ } }, "node_modules/d3-array": { - "version": "3.2.2", - "resolved": "https://registry.npmjs.org/d3-array/-/d3-array-3.2.2.tgz", - "integrity": "sha512-yEEyEAbDrF8C6Ob2myOBLjwBLck1Z89jMGFee0oPsn95GqjerpaOA4ch+vc2l0FNFFwMD5N7OCSEN5eAlsUbgQ==", + "version": "3.2.3", + "resolved": "https://registry.npmjs.org/d3-array/-/d3-array-3.2.3.tgz", + "integrity": "sha512-JRHwbQQ84XuAESWhvIPaUV4/1UYTBOLiOPGWqgFDHZS1D5QN9c57FbH3QpEnQMYiOXNzKUQyGTZf+EVO7RT5TQ==", "dependencies": { "internmap": "1 - 2" }, @@ -5862,9 +5862,9 @@ } }, "node_modules/d3-delaunay": { - "version": "6.0.2", - "resolved": "https://registry.npmjs.org/d3-delaunay/-/d3-delaunay-6.0.2.tgz", - "integrity": "sha512-IMLNldruDQScrcfT+MWnazhHbDJhcRJyOEBAJfwQnHle1RPh6WDuLvxNArUju2VSMSUuKlY5BGHRJ2cYyoFLQQ==", + "version": "6.0.4", + "resolved": "https://registry.npmjs.org/d3-delaunay/-/d3-delaunay-6.0.4.tgz", + "integrity": "sha512-mdjtIZ1XLAM8bm/hx3WwjfHt6Sggek7qH043O8KEjDXN40xi3vx/6pYSVTwLjEgiXQTbvaouWKynLBiUZ6SK6A==", "dependencies": { "delaunator": "5" }, @@ -6147,9 +6147,9 @@ } }, "node_modules/dagre-d3-es": { - "version": "7.0.9", - "resolved": "https://registry.npmjs.org/dagre-d3-es/-/dagre-d3-es-7.0.9.tgz", - "integrity": "sha512-rYR4QfVmy+sR44IBDvVtcAmOReGBvRCWDpO2QjYwqgh9yijw6eSHBqaPG/LIOEy7aBsniLvtMW6pg19qJhq60w==", + "version": "7.0.10", + "resolved": "https://registry.npmjs.org/dagre-d3-es/-/dagre-d3-es-7.0.10.tgz", + "integrity": "sha512-qTCQmEhcynucuaZgY5/+ti3X/rnszKZhEQH/ZdWdtP1tA/y3VoHJzcVrO9pjjJCNpigfscAtoUB5ONcd2wNn0A==", "dependencies": { "d3": "^7.8.2", "lodash-es": "^4.17.21" @@ -6530,9 +6530,9 @@ } }, "node_modules/dompurify": { - "version": "2.4.3", - "resolved": "https://registry.npmjs.org/dompurify/-/dompurify-2.4.3.tgz", - "integrity": "sha512-q6QaLcakcRjebxjg8/+NP+h0rPfatOgOzc46Fst9VAA3jF2ApfKBNKMzdP4DYTqtUMXSCd5pRS/8Po/OmoCHZQ==" + "version": "2.4.5", + "resolved": "https://registry.npmjs.org/dompurify/-/dompurify-2.4.5.tgz", + "integrity": "sha512-jggCCd+8Iqp4Tsz0nIvpcb22InKEBrGz5dw3EQJMs8HPJDsKbFIO3STYtAvCfDx26Muevn1MHVI0XxjgFfmiSA==" }, "node_modules/domutils": { "version": "2.8.0", @@ -9151,18 +9151,19 @@ } }, "node_modules/mermaid": { - "version": "10.0.2", - "resolved": "https://registry.npmjs.org/mermaid/-/mermaid-10.0.2.tgz", - "integrity": "sha512-slwoB9WdNUT+/W9VhxLYRLZ0Ey12fIE+cAZjm3FmHTD+0F1uoJETfsNbVS1POnvQZhFYzfT6/z6hJZXgecqVBA==", + "version": "10.1.0", + "resolved": "https://registry.npmjs.org/mermaid/-/mermaid-10.1.0.tgz", + "integrity": "sha512-LYekSMNJygI1VnMizAPUddY95hZxOjwZxr7pODczILInO0dhQKuhXeu4sargtnuTwCilSuLS7Uiq/Qn7HTVrmA==", "dependencies": { "@braintree/sanitize-url": "^6.0.0", + "@khanacademy/simple-markdown": "^0.8.6", "cytoscape": "^3.23.0", "cytoscape-cose-bilkent": "^4.1.0", "cytoscape-fcose": "^2.1.0", "d3": "^7.4.0", - "dagre-d3-es": "7.0.9", + "dagre-d3-es": "7.0.10", "dayjs": "^1.11.7", - "dompurify": "2.4.3", + "dompurify": "2.4.5", "elkjs": "^0.8.2", "khroma": "^2.0.0", "lodash-es": "^4.17.21", @@ -9173,6 +9174,57 @@ "web-worker": "^1.2.0" } }, + "node_modules/mermaid/node_modules/@khanacademy/simple-markdown": { + "version": "0.8.6", + "resolved": "https://registry.npmjs.org/@khanacademy/simple-markdown/-/simple-markdown-0.8.6.tgz", + "integrity": "sha512-mAUlR9lchzfqunR89pFvNI51jQKsMpJeWYsYWw0DQcUXczn/T/V6510utgvm7X0N3zN87j1SvuKk8cMbl9IAFw==", + "dependencies": { + "@types/react": ">=16.0.0" + }, + "peerDependencies": { + "react": "16.14.0", + "react-dom": "16.14.0" + } + }, + "node_modules/mermaid/node_modules/react": { + "version": "16.14.0", + "resolved": "https://registry.npmjs.org/react/-/react-16.14.0.tgz", + "integrity": "sha512-0X2CImDkJGApiAlcf0ODKIneSwBPhqJawOa5wCtKbu7ZECrmS26NvtSILynQ66cgkT/RJ4LidJOc3bUESwmU8g==", + "peer": true, + "dependencies": { + "loose-envify": "^1.1.0", + "object-assign": "^4.1.1", + "prop-types": "^15.6.2" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/mermaid/node_modules/react-dom": { + "version": "16.14.0", + "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-16.14.0.tgz", + "integrity": "sha512-1gCeQXDLoIqMgqD3IO2Ah9bnf0w9kzhwN5q4FGnHZ67hBm9yePzB5JJAIQCc8x3pFnNlwFq4RidZggNAAkzWWw==", + "peer": true, + "dependencies": { + "loose-envify": "^1.1.0", + "object-assign": "^4.1.1", + "prop-types": "^15.6.2", + "scheduler": "^0.19.1" + }, + "peerDependencies": { + "react": "^16.14.0" + } + }, + "node_modules/mermaid/node_modules/scheduler": { + "version": "0.19.1", + "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.19.1.tgz", + "integrity": "sha512-n/zwRWRYSUj0/3g/otKDRPMh6qv2SYMWNq85IEa8iZyAv8od9zDYpGSnpBEjNgcMNq6Scbu5KfIPxNF72R/2EA==", + "peer": true, + "dependencies": { + "loose-envify": "^1.1.0", + "object-assign": "^4.1.1" + } + }, "node_modules/mermaid/node_modules/uuid": { "version": "9.0.0", "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.0.tgz", @@ -12824,9 +12876,9 @@ } }, "node_modules/tailwindcss": { - "version": "3.3.0", - "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-3.3.0.tgz", - "integrity": "sha512-hOXlFx+YcklJ8kXiCAfk/FMyr4Pm9ck477G0m/us2344Vuj355IpoEDB5UmGAsSpTBmr+4ZhjzW04JuFXkb/fw==", + "version": "3.3.1", + "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-3.3.1.tgz", + "integrity": "sha512-Vkiouc41d4CEq0ujXl6oiGFQ7bA3WEhUZdTgXAhtKxSy49OmKs8rEfQmupsfF0IGW8fv2iQkp1EVUuapCFrZ9g==", "dev": true, "dependencies": { "arg": "^5.0.2", @@ -18617,9 +18669,9 @@ } }, "d3": { - "version": "7.8.2", - "resolved": "https://registry.npmjs.org/d3/-/d3-7.8.2.tgz", - "integrity": "sha512-WXty7qOGSHb7HR7CfOzwN1Gw04MUOzN8qh9ZUsvwycIMb4DYMpY9xczZ6jUorGtO6bR9BPMPaueIKwiDxu9uiQ==", + "version": "7.8.4", + "resolved": "https://registry.npmjs.org/d3/-/d3-7.8.4.tgz", + "integrity": "sha512-q2WHStdhiBtD8DMmhDPyJmXUxr6VWRngKyiJ5EfXMxPw+tqT6BhNjhJZ4w3BHsNm3QoVfZLY8Orq/qPFczwKRA==", "requires": { "d3-array": "3", "d3-axis": "3", @@ -18654,9 +18706,9 @@ } }, "d3-array": { - "version": "3.2.2", - "resolved": "https://registry.npmjs.org/d3-array/-/d3-array-3.2.2.tgz", - "integrity": "sha512-yEEyEAbDrF8C6Ob2myOBLjwBLck1Z89jMGFee0oPsn95GqjerpaOA4ch+vc2l0FNFFwMD5N7OCSEN5eAlsUbgQ==", + "version": "3.2.3", + "resolved": "https://registry.npmjs.org/d3-array/-/d3-array-3.2.3.tgz", + "integrity": "sha512-JRHwbQQ84XuAESWhvIPaUV4/1UYTBOLiOPGWqgFDHZS1D5QN9c57FbH3QpEnQMYiOXNzKUQyGTZf+EVO7RT5TQ==", "requires": { "internmap": "1 - 2" } @@ -18700,9 +18752,9 @@ } }, "d3-delaunay": { - "version": "6.0.2", - "resolved": "https://registry.npmjs.org/d3-delaunay/-/d3-delaunay-6.0.2.tgz", - "integrity": "sha512-IMLNldruDQScrcfT+MWnazhHbDJhcRJyOEBAJfwQnHle1RPh6WDuLvxNArUju2VSMSUuKlY5BGHRJ2cYyoFLQQ==", + "version": "6.0.4", + "resolved": "https://registry.npmjs.org/d3-delaunay/-/d3-delaunay-6.0.4.tgz", + "integrity": "sha512-mdjtIZ1XLAM8bm/hx3WwjfHt6Sggek7qH043O8KEjDXN40xi3vx/6pYSVTwLjEgiXQTbvaouWKynLBiUZ6SK6A==", "requires": { "delaunator": "5" } @@ -18895,9 +18947,9 @@ } }, "dagre-d3-es": { - "version": "7.0.9", - "resolved": "https://registry.npmjs.org/dagre-d3-es/-/dagre-d3-es-7.0.9.tgz", - "integrity": "sha512-rYR4QfVmy+sR44IBDvVtcAmOReGBvRCWDpO2QjYwqgh9yijw6eSHBqaPG/LIOEy7aBsniLvtMW6pg19qJhq60w==", + "version": "7.0.10", + "resolved": "https://registry.npmjs.org/dagre-d3-es/-/dagre-d3-es-7.0.10.tgz", + "integrity": "sha512-qTCQmEhcynucuaZgY5/+ti3X/rnszKZhEQH/ZdWdtP1tA/y3VoHJzcVrO9pjjJCNpigfscAtoUB5ONcd2wNn0A==", "requires": { "d3": "^7.8.2", "lodash-es": "^4.17.21" @@ -19160,9 +19212,9 @@ } }, "dompurify": { - "version": "2.4.3", - "resolved": "https://registry.npmjs.org/dompurify/-/dompurify-2.4.3.tgz", - "integrity": "sha512-q6QaLcakcRjebxjg8/+NP+h0rPfatOgOzc46Fst9VAA3jF2ApfKBNKMzdP4DYTqtUMXSCd5pRS/8Po/OmoCHZQ==" + "version": "2.4.5", + "resolved": "https://registry.npmjs.org/dompurify/-/dompurify-2.4.5.tgz", + "integrity": "sha512-jggCCd+8Iqp4Tsz0nIvpcb22InKEBrGz5dw3EQJMs8HPJDsKbFIO3STYtAvCfDx26Muevn1MHVI0XxjgFfmiSA==" }, "domutils": { "version": "2.8.0", @@ -20937,18 +20989,19 @@ "integrity": "sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==" }, "mermaid": { - "version": "10.0.2", - "resolved": "https://registry.npmjs.org/mermaid/-/mermaid-10.0.2.tgz", - "integrity": "sha512-slwoB9WdNUT+/W9VhxLYRLZ0Ey12fIE+cAZjm3FmHTD+0F1uoJETfsNbVS1POnvQZhFYzfT6/z6hJZXgecqVBA==", + "version": "10.1.0", + "resolved": "https://registry.npmjs.org/mermaid/-/mermaid-10.1.0.tgz", + "integrity": "sha512-LYekSMNJygI1VnMizAPUddY95hZxOjwZxr7pODczILInO0dhQKuhXeu4sargtnuTwCilSuLS7Uiq/Qn7HTVrmA==", "requires": { "@braintree/sanitize-url": "^6.0.0", + "@khanacademy/simple-markdown": "^0.8.6", "cytoscape": "^3.23.0", "cytoscape-cose-bilkent": "^4.1.0", "cytoscape-fcose": "^2.1.0", "d3": "^7.4.0", - "dagre-d3-es": "7.0.9", + "dagre-d3-es": "7.0.10", "dayjs": "^1.11.7", - "dompurify": "2.4.3", + "dompurify": "2.4.5", "elkjs": "^0.8.2", "khroma": "^2.0.0", "lodash-es": "^4.17.21", @@ -20959,6 +21012,47 @@ "web-worker": "^1.2.0" }, "dependencies": { + "@khanacademy/simple-markdown": { + "version": "0.8.6", + "resolved": "https://registry.npmjs.org/@khanacademy/simple-markdown/-/simple-markdown-0.8.6.tgz", + "integrity": "sha512-mAUlR9lchzfqunR89pFvNI51jQKsMpJeWYsYWw0DQcUXczn/T/V6510utgvm7X0N3zN87j1SvuKk8cMbl9IAFw==", + "requires": { + "@types/react": ">=16.0.0" + } + }, + "react": { + "version": "16.14.0", + "resolved": "https://registry.npmjs.org/react/-/react-16.14.0.tgz", + "integrity": "sha512-0X2CImDkJGApiAlcf0ODKIneSwBPhqJawOa5wCtKbu7ZECrmS26NvtSILynQ66cgkT/RJ4LidJOc3bUESwmU8g==", + "peer": true, + "requires": { + "loose-envify": "^1.1.0", + "object-assign": "^4.1.1", + "prop-types": "^15.6.2" + } + }, + "react-dom": { + "version": "16.14.0", + "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-16.14.0.tgz", + "integrity": "sha512-1gCeQXDLoIqMgqD3IO2Ah9bnf0w9kzhwN5q4FGnHZ67hBm9yePzB5JJAIQCc8x3pFnNlwFq4RidZggNAAkzWWw==", + "peer": true, + "requires": { + "loose-envify": "^1.1.0", + "object-assign": "^4.1.1", + "prop-types": "^15.6.2", + "scheduler": "^0.19.1" + } + }, + "scheduler": { + "version": "0.19.1", + "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.19.1.tgz", + "integrity": "sha512-n/zwRWRYSUj0/3g/otKDRPMh6qv2SYMWNq85IEa8iZyAv8od9zDYpGSnpBEjNgcMNq6Scbu5KfIPxNF72R/2EA==", + "peer": true, + "requires": { + "loose-envify": "^1.1.0", + "object-assign": "^4.1.1" + } + }, "uuid": { "version": "9.0.0", "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.0.tgz", @@ -23358,9 +23452,9 @@ } }, "tailwindcss": { - "version": "3.3.0", - "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-3.3.0.tgz", - "integrity": "sha512-hOXlFx+YcklJ8kXiCAfk/FMyr4Pm9ck477G0m/us2344Vuj355IpoEDB5UmGAsSpTBmr+4ZhjzW04JuFXkb/fw==", + "version": "3.3.1", + "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-3.3.1.tgz", + "integrity": "sha512-Vkiouc41d4CEq0ujXl6oiGFQ7bA3WEhUZdTgXAhtKxSy49OmKs8rEfQmupsfF0IGW8fv2iQkp1EVUuapCFrZ9g==", "dev": true, "requires": { "arg": "^5.0.2", diff --git a/website/package.json b/website/package.json index 2550cca3b..b1bd397fd 100644 --- a/website/package.json +++ b/website/package.json @@ -26,7 +26,7 @@ "feather-icons": "^4.29.0", "jarallax": "^2.1.3", "mdx-mermaid": "^1.3.2", - "mermaid": "^10.0.2", + "mermaid": "^10.1.0", "prism-react-renderer": "^1.3.5", "react": "^17.0.2", "react-dom": "^17.0.2", @@ -40,7 +40,7 @@ "@iconify/react": "^4.1.0", "autoprefixer": "^10.4.14", "postcss": "^8.4.21", - "tailwindcss": "^3.3.0" + "tailwindcss": "^3.3.1" }, "browserslist": { "production": [ diff --git a/website/styles/Vocab/Base/accept.txt b/website/styles/Vocab/Base/accept.txt index 91dc74dc2..7f8d159c7 100644 --- a/website/styles/Vocab/Base/accept.txt +++ b/website/styles/Vocab/Base/accept.txt @@ -53,4 +53,5 @@ SMBs Demetrius Malbrough lockdowns -exfiltrate \ No newline at end of file +exfiltrate +deduplicating \ No newline at end of file