diff --git a/.github/workflows/weekly_cleanup.yml b/.github/workflows/weekly_cleanup.yml deleted file mode 100644 index befd7d1de..000000000 --- a/.github/workflows/weekly_cleanup.yml +++ /dev/null @@ -1,26 +0,0 @@ -name: Weekly S3 Test Bucket Cleanup -on: - schedule: - # every saturday at 23:59 (11:59pm) - - cron: "59 23 * * 6" - -permissions: - # required to retrieve AWS credentials - id-token: write - -jobs: - S3-Test-Cleanup: - runs-on: ubuntu-latest - environment: Testing - - steps: - - name: Configure AWS credentials from Test account - uses: aws-actions/configure-aws-credentials@v2 - with: - role-to-assume: ${{ secrets.AWS_IAM_ROLE }} - role-session-name: integration-testing - aws-region: us-east-1 - - - name: Delete all files in the test bucket - run: | - aws s3 rm s3://${{ secrets.CI_TESTS_S3_BUCKET }} --recursive --include "*" --exclude "longevity/*" \ No newline at end of file diff --git a/src/cli/backup/exchange.go b/src/cli/backup/exchange.go index 0f11bd6bd..d69ee3adf 100644 --- a/src/cli/backup/exchange.go +++ b/src/cli/backup/exchange.go @@ -91,6 +91,7 @@ func addExchangeCommands(cmd *cobra.Command) *cobra.Command { flags.AddFetchParallelismFlag(c) flags.AddFailFastFlag(c) flags.AddDisableIncrementalsFlag(c) + flags.AddForceItemDataDownloadFlag(c) flags.AddDisableDeltaFlag(c) flags.AddEnableImmutableIDFlag(c) flags.AddDisableConcurrencyLimiterFlag(c) diff --git a/src/cli/backup/onedrive.go b/src/cli/backup/onedrive.go index b9d94fc41..4e241323d 100644 --- a/src/cli/backup/onedrive.go +++ b/src/cli/backup/onedrive.go @@ -78,6 +78,7 @@ func addOneDriveCommands(cmd *cobra.Command) *cobra.Command { flags.AddFailFastFlag(c) flags.AddDisableIncrementalsFlag(c) + flags.AddForceItemDataDownloadFlag(c) case listCommand: c, fs = utils.AddCommand(cmd, oneDriveListCmd()) diff --git a/src/cli/backup/sharepoint.go b/src/cli/backup/sharepoint.go index 7fcf58163..567c0878c 100644 --- a/src/cli/backup/sharepoint.go +++ b/src/cli/backup/sharepoint.go @@ -93,6 +93,7 @@ func addSharePointCommands(cmd *cobra.Command) *cobra.Command { flags.AddDataFlag(c, []string{dataLibraries}, true) flags.AddFailFastFlag(c) flags.AddDisableIncrementalsFlag(c) + flags.AddForceItemDataDownloadFlag(c) case listCommand: c, fs = utils.AddCommand(cmd, sharePointListCmd()) diff --git a/src/cli/backup/sharepoint_test.go b/src/cli/backup/sharepoint_test.go index 648d3e8c4..8fadd064e 100644 --- a/src/cli/backup/sharepoint_test.go +++ b/src/cli/backup/sharepoint_test.go @@ -163,12 +163,11 @@ func (suite *SharePointUnitSuite) TestSharePointBackupCreateSelectors() { ) table := []struct { - name string - site []string - weburl []string - data []string - expect []string - expectScopesLen int + name string + site []string + weburl []string + data []string + expect []string }{ { name: "no sites or urls", @@ -181,63 +180,54 @@ func (suite *SharePointUnitSuite) TestSharePointBackupCreateSelectors() { expect: selectors.None(), }, { - name: "site wildcard", - site: []string{flags.Wildcard}, - expect: bothIDs, - expectScopesLen: 2, + name: "site wildcard", + site: []string{flags.Wildcard}, + expect: bothIDs, }, { - name: "url wildcard", - weburl: []string{flags.Wildcard}, - expect: bothIDs, - expectScopesLen: 2, + name: "url wildcard", + weburl: []string{flags.Wildcard}, + expect: bothIDs, }, { - name: "sites", - site: []string{id1, id2}, - expect: []string{id1, id2}, - expectScopesLen: 2, + name: "sites", + site: []string{id1, id2}, + expect: []string{id1, id2}, }, { - name: "urls", - weburl: []string{url1, url2}, - expect: []string{url1, url2}, - expectScopesLen: 2, + name: "urls", + weburl: []string{url1, url2}, + expect: []string{url1, url2}, }, { - name: "mix sites and urls", - site: []string{id1}, - weburl: []string{url2}, - expect: []string{id1, url2}, - expectScopesLen: 2, + name: "mix sites and urls", + site: []string{id1}, + weburl: []string{url2}, + expect: []string{id1, url2}, }, { - name: "duplicate sites and urls", - site: []string{id1, id2}, - weburl: []string{url1, url2}, - expect: []string{id1, id2, url1, url2}, - expectScopesLen: 2, + name: "duplicate sites and urls", + site: []string{id1, id2}, + weburl: []string{url1, url2}, + expect: []string{id1, id2, url1, url2}, }, { - name: "unnecessary site wildcard", - site: []string{id1, flags.Wildcard}, - weburl: []string{url1, url2}, - expect: bothIDs, - expectScopesLen: 2, + name: "unnecessary site wildcard", + site: []string{id1, flags.Wildcard}, + weburl: []string{url1, url2}, + expect: bothIDs, }, { - name: "unnecessary url wildcard", - site: []string{id1}, - weburl: []string{url1, flags.Wildcard}, - expect: bothIDs, - expectScopesLen: 2, + name: "unnecessary url wildcard", + site: []string{id1}, + weburl: []string{url1, flags.Wildcard}, + expect: bothIDs, }, { - name: "Pages", - site: bothIDs, - data: []string{dataPages}, - expect: bothIDs, - expectScopesLen: 1, + name: "Pages", + site: bothIDs, + data: []string{dataPages}, + expect: bothIDs, }, } for _, test := range table { @@ -249,7 +239,7 @@ func (suite *SharePointUnitSuite) TestSharePointBackupCreateSelectors() { sel, err := sharePointBackupCreateSelectors(ctx, ins, test.site, test.weburl, test.data) require.NoError(t, err, clues.ToCore(err)) - assert.ElementsMatch(t, test.expect, sel.DiscreteResourceOwners()) + assert.ElementsMatch(t, test.expect, sel.ResourceOwners.Targets) }) } } diff --git a/src/cli/flags/options.go b/src/cli/flags/options.go index 81a893f93..b24c5a9c9 100644 --- a/src/cli/flags/options.go +++ b/src/cli/flags/options.go @@ -9,6 +9,7 @@ const ( DisableConcurrencyLimiterFN = "disable-concurrency-limiter" DisableDeltaFN = "disable-delta" DisableIncrementalsFN = "disable-incrementals" + ForceItemDataDownloadFN = "force-item-data-download" EnableImmutableIDFN = "enable-immutable-id" FailFastFN = "fail-fast" FailedItemsFN = "failed-items" @@ -26,6 +27,7 @@ var ( DisableConcurrencyLimiterFV bool DisableDeltaFV bool DisableIncrementalsFV bool + ForceItemDataDownloadFV bool EnableImmutableIDFV bool FailFastFV bool FetchParallelismFV int @@ -110,6 +112,19 @@ func AddDisableIncrementalsFlag(cmd *cobra.Command) { cobra.CheckErr(fs.MarkHidden(DisableIncrementalsFN)) } +// Adds the hidden '--force-item-data-download' cli flag which, when set, +// disables kopia-assisted incremental backups. +func AddForceItemDataDownloadFlag(cmd *cobra.Command) { + fs := cmd.Flags() + fs.BoolVar( + &ForceItemDataDownloadFV, + ForceItemDataDownloadFN, + false, + "Disable cached data checks in backups to force item redownloads for "+ + "items changed since the last successful backup.") + cobra.CheckErr(fs.MarkHidden(ForceItemDataDownloadFN)) +} + // Adds the hidden '--disable-delta' cli flag which, when set, disables // delta based backups. func AddDisableDeltaFlag(cmd *cobra.Command) { diff --git a/src/cli/utils/options.go b/src/cli/utils/options.go index 932c56b6b..fdb92aced 100644 --- a/src/cli/utils/options.go +++ b/src/cli/utils/options.go @@ -23,6 +23,7 @@ func Control() control.Options { opt.DisableMetrics = flags.NoStatsFV opt.SkipReduce = flags.SkipReduceFV opt.ToggleFeatures.DisableIncrementals = flags.DisableIncrementalsFV + opt.ToggleFeatures.ForceItemDataDownload = flags.ForceItemDataDownloadFV opt.ToggleFeatures.DisableDelta = flags.DisableDeltaFV opt.ToggleFeatures.ExchangeImmutableIDs = flags.EnableImmutableIDFV opt.ToggleFeatures.DisableConcurrencyLimiter = flags.DisableConcurrencyLimiterFV diff --git a/src/cli/utils/options_test.go b/src/cli/utils/options_test.go index 1a8f7ddcd..6cb12e9d1 100644 --- a/src/cli/utils/options_test.go +++ b/src/cli/utils/options_test.go @@ -29,6 +29,7 @@ func (suite *OptionsUnitSuite) TestAddExchangeCommands() { Run: func(cmd *cobra.Command, args []string) { assert.True(t, flags.FailFastFV, flags.FailFastFN) assert.True(t, flags.DisableIncrementalsFV, flags.DisableIncrementalsFN) + assert.True(t, flags.ForceItemDataDownloadFV, flags.ForceItemDataDownloadFN) assert.True(t, flags.DisableDeltaFV, flags.DisableDeltaFN) assert.True(t, flags.NoStatsFV, flags.NoStatsFN) assert.True(t, flags.RestorePermissionsFV, flags.RestorePermissionsFN) @@ -44,6 +45,7 @@ func (suite *OptionsUnitSuite) TestAddExchangeCommands() { flags.AddFailFastFlag(cmd) flags.AddDisableIncrementalsFlag(cmd) + flags.AddForceItemDataDownloadFlag(cmd) flags.AddDisableDeltaFlag(cmd) flags.AddRestorePermissionsFlag(cmd) flags.AddSkipReduceFlag(cmd) @@ -56,6 +58,7 @@ func (suite *OptionsUnitSuite) TestAddExchangeCommands() { "test", "--" + flags.FailFastFN, "--" + flags.DisableIncrementalsFN, + "--" + flags.ForceItemDataDownloadFN, "--" + flags.DisableDeltaFN, "--" + flags.NoStatsFN, "--" + flags.RestorePermissionsFN, diff --git a/src/cmd/factory/impl/common.go b/src/cmd/factory/impl/common.go index 5904e09d4..f6532828b 100644 --- a/src/cmd/factory/impl/common.go +++ b/src/cmd/factory/impl/common.go @@ -17,9 +17,9 @@ import ( "github.com/alcionai/corso/src/internal/common/str" "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/m365" - exchMock "github.com/alcionai/corso/src/internal/m365/exchange/mock" - odStub "github.com/alcionai/corso/src/internal/m365/onedrive/stub" "github.com/alcionai/corso/src/internal/m365/resource" + exchMock "github.com/alcionai/corso/src/internal/m365/service/exchange/mock" + odStub "github.com/alcionai/corso/src/internal/m365/service/onedrive/stub" m365Stub "github.com/alcionai/corso/src/internal/m365/stub" "github.com/alcionai/corso/src/internal/operations/inject" "github.com/alcionai/corso/src/internal/tester" diff --git a/src/cmd/factory/impl/exchange.go b/src/cmd/factory/impl/exchange.go index b7ad4840d..d4513fe4a 100644 --- a/src/cmd/factory/impl/exchange.go +++ b/src/cmd/factory/impl/exchange.go @@ -5,8 +5,8 @@ import ( . "github.com/alcionai/corso/src/cli/print" "github.com/alcionai/corso/src/cli/utils" - exchMock "github.com/alcionai/corso/src/internal/m365/exchange/mock" "github.com/alcionai/corso/src/internal/m365/resource" + exchMock "github.com/alcionai/corso/src/internal/m365/service/exchange/mock" "github.com/alcionai/corso/src/pkg/control" "github.com/alcionai/corso/src/pkg/count" "github.com/alcionai/corso/src/pkg/fault" diff --git a/src/go.mod b/src/go.mod index f5d797f39..10f04090d 100644 --- a/src/go.mod +++ b/src/go.mod @@ -8,7 +8,7 @@ require ( github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.3.0 github.com/alcionai/clues v0.0.0-20230728164842-7dc4795a43e4 github.com/armon/go-metrics v0.4.1 - github.com/aws/aws-sdk-go v1.44.316 + github.com/aws/aws-sdk-go v1.44.320 github.com/aws/aws-xray-sdk-go v1.8.1 github.com/cenkalti/backoff/v4 v4.2.1 github.com/google/uuid v1.3.0 @@ -35,7 +35,7 @@ require ( go.uber.org/zap v1.25.0 golang.org/x/exp v0.0.0-20230801115018-d63ba01acd4b golang.org/x/time v0.3.0 - golang.org/x/tools v0.11.1 + golang.org/x/tools v0.12.0 ) require ( @@ -116,12 +116,12 @@ require ( go.opentelemetry.io/otel v1.16.0 // indirect go.opentelemetry.io/otel/trace v1.16.0 // indirect go.uber.org/multierr v1.11.0 // indirect - golang.org/x/crypto v0.11.0 // indirect + golang.org/x/crypto v0.12.0 // indirect golang.org/x/mod v0.12.0 // indirect - golang.org/x/net v0.13.0 + golang.org/x/net v0.14.0 golang.org/x/sync v0.3.0 // indirect - golang.org/x/sys v0.10.0 // indirect - golang.org/x/text v0.11.0 // indirect + golang.org/x/sys v0.11.0 // indirect + golang.org/x/text v0.12.0 // indirect google.golang.org/grpc v1.57.0 // indirect google.golang.org/protobuf v1.31.0 // indirect gopkg.in/ini.v1 v1.67.0 // indirect diff --git a/src/go.sum b/src/go.sum index 57519b404..a2cabe8ce 100644 --- a/src/go.sum +++ b/src/go.sum @@ -66,8 +66,8 @@ github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/ github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= github.com/armon/go-metrics v0.4.1 h1:hR91U9KYmb6bLBYLQjyM+3j+rcd/UhE+G78SFnF8gJA= github.com/armon/go-metrics v0.4.1/go.mod h1:E6amYzXo6aW1tqzoZGT755KkbgrJsSdpwZ+3JqfkOG4= -github.com/aws/aws-sdk-go v1.44.316 h1:UC3alCEyzj2XU13ZFGIOHW3yjCNLGTIGVauyetl9fwE= -github.com/aws/aws-sdk-go v1.44.316/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI= +github.com/aws/aws-sdk-go v1.44.320 h1:o2cno15HVUYj+IAgZHJ5No6ifAxwa2HcluzahMEPfOw= +github.com/aws/aws-sdk-go v1.44.320/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI= github.com/aws/aws-xray-sdk-go v1.8.1 h1:O4pXV+hnCskaamGsZnFpzHyAmgPGusBMN6i7nnsy0Fo= github.com/aws/aws-xray-sdk-go v1.8.1/go.mod h1:wMmVYzej3sykAttNBkXQHK/+clAPWTOrPiajEk7Cp3A= github.com/benbjohnson/clock v1.3.0 h1:ip6w0uFQkncKQ979AypyG0ER7mqUSBdKLOgAle/AT8A= @@ -448,8 +448,8 @@ golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPh golang.org/x/crypto v0.0.0-20210421170649-83a5a9bb288b/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20220722155217-630584e8d5aa/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= -golang.org/x/crypto v0.11.0 h1:6Ewdq3tDic1mg5xRO4milcWCfMVQhI4NkqWWvqejpuA= -golang.org/x/crypto v0.11.0/go.mod h1:xgJhtzW8F9jGdVFWZESrid1U1bjeNy4zgy5cRr/CIio= +golang.org/x/crypto v0.12.0 h1:tFM/ta59kqch6LlvYnPa0yx5a83cL2nHflFhYKvv9Yk= +golang.org/x/crypto v0.12.0/go.mod h1:NF0Gs7EO5K4qLn+Ylc+fih8BSTeIjAP05siRnAh98yw= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= @@ -524,8 +524,8 @@ golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.1.0/go.mod h1:Cx3nUiGt4eDBEyega/BKRp+/AlGL8hYe7U9odMt2Cco= -golang.org/x/net v0.13.0 h1:Nvo8UFsZ8X3BhAC9699Z1j7XQ3rsZnUUm7jfBEk1ueY= -golang.org/x/net v0.13.0/go.mod h1:zEVYFnQC7m/vmpQFELhcD1EWkZlX69l4oqgmer6hfKA= +golang.org/x/net v0.14.0 h1:BONx9s002vGdD9umnlX1Po8vOZmrgH34qlHcD1MfK14= +golang.org/x/net v0.14.0/go.mod h1:PpSgVXXLK0OxS0F31C1/tv6XNguvCrnXIDrFMspZIUI= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -597,8 +597,8 @@ golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.10.0 h1:SqMFp9UcQJZa+pmYuAKjd9xq1f0j5rLcDIk0mj4qAsA= -golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.11.0 h1:eG7RXZHdqOJ1i+0lgLgCpSXAp6M3LYlAo6osgSi0xOM= +golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= @@ -611,8 +611,8 @@ golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= -golang.org/x/text v0.11.0 h1:LAntKIrcmeSKERyiOh0XMV39LXS8IE9UL2yP7+f5ij4= -golang.org/x/text v0.11.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= +golang.org/x/text v0.12.0 h1:k+n5B8goJNdU7hSvEtMUz3d1Q6D/XW4COJSJR6fN0mc= +golang.org/x/text v0.12.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= @@ -666,8 +666,8 @@ golang.org/x/tools v0.0.0-20210105154028-b0ab187a4818/go.mod h1:emZCQorbCU4vsT4f golang.org/x/tools v0.0.0-20210108195828-e2f9c7f1fc8e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= -golang.org/x/tools v0.11.1 h1:ojD5zOW8+7dOGzdnNgersm8aPfcDjhMp12UfG93NIMc= -golang.org/x/tools v0.11.1/go.mod h1:anzJrxPjNtfgiYQYirP2CPGzGLxrH2u2QBhn6Bf3qY8= +golang.org/x/tools v0.12.0 h1:YW6HUoUmYBpwSgyaGaZq1fHjrBjX1rlpZ54T6mu2kss= +golang.org/x/tools v0.12.0/go.mod h1:Sc0INKfu04TlqNoRA1hgpFZbhYXHPr4V5DzpSBTPqQM= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/src/internal/common/readers/retry_handler.go b/src/internal/common/readers/retry_handler.go new file mode 100644 index 000000000..b52389f83 --- /dev/null +++ b/src/internal/common/readers/retry_handler.go @@ -0,0 +1,228 @@ +package readers + +import ( + "context" + "errors" + "fmt" + "io" + "syscall" + "time" + + "github.com/alcionai/clues" + + "github.com/alcionai/corso/src/pkg/logger" +) + +var _ io.ReadCloser = &resetRetryHandler{} + +const ( + minSleepTime = 3 + numMaxRetries = 3 + rangeHeaderKey = "Range" + // One-sided range like this is defined as starting at the given byte and + // extending to the end of the item. + rangeHeaderOneSidedValueTmpl = "bytes=%d-" +) + +// Could make this per wrapper instance if we need additional flexibility +// between callers. +var retryErrs = []error{ + syscall.ECONNRESET, +} + +type Getter interface { + // SupportsRange returns true if this Getter supports adding Range headers to + // the Get call. Otherwise returns false. + SupportsRange() bool + // Get attempts to get another reader for the data this reader is returning. + // headers denotes any additional headers that should be added to the request, + // like a Range header. + // + // Don't allow passing a URL to Get so that we can hide the fact that some + // components may need to dynamically refresh the fetch URL (i.e. OneDrive) + // from this wrapper. + // + // Get should encapsulate all error handling and status code checking required + // for the component. This function is called both during NewResetRetryHandler + // and Read so it's possible to discover errors with the item prior to + // informing other components about it if desired. + Get(ctx context.Context, headers map[string]string) (io.ReadCloser, error) +} + +// NewResetRetryHandler returns an io.ReadCloser with the reader initialized to +// the result of getter. The reader is eagerly initialized during this call so +// if callers of this function want to delay initialization they should wrap +// this reader in a lazy initializer. +// +// Selected errors that the reader hits during Read calls (e.x. +// syscall.ECONNRESET) will be automatically retried by the returned reader. +func NewResetRetryHandler( + ctx context.Context, + getter Getter, +) (*resetRetryHandler, error) { + rrh := &resetRetryHandler{ + ctx: ctx, + getter: getter, + } + + // Retry logic encapsulated in reconnect so no need for it here. + _, err := rrh.reconnect(numMaxRetries) + + return rrh, clues.Wrap(err, "initializing reader").OrNil() +} + +//nolint:unused +type resetRetryHandler struct { + ctx context.Context + getter Getter + innerReader io.ReadCloser + offset int64 +} + +func isRetriable(err error) bool { + if err == nil { + return false + } + + for _, e := range retryErrs { + if errors.Is(err, e) { + return true + } + } + + return false +} + +func (rrh *resetRetryHandler) Read(p []byte) (int, error) { + if rrh.innerReader == nil { + return 0, clues.New("not initialized") + } + + var ( + // Use separate error variable just to make other assignments in the loop a + // bit cleaner. + finalErr error + read int + numRetries int + ) + + // Still need to check retry count in loop header so we don't go through one + // last time after failing to reconnect due to exhausting retries. + for numRetries < numMaxRetries { + n, err := rrh.innerReader.Read(p[read:]) + rrh.offset = rrh.offset + int64(n) + read = read + n + + // Catch short reads with no error and errors we don't know how to retry. + if !isRetriable(err) { + // Not everything knows how to handle a wrapped version of EOF (including + // io.ReadAll) so return the error itself here. + if errors.Is(err, io.EOF) { + // Log info about the error, but only if it's not directly an EOF. + // Otherwise this can be rather chatty and annoying to filter out. + if err != io.EOF { + logger.CtxErr(rrh.ctx, err).Debug("dropping wrapped io.EOF") + } + + return read, io.EOF + } + + return read, clues.Stack(err).WithClues(rrh.ctx).OrNil() + } + + logger.Ctx(rrh.ctx).Infow( + "restarting reader", + "supports_range", rrh.getter.SupportsRange(), + "restart_at_offset", rrh.offset, + "retries_remaining", numMaxRetries-numRetries, + "retriable_error", err) + + attempts, err := rrh.reconnect(numMaxRetries - numRetries) + numRetries = numRetries + attempts + finalErr = err + } + + // We couln't read anything through all the retries but never had an error + // getting another reader. Report this as an error so we don't get stuck in an + // infinite loop. + if read == 0 && finalErr == nil && numRetries >= numMaxRetries { + finalErr = clues.Wrap(io.ErrNoProgress, "unable to read data") + } + + return read, clues.Stack(finalErr).OrNil() +} + +// reconnect attempts to get another instance of the underlying reader and set +// the reader to pickup where the previous reader left off. +// +// Since this function can be called by functions that also implement retries on +// read errors pass an int in to denote how many times to attempt to reconnect. +// This avoids mulplicative retries when called from other functions. +func (rrh *resetRetryHandler) reconnect(maxRetries int) (int, error) { + var ( + attempts int + skip = rrh.offset + headers = map[string]string{} + // This is annoying but we want the equivalent of a do-while loop. + err = retryErrs[0] + ) + + // Only set the range header if we've already read data. Otherwise we could + // get 416 (range not satisfiable) if the file is empty. + if rrh.getter.SupportsRange() && rrh.offset > 0 { + headers[rangeHeaderKey] = fmt.Sprintf( + rangeHeaderOneSidedValueTmpl, + rrh.offset) + skip = 0 + } + + ctx := clues.Add( + rrh.ctx, + "supports_range", rrh.getter.SupportsRange(), + "restart_at_offset", rrh.offset) + + for attempts < maxRetries && isRetriable(err) { + // Attempts will be 0 the first time through so it won't sleep then. + time.Sleep(time.Duration(attempts*minSleepTime) * time.Second) + + attempts++ + + var r io.ReadCloser + + r, err = rrh.getter.Get(ctx, headers) + if err != nil { + err = clues.Wrap(err, "retrying connection"). + WithClues(ctx). + With("attempt_num", attempts) + + continue + } + + if rrh.innerReader != nil { + rrh.innerReader.Close() + } + + rrh.innerReader = r + + // If we can't request a specific range of content then read as many bytes + // as we've already processed into the equivalent of /dev/null so that the + // next read will get content we haven't seen before. + if skip > 0 { + _, err = io.CopyN(io.Discard, rrh.innerReader, skip) + if err != nil { + err = clues.Wrap(err, "seeking to correct offset"). + WithClues(ctx). + With("attempt_num", attempts) + } + } + } + + return attempts, err +} + +func (rrh *resetRetryHandler) Close() error { + err := rrh.innerReader.Close() + rrh.innerReader = nil + + return clues.Stack(err).OrNil() +} diff --git a/src/internal/common/readers/retry_handler_test.go b/src/internal/common/readers/retry_handler_test.go new file mode 100644 index 000000000..e6bca2585 --- /dev/null +++ b/src/internal/common/readers/retry_handler_test.go @@ -0,0 +1,477 @@ +package readers_test + +import ( + "bytes" + "context" + "io" + "syscall" + "testing" + + "github.com/alcionai/clues" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + + "github.com/alcionai/corso/src/internal/common/readers" + "github.com/alcionai/corso/src/internal/tester" +) + +type readResp struct { + read int + // sticky denotes whether the error should continue to be returned until reset + // is called. + sticky bool + err error +} + +type mockReader struct { + r io.Reader + data []byte + // Associate return values for Read with calls. Allows partial reads as well. + // If a value for a particular read call is not in the map that means + // completing the request completely with no errors (i.e. all bytes requested + // are returned or as many as possible and EOF). + resps map[int]readResp + callCount int + stickyErr error +} + +func (mr *mockReader) Read(p []byte) (int, error) { + defer func() { + mr.callCount++ + }() + + if mr.r == nil { + mr.reset(0) + } + + if mr.stickyErr != nil { + return 0, clues.Wrap(mr.stickyErr, "sticky error") + } + + resp, ok := mr.resps[mr.callCount] + if !ok { + n, err := mr.r.Read(p) + return n, clues.Stack(err).OrNil() + } + + n, err := mr.r.Read(p[:resp.read]) + + if resp.err != nil { + if resp.sticky { + mr.stickyErr = resp.err + } + + return n, clues.Stack(resp.err) + } + + return n, clues.Stack(err).OrNil() +} + +func (mr *mockReader) reset(n int) { + mr.r = bytes.NewBuffer(mr.data[n:]) + mr.stickyErr = nil +} + +type getterResp struct { + offset int + err error +} + +type mockGetter struct { + t *testing.T + supportsRange bool + reader *mockReader + resps map[int]getterResp + expectHeaders map[int]map[string]string + callCount int +} + +func (mg *mockGetter) SupportsRange() bool { + return mg.supportsRange +} + +func (mg *mockGetter) Get( + ctx context.Context, + headers map[string]string, +) (io.ReadCloser, error) { + defer func() { + mg.callCount++ + }() + + expectHeaders := mg.expectHeaders[mg.callCount] + if expectHeaders == nil { + expectHeaders = map[string]string{} + } + + assert.Equal(mg.t, expectHeaders, headers) + + resp := mg.resps[mg.callCount] + + if resp.offset >= 0 { + mg.reader.reset(resp.offset) + } + + return io.NopCloser(mg.reader), clues.Stack(resp.err).OrNil() +} + +type ResetRetryHandlerUnitSuite struct { + tester.Suite +} + +func TestResetRetryHandlerUnitSuite(t *testing.T) { + suite.Run(t, &ResetRetryHandlerUnitSuite{Suite: tester.NewUnitSuite(t)}) +} + +func (suite *ResetRetryHandlerUnitSuite) TestResetRetryHandler() { + data := []byte("abcdefghijklmnopqrstuvwxyz") + // Pick a smaller read size so we can see how things will act if we have a + // "chunked" set of data. + readSize := 4 + + table := []struct { + name string + supportsRange bool + // 0th entry is the return data when trying to initialize the wrapper. + getterResps map[int]getterResp + // 0th entry is the return data when trying to initialize the wrapper. + getterExpectHeaders map[int]map[string]string + readerResps map[int]readResp + expectData []byte + expectErr error + }{ + { + name: "OnlyFirstGetErrors NoRangeSupport", + getterResps: map[int]getterResp{ + 0: { + err: syscall.ECONNRESET, + }, + }, + expectData: data, + }, + { + name: "OnlyFirstReadErrors RangeSupport", + supportsRange: true, + getterResps: map[int]getterResp{ + 0: { + err: syscall.ECONNRESET, + }, + }, + expectData: data, + }, + { + name: "ErrorInMiddle NoRangeSupport", + readerResps: map[int]readResp{ + 3: { + read: 0, + err: syscall.ECONNRESET, + }, + }, + expectData: data, + }, + { + name: "ErrorInMiddle RangeSupport", + supportsRange: true, + getterResps: map[int]getterResp{ + 1: {offset: 12}, + }, + getterExpectHeaders: map[int]map[string]string{ + 1: {"Range": "bytes=12-"}, + }, + readerResps: map[int]readResp{ + 3: { + read: 0, + err: syscall.ECONNRESET, + }, + }, + expectData: data, + }, + { + name: "MultipleErrorsInMiddle NoRangeSupport", + readerResps: map[int]readResp{ + 3: { + read: 0, + err: syscall.ECONNRESET, + }, + 7: { + read: 0, + err: syscall.ECONNRESET, + }, + }, + expectData: data, + }, + { + name: "MultipleErrorsInMiddle RangeSupport", + supportsRange: true, + getterResps: map[int]getterResp{ + 1: {offset: 12}, + 2: {offset: 20}, + }, + getterExpectHeaders: map[int]map[string]string{ + 1: {"Range": "bytes=12-"}, + 2: {"Range": "bytes=20-"}, + }, + readerResps: map[int]readResp{ + 3: { + read: 0, + err: syscall.ECONNRESET, + }, + 6: { + read: 0, + err: syscall.ECONNRESET, + }, + }, + expectData: data, + }, + { + name: "ShortReadWithError NoRangeSupport", + readerResps: map[int]readResp{ + 3: { + read: readSize / 2, + err: syscall.ECONNRESET, + }, + }, + expectData: data, + }, + { + name: "ShortReadWithError RangeSupport", + supportsRange: true, + getterResps: map[int]getterResp{ + 1: {offset: 14}, + }, + getterExpectHeaders: map[int]map[string]string{ + 1: {"Range": "bytes=14-"}, + }, + readerResps: map[int]readResp{ + 3: { + read: readSize / 2, + err: syscall.ECONNRESET, + }, + }, + expectData: data, + }, + { + name: "ErrorAtEndOfRead NoRangeSupport", + readerResps: map[int]readResp{ + 3: { + read: readSize, + sticky: true, + err: syscall.ECONNRESET, + }, + }, + expectData: data, + }, + { + name: "ErrorAtEndOfRead RangeSupport", + supportsRange: true, + getterResps: map[int]getterResp{ + 1: {offset: 16}, + }, + getterExpectHeaders: map[int]map[string]string{ + 1: {"Range": "bytes=16-"}, + }, + readerResps: map[int]readResp{ + 3: { + read: readSize, + sticky: true, + err: syscall.ECONNRESET, + }, + }, + expectData: data, + }, + { + name: "UnexpectedError NoRangeSupport", + readerResps: map[int]readResp{ + 3: { + read: 0, + err: assert.AnError, + }, + }, + expectData: data[:12], + expectErr: assert.AnError, + }, + { + name: "UnexpectedError RangeSupport", + supportsRange: true, + getterResps: map[int]getterResp{ + 1: {offset: 12}, + }, + getterExpectHeaders: map[int]map[string]string{ + 1: {"Range": "bytes=12-"}, + }, + readerResps: map[int]readResp{ + 3: { + read: 0, + err: assert.AnError, + }, + }, + expectData: data[:12], + expectErr: assert.AnError, + }, + { + name: "ErrorWhileSeeking NoRangeSupport", + readerResps: map[int]readResp{ + 3: { + read: 0, + err: syscall.ECONNRESET, + }, + 4: { + read: 0, + err: syscall.ECONNRESET, + }, + }, + expectData: data, + }, + { + name: "ShortReadNoError NoRangeSupport", + readerResps: map[int]readResp{ + 3: { + read: readSize / 2, + }, + }, + expectData: data, + }, + { + name: "ShortReadNoError RangeSupport", + supportsRange: true, + getterResps: map[int]getterResp{ + 1: {offset: 14}, + }, + getterExpectHeaders: map[int]map[string]string{ + 1: {"Range": "bytes=14-"}, + }, + readerResps: map[int]readResp{ + 3: { + read: readSize / 2, + }, + }, + expectData: data, + }, + { + name: "TooManyRetriesDuringRead NoRangeSupport", + // Fail the final reconnect attempt so we run out of retries. Otherwise we + // exit with a short read and successful reconnect. + getterResps: map[int]getterResp{ + 3: {err: syscall.ECONNRESET}, + }, + // Even numbered read requests are seeks to the proper offset. + readerResps: map[int]readResp{ + 3: { + read: 0, + err: syscall.ECONNRESET, + }, + 5: { + read: 1, + err: syscall.ECONNRESET, + }, + 7: { + read: 1, + err: syscall.ECONNRESET, + }, + }, + expectData: data[:14], + expectErr: syscall.ECONNRESET, + }, + { + name: "TooManyRetriesDuringRead RangeSupport", + supportsRange: true, + getterResps: map[int]getterResp{ + 1: {offset: 12}, + 2: {offset: 12}, + 3: {err: syscall.ECONNRESET}, + }, + getterExpectHeaders: map[int]map[string]string{ + 1: {"Range": "bytes=12-"}, + 2: {"Range": "bytes=13-"}, + 3: {"Range": "bytes=14-"}, + }, + readerResps: map[int]readResp{ + 3: { + read: 0, + err: syscall.ECONNRESET, + }, + 4: { + read: 1, + err: syscall.ECONNRESET, + }, + 5: { + read: 1, + err: syscall.ECONNRESET, + }, + }, + expectData: data[:14], + expectErr: syscall.ECONNRESET, + }, + { + name: "TooManyRetriesDuringRead AlwaysReturnError RangeSupport", + supportsRange: true, + getterResps: map[int]getterResp{ + 1: {offset: -1}, + 2: {offset: -1}, + 3: {offset: -1}, + 4: {offset: -1}, + 5: {offset: -1}, + }, + readerResps: map[int]readResp{ + 0: { + sticky: true, + err: syscall.ECONNRESET, + }, + }, + expectData: []byte{}, + expectErr: io.ErrNoProgress, + }, + } + + for _, test := range table { + suite.Run(test.name, func() { + t := suite.T() + + ctx, flush := tester.NewContext(t) + defer flush() + + reader := &mockReader{ + data: data, + resps: test.readerResps, + } + + getter := &mockGetter{ + t: t, + supportsRange: test.supportsRange, + reader: reader, + resps: test.getterResps, + expectHeaders: test.getterExpectHeaders, + } + + var ( + err error + n int + offset int + resData = make([]byte, len(data)) + ) + + rrh, err := readers.NewResetRetryHandler(ctx, getter) + require.NoError(t, err, "making reader wrapper: %v", clues.ToCore(err)) + + for err == nil && offset < len(data) { + end := offset + readSize + if end > len(data) { + end = len(data) + } + + n, err = rrh.Read(resData[offset:end]) + + offset = offset + n + } + + assert.Equal(t, test.expectData, data[:offset]) + + if test.expectErr == nil { + assert.NoError(t, err, clues.ToCore(err)) + return + } + + assert.ErrorIs(t, err, test.expectErr, clues.ToCore(err)) + }) + } +} diff --git a/src/internal/data/data_collection.go b/src/internal/data/data_collection.go index cec096783..3251179b8 100644 --- a/src/internal/data/data_collection.go +++ b/src/internal/data/data_collection.go @@ -138,6 +138,9 @@ type StreamSize interface { } // StreamModTime is used to provide the modified time of the stream's data. +// +// If an item implements StreamModTime and StreamInfo it should return the same +// value here as in item.Info().Modified(). type StreamModTime interface { ModTime() time.Time } diff --git a/src/internal/kopia/backup_bases.go b/src/internal/kopia/backup_bases.go index c0b8ecfaa..4c2c95fb7 100644 --- a/src/internal/kopia/backup_bases.go +++ b/src/internal/kopia/backup_bases.go @@ -8,6 +8,7 @@ import ( "golang.org/x/exp/slices" "github.com/alcionai/corso/src/internal/version" + "github.com/alcionai/corso/src/pkg/backup/identity" "github.com/alcionai/corso/src/pkg/logger" ) @@ -16,6 +17,7 @@ import ( type BackupBases interface { RemoveMergeBaseByManifestID(manifestID manifest.ID) Backups() []BackupEntry + AssistBackups() []BackupEntry MinBackupVersion() int MergeBases() []ManifestEntry ClearMergeBases() @@ -24,16 +26,17 @@ type BackupBases interface { MergeBackupBases( ctx context.Context, other BackupBases, - reasonToKey func(Reasoner) string, + reasonToKey func(identity.Reasoner) string, ) BackupBases } type backupBases struct { // backups and mergeBases should be modified together as they relate similar // data. - backups []BackupEntry - mergeBases []ManifestEntry - assistBases []ManifestEntry + backups []BackupEntry + mergeBases []ManifestEntry + assistBackups []BackupEntry + assistBases []ManifestEntry } func (bb *backupBases) RemoveMergeBaseByManifestID(manifestID manifest.ID) { @@ -71,6 +74,10 @@ func (bb backupBases) Backups() []BackupEntry { return slices.Clone(bb.backups) } +func (bb backupBases) AssistBackups() []BackupEntry { + return slices.Clone(bb.assistBackups) +} + func (bb *backupBases) MinBackupVersion() int { min := version.NoBackup @@ -116,16 +123,16 @@ func (bb *backupBases) ClearAssistBases() { // // Selection priority, for each reason key generated by reasonsToKey, follows // these rules: -// 1. If the called BackupBases has an entry for a given resaon, ignore the +// 1. If the called BackupBases has an entry for a given reason, ignore the // other BackupBases matching that reason. -// 2. If the the receiver BackupBases has only AssistBases, look for a matching -// MergeBase manifest in the passed in BackupBases. -// 3. If the called BackupBases has no entry for a reason, look for both -// AssistBases and MergeBases in the passed in BackupBases. +// 2. If the called BackupBases has only AssistBases, look for a matching +// MergeBase manifest in the other BackupBases. +// 3. If the called BackupBases has no entry for a reason, look for a matching +// MergeBase in the other BackupBases. func (bb *backupBases) MergeBackupBases( ctx context.Context, other BackupBases, - reasonToKey func(reason Reasoner) string, + reasonToKey func(reason identity.Reasoner) string, ) BackupBases { if other == nil || (len(other.MergeBases()) == 0 && len(other.AssistBases()) == 0) { return bb @@ -159,7 +166,7 @@ func (bb *backupBases) MergeBackupBases( // Calculate the set of mergeBases to pull from other into this one. for _, m := range other.MergeBases() { - useReasons := []Reasoner{} + useReasons := []identity.Reasoner{} for _, r := range m.Reasons { k := reasonToKey(r) @@ -183,6 +190,10 @@ func (bb *backupBases) MergeBackupBases( backups: bb.Backups(), mergeBases: bb.MergeBases(), assistBases: bb.AssistBases(), + // Note that assistBackups are a new feature and don't exist + // in prior versions where we were using UPN based reasons i.e. + // other won't have any assistBackups. + assistBackups: bb.AssistBackups(), } // Add new mergeBases and backups. @@ -205,39 +216,11 @@ func (bb *backupBases) MergeBackupBases( res.backups = append(res.backups, bup) res.mergeBases = append(res.mergeBases, man) + // TODO(pandeyabs): Remove this once we remove overlap between + // between merge and assist bases as part of #3943. res.assistBases = append(res.assistBases, man) } - // Add assistBases from other to this one as needed. - for _, m := range other.AssistBases() { - useReasons := []Reasoner{} - - // Assume that all complete manifests in assist overlap with MergeBases. - if len(m.IncompleteReason) == 0 { - continue - } - - for _, r := range m.Reasons { - k := reasonToKey(r) - if _, ok := assist[k]; ok { - // This reason is already covered by either: - // * complete manifest in bb - // * incomplete manifest in bb - // - // If it was already in the assist set then it must be the case that - // it's newer than any complete manifests in other for the same reason. - continue - } - - useReasons = append(useReasons, r) - } - - if len(useReasons) > 0 { - m.Reasons = useReasons - res.assistBases = append(res.assistBases, m) - } - } - return res } @@ -326,12 +309,16 @@ func getBackupByID(backups []BackupEntry, bID string) (BackupEntry, bool) { // pull. On the other hand, *not* dropping them is unsafe as it will muck up // merging when we add stuff to kopia (possibly multiple entries for the same // item etc). +// +// TODO(pandeyabs): Refactor common code into a helper as part of #3943. func (bb *backupBases) fixupAndVerify(ctx context.Context) { toDrop := findNonUniqueManifests(ctx, bb.mergeBases) var ( - backupsToKeep []BackupEntry - mergeToKeep []ManifestEntry + backupsToKeep []BackupEntry + assistBackupsToKeep []BackupEntry + mergeToKeep []ManifestEntry + assistToKeep []ManifestEntry ) for _, man := range bb.mergeBases { @@ -346,7 +333,7 @@ func (bb *backupBases) fixupAndVerify(ctx context.Context) { toDrop[man.ID] = struct{}{} logger.Ctx(ctx).Info( - "dropping manifest due to missing backup", + "dropping merge base due to missing backup", "manifest_id", man.ID) continue @@ -361,7 +348,7 @@ func (bb *backupBases) fixupAndVerify(ctx context.Context) { toDrop[man.ID] = struct{}{} logger.Ctx(ctx).Info( - "dropping manifest due to invalid backup", + "dropping merge base due to invalid backup", "manifest_id", man.ID) continue @@ -371,9 +358,9 @@ func (bb *backupBases) fixupAndVerify(ctx context.Context) { mergeToKeep = append(mergeToKeep, man) } - var assistToKeep []ManifestEntry - - for _, man := range bb.assistBases { + // Every merge base is also a kopia assist base. + // TODO(pandeyabs): This should be removed as part of #3943. + for _, man := range bb.mergeBases { if _, ok := toDrop[man.ID]; ok { continue } @@ -381,7 +368,48 @@ func (bb *backupBases) fixupAndVerify(ctx context.Context) { assistToKeep = append(assistToKeep, man) } + // Drop assist snapshots with overlapping reasons. + toDropAssists := findNonUniqueManifests(ctx, bb.assistBases) + + for _, man := range bb.assistBases { + if _, ok := toDropAssists[man.ID]; ok { + continue + } + + bID, _ := man.GetTag(TagBackupID) + + bup, ok := getBackupByID(bb.assistBackups, bID) + if !ok { + toDrop[man.ID] = struct{}{} + + logger.Ctx(ctx).Info( + "dropping assist base due to missing backup", + "manifest_id", man.ID) + + continue + } + + deetsID := bup.StreamStoreID + if len(deetsID) == 0 { + deetsID = bup.DetailsID + } + + if len(bup.SnapshotID) == 0 || len(deetsID) == 0 { + toDrop[man.ID] = struct{}{} + + logger.Ctx(ctx).Info( + "dropping assist base due to invalid backup", + "manifest_id", man.ID) + + continue + } + + assistBackupsToKeep = append(assistBackupsToKeep, bup) + assistToKeep = append(assistToKeep, man) + } + bb.backups = backupsToKeep bb.mergeBases = mergeToKeep bb.assistBases = assistToKeep + bb.assistBackups = assistBackupsToKeep } diff --git a/src/internal/kopia/backup_bases_test.go b/src/internal/kopia/backup_bases_test.go index 04afb5408..faa402162 100644 --- a/src/internal/kopia/backup_bases_test.go +++ b/src/internal/kopia/backup_bases_test.go @@ -13,10 +13,11 @@ import ( "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/internal/version" "github.com/alcionai/corso/src/pkg/backup" + "github.com/alcionai/corso/src/pkg/backup/identity" "github.com/alcionai/corso/src/pkg/path" ) -func makeManifest(id, incmpl, bID string, reasons ...Reasoner) ManifestEntry { +func makeManifest(id, incmpl, bID string, reasons ...identity.Reasoner) ManifestEntry { bIDKey, _ := makeTagKV(TagBackupID) return ManifestEntry{ @@ -206,36 +207,25 @@ func (suite *BackupBasesUnitSuite) TestMergeBackupBases() { ro := "resource_owner" type testInput struct { - id int - incomplete bool - cat []path.CategoryType + id int + cat []path.CategoryType } // Make a function so tests can modify things without messing with each other. - makeBackupBases := func(ti []testInput) *backupBases { + makeBackupBases := func(mergeInputs []testInput, assistInputs []testInput) *backupBases { res := &backupBases{} - for _, i := range ti { + for _, i := range mergeInputs { baseID := fmt.Sprintf("id%d", i.id) - ir := "" - - if i.incomplete { - ir = "checkpoint" - } - - reasons := make([]Reasoner, 0, len(i.cat)) + reasons := make([]identity.Reasoner, 0, len(i.cat)) for _, c := range i.cat { reasons = append(reasons, NewReason("", ro, path.ExchangeService, c)) } - m := makeManifest(baseID, ir, "b"+baseID, reasons...) + m := makeManifest(baseID, "", "b"+baseID, reasons...) res.assistBases = append(res.assistBases, m) - if i.incomplete { - continue - } - b := BackupEntry{ Backup: &backup.Backup{ BaseModel: model.BaseModel{ID: model.StableID("b" + baseID)}, @@ -249,192 +239,217 @@ func (suite *BackupBasesUnitSuite) TestMergeBackupBases() { res.mergeBases = append(res.mergeBases, m) } + for _, i := range assistInputs { + baseID := fmt.Sprintf("id%d", i.id) + + reasons := make([]identity.Reasoner, 0, len(i.cat)) + + for _, c := range i.cat { + reasons = append(reasons, NewReason("", ro, path.ExchangeService, c)) + } + + m := makeManifest(baseID, "", "a"+baseID, reasons...) + + b := BackupEntry{ + Backup: &backup.Backup{ + BaseModel: model.BaseModel{ + ID: model.StableID("a" + baseID), + Tags: map[string]string{model.BackupTypeTag: model.AssistBackup}, + }, + SnapshotID: baseID, + StreamStoreID: "ss" + baseID, + }, + Reasons: reasons, + } + + res.assistBackups = append(res.assistBackups, b) + res.assistBases = append(res.assistBases, m) + } + return res } table := []struct { - name string - bb []testInput - other []testInput - expect []testInput + name string + merge []testInput + assist []testInput + otherMerge []testInput + otherAssist []testInput + expect func() *backupBases }{ { name: "Other Empty", - bb: []testInput{ + merge: []testInput{ {cat: []path.CategoryType{path.EmailCategory}}, }, - expect: []testInput{ + assist: []testInput{ {cat: []path.CategoryType{path.EmailCategory}}, }, + expect: func() *backupBases { + bs := makeBackupBases([]testInput{ + {cat: []path.CategoryType{path.EmailCategory}}, + }, []testInput{ + {cat: []path.CategoryType{path.EmailCategory}}, + }) + + return bs + }, }, { - name: "BB Empty", - other: []testInput{ + name: "current Empty", + otherMerge: []testInput{ {cat: []path.CategoryType{path.EmailCategory}}, }, - expect: []testInput{ + otherAssist: []testInput{ {cat: []path.CategoryType{path.EmailCategory}}, }, + expect: func() *backupBases { + bs := makeBackupBases([]testInput{ + {cat: []path.CategoryType{path.EmailCategory}}, + }, []testInput{ + {cat: []path.CategoryType{path.EmailCategory}}, + }) + + return bs + }, }, { - name: "Other overlaps Complete And Incomplete", - bb: []testInput{ - {cat: []path.CategoryType{path.EmailCategory}}, + name: "Other overlaps merge and assist", + merge: []testInput{ { - id: 1, - cat: []path.CategoryType{path.EmailCategory}, - incomplete: true, + id: 1, + cat: []path.CategoryType{path.EmailCategory}, }, }, - other: []testInput{ + assist: []testInput{ + { + id: 4, + cat: []path.CategoryType{path.EmailCategory}, + }, + }, + otherMerge: []testInput{ { id: 2, cat: []path.CategoryType{path.EmailCategory}, }, { - id: 3, - cat: []path.CategoryType{path.EmailCategory}, - incomplete: true, + id: 3, + cat: []path.CategoryType{path.EmailCategory}, }, }, - expect: []testInput{ - {cat: []path.CategoryType{path.EmailCategory}}, + otherAssist: []testInput{ { - id: 1, - cat: []path.CategoryType{path.EmailCategory}, - incomplete: true, + id: 5, + cat: []path.CategoryType{path.EmailCategory}, }, }, + expect: func() *backupBases { + bs := makeBackupBases([]testInput{ + { + id: 1, + cat: []path.CategoryType{path.EmailCategory}, + }, + }, []testInput{ + { + id: 4, + cat: []path.CategoryType{path.EmailCategory}, + }, + }) + + return bs + }, }, { - name: "Other Overlaps Complete", - bb: []testInput{ - {cat: []path.CategoryType{path.EmailCategory}}, + name: "Other overlaps merge", + merge: []testInput{ + { + id: 1, + cat: []path.CategoryType{path.EmailCategory}, + }, }, - other: []testInput{ + otherMerge: []testInput{ { id: 2, cat: []path.CategoryType{path.EmailCategory}, }, }, - expect: []testInput{ - {cat: []path.CategoryType{path.EmailCategory}}, + expect: func() *backupBases { + bs := makeBackupBases([]testInput{ + { + id: 1, + cat: []path.CategoryType{path.EmailCategory}, + }, + }, nil) + + return bs }, }, { - name: "Other Overlaps Incomplete", - bb: []testInput{ + name: "Current assist overlaps with Other merge", + assist: []testInput{ { - id: 1, - cat: []path.CategoryType{path.EmailCategory}, - incomplete: true, + id: 3, + cat: []path.CategoryType{path.EmailCategory}, }, }, - other: []testInput{ + otherMerge: []testInput{ + { + id: 1, + cat: []path.CategoryType{path.EmailCategory}, + }, + }, + otherAssist: []testInput{ { id: 2, cat: []path.CategoryType{path.EmailCategory}, }, - { - id: 3, - cat: []path.CategoryType{path.EmailCategory}, - incomplete: true, - }, }, - expect: []testInput{ - { - id: 1, - cat: []path.CategoryType{path.EmailCategory}, - incomplete: true, - }, - { - id: 2, - cat: []path.CategoryType{path.EmailCategory}, - }, + + expect: func() *backupBases { + bs := makeBackupBases([]testInput{ + { + id: 1, + cat: []path.CategoryType{path.EmailCategory}, + }, + }, []testInput{ + { + id: 3, + cat: []path.CategoryType{path.EmailCategory}, + }, + }) + + return bs }, }, { name: "Other Disjoint", - bb: []testInput{ + merge: []testInput{ {cat: []path.CategoryType{path.EmailCategory}}, { - id: 1, - cat: []path.CategoryType{path.EmailCategory}, - incomplete: true, + id: 1, + cat: []path.CategoryType{path.EmailCategory}, }, }, - other: []testInput{ + otherMerge: []testInput{ { id: 2, cat: []path.CategoryType{path.ContactsCategory}, }, - { - id: 3, - cat: []path.CategoryType{path.ContactsCategory}, - incomplete: true, - }, }, - expect: []testInput{ - {cat: []path.CategoryType{path.EmailCategory}}, - { - id: 1, - cat: []path.CategoryType{path.EmailCategory}, - incomplete: true, - }, - { - id: 2, - cat: []path.CategoryType{path.ContactsCategory}, - }, - { - id: 3, - cat: []path.CategoryType{path.ContactsCategory}, - incomplete: true, - }, - }, - }, - { - name: "Other Reduced Reasons", - bb: []testInput{ - {cat: []path.CategoryType{path.EmailCategory}}, - { - id: 1, - cat: []path.CategoryType{path.EmailCategory}, - incomplete: true, - }, - }, - other: []testInput{ - { - id: 2, - cat: []path.CategoryType{ - path.EmailCategory, - path.ContactsCategory, + expect: func() *backupBases { + bs := makeBackupBases([]testInput{ + {cat: []path.CategoryType{path.EmailCategory}}, + { + id: 1, + cat: []path.CategoryType{path.EmailCategory}, }, - }, - { - id: 3, - cat: []path.CategoryType{ - path.EmailCategory, - path.ContactsCategory, + { + id: 2, + cat: []path.CategoryType{path.ContactsCategory}, }, - incomplete: true, - }, - }, - expect: []testInput{ - {cat: []path.CategoryType{path.EmailCategory}}, - { - id: 1, - cat: []path.CategoryType{path.EmailCategory}, - incomplete: true, - }, - { - id: 2, - cat: []path.CategoryType{path.ContactsCategory}, - }, - { - id: 3, - cat: []path.CategoryType{path.ContactsCategory}, - incomplete: true, - }, + }, nil) + + return bs }, }, } @@ -443,9 +458,9 @@ func (suite *BackupBasesUnitSuite) TestMergeBackupBases() { suite.Run(test.name, func() { t := suite.T() - bb := makeBackupBases(test.bb) - other := makeBackupBases(test.other) - expect := makeBackupBases(test.expect) + bb := makeBackupBases(test.merge, test.assist) + other := makeBackupBases(test.otherMerge, test.otherAssist) + expected := test.expect() ctx, flush := tester.NewContext(t) defer flush() @@ -453,10 +468,10 @@ func (suite *BackupBasesUnitSuite) TestMergeBackupBases() { got := bb.MergeBackupBases( ctx, other, - func(r Reasoner) string { + func(r identity.Reasoner) string { return r.Service().String() + r.Category().String() }) - AssertBackupBasesEqual(t, expect, got) + AssertBackupBasesEqual(t, expected, got) }) } } @@ -486,8 +501,20 @@ func (suite *BackupBasesUnitSuite) TestFixupAndVerify() { mergeBases: []ManifestEntry{ makeMan(path.EmailCategory, "id1", "", "bid1"), }, + assistBackups: []BackupEntry{ + { + Backup: &backup.Backup{ + BaseModel: model.BaseModel{ + ID: "bid2", + Tags: map[string]string{model.BackupTypeTag: model.AssistBackup}, + }, + SnapshotID: "id2", + StreamStoreID: "ssid2", + }, + }, + }, assistBases: []ManifestEntry{ - makeMan(path.EmailCategory, "id1", "", "bid1"), + makeMan(path.EmailCategory, "id2", "", "bid2"), }, } } @@ -507,24 +534,77 @@ func (suite *BackupBasesUnitSuite) TestFixupAndVerify() { res := validMail1() res.backups = nil + return res + }(), + expect: func() *backupBases { + res := validMail1() + res.mergeBases = nil + res.backups = nil + return res }(), }, { - name: "Backup Missing Snapshot ID", + name: "Merge Backup Missing Snapshot ID", bb: func() *backupBases { res := validMail1() res.backups[0].SnapshotID = "" + return res + }(), + expect: func() *backupBases { + res := validMail1() + res.mergeBases = nil + res.backups = nil + return res }(), }, { - name: "Backup Missing Deets ID", + name: "Assist backup missing snapshot ID", + bb: func() *backupBases { + res := validMail1() + res.assistBackups[0].SnapshotID = "" + + return res + }(), + expect: func() *backupBases { + res := validMail1() + res.assistBases = res.mergeBases + res.assistBackups = nil + + return res + }(), + }, + { + name: "Merge backup missing deets ID", bb: func() *backupBases { res := validMail1() res.backups[0].StreamStoreID = "" + return res + }(), + expect: func() *backupBases { + res := validMail1() + res.mergeBases = nil + res.backups = nil + + return res + }(), + }, + { + name: "Assist backup missing deets ID", + bb: func() *backupBases { + res := validMail1() + res.assistBackups[0].StreamStoreID = "" + + return res + }(), + expect: func() *backupBases { + res := validMail1() + res.assistBases = res.mergeBases + res.assistBackups = nil + return res }(), }, @@ -545,15 +625,22 @@ func (suite *BackupBasesUnitSuite) TestFixupAndVerify() { res.mergeBases[0].Reasons = append( res.mergeBases[0].Reasons, res.mergeBases[0].Reasons[0]) - res.assistBases = res.mergeBases + res.assistBases[0].Reasons = append( + res.assistBases[0].Reasons, + res.assistBases[0].Reasons[0]) return res }(), }, { - name: "Single Valid Entry", - bb: validMail1(), - expect: validMail1(), + name: "Single Valid Entry", + bb: validMail1(), + expect: func() *backupBases { + res := validMail1() + res.assistBases = append(res.mergeBases, res.assistBases...) + + return res + }(), }, { name: "Single Valid Entry With Incomplete Assist With Same Reason", @@ -561,16 +648,14 @@ func (suite *BackupBasesUnitSuite) TestFixupAndVerify() { res := validMail1() res.assistBases = append( res.assistBases, - makeMan(path.EmailCategory, "id2", "checkpoint", "bid2")) + makeMan(path.EmailCategory, "id3", "checkpoint", "bid3")) return res }(), expect: func() *backupBases { res := validMail1() - res.assistBases = append( - res.assistBases, - makeMan(path.EmailCategory, "id2", "checkpoint", "bid2")) + res.assistBases = append(res.mergeBases, res.assistBases...) return res }(), }, @@ -581,6 +666,9 @@ func (suite *BackupBasesUnitSuite) TestFixupAndVerify() { res.backups[0].DetailsID = res.backups[0].StreamStoreID res.backups[0].StreamStoreID = "" + res.assistBackups[0].DetailsID = res.assistBackups[0].StreamStoreID + res.assistBackups[0].StreamStoreID = "" + return res }(), expect: func() *backupBases { @@ -588,6 +676,11 @@ func (suite *BackupBasesUnitSuite) TestFixupAndVerify() { res.backups[0].DetailsID = res.backups[0].StreamStoreID res.backups[0].StreamStoreID = "" + res.assistBackups[0].DetailsID = res.assistBackups[0].StreamStoreID + res.assistBackups[0].StreamStoreID = "" + + res.assistBases = append(res.mergeBases, res.assistBases...) + return res }(), }, @@ -598,7 +691,10 @@ func (suite *BackupBasesUnitSuite) TestFixupAndVerify() { res.mergeBases[0].Reasons = append( res.mergeBases[0].Reasons, NewReason("", ro, path.ExchangeService, path.ContactsCategory)) - res.assistBases = res.mergeBases + + res.assistBases[0].Reasons = append( + res.assistBases[0].Reasons, + NewReason("", ro, path.ExchangeService, path.ContactsCategory)) return res }(), @@ -607,7 +703,12 @@ func (suite *BackupBasesUnitSuite) TestFixupAndVerify() { res.mergeBases[0].Reasons = append( res.mergeBases[0].Reasons, NewReason("", ro, path.ExchangeService, path.ContactsCategory)) - res.assistBases = res.mergeBases + + res.assistBases[0].Reasons = append( + res.assistBases[0].Reasons, + NewReason("", ro, path.ExchangeService, path.ContactsCategory)) + + res.assistBases = append(res.mergeBases, res.assistBases...) return res }(), @@ -618,14 +719,17 @@ func (suite *BackupBasesUnitSuite) TestFixupAndVerify() { res := validMail1() res.mergeBases = append( res.mergeBases, - makeMan(path.EmailCategory, "id2", "", "bid2")) - res.assistBases = res.mergeBases + makeMan(path.EmailCategory, "id3", "", "bid3")) + + res.assistBases = append( + res.assistBases, + makeMan(path.EmailCategory, "id4", "", "bid4")) return res }(), }, { - name: "Three Entries One Invalid", + name: "Merge Backup, Three Entries One Invalid", bb: func() *backupBases { res := validMail1() res.backups = append( @@ -633,24 +737,23 @@ func (suite *BackupBasesUnitSuite) TestFixupAndVerify() { BackupEntry{ Backup: &backup.Backup{ BaseModel: model.BaseModel{ - ID: "bid2", + ID: "bid3", }, }, }, BackupEntry{ Backup: &backup.Backup{ BaseModel: model.BaseModel{ - ID: "bid3", + ID: "bid4", }, - SnapshotID: "id3", - StreamStoreID: "ssid3", + SnapshotID: "id4", + StreamStoreID: "ssid4", }, }) res.mergeBases = append( res.mergeBases, - makeMan(path.ContactsCategory, "id2", "checkpoint", "bid2"), - makeMan(path.EventsCategory, "id3", "", "bid3")) - res.assistBases = res.mergeBases + makeMan(path.ContactsCategory, "id3", "checkpoint", "bid3"), + makeMan(path.EventsCategory, "id4", "", "bid4")) return res }(), @@ -661,16 +764,70 @@ func (suite *BackupBasesUnitSuite) TestFixupAndVerify() { BackupEntry{ Backup: &backup.Backup{ BaseModel: model.BaseModel{ - ID: "bid3", + ID: "bid4", }, - SnapshotID: "id3", - StreamStoreID: "ssid3", + SnapshotID: "id4", + StreamStoreID: "ssid4", }, }) res.mergeBases = append( res.mergeBases, - makeMan(path.EventsCategory, "id3", "", "bid3")) - res.assistBases = res.mergeBases + makeMan(path.EventsCategory, "id4", "", "bid4")) + res.assistBases = append(res.mergeBases, res.assistBases...) + + return res + }(), + }, + { + name: "Assist Backup, Three Entries One Invalid", + bb: func() *backupBases { + res := validMail1() + res.assistBackups = append( + res.assistBackups, + BackupEntry{ + Backup: &backup.Backup{ + BaseModel: model.BaseModel{ + ID: "bid3", + Tags: map[string]string{model.BackupTypeTag: model.AssistBackup}, + }, + }, + }, + BackupEntry{ + Backup: &backup.Backup{ + BaseModel: model.BaseModel{ + ID: "bid4", + Tags: map[string]string{model.BackupTypeTag: model.AssistBackup}, + }, + SnapshotID: "id4", + StreamStoreID: "ssid4", + }, + }) + res.assistBases = append( + res.assistBases, + makeMan(path.ContactsCategory, "id3", "checkpoint", "bid3"), + makeMan(path.EventsCategory, "id4", "", "bid4")) + + return res + }(), + expect: func() *backupBases { + res := validMail1() + res.assistBackups = append( + res.assistBackups, + BackupEntry{ + Backup: &backup.Backup{ + BaseModel: model.BaseModel{ + ID: "bid4", + Tags: map[string]string{model.BackupTypeTag: model.AssistBackup}, + }, + SnapshotID: "id4", + StreamStoreID: "ssid4", + }, + }) + res.assistBases = append( + res.assistBases, + makeMan(path.EventsCategory, "id4", "", "bid4")) + + res.assistBases = append(res.mergeBases, res.assistBases...) return res }(), diff --git a/src/internal/kopia/base_finder.go b/src/internal/kopia/base_finder.go index 00561c833..571eda475 100644 --- a/src/internal/kopia/base_finder.go +++ b/src/internal/kopia/base_finder.go @@ -12,6 +12,7 @@ import ( "github.com/alcionai/corso/src/internal/model" "github.com/alcionai/corso/src/internal/operations/inject" "github.com/alcionai/corso/src/pkg/backup" + "github.com/alcionai/corso/src/pkg/backup/identity" "github.com/alcionai/corso/src/pkg/logger" "github.com/alcionai/corso/src/pkg/path" ) @@ -29,23 +30,11 @@ const ( userTagPrefix = "tag:" ) -// TODO(ashmrtn): Move this into some inject package. Here to avoid import -// cycles. -type Reasoner interface { - Tenant() string - ProtectedResource() string - Service() path.ServiceType - Category() path.CategoryType - // SubtreePath returns the path prefix for data in existing backups that have - // parameters (tenant, protected resourced, etc) that match this Reasoner. - SubtreePath() (path.Path, error) -} - func NewReason( tenant, resource string, service path.ServiceType, category path.CategoryType, -) Reasoner { +) identity.Reasoner { return reason{ tenant: tenant, resource: resource, @@ -90,7 +79,7 @@ func (r reason) SubtreePath() (path.Path, error) { return p, clues.Wrap(err, "building path").OrNil() } -func tagKeys(r Reasoner) []string { +func tagKeys(r identity.Reasoner) []string { return []string{ r.ProtectedResource(), serviceCatString(r.Service(), r.Category()), @@ -98,13 +87,13 @@ func tagKeys(r Reasoner) []string { } // reasonKey returns the concatenation of the ProtectedResource, Service, and Category. -func reasonKey(r Reasoner) string { +func reasonKey(r identity.Reasoner) string { return r.ProtectedResource() + r.Service().String() + r.Category().String() } type BackupEntry struct { *backup.Backup - Reasons []Reasoner + Reasons []identity.Reasoner } type ManifestEntry struct { @@ -116,7 +105,7 @@ type ManifestEntry struct { // 1. backup user1 email,contacts -> B1 // 2. backup user1 contacts -> B2 (uses B1 as base) // 3. backup user1 email,contacts,events (uses B1 for email, B2 for contacts) - Reasons []Reasoner + Reasons []identity.Reasoner } func (me ManifestEntry) GetTag(key string) (string, bool) { @@ -204,17 +193,20 @@ func (b *baseFinder) getBackupModel( return bup, nil } +type backupBase struct { + backup BackupEntry + manifest ManifestEntry +} + // findBasesInSet goes through manifest metadata entries and sees if they're -// incomplete or not. If an entry is incomplete and we don't already have a -// complete or incomplete manifest add it to the set for kopia assisted -// incrementals. If it's complete, fetch the backup model and see if it -// corresponds to a successful backup. If it does, return it as we only need the -// most recent complete backup as the base. +// incomplete or not. Manifests which don't have an associated backup +// are discarded as incomplete. Manifests are then checked to see if they +// are associated with an assist backup or merge backup. func (b *baseFinder) findBasesInSet( ctx context.Context, - reason Reasoner, + reason identity.Reasoner, metas []*manifest.EntryMetadata, -) (*BackupEntry, *ManifestEntry, []ManifestEntry, error) { +) (*backupBase, *backupBase, error) { // Sort manifests by time so we can go through them sequentially. The code in // kopia appears to sort them already, but add sorting here just so we're not // reliant on undocumented behavior. @@ -223,8 +215,8 @@ func (b *baseFinder) findBasesInSet( }) var ( - kopiaAssistSnaps []ManifestEntry - foundIncomplete bool + mergeBase *backupBase + assistBase *backupBase ) for i := len(metas) - 1; i >= 0; i-- { @@ -240,16 +232,10 @@ func (b *baseFinder) findBasesInSet( } if len(man.IncompleteReason) > 0 { - if !foundIncomplete { - foundIncomplete = true - - kopiaAssistSnaps = append(kopiaAssistSnaps, ManifestEntry{ - Manifest: man, - Reasons: []Reasoner{reason}, - }) - - logger.Ctx(ictx).Info("found incomplete backup") - } + // Skip here since this snapshot cannot be considered an assist base. + logger.Ctx(ictx).Debugw( + "Incomplete snapshot", + "incomplete_reason", man.IncompleteReason) continue } @@ -259,19 +245,7 @@ func (b *baseFinder) findBasesInSet( if err != nil { // Safe to continue here as we'll just end up attempting to use an older // backup as the base. - logger.CtxErr(ictx, err).Debug("searching for base backup") - - if !foundIncomplete { - foundIncomplete = true - - kopiaAssistSnaps = append(kopiaAssistSnaps, ManifestEntry{ - Manifest: man, - Reasons: []Reasoner{reason}, - }) - - logger.Ctx(ictx).Info("found incomplete backup") - } - + logger.CtxErr(ictx, err).Debug("searching for backup model") continue } @@ -285,49 +259,118 @@ func (b *baseFinder) findBasesInSet( "empty backup stream store ID", "search_backup_id", bup.ID) - if !foundIncomplete { - foundIncomplete = true - - kopiaAssistSnaps = append(kopiaAssistSnaps, ManifestEntry{ - Manifest: man, - Reasons: []Reasoner{reason}, - }) - - logger.Ctx(ictx).Infow( - "found incomplete backup", - "search_backup_id", bup.ID) - } - continue } // If we've made it to this point then we're considering the backup // complete as it has both an item data snapshot and a backup details // snapshot. - logger.Ctx(ictx).Infow("found complete backup", "base_backup_id", bup.ID) + // + // Check first if this is an assist base. Criteria for selecting an + // assist base are: + // 1. most recent assist base for the reason. + // 2. at most one assist base per reason. + // 3. it must be more recent than the merge backup for the reason, if + // a merge backup exists. - me := ManifestEntry{ - Manifest: man, - Reasons: []Reasoner{reason}, + if b.isAssistBackupModel(ictx, bup) { + if assistBase == nil { + assistModel := BackupEntry{ + Backup: bup, + Reasons: []identity.Reasoner{reason}, + } + assistSnap := ManifestEntry{ + Manifest: man, + Reasons: []identity.Reasoner{reason}, + } + + assistBase = &backupBase{ + backup: assistModel, + manifest: assistSnap, + } + + logger.Ctx(ictx).Infow( + "found assist base", + "search_backup_id", bup.ID, + "search_snapshot_id", meta.ID, + "ssid", ssid) + } + + // Skip if an assist base has already been selected. + continue } - kopiaAssistSnaps = append(kopiaAssistSnaps, me) - return &BackupEntry{ + logger.Ctx(ictx).Infow("found merge base", + "search_backup_id", bup.ID, + "search_snapshot_id", meta.ID, + "ssid", ssid) + + mergeSnap := ManifestEntry{ + Manifest: man, + Reasons: []identity.Reasoner{reason}, + } + + mergeModel := BackupEntry{ Backup: bup, - Reasons: []Reasoner{reason}, - }, &me, kopiaAssistSnaps, nil + Reasons: []identity.Reasoner{reason}, + } + + mergeBase = &backupBase{ + backup: mergeModel, + manifest: mergeSnap, + } + + break } - logger.Ctx(ctx).Info("no base backups for reason") + if mergeBase == nil && assistBase == nil { + logger.Ctx(ctx).Info("no merge or assist base found for reason") + } - return nil, nil, kopiaAssistSnaps, nil + return mergeBase, assistBase, nil +} + +// isAssistBackupModel checks if the provided backup is an assist backup. +func (b *baseFinder) isAssistBackupModel( + ctx context.Context, + bup *backup.Backup, +) bool { + allTags := map[string]string{ + model.BackupTypeTag: model.AssistBackup, + } + + for k, v := range allTags { + if bup.Tags[k] != v { + // This is not an assist backup so we can just exit here. + logger.Ctx(ctx).Debugw( + "assist backup model missing tags", + "backup_id", bup.ID, + "tag", k, + "expected_value", v, + "actual_value", bup.Tags[k]) + + return false + } + } + + // Check if it has a valid streamstore id and snapshot id. + if len(bup.StreamStoreID) == 0 || len(bup.SnapshotID) == 0 { + logger.Ctx(ctx).Infow( + "nil ssid or snapshot id in assist base", + "ssid", bup.StreamStoreID, + "snapshot_id", bup.SnapshotID) + + return false + } + + return true } func (b *baseFinder) getBase( ctx context.Context, - r Reasoner, + r identity.Reasoner, tags map[string]string, -) (*BackupEntry, *ManifestEntry, []ManifestEntry, error) { +) (*backupBase, *backupBase, error) { allTags := map[string]string{} for _, k := range tagKeys(r) { @@ -339,12 +382,12 @@ func (b *baseFinder) getBase( metas, err := b.sm.FindManifests(ctx, allTags) if err != nil { - return nil, nil, nil, clues.Wrap(err, "getting snapshots") + return nil, nil, clues.Wrap(err, "getting snapshots") } // No snapshots means no backups so we can just exit here. if len(metas) == 0 { - return nil, nil, nil, nil + return nil, nil, nil } return b.findBasesInSet(ctx, r, metas) @@ -352,7 +395,7 @@ func (b *baseFinder) getBase( func (b *baseFinder) FindBases( ctx context.Context, - reasons []Reasoner, + reasons []identity.Reasoner, tags map[string]string, ) BackupBases { var ( @@ -360,9 +403,10 @@ func (b *baseFinder) FindBases( // the reason for selecting something. Kopia assisted snapshots also use // ManifestEntry so we have the reasons for selecting them to aid in // debugging. - baseBups = map[model.StableID]BackupEntry{} - baseSnaps = map[manifest.ID]ManifestEntry{} - kopiaAssistSnaps = map[manifest.ID]ManifestEntry{} + mergeBups = map[model.StableID]BackupEntry{} + assistBups = map[model.StableID]BackupEntry{} + mergeSnaps = map[manifest.ID]ManifestEntry{} + assistSnaps = map[manifest.ID]ManifestEntry{} ) for _, searchReason := range reasons { @@ -372,7 +416,10 @@ func (b *baseFinder) FindBases( "search_category", searchReason.Category().String()) logger.Ctx(ictx).Info("searching for previous manifests") - baseBackup, baseSnap, assistSnaps, err := b.getBase(ictx, searchReason, tags) + mergeBase, assistBase, err := b.getBase( + ictx, + searchReason, + tags) if err != nil { logger.Ctx(ctx).Info( "getting base, falling back to full backup for reason", @@ -381,47 +428,60 @@ func (b *baseFinder) FindBases( continue } - if baseBackup != nil { - bs, ok := baseBups[baseBackup.ID] + if mergeBase != nil { + mergeSnap := mergeBase.manifest + mergeBackup := mergeBase.backup + + ms, ok := mergeSnaps[mergeSnap.ID] if ok { - bs.Reasons = append(bs.Reasons, baseSnap.Reasons...) + ms.Reasons = append(ms.Reasons, mergeSnap.Reasons...) } else { - bs = *baseBackup + ms = mergeSnap } - // Reassign since it's structs not pointers to structs. - baseBups[baseBackup.ID] = bs + mergeSnaps[mergeSnap.ID] = ms + + mb, ok := mergeBups[mergeBackup.ID] + if ok { + mb.Reasons = append(mb.Reasons, mergeSnap.Reasons...) + } else { + mb = mergeBackup + } + + mergeBups[mergeBackup.ID] = mb } - if baseSnap != nil { - bs, ok := baseSnaps[baseSnap.ID] + if assistBase != nil { + assistSnap := assistBase.manifest + assistBackup := assistBase.backup + + as, ok := assistSnaps[assistSnap.ID] if ok { - bs.Reasons = append(bs.Reasons, baseSnap.Reasons...) + as.Reasons = append(as.Reasons, assistSnap.Reasons...) } else { - bs = *baseSnap + as = assistSnap } - // Reassign since it's structs not pointers to structs. - baseSnaps[baseSnap.ID] = bs - } + assistSnaps[assistSnap.ID] = as - for _, s := range assistSnaps { - bs, ok := kopiaAssistSnaps[s.ID] + ab, ok := assistBups[assistBackup.ID] if ok { - bs.Reasons = append(bs.Reasons, s.Reasons...) + ab.Reasons = append(ab.Reasons, assistBackup.Reasons...) } else { - bs = s + ab = assistBackup } - // Reassign since it's structs not pointers to structs. - kopiaAssistSnaps[s.ID] = bs + assistBups[assistBackup.ID] = ab } } + // TODO(pandeyabs): Fix the terminology used in backupBases to go with + // new definitions i.e. mergeSnaps instead of mergeBases, etc. res := &backupBases{ - backups: maps.Values(baseBups), - mergeBases: maps.Values(baseSnaps), - assistBases: maps.Values(kopiaAssistSnaps), + backups: maps.Values(mergeBups), + assistBackups: maps.Values(assistBups), + mergeBases: maps.Values(mergeSnaps), + assistBases: maps.Values(assistSnaps), } res.fixupAndVerify(ctx) diff --git a/src/internal/kopia/base_finder_test.go b/src/internal/kopia/base_finder_test.go index cb3239ca1..d1b0742fc 100644 --- a/src/internal/kopia/base_finder_test.go +++ b/src/internal/kopia/base_finder_test.go @@ -14,6 +14,7 @@ import ( "github.com/alcionai/corso/src/internal/model" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/backup" + "github.com/alcionai/corso/src/pkg/backup/identity" "github.com/alcionai/corso/src/pkg/path" ) @@ -23,14 +24,19 @@ const ( ) var ( - testT1 = time.Now() - testT2 = testT1.Add(1 * time.Hour) - + testT1 = time.Now() + testT2 = testT1.Add(1 * time.Hour) + testT3 = testT2.Add(1 * time.Hour) + testT4 = testT3.Add(1 * time.Hour) testID1 = manifest.ID("snap1") testID2 = manifest.ID("snap2") + testID3 = manifest.ID("snap3") + testID4 = manifest.ID("snap4") testBackup1 = "backupID1" testBackup2 = "backupID2" + testBackup3 = "backupID3" + testBackup4 = "backupID4" testMail = path.ExchangeService.String() + path.EmailCategory.String() testEvents = path.ExchangeService.String() + path.EventsCategory.String() @@ -39,7 +45,7 @@ var ( testUser2 = "user2" testUser3 = "user3" - testAllUsersAllCats = []Reasoner{ + testAllUsersAllCats = []identity.Reasoner{ // User1 email and events. NewReason("", testUser1, path.ExchangeService, path.EmailCategory), NewReason("", testUser1, path.ExchangeService, path.EventsCategory), @@ -50,12 +56,12 @@ var ( NewReason("", testUser3, path.ExchangeService, path.EmailCategory), NewReason("", testUser3, path.ExchangeService, path.EventsCategory), } - testAllUsersMail = []Reasoner{ + testAllUsersMail = []identity.Reasoner{ NewReason("", testUser1, path.ExchangeService, path.EmailCategory), NewReason("", testUser2, path.ExchangeService, path.EmailCategory), NewReason("", testUser3, path.ExchangeService, path.EmailCategory), } - testUser1Mail = []Reasoner{ + testUser1Mail = []identity.Reasoner{ NewReason("", testUser1, path.ExchangeService, path.EmailCategory), } ) @@ -212,12 +218,14 @@ func newBackupModel( hasItemSnap bool, hasDetailsSnap bool, oldDetailsID bool, + tags map[string]string, err error, ) backupInfo { res := backupInfo{ b: backup.Backup{ BaseModel: model.BaseModel{ - ID: model.StableID(id), + ID: model.StableID(id), + Tags: tags, }, SnapshotID: "iid", }, @@ -285,7 +293,7 @@ func (suite *BaseFinderUnitSuite) TestNoResult_NoBackupsOrSnapshots() { sm: mockEmptySnapshotManager{}, bg: mockEmptyModelGetter{}, } - reasons := []Reasoner{ + reasons := []identity.Reasoner{ NewReason("", "a-user", path.ExchangeService, path.EmailCategory), } @@ -304,7 +312,7 @@ func (suite *BaseFinderUnitSuite) TestNoResult_ErrorListingSnapshots() { sm: &mockSnapshotManager{findErr: assert.AnError}, bg: mockEmptyModelGetter{}, } - reasons := []Reasoner{ + reasons := []identity.Reasoner{ NewReason("", "a-user", path.ExchangeService, path.EmailCategory), } @@ -316,18 +324,21 @@ func (suite *BaseFinderUnitSuite) TestNoResult_ErrorListingSnapshots() { func (suite *BaseFinderUnitSuite) TestGetBases() { table := []struct { name string - input []Reasoner + input []identity.Reasoner manifestData []manifestInfo // Use this to denote the Reasons a base backup or base manifest is // selected. The int maps to the index of the backup or manifest in data. - expectedBaseReasons map[int][]Reasoner + expectedBaseReasons map[int][]identity.Reasoner // Use this to denote the Reasons a kopia assised incrementals manifest is // selected. The int maps to the index of the manifest in data. - expectedAssistManifestReasons map[int][]Reasoner + // TODO(pandeyabs): Remove this once we have 1:1 mapping between snapshots + // and backup models. + expectedAssistManifestReasons map[int][]identity.Reasoner + expectedAssistReasons map[int][]identity.Reasoner backupData []backupInfo }{ { - name: "Return Older Base If Fail To Get Manifest", + name: "Return Older Merge Base If Fail To Get Manifest", input: testUser1Mail, manifestData: []manifestInfo{ newManifestInfo( @@ -349,19 +360,61 @@ func (suite *BaseFinderUnitSuite) TestGetBases() { testUser1, ), }, - expectedBaseReasons: map[int][]Reasoner{ + expectedBaseReasons: map[int][]identity.Reasoner{ 1: testUser1Mail, }, - expectedAssistManifestReasons: map[int][]Reasoner{ + expectedAssistManifestReasons: map[int][]identity.Reasoner{ 1: testUser1Mail, }, + expectedAssistReasons: map[int][]identity.Reasoner{}, backupData: []backupInfo{ - newBackupModel(testBackup2, true, true, false, nil), - newBackupModel(testBackup1, true, true, false, nil), + newBackupModel(testBackup2, true, true, false, nil, nil), + newBackupModel(testBackup1, true, true, false, nil, nil), }, }, { - name: "Return Older Base If Fail To Get Backup", + name: "Return Older Assist Base If Fail To Get Manifest", + input: testUser1Mail, + manifestData: []manifestInfo{ + newManifestInfo( + testID2, + testT2, + testCompleteMan, + testBackup2, + assert.AnError, + testMail, + testUser1, + ), + newManifestInfo( + testID1, + testT1, + testCompleteMan, + testBackup1, + nil, + testMail, + testUser1, + ), + }, + expectedBaseReasons: map[int][]identity.Reasoner{}, + expectedAssistManifestReasons: map[int][]identity.Reasoner{ + 1: testUser1Mail, + }, + expectedAssistReasons: map[int][]identity.Reasoner{ + 1: testUser1Mail, + }, + backupData: []backupInfo{ + newBackupModel(testBackup2, true, true, false, nil, nil), + newBackupModel( + testBackup1, + true, + true, + false, + map[string]string{model.BackupTypeTag: model.AssistBackup}, + nil), + }, + }, + { + name: "Return Older Merge Base If Fail To Get Backup", input: testUser1Mail, manifestData: []manifestInfo{ newManifestInfo( @@ -383,16 +436,15 @@ func (suite *BaseFinderUnitSuite) TestGetBases() { testUser1, ), }, - expectedBaseReasons: map[int][]Reasoner{ + expectedBaseReasons: map[int][]identity.Reasoner{ 1: testUser1Mail, }, - expectedAssistManifestReasons: map[int][]Reasoner{ - 0: testUser1Mail, + expectedAssistManifestReasons: map[int][]identity.Reasoner{ 1: testUser1Mail, }, backupData: []backupInfo{ - newBackupModel(testBackup2, false, false, false, assert.AnError), - newBackupModel(testBackup1, true, true, false, nil), + newBackupModel(testBackup2, false, false, false, nil, assert.AnError), + newBackupModel(testBackup1, true, true, false, nil, nil), }, }, { @@ -418,16 +470,16 @@ func (suite *BaseFinderUnitSuite) TestGetBases() { testUser1, ), }, - expectedBaseReasons: map[int][]Reasoner{ + expectedBaseReasons: map[int][]identity.Reasoner{ 1: testUser1Mail, }, - expectedAssistManifestReasons: map[int][]Reasoner{ - 0: testUser1Mail, + expectedAssistManifestReasons: map[int][]identity.Reasoner{ 1: testUser1Mail, }, + expectedAssistReasons: map[int][]identity.Reasoner{}, backupData: []backupInfo{ - newBackupModel(testBackup2, true, false, false, nil), - newBackupModel(testBackup1, true, true, false, nil), + newBackupModel(testBackup2, true, false, false, nil, nil), + newBackupModel(testBackup1, true, true, false, nil, nil), }, }, { @@ -447,18 +499,19 @@ func (suite *BaseFinderUnitSuite) TestGetBases() { testUser3, ), }, - expectedBaseReasons: map[int][]Reasoner{ + expectedBaseReasons: map[int][]identity.Reasoner{ 0: testUser1Mail, }, - expectedAssistManifestReasons: map[int][]Reasoner{ + expectedAssistManifestReasons: map[int][]identity.Reasoner{ 0: testUser1Mail, }, + expectedAssistReasons: map[int][]identity.Reasoner{}, backupData: []backupInfo{ - newBackupModel(testBackup1, true, true, true, nil), + newBackupModel(testBackup1, true, true, true, nil, nil), }, }, { - name: "All One Snapshot", + name: "All One Snapshot With Merge Base", input: testAllUsersAllCats, manifestData: []manifestInfo{ newManifestInfo( @@ -474,14 +527,49 @@ func (suite *BaseFinderUnitSuite) TestGetBases() { testUser3, ), }, - expectedBaseReasons: map[int][]Reasoner{ + expectedBaseReasons: map[int][]identity.Reasoner{ 0: testAllUsersAllCats, }, - expectedAssistManifestReasons: map[int][]Reasoner{ + expectedAssistManifestReasons: map[int][]identity.Reasoner{ + 0: testAllUsersAllCats, + }, + expectedAssistReasons: map[int][]identity.Reasoner{}, + backupData: []backupInfo{ + newBackupModel(testBackup1, true, true, false, nil, nil), + }, + }, + { + name: "All One Snapshot with Assist Base", + input: testAllUsersAllCats, + manifestData: []manifestInfo{ + newManifestInfo( + testID1, + testT1, + testCompleteMan, + testBackup1, + nil, + testMail, + testEvents, + testUser1, + testUser2, + testUser3, + ), + }, + expectedBaseReasons: map[int][]identity.Reasoner{}, + expectedAssistManifestReasons: map[int][]identity.Reasoner{ + 0: testAllUsersAllCats, + }, + expectedAssistReasons: map[int][]identity.Reasoner{ 0: testAllUsersAllCats, }, backupData: []backupInfo{ - newBackupModel(testBackup1, true, true, false, nil), + newBackupModel( + testBackup1, + true, + true, + false, + map[string]string{model.BackupTypeTag: model.AssistBackup}, + nil), }, }, { @@ -512,7 +600,7 @@ func (suite *BaseFinderUnitSuite) TestGetBases() { testUser3, ), }, - expectedBaseReasons: map[int][]Reasoner{ + expectedBaseReasons: map[int][]identity.Reasoner{ 0: { NewReason("", testUser1, path.ExchangeService, path.EmailCategory), NewReason("", testUser2, path.ExchangeService, path.EmailCategory), @@ -524,7 +612,7 @@ func (suite *BaseFinderUnitSuite) TestGetBases() { NewReason("", testUser3, path.ExchangeService, path.EventsCategory), }, }, - expectedAssistManifestReasons: map[int][]Reasoner{ + expectedAssistManifestReasons: map[int][]identity.Reasoner{ 0: { NewReason("", testUser1, path.ExchangeService, path.EmailCategory), NewReason("", testUser2, path.ExchangeService, path.EmailCategory), @@ -537,8 +625,96 @@ func (suite *BaseFinderUnitSuite) TestGetBases() { }, }, backupData: []backupInfo{ - newBackupModel(testBackup1, true, true, false, nil), - newBackupModel(testBackup2, true, true, false, nil), + newBackupModel(testBackup1, true, true, false, nil, nil), + newBackupModel(testBackup2, true, true, false, nil, nil), + }, + }, + { + name: "Unique assist bases with common merge Base, overlapping reasons", + input: testAllUsersAllCats, + manifestData: []manifestInfo{ + newManifestInfo( + testID3, + testT3, + testCompleteMan, + testBackup3, + nil, + testEvents, + testUser1, + testUser2, + ), + newManifestInfo( + testID2, + testT2, + testCompleteMan, + testBackup2, + nil, + testMail, + testUser1, + testUser2, + ), + newManifestInfo( + testID1, + testT1, + testCompleteMan, + testBackup1, + nil, + testMail, + testEvents, + testUser1, + testUser2, + ), + }, + expectedBaseReasons: map[int][]identity.Reasoner{ + 2: { + NewReason("", testUser1, path.ExchangeService, path.EmailCategory), + NewReason("", testUser2, path.ExchangeService, path.EmailCategory), + NewReason("", testUser1, path.ExchangeService, path.EventsCategory), + NewReason("", testUser2, path.ExchangeService, path.EventsCategory), + }, + }, + expectedAssistManifestReasons: map[int][]identity.Reasoner{ + 0: { + NewReason("", testUser1, path.ExchangeService, path.EventsCategory), + NewReason("", testUser2, path.ExchangeService, path.EventsCategory), + }, + 1: { + NewReason("", testUser1, path.ExchangeService, path.EmailCategory), + NewReason("", testUser2, path.ExchangeService, path.EmailCategory), + }, + 2: { + NewReason("", testUser1, path.ExchangeService, path.EmailCategory), + NewReason("", testUser2, path.ExchangeService, path.EmailCategory), + NewReason("", testUser1, path.ExchangeService, path.EventsCategory), + NewReason("", testUser2, path.ExchangeService, path.EventsCategory), + }, + }, + expectedAssistReasons: map[int][]identity.Reasoner{ + 0: { + NewReason("", testUser1, path.ExchangeService, path.EventsCategory), + NewReason("", testUser2, path.ExchangeService, path.EventsCategory), + }, + 1: { + NewReason("", testUser1, path.ExchangeService, path.EmailCategory), + NewReason("", testUser2, path.ExchangeService, path.EmailCategory), + }, + }, + backupData: []backupInfo{ + newBackupModel( + testBackup3, + true, + true, + false, + map[string]string{model.BackupTypeTag: model.AssistBackup}, + nil), + newBackupModel( + testBackup2, + true, + true, + false, + map[string]string{model.BackupTypeTag: model.AssistBackup}, + nil), + newBackupModel(testBackup1, true, true, false, nil, nil), }, }, { @@ -564,17 +740,16 @@ func (suite *BaseFinderUnitSuite) TestGetBases() { testUser1, ), }, - expectedBaseReasons: map[int][]Reasoner{ + expectedBaseReasons: map[int][]identity.Reasoner{ 0: testUser1Mail, }, - expectedAssistManifestReasons: map[int][]Reasoner{ + expectedAssistManifestReasons: map[int][]identity.Reasoner{ 0: testUser1Mail, - 1: testUser1Mail, }, backupData: []backupInfo{ - newBackupModel(testBackup1, true, true, false, nil), + newBackupModel(testBackup1, true, true, false, nil, nil), // Shouldn't be returned but have here just so we can see. - newBackupModel(testBackup2, true, true, false, nil), + newBackupModel(testBackup2, true, true, false, nil, nil), }, }, { @@ -600,16 +775,16 @@ func (suite *BaseFinderUnitSuite) TestGetBases() { testUser1, ), }, - expectedBaseReasons: map[int][]Reasoner{ + expectedBaseReasons: map[int][]identity.Reasoner{ 1: testUser1Mail, }, - expectedAssistManifestReasons: map[int][]Reasoner{ + expectedAssistManifestReasons: map[int][]identity.Reasoner{ 1: testUser1Mail, }, backupData: []backupInfo{ // Shouldn't be returned but have here just so we can see. - newBackupModel(testBackup1, true, true, false, nil), - newBackupModel(testBackup2, true, true, false, nil), + newBackupModel(testBackup1, true, true, false, nil, nil), + newBackupModel(testBackup2, true, true, false, nil, nil), }, }, { @@ -635,14 +810,12 @@ func (suite *BaseFinderUnitSuite) TestGetBases() { testUser1, ), }, - expectedBaseReasons: map[int][]Reasoner{}, - expectedAssistManifestReasons: map[int][]Reasoner{ - 1: testUser1Mail, - }, + expectedBaseReasons: map[int][]identity.Reasoner{}, + expectedAssistManifestReasons: map[int][]identity.Reasoner{}, backupData: []backupInfo{ // Shouldn't be returned but have here just so we can see. - newBackupModel(testBackup1, true, true, false, nil), - newBackupModel(testBackup2, true, true, false, nil), + newBackupModel(testBackup1, true, true, false, nil, nil), + newBackupModel(testBackup2, true, true, false, nil, nil), }, }, { @@ -659,14 +832,14 @@ func (suite *BaseFinderUnitSuite) TestGetBases() { testUser1, ), }, - expectedBaseReasons: map[int][]Reasoner{ + expectedBaseReasons: map[int][]identity.Reasoner{ 0: testUser1Mail, }, - expectedAssistManifestReasons: map[int][]Reasoner{ + expectedAssistManifestReasons: map[int][]identity.Reasoner{ 0: testUser1Mail, }, backupData: []backupInfo{ - newBackupModel(testBackup1, true, true, false, nil), + newBackupModel(testBackup1, true, true, false, nil, nil), }, }, { @@ -694,16 +867,206 @@ func (suite *BaseFinderUnitSuite) TestGetBases() { testUser1, ), }, - expectedBaseReasons: map[int][]Reasoner{ + expectedBaseReasons: map[int][]identity.Reasoner{ 0: testUser1Mail, }, - expectedAssistManifestReasons: map[int][]Reasoner{ + expectedAssistManifestReasons: map[int][]identity.Reasoner{ 0: testUser1Mail, }, backupData: []backupInfo{ - newBackupModel(testBackup2, true, true, false, nil), + newBackupModel(testBackup2, true, true, false, nil, nil), // Shouldn't be returned but here just so we can check. - newBackupModel(testBackup1, true, true, false, nil), + newBackupModel(testBackup1, true, true, false, nil, nil), + }, + }, + { + name: "Return latest assist & merge base pair", + input: testUser1Mail, + manifestData: []manifestInfo{ + newManifestInfo( + testID4, + testT4, + testCompleteMan, + testBackup4, + nil, + testMail, + testUser1, + ), + newManifestInfo( + testID3, + testT3, + testCompleteMan, + testBackup3, + nil, + testMail, + testUser1, + ), + newManifestInfo( + testID2, + testT2, + testCompleteMan, + testBackup2, + nil, + testMail, + testUser1, + ), + newManifestInfo( + testID1, + testT1, + testCompleteMan, + testBackup1, + nil, + testMail, + testUser1, + ), + }, + expectedBaseReasons: map[int][]identity.Reasoner{ + 2: testUser1Mail, + }, + expectedAssistManifestReasons: map[int][]identity.Reasoner{ + 0: testUser1Mail, + 2: testUser1Mail, + }, + expectedAssistReasons: map[int][]identity.Reasoner{ + 0: testUser1Mail, + }, + backupData: []backupInfo{ + newBackupModel( + testBackup4, + true, + true, + false, + map[string]string{model.BackupTypeTag: model.AssistBackup}, + nil), + newBackupModel( + testBackup3, + true, + true, + false, + map[string]string{model.BackupTypeTag: model.AssistBackup}, + nil), + newBackupModel(testBackup2, true, true, false, nil, nil), + newBackupModel(testBackup1, true, true, false, nil, nil), + }, + }, + { + name: "Newer merge base than assist base", + input: testUser1Mail, + manifestData: []manifestInfo{ + newManifestInfo( + testID2, + testT2, + testCompleteMan, + testBackup2, + nil, + testMail, + testUser1, + ), + newManifestInfo( + testID1, + testT1, + testCompleteMan, + testBackup1, + nil, + testMail, + testUser1, + ), + }, + expectedBaseReasons: map[int][]identity.Reasoner{ + 0: testUser1Mail, + }, + expectedAssistManifestReasons: map[int][]identity.Reasoner{ + 0: testUser1Mail, + }, + expectedAssistReasons: map[int][]identity.Reasoner{}, + backupData: []backupInfo{ + newBackupModel(testBackup2, true, true, false, nil, nil), + newBackupModel( + testBackup1, + true, + true, + false, + map[string]string{model.BackupTypeTag: model.AssistBackup}, + nil), + }, + }, + { + name: "Only assist bases", + input: testUser1Mail, + manifestData: []manifestInfo{ + newManifestInfo( + testID2, + testT2, + testCompleteMan, + testBackup2, + nil, + testMail, + testUser1, + ), + newManifestInfo( + testID1, + testT1, + testCompleteMan, + testBackup1, + nil, + testMail, + testUser1, + ), + }, + expectedBaseReasons: map[int][]identity.Reasoner{}, + expectedAssistManifestReasons: map[int][]identity.Reasoner{ + 0: testUser1Mail, + }, + expectedAssistReasons: map[int][]identity.Reasoner{ + 0: testUser1Mail, + }, + backupData: []backupInfo{ + newBackupModel( + testBackup2, + true, + true, + false, + map[string]string{model.BackupTypeTag: model.AssistBackup}, + nil), + newBackupModel( + testBackup1, + true, + true, + false, + map[string]string{model.BackupTypeTag: model.AssistBackup}, + nil), + }, + }, + { + name: "Merge base with tag", + input: testUser1Mail, + manifestData: []manifestInfo{ + newManifestInfo( + testID2, + testT2, + testCompleteMan, + testBackup2, + nil, + testMail, + testUser1, + ), + }, + expectedBaseReasons: map[int][]identity.Reasoner{ + 0: testUser1Mail, + }, + expectedAssistManifestReasons: map[int][]identity.Reasoner{ + 0: testUser1Mail, + }, + expectedAssistReasons: map[int][]identity.Reasoner{}, + backupData: []backupInfo{ + newBackupModel(testBackup2, true, true, false, nil, nil), + newBackupModel( + testBackup1, + true, + true, + false, + map[string]string{model.BackupTypeTag: model.MergeBackup}, + nil), }, }, } @@ -730,6 +1093,12 @@ func (suite *BaseFinderUnitSuite) TestGetBases() { bb.Backups(), test.backupData, test.expectedBaseReasons) + checkBackupEntriesMatch( + t, + bb.AssistBackups(), + test.backupData, + test.expectedAssistReasons) + checkManifestEntriesMatch( t, bb.MergeBases(), @@ -759,22 +1128,22 @@ func (suite *BaseFinderUnitSuite) TestFindBases_CustomTags() { ), } backupData := []backupInfo{ - newBackupModel(testBackup1, true, true, false, nil), + newBackupModel(testBackup1, true, true, false, nil, nil), } table := []struct { name string - input []Reasoner + input []identity.Reasoner tags map[string]string // Use this to denote which manifests in data should be expected. Allows // defining data in a table while not repeating things between data and // expected. - expectedIdxs map[int][]Reasoner + expectedIdxs map[int][]identity.Reasoner }{ { name: "no tags specified", tags: nil, - expectedIdxs: map[int][]Reasoner{ + expectedIdxs: map[int][]identity.Reasoner{ 0: testUser1Mail, }, }, @@ -784,14 +1153,14 @@ func (suite *BaseFinderUnitSuite) TestFindBases_CustomTags() { "fnords": "", "smarf": "", }, - expectedIdxs: map[int][]Reasoner{ + expectedIdxs: map[int][]identity.Reasoner{ 0: testUser1Mail, }, }, { name: "subset of custom tags", tags: map[string]string{"fnords": ""}, - expectedIdxs: map[int][]Reasoner{ + expectedIdxs: map[int][]identity.Reasoner{ 0: testUser1Mail, }, }, @@ -832,7 +1201,7 @@ func checkManifestEntriesMatch( t *testing.T, retSnaps []ManifestEntry, allExpected []manifestInfo, - expectedIdxsAndReasons map[int][]Reasoner, + expectedIdxsAndReasons map[int][]identity.Reasoner, ) { // Check the proper snapshot manifests were returned. expected := make([]*snapshot.Manifest, 0, len(expectedIdxsAndReasons)) @@ -848,7 +1217,7 @@ func checkManifestEntriesMatch( assert.ElementsMatch(t, expected, got) // Check the reasons for selecting each manifest are correct. - expectedReasons := make(map[manifest.ID][]Reasoner, len(expectedIdxsAndReasons)) + expectedReasons := make(map[manifest.ID][]identity.Reasoner, len(expectedIdxsAndReasons)) for idx, reasons := range expectedIdxsAndReasons { expectedReasons[allExpected[idx].man.ID] = reasons } @@ -874,7 +1243,7 @@ func checkBackupEntriesMatch( t *testing.T, retBups []BackupEntry, allExpected []backupInfo, - expectedIdxsAndReasons map[int][]Reasoner, + expectedIdxsAndReasons map[int][]identity.Reasoner, ) { // Check the proper snapshot manifests were returned. expected := make([]*backup.Backup, 0, len(expectedIdxsAndReasons)) @@ -890,7 +1259,7 @@ func checkBackupEntriesMatch( assert.ElementsMatch(t, expected, got) // Check the reasons for selecting each manifest are correct. - expectedReasons := make(map[model.StableID][]Reasoner, len(expectedIdxsAndReasons)) + expectedReasons := make(map[model.StableID][]identity.Reasoner, len(expectedIdxsAndReasons)) for idx, reasons := range expectedIdxsAndReasons { expectedReasons[allExpected[idx].b.ID] = reasons } diff --git a/src/internal/kopia/data_collection_test.go b/src/internal/kopia/data_collection_test.go index 318af2682..a4da94ee4 100644 --- a/src/internal/kopia/data_collection_test.go +++ b/src/internal/kopia/data_collection_test.go @@ -14,7 +14,7 @@ import ( "github.com/stretchr/testify/suite" "github.com/alcionai/corso/src/internal/data" - exchMock "github.com/alcionai/corso/src/internal/m365/exchange/mock" + exchMock "github.com/alcionai/corso/src/internal/m365/service/exchange/mock" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/path" diff --git a/src/internal/kopia/inject/inject.go b/src/internal/kopia/inject/inject.go index 5d8dd3bc7..3011a79e7 100644 --- a/src/internal/kopia/inject/inject.go +++ b/src/internal/kopia/inject/inject.go @@ -7,6 +7,7 @@ import ( "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/kopia" "github.com/alcionai/corso/src/pkg/backup/details" + "github.com/alcionai/corso/src/pkg/backup/identity" "github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/path" ) @@ -15,7 +16,7 @@ type ( BackupConsumer interface { ConsumeBackupCollections( ctx context.Context, - backupReasons []kopia.Reasoner, + backupReasons []identity.Reasoner, bases kopia.BackupBases, cs []data.BackupCollection, pmr prefixmatcher.StringSetReader, @@ -38,7 +39,7 @@ type ( BaseFinder interface { FindBases( ctx context.Context, - reasons []kopia.Reasoner, + reasons []identity.Reasoner, tags map[string]string, ) kopia.BackupBases } diff --git a/src/internal/kopia/merge_collection_test.go b/src/internal/kopia/merge_collection_test.go index 4ffd8d394..9aaf751a1 100644 --- a/src/internal/kopia/merge_collection_test.go +++ b/src/internal/kopia/merge_collection_test.go @@ -13,7 +13,7 @@ import ( "github.com/stretchr/testify/suite" "github.com/alcionai/corso/src/internal/data" - "github.com/alcionai/corso/src/internal/m365/exchange/mock" + "github.com/alcionai/corso/src/internal/m365/service/exchange/mock" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/path" diff --git a/src/internal/kopia/merge_details.go b/src/internal/kopia/merge_details.go index 2ec6cc4bb..11e0a94e4 100644 --- a/src/internal/kopia/merge_details.go +++ b/src/internal/kopia/merge_details.go @@ -1,6 +1,8 @@ package kopia import ( + "time" + "github.com/alcionai/clues" "github.com/alcionai/corso/src/internal/common/prefixmatcher" @@ -12,14 +14,11 @@ type DetailsMergeInfoer interface { // ItemsToMerge returns the number of items that need to be merged. ItemsToMerge() int // GetNewPathRefs takes the old RepoRef and old LocationRef of an item and - // returns the new RepoRef, a prefix of the old LocationRef to replace, and - // the new LocationRefPrefix of the item if the item should be merged. If the + // returns the new RepoRef and the new location of the item the item. If the // item shouldn't be merged nils are returned. - // - // If the returned old LocationRef prefix is equal to the old LocationRef then - // the entire LocationRef should be replaced with the returned value. GetNewPathRefs( oldRef *path.Builder, + modTime time.Time, oldLoc details.LocationIDer, ) (path.Path, *path.Builder, error) } @@ -27,6 +26,7 @@ type DetailsMergeInfoer interface { type prevRef struct { repoRef path.Path locRef *path.Builder + modTime *time.Time } type mergeDetails struct { @@ -42,8 +42,12 @@ func (m *mergeDetails) ItemsToMerge() int { return len(m.repoRefs) } +// addRepoRef adds an entry in mergeDetails that can be looked up later. If +// modTime is non-nil then it's checked during lookup. If it is nil then the +// mod time provided during lookup is ignored. func (m *mergeDetails) addRepoRef( oldRef *path.Builder, + modTime *time.Time, newRef path.Path, newLocRef *path.Builder, ) error { @@ -58,6 +62,7 @@ func (m *mergeDetails) addRepoRef( pr := prevRef{ repoRef: newRef, locRef: newLocRef, + modTime: modTime, } m.repoRefs[oldRef.ShortRef()] = pr @@ -67,6 +72,7 @@ func (m *mergeDetails) addRepoRef( func (m *mergeDetails) GetNewPathRefs( oldRef *path.Builder, + modTime time.Time, oldLoc details.LocationIDer, ) (path.Path, *path.Builder, error) { pr, ok := m.repoRefs[oldRef.ShortRef()] @@ -74,6 +80,14 @@ func (m *mergeDetails) GetNewPathRefs( return nil, nil, nil } + // ModTimes don't match which means we're attempting to merge a different + // version of the item (i.e. an older version from an assist base). We + // shouldn't return a match because it could cause us to source out-of-date + // details for the item. + if pr.modTime != nil && !pr.modTime.Equal(modTime) { + return nil, nil, nil + } + // This was a location specified directly by a collection. if pr.locRef != nil { return pr.repoRef, pr.locRef, nil diff --git a/src/internal/kopia/merge_details_test.go b/src/internal/kopia/merge_details_test.go index 6dfee6381..ae19a8c6a 100644 --- a/src/internal/kopia/merge_details_test.go +++ b/src/internal/kopia/merge_details_test.go @@ -2,6 +2,7 @@ package kopia import ( "testing" + "time" "github.com/alcionai/clues" "github.com/stretchr/testify/assert" @@ -47,10 +48,10 @@ func (suite *DetailsMergeInfoerUnitSuite) TestAddRepoRef_DuplicateFails() { dm := newMergeDetails() - err := dm.addRepoRef(oldRef1.ToBuilder(), oldRef1, nil) + err := dm.addRepoRef(oldRef1.ToBuilder(), nil, oldRef1, nil) require.NoError(t, err, clues.ToCore(err)) - err = dm.addRepoRef(oldRef1.ToBuilder(), oldRef1, nil) + err = dm.addRepoRef(oldRef1.ToBuilder(), nil, oldRef1, nil) require.Error(t, err, clues.ToCore(err)) } @@ -58,6 +59,10 @@ func (suite *DetailsMergeInfoerUnitSuite) TestAddRepoRef_DuplicateFails() { // for stored RepoRefs. func (suite *DetailsMergeInfoerUnitSuite) TestGetNewPathRefs() { t := suite.T() + + t1 := time.Now() + t2 := t1.Add(time.Second * 30) + oldRef1 := makePath( t, []string{ @@ -110,10 +115,13 @@ func (suite *DetailsMergeInfoerUnitSuite) TestGetNewPathRefs() { dm := newMergeDetails() - err := dm.addRepoRef(oldRef1.ToBuilder(), newRef1, newLoc1) + err := dm.addRepoRef(oldRef1.ToBuilder(), &t1, newRef1, newLoc1) require.NoError(t, err, clues.ToCore(err)) - err = dm.addRepoRef(oldRef2.ToBuilder(), newRef2, nil) + err = dm.addRepoRef(oldRef2.ToBuilder(), &t2, newRef2, nil) + require.NoError(t, err, clues.ToCore(err)) + + err = dm.addRepoRef(newRef1.ToBuilder(), nil, oldRef1, oldLoc1) require.NoError(t, err, clues.ToCore(err)) // Add prefix matcher entry. @@ -121,58 +129,89 @@ func (suite *DetailsMergeInfoerUnitSuite) TestGetNewPathRefs() { require.NoError(t, err, clues.ToCore(err)) table := []struct { - name string - searchRef *path.Builder - searchLoc mockLocationIDer - errCheck require.ErrorAssertionFunc - expectedRef path.Path - expectedLoc *path.Builder + name string + searchRef *path.Builder + searchModTime time.Time + searchLoc mockLocationIDer + errCheck require.ErrorAssertionFunc + expectFound bool + expectedRef path.Path + expectedLoc *path.Builder }{ { - name: "Exact Match With Loc", - searchRef: oldRef1.ToBuilder(), - searchLoc: searchLoc1, - errCheck: require.NoError, - expectedRef: newRef1, - expectedLoc: newLoc1, + name: "Exact Match With Loc", + searchRef: oldRef1.ToBuilder(), + searchModTime: t1, + searchLoc: searchLoc1, + errCheck: require.NoError, + expectFound: true, + expectedRef: newRef1, + expectedLoc: newLoc1, }, { - name: "Exact Match Without Loc", - searchRef: oldRef1.ToBuilder(), - errCheck: require.NoError, - expectedRef: newRef1, - expectedLoc: newLoc1, + name: "Exact Match Without Loc", + searchRef: oldRef1.ToBuilder(), + searchModTime: t1, + errCheck: require.NoError, + expectFound: true, + expectedRef: newRef1, + expectedLoc: newLoc1, }, { - name: "Prefix Match", - searchRef: oldRef2.ToBuilder(), - searchLoc: searchLoc2, - errCheck: require.NoError, - expectedRef: newRef2, - expectedLoc: newLoc2, + name: "Exact Match Without Loc ModTime Not In Merger", + searchRef: newRef1.ToBuilder(), + searchModTime: time.Now(), + errCheck: require.NoError, + expectFound: true, + expectedRef: oldRef1, + expectedLoc: oldLoc1, }, { - name: "Would Be Prefix Match Without Old Loc Errors", - searchRef: oldRef2.ToBuilder(), - errCheck: require.Error, + name: "Prefix Match", + searchRef: oldRef2.ToBuilder(), + searchModTime: t2, + searchLoc: searchLoc2, + errCheck: require.NoError, + expectFound: true, + expectedRef: newRef2, + expectedLoc: newLoc2, }, { - name: "Not Found With Old Loc", - searchRef: newRef1.ToBuilder(), - searchLoc: searchLoc2, - errCheck: require.NoError, + name: "Would Be Prefix Match Without Old Loc Errors", + searchRef: oldRef2.ToBuilder(), + searchModTime: t2, + errCheck: require.Error, }, { - name: "Not Found Without Old Loc", - searchRef: newRef1.ToBuilder(), - errCheck: require.NoError, + name: "Not Found With Old Loc", + searchRef: newRef2.ToBuilder(), + searchModTime: t1, + searchLoc: searchLoc2, + errCheck: require.NoError, + }, + { + name: "Not Found Without Old Loc", + searchRef: newRef2.ToBuilder(), + searchModTime: t1, + errCheck: require.NoError, + }, + { + name: "Not Found Due To Mod Time", + searchRef: oldRef1.ToBuilder(), + searchModTime: time.Now(), + searchLoc: searchLoc1, + errCheck: require.NoError, }, } + for _, test := range table { suite.Run(test.name, func() { t := suite.T() - newRef, newLoc, err := dm.GetNewPathRefs(test.searchRef, test.searchLoc) + newRef, newLoc, err := dm.GetNewPathRefs( + test.searchRef, + test.searchModTime, + test.searchLoc) test.errCheck(t, err, clues.ToCore(err)) assert.Equal(t, test.expectedRef, newRef, "RepoRef") diff --git a/src/internal/kopia/mock_backup_base.go b/src/internal/kopia/mock_backup_base.go index 84743486e..7edea1c39 100644 --- a/src/internal/kopia/mock_backup_base.go +++ b/src/internal/kopia/mock_backup_base.go @@ -14,13 +14,17 @@ func AssertBackupBasesEqual(t *testing.T, expect, got BackupBases) { if expect == nil { assert.Empty(t, got.Backups(), "backups") assert.Empty(t, got.MergeBases(), "merge bases") + assert.Empty(t, got.AssistBackups(), "assist backups") assert.Empty(t, got.AssistBases(), "assist bases") return } if got == nil { - if len(expect.Backups()) > 0 && len(expect.MergeBases()) > 0 && len(expect.AssistBases()) > 0 { + if len(expect.Backups()) > 0 && + len(expect.MergeBases()) > 0 && + len(expect.AssistBackups()) > 0 && + len(expect.AssistBases()) > 0 { assert.Fail(t, "got was nil but expected non-nil result %v", expect) } @@ -29,6 +33,7 @@ func AssertBackupBasesEqual(t *testing.T, expect, got BackupBases) { assert.ElementsMatch(t, expect.Backups(), got.Backups(), "backups") assert.ElementsMatch(t, expect.MergeBases(), got.MergeBases(), "merge bases") + assert.ElementsMatch(t, expect.AssistBackups(), got.AssistBackups(), "assist backups") assert.ElementsMatch(t, expect.AssistBases(), got.AssistBases(), "assist bases") } @@ -52,6 +57,11 @@ func (bb *MockBackupBases) WithMergeBases(m ...ManifestEntry) *MockBackupBases { return bb } +func (bb *MockBackupBases) WithAssistBackups(b ...BackupEntry) *MockBackupBases { + bb.backupBases.assistBackups = append(bb.AssistBackups(), b...) + return bb +} + func (bb *MockBackupBases) WithAssistBases(m ...ManifestEntry) *MockBackupBases { bb.backupBases.assistBases = append(bb.AssistBases(), m...) return bb diff --git a/src/internal/kopia/upload.go b/src/internal/kopia/upload.go index b0dbd40e5..522d3fad5 100644 --- a/src/internal/kopia/upload.go +++ b/src/internal/kopia/upload.go @@ -23,6 +23,7 @@ import ( "golang.org/x/exp/maps" "github.com/alcionai/corso/src/internal/common/prefixmatcher" + "github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/diagnostics" "github.com/alcionai/corso/src/internal/m365/graph" @@ -137,6 +138,7 @@ type itemDetails struct { prevPath path.Path locationPath *path.Builder cached bool + modTime *time.Time } type corsoProgress struct { @@ -148,9 +150,11 @@ type corsoProgress struct { snapshotfs.UploadProgress pending map[string]*itemDetails - deets *details.Builder - // toMerge represents items that we don't have in-memory item info for. The - // item info for these items should be sourced from a base snapshot later on. + // deets contains entries that are complete and don't need merged with base + // backup data at all. + deets *details.Builder + // toMerge represents items that we either don't have in-memory item info or + // that need sourced from a base backup due to caching etc. toMerge *mergeDetails mu sync.RWMutex totalBytes int64 @@ -194,7 +198,7 @@ func (cp *corsoProgress) FinishedFile(relativePath string, err error) { // These items were sourced from a base snapshot or were cached in kopia so we // never had to materialize their details in-memory. - if d.info == nil { + if d.info == nil || d.cached { if d.prevPath == nil { cp.errs.AddRecoverable(cp.ctx, clues.New("item sourced from previous backup with no previous path"). With( @@ -208,7 +212,11 @@ func (cp *corsoProgress) FinishedFile(relativePath string, err error) { cp.mu.Lock() defer cp.mu.Unlock() - err := cp.toMerge.addRepoRef(d.prevPath.ToBuilder(), d.repoPath, d.locationPath) + err := cp.toMerge.addRepoRef( + d.prevPath.ToBuilder(), + d.modTime, + d.repoPath, + d.locationPath) if err != nil { cp.errs.AddRecoverable(cp.ctx, clues.Wrap(err, "adding item to merge list"). With( @@ -375,6 +383,11 @@ func collectionEntries( continue } + modTime := time.Now() + if smt, ok := e.(data.StreamModTime); ok { + modTime = smt.ModTime() + } + // Not all items implement StreamInfo. For example, the metadata files // do not because they don't contain information directly backed up or // used for restore. If progress does not contain information about a @@ -391,18 +404,22 @@ func collectionEntries( // info nil. itemInfo := ei.Info() d := &itemDetails{ - info: &itemInfo, - repoPath: itemPath, + info: &itemInfo, + repoPath: itemPath, + // Also use the current path as the previous path for this item. This + // is so that if the item is marked as cached and we need to merge + // details with an assist backup base which sourced the cached item we + // can find it with the lookup in DetailsMergeInfoer. + // + // This all works out because cached item checks in kopia are direct + // path + metadata comparisons. + prevPath: itemPath, locationPath: locationPath, + modTime: &modTime, } progress.put(encodeAsPath(itemPath.PopFront().Elements()...), d) } - modTime := time.Now() - if smt, ok := e.(data.StreamModTime); ok { - modTime = smt.ModTime() - } - entry := virtualfs.StreamingFileWithModTimeFromReader( encodedName, modTime, @@ -508,6 +525,7 @@ func streamBaseEntries( repoPath: itemPath, prevPath: prevItemPath, locationPath: locationPath, + modTime: ptr.To(entry.ModTime()), } progress.put(encodeAsPath(itemPath.PopFront().Elements()...), d) } diff --git a/src/internal/kopia/upload_test.go b/src/internal/kopia/upload_test.go index bbdbe9e6f..95c39c46d 100644 --- a/src/internal/kopia/upload_test.go +++ b/src/internal/kopia/upload_test.go @@ -21,9 +21,10 @@ import ( pmMock "github.com/alcionai/corso/src/internal/common/prefixmatcher/mock" "github.com/alcionai/corso/src/internal/data" - exchMock "github.com/alcionai/corso/src/internal/m365/exchange/mock" + exchMock "github.com/alcionai/corso/src/internal/m365/service/exchange/mock" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/backup/details" + "github.com/alcionai/corso/src/pkg/backup/identity" "github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/path" ) @@ -386,7 +387,9 @@ var finishedFileTable = []struct { cachedItems func(fname string, fpath path.Path) map[string]testInfo expectedBytes int64 expectedNumEntries int - err error + // Non-folder items. + expectedNumItems int + err error }{ { name: "DetailsExist", @@ -410,6 +413,7 @@ var finishedFileTable = []struct { expectedBytes: 100, // 1 file and 5 folders. expectedNumEntries: 2, + expectedNumItems: 1, }, { name: "PendingNoDetails", @@ -453,16 +457,34 @@ var finishedFileTable = []struct { func (suite *CorsoProgressUnitSuite) TestFinishedFile() { table := []struct { - name string - cached bool + name string + cached bool + differentPrevPath bool + dropInfo bool + expectToMergeEntries bool }{ { name: "all updated", cached: false, }, { - name: "all cached", - cached: true, + name: "all cached from assist base", + cached: true, + expectToMergeEntries: true, + }, + { + name: "all cached from merge base", + cached: true, + differentPrevPath: true, + dropInfo: true, + expectToMergeEntries: true, + }, + { + name: "all not cached from merge base", + cached: false, + differentPrevPath: true, + dropInfo: true, + expectToMergeEntries: true, }, } @@ -480,6 +502,7 @@ func (suite *CorsoProgressUnitSuite) TestFinishedFile() { ctx: ctx, UploadProgress: &snapshotfs.NullUploadProgress{}, deets: bd, + toMerge: newMergeDetails(), pending: map[string]*itemDetails{}, errs: fault.New(true), } @@ -487,6 +510,29 @@ func (suite *CorsoProgressUnitSuite) TestFinishedFile() { ci := test.cachedItems(suite.targetFileName, suite.targetFilePath) for k, v := range ci { + if v.info != nil { + v.info.prevPath = v.info.repoPath + + if cachedTest.differentPrevPath { + // Doesn't really matter how we change the path as long as it's + // different somehow. + p, err := path.FromDataLayerPath( + suite.targetFilePath.String()+"2", + true) + require.NoError( + t, + err, + "making prevPath: %v", + clues.ToCore(err)) + + v.info.prevPath = p + } + + if cachedTest.dropInfo { + v.info.info = nil + } + } + cp.put(k, v.info) } @@ -509,6 +555,17 @@ func (suite *CorsoProgressUnitSuite) TestFinishedFile() { assert.Empty(t, cp.pending) entries := bd.Details().Entries + + if cachedTest.expectToMergeEntries { + assert.Equal( + t, + test.expectedNumItems, + cp.toMerge.ItemsToMerge(), + "merge entries") + + return + } + assert.Len(t, entries, test.expectedNumEntries) for _, entry := range entries { @@ -616,7 +673,10 @@ func (suite *CorsoProgressUnitSuite) TestFinishedFileBaseItemDoesntBuildHierarch assert.Empty(t, cp.deets) for _, expected := range expectedToMerge { - gotRef, _, _ := cp.toMerge.GetNewPathRefs(expected.oldRef, nil) + gotRef, _, _ := cp.toMerge.GetNewPathRefs( + expected.oldRef, + time.Now(), + nil) if !assert.NotNil(t, gotRef) { continue } @@ -951,7 +1011,7 @@ func makeManifestEntry( service path.ServiceType, categories ...path.CategoryType, ) ManifestEntry { - var reasons []Reasoner + var reasons []identity.Reasoner for _, c := range categories { reasons = append(reasons, NewReason(tenant, resourceOwner, service, c)) diff --git a/src/internal/kopia/wrapper.go b/src/internal/kopia/wrapper.go index 7b1feca44..7bfe92a51 100644 --- a/src/internal/kopia/wrapper.go +++ b/src/internal/kopia/wrapper.go @@ -23,6 +23,7 @@ import ( "github.com/alcionai/corso/src/internal/operations/inject" "github.com/alcionai/corso/src/internal/stats" "github.com/alcionai/corso/src/pkg/backup/details" + "github.com/alcionai/corso/src/pkg/backup/identity" "github.com/alcionai/corso/src/pkg/control/repository" "github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/logger" @@ -137,7 +138,7 @@ func (w *Wrapper) Close(ctx context.Context) error { // complete backup of all data. func (w Wrapper) ConsumeBackupCollections( ctx context.Context, - backupReasons []Reasoner, + backupReasons []identity.Reasoner, bases BackupBases, collections []data.BackupCollection, globalExcludeSet prefixmatcher.StringSetReader, diff --git a/src/internal/kopia/wrapper_test.go b/src/internal/kopia/wrapper_test.go index b58f87be8..582c3ff78 100644 --- a/src/internal/kopia/wrapper_test.go +++ b/src/internal/kopia/wrapper_test.go @@ -25,10 +25,11 @@ import ( "github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/data/mock" - exchMock "github.com/alcionai/corso/src/internal/m365/exchange/mock" - "github.com/alcionai/corso/src/internal/m365/onedrive/metadata" + "github.com/alcionai/corso/src/internal/m365/collection/drive/metadata" + exchMock "github.com/alcionai/corso/src/internal/m365/service/exchange/mock" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/backup/details" + "github.com/alcionai/corso/src/pkg/backup/identity" "github.com/alcionai/corso/src/pkg/control/repository" "github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/logger" @@ -800,7 +801,7 @@ func (suite *KopiaIntegrationSuite) TestBackupCollections() { "brunhilda": "", } - reasons := []Reasoner{ + reasons := []identity.Reasoner{ NewReason( testTenant, suite.storePath1.ResourceOwner(), @@ -964,9 +965,11 @@ func (suite *KopiaIntegrationSuite) TestBackupCollections() { collections: collections, expectedUploadedFiles: 0, expectedCachedFiles: 47, - deetsUpdated: assert.False, - hashedBytesCheck: assert.Zero, - uploadedBytes: []int64{4000, 6000}, + // Entries go to details merger since cached files are merged too. + expectMerge: true, + deetsUpdated: assert.False, + hashedBytesCheck: assert.Zero, + uploadedBytes: []int64{4000, 6000}, }, { name: "Kopia Assist And Merge No Files Changed", @@ -998,6 +1001,7 @@ func (suite *KopiaIntegrationSuite) TestBackupCollections() { collections: collections, expectedUploadedFiles: 0, expectedCachedFiles: 47, + expectMerge: true, deetsUpdated: assert.False, hashedBytesCheck: assert.Zero, uploadedBytes: []int64{4000, 6000}, @@ -1072,7 +1076,7 @@ func (suite *KopiaIntegrationSuite) TestBackupCollections_NoDetailsForMeta() { "brunhilda": "", } - reasons := []Reasoner{ + reasons := []identity.Reasoner{ NewReason( testTenant, storePath.ResourceOwner(), @@ -1267,7 +1271,7 @@ func (suite *KopiaIntegrationSuite) TestRestoreAfterCompressionChange() { stats, _, _, err := w.ConsumeBackupCollections( ctx, - []Reasoner{r}, + []identity.Reasoner{r}, nil, []data.BackupCollection{dc1, dc2}, nil, @@ -1296,6 +1300,7 @@ func (suite *KopiaIntegrationSuite) TestRestoreAfterCompressionChange() { testForFiles(t, ctx, expected, result) } +// TODO(pandeyabs): Switch to m365/mock/BackupCollection. type mockBackupCollection struct { path path.Path loc *path.Builder @@ -1385,7 +1390,7 @@ func (suite *KopiaIntegrationSuite) TestBackupCollections_ReaderError() { stats, deets, _, err := suite.w.ConsumeBackupCollections( suite.ctx, - []Reasoner{r}, + []identity.Reasoner{r}, nil, collections, nil, @@ -1618,7 +1623,7 @@ func (suite *KopiaSimpleRepoIntegrationSuite) SetupTest() { stats, deets, _, err := suite.w.ConsumeBackupCollections( suite.ctx, - []Reasoner{r}, + []identity.Reasoner{r}, nil, collections, nil, @@ -1745,11 +1750,11 @@ func (suite *KopiaSimpleRepoIntegrationSuite) TestBackupExcludeItem() { stats, _, _, err := suite.w.ConsumeBackupCollections( suite.ctx, - []Reasoner{r}, + []identity.Reasoner{r}, NewMockBackupBases().WithMergeBases( ManifestEntry{ Manifest: man, - Reasons: []Reasoner{r}, + Reasons: []identity.Reasoner{r}, }, ), test.cols(), diff --git a/src/internal/m365/backup.go b/src/internal/m365/backup.go index 31f79871e..9e7194511 100644 --- a/src/internal/m365/backup.go +++ b/src/internal/m365/backup.go @@ -8,10 +8,10 @@ import ( "github.com/alcionai/corso/src/internal/common/prefixmatcher" "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/diagnostics" - "github.com/alcionai/corso/src/internal/m365/exchange" "github.com/alcionai/corso/src/internal/m365/graph" - "github.com/alcionai/corso/src/internal/m365/onedrive" - "github.com/alcionai/corso/src/internal/m365/sharepoint" + "github.com/alcionai/corso/src/internal/m365/service/exchange" + "github.com/alcionai/corso/src/internal/m365/service/onedrive" + "github.com/alcionai/corso/src/internal/m365/service/sharepoint" "github.com/alcionai/corso/src/internal/operations/inject" "github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/filters" @@ -110,7 +110,7 @@ func (ctrl *Controller) ProduceBackupCollections( bpc, ctrl.AC, ctrl.credentials, - ctrl, + ctrl.UpdateStatus, errs) if err != nil { return nil, nil, false, err diff --git a/src/internal/m365/backup_test.go b/src/internal/m365/backup_test.go index 4a1babe47..3e35b0030 100644 --- a/src/internal/m365/backup_test.go +++ b/src/internal/m365/backup_test.go @@ -11,9 +11,9 @@ import ( "github.com/stretchr/testify/suite" inMock "github.com/alcionai/corso/src/internal/common/idname/mock" - "github.com/alcionai/corso/src/internal/m365/exchange" "github.com/alcionai/corso/src/internal/m365/resource" - "github.com/alcionai/corso/src/internal/m365/sharepoint" + "github.com/alcionai/corso/src/internal/m365/service/exchange" + "github.com/alcionai/corso/src/internal/m365/service/sharepoint" "github.com/alcionai/corso/src/internal/operations/inject" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/internal/tester/tconfig" @@ -307,7 +307,7 @@ func (suite *DataCollectionIntgSuite) TestSharePointDataCollection() { bpc, suite.ac, ctrl.credentials, - ctrl, + ctrl.UpdateStatus, fault.New(true)) require.NoError(t, err, clues.ToCore(err)) assert.True(t, canUsePreviousBackup, "can use previous backup") diff --git a/src/internal/m365/onedrive/collection.go b/src/internal/m365/collection/drive/collection.go similarity index 97% rename from src/internal/m365/onedrive/collection.go rename to src/internal/m365/collection/drive/collection.go index e393554ab..a7ea841d1 100644 --- a/src/internal/m365/onedrive/collection.go +++ b/src/internal/m365/collection/drive/collection.go @@ -1,5 +1,5 @@ -// Package onedrive provides support for retrieving M365 OneDrive objects -package onedrive +// Package drive provides support for retrieving M365 Drive objects +package drive import ( "context" @@ -15,8 +15,8 @@ import ( "github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/internal/data" + "github.com/alcionai/corso/src/internal/m365/collection/drive/metadata" "github.com/alcionai/corso/src/internal/m365/graph" - "github.com/alcionai/corso/src/internal/m365/onedrive/metadata" "github.com/alcionai/corso/src/internal/m365/support" "github.com/alcionai/corso/src/internal/observe" "github.com/alcionai/corso/src/pkg/backup/details" @@ -512,10 +512,14 @@ func (oc *Collection) populateDriveItem( metaSuffix = metadata.DirMetaFileSuffix } - // Fetch metadata for the file + // Fetch metadata for the item itemMeta, itemMetaSize, err = downloadItemMeta(ctx, oc.handler, oc.driveID, item) if err != nil { - errs.AddRecoverable(ctx, clues.Wrap(err, "getting item metadata").Label(fault.LabelForceNoBackupCreation)) + // Skip deleted items + if !clues.HasLabel(err, graph.LabelStatus(http.StatusNotFound)) && !graph.IsErrDeletedInFlight(err) { + errs.AddRecoverable(ctx, clues.Wrap(err, "getting item metadata").Label(fault.LabelForceNoBackupCreation)) + } + return } diff --git a/src/internal/m365/onedrive/collection_test.go b/src/internal/m365/collection/drive/collection_test.go similarity index 98% rename from src/internal/m365/onedrive/collection_test.go rename to src/internal/m365/collection/drive/collection_test.go index 3c30cac22..3636b7663 100644 --- a/src/internal/m365/onedrive/collection_test.go +++ b/src/internal/m365/collection/drive/collection_test.go @@ -1,4 +1,4 @@ -package onedrive +package drive import ( "bytes" @@ -20,11 +20,11 @@ import ( "github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/internal/data" + "github.com/alcionai/corso/src/internal/m365/collection/drive/metadata" + metaTD "github.com/alcionai/corso/src/internal/m365/collection/drive/metadata/testdata" "github.com/alcionai/corso/src/internal/m365/graph" - "github.com/alcionai/corso/src/internal/m365/onedrive/metadata" - metaTD "github.com/alcionai/corso/src/internal/m365/onedrive/metadata/testdata" - "github.com/alcionai/corso/src/internal/m365/onedrive/mock" - odTD "github.com/alcionai/corso/src/internal/m365/onedrive/testdata" + "github.com/alcionai/corso/src/internal/m365/service/onedrive/mock" + odTD "github.com/alcionai/corso/src/internal/m365/service/onedrive/testdata" "github.com/alcionai/corso/src/internal/m365/support" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/backup/details" diff --git a/src/internal/m365/onedrive/collections.go b/src/internal/m365/collection/drive/collections.go similarity index 99% rename from src/internal/m365/onedrive/collections.go rename to src/internal/m365/collection/drive/collections.go index a73d46fec..ac976b015 100644 --- a/src/internal/m365/onedrive/collections.go +++ b/src/internal/m365/collection/drive/collections.go @@ -1,4 +1,4 @@ -package onedrive +package drive import ( "context" @@ -14,9 +14,9 @@ import ( "github.com/alcionai/corso/src/internal/common/prefixmatcher" "github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/internal/data" + "github.com/alcionai/corso/src/internal/m365/collection/drive/metadata" "github.com/alcionai/corso/src/internal/m365/graph" - odConsts "github.com/alcionai/corso/src/internal/m365/onedrive/consts" - "github.com/alcionai/corso/src/internal/m365/onedrive/metadata" + odConsts "github.com/alcionai/corso/src/internal/m365/service/onedrive/consts" "github.com/alcionai/corso/src/internal/m365/support" "github.com/alcionai/corso/src/internal/observe" "github.com/alcionai/corso/src/pkg/control" diff --git a/src/internal/m365/onedrive/collections_test.go b/src/internal/m365/collection/drive/collections_test.go similarity index 99% rename from src/internal/m365/onedrive/collections_test.go rename to src/internal/m365/collection/drive/collections_test.go index b1ca92963..f118c0f03 100644 --- a/src/internal/m365/onedrive/collections_test.go +++ b/src/internal/m365/collection/drive/collections_test.go @@ -1,4 +1,4 @@ -package onedrive +package drive import ( "context" @@ -17,10 +17,10 @@ import ( "github.com/alcionai/corso/src/internal/common/prefixmatcher" pmMock "github.com/alcionai/corso/src/internal/common/prefixmatcher/mock" "github.com/alcionai/corso/src/internal/data" + "github.com/alcionai/corso/src/internal/m365/collection/drive/metadata" "github.com/alcionai/corso/src/internal/m365/graph" - odConsts "github.com/alcionai/corso/src/internal/m365/onedrive/consts" - "github.com/alcionai/corso/src/internal/m365/onedrive/metadata" - "github.com/alcionai/corso/src/internal/m365/onedrive/mock" + odConsts "github.com/alcionai/corso/src/internal/m365/service/onedrive/consts" + "github.com/alcionai/corso/src/internal/m365/service/onedrive/mock" "github.com/alcionai/corso/src/internal/m365/support" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/control" diff --git a/src/internal/m365/onedrive/folder_cache.go b/src/internal/m365/collection/drive/folder_cache.go similarity index 97% rename from src/internal/m365/onedrive/folder_cache.go rename to src/internal/m365/collection/drive/folder_cache.go index 696d42819..1fa4643db 100644 --- a/src/internal/m365/onedrive/folder_cache.go +++ b/src/internal/m365/collection/drive/folder_cache.go @@ -1,4 +1,4 @@ -package onedrive +package drive import ( "github.com/microsoftgraph/msgraph-sdk-go/models" diff --git a/src/internal/m365/onedrive/handlers.go b/src/internal/m365/collection/drive/handlers.go similarity index 99% rename from src/internal/m365/onedrive/handlers.go rename to src/internal/m365/collection/drive/handlers.go index cb33b373d..239bcbef5 100644 --- a/src/internal/m365/onedrive/handlers.go +++ b/src/internal/m365/collection/drive/handlers.go @@ -1,4 +1,4 @@ -package onedrive +package drive import ( "context" diff --git a/src/internal/m365/onedrive/service_test.go b/src/internal/m365/collection/drive/helper_test.go similarity index 98% rename from src/internal/m365/onedrive/service_test.go rename to src/internal/m365/collection/drive/helper_test.go index a2766b8ee..0c9ec8f8c 100644 --- a/src/internal/m365/onedrive/service_test.go +++ b/src/internal/m365/collection/drive/helper_test.go @@ -1,4 +1,4 @@ -package onedrive +package drive import ( "testing" diff --git a/src/internal/m365/onedrive/item.go b/src/internal/m365/collection/drive/item.go similarity index 75% rename from src/internal/m365/onedrive/item.go rename to src/internal/m365/collection/drive/item.go index a149efd12..19da4a30e 100644 --- a/src/internal/m365/onedrive/item.go +++ b/src/internal/m365/collection/drive/item.go @@ -1,4 +1,4 @@ -package onedrive +package drive import ( "bytes" @@ -8,14 +8,21 @@ import ( "github.com/alcionai/clues" "github.com/microsoftgraph/msgraph-sdk-go/models" + "golang.org/x/exp/maps" "github.com/alcionai/corso/src/internal/common/ptr" + "github.com/alcionai/corso/src/internal/common/readers" "github.com/alcionai/corso/src/internal/common/str" + "github.com/alcionai/corso/src/internal/m365/collection/drive/metadata" "github.com/alcionai/corso/src/internal/m365/graph" - "github.com/alcionai/corso/src/internal/m365/onedrive/metadata" "github.com/alcionai/corso/src/pkg/services/m365/api" ) +const ( + acceptHeaderKey = "Accept" + acceptHeaderValue = "*/*" +) + // downloadUrlKeys is used to find the download URL in a DriveItem response. var downloadURLKeys = []string{ "@microsoft.graph.downloadUrl", @@ -59,25 +66,42 @@ func downloadItem( return rc, nil } -func downloadFile( - ctx context.Context, - ag api.Getter, - url string, -) (io.ReadCloser, error) { - if len(url) == 0 { - return nil, clues.New("empty file url") - } +type downloadWithRetries struct { + getter api.Getter + url string +} - resp, err := ag.Get(ctx, url, nil) +func (dg *downloadWithRetries) SupportsRange() bool { + return true +} + +func (dg *downloadWithRetries) Get( + ctx context.Context, + additionalHeaders map[string]string, +) (io.ReadCloser, error) { + headers := maps.Clone(additionalHeaders) + // Set the accept header like curl does. Local testing showed range headers + // wouldn't work without it (get 416 responses instead of 206). + headers[acceptHeaderKey] = acceptHeaderValue + + resp, err := dg.getter.Get(ctx, dg.url, headers) if err != nil { return nil, clues.Wrap(err, "getting file") } if graph.IsMalwareResp(ctx, resp) { + if resp != nil && resp.Body != nil { + resp.Body.Close() + } + return nil, clues.New("malware detected").Label(graph.LabelsMalware) } if resp != nil && (resp.StatusCode/100) != 2 { + if resp.Body != nil { + resp.Body.Close() + } + // upstream error checks can compare the status with // clues.HasLabel(err, graph.LabelStatus(http.KnownStatusCode)) return nil, clues. @@ -88,6 +112,25 @@ func downloadFile( return resp.Body, nil } +func downloadFile( + ctx context.Context, + ag api.Getter, + url string, +) (io.ReadCloser, error) { + if len(url) == 0 { + return nil, clues.New("empty file url").WithClues(ctx) + } + + rc, err := readers.NewResetRetryHandler( + ctx, + &downloadWithRetries{ + getter: ag, + url: url, + }) + + return rc, clues.Stack(err).OrNil() +} + func downloadItemMeta( ctx context.Context, gip GetItemPermissioner, diff --git a/src/internal/m365/onedrive/item_collector.go b/src/internal/m365/collection/drive/item_collector.go similarity index 61% rename from src/internal/m365/onedrive/item_collector.go rename to src/internal/m365/collection/drive/item_collector.go index 3bc23bacd..d737c4abd 100644 --- a/src/internal/m365/onedrive/item_collector.go +++ b/src/internal/m365/collection/drive/item_collector.go @@ -1,14 +1,11 @@ -package onedrive +package drive import ( "context" - "strings" - "github.com/alcionai/clues" "github.com/microsoftgraph/msgraph-sdk-go/models" "golang.org/x/exp/maps" - "github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/internal/m365/graph" "github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/logger" @@ -148,105 +145,3 @@ func newItem(name string, folder bool) *models.DriveItem { return itemToCreate } - -type Displayable struct { - models.DriveItemable -} - -func (op *Displayable) GetDisplayName() *string { - return op.GetName() -} - -// GetAllFolders returns all folders in all drives for the given user. If a -// prefix is given, returns all folders with that prefix, regardless of if they -// are a subfolder or top-level folder in the hierarchy. -func GetAllFolders( - ctx context.Context, - bh BackupHandler, - pager api.DrivePager, - prefix string, - errs *fault.Bus, -) ([]*Displayable, error) { - ds, err := api.GetAllDrives(ctx, pager, true, maxDrivesRetries) - if err != nil { - return nil, clues.Wrap(err, "getting OneDrive folders") - } - - var ( - folders = map[string]*Displayable{} - el = errs.Local() - ) - - for _, drive := range ds { - if el.Failure() != nil { - break - } - - var ( - id = ptr.Val(drive.GetId()) - name = ptr.Val(drive.GetName()) - ) - - ictx := clues.Add(ctx, "drive_id", id, "drive_name", clues.Hide(name)) - collector := func( - _ context.Context, - _, _ string, - items []models.DriveItemable, - _ map[string]string, - _ map[string]string, - _ map[string]struct{}, - _ map[string]map[string]string, - _ bool, - _ *fault.Bus, - ) error { - for _, item := range items { - // Skip the root item. - if item.GetRoot() != nil { - continue - } - - // Only selecting folders right now, not packages. - if item.GetFolder() == nil { - continue - } - - itemID := ptr.Val(item.GetId()) - if len(itemID) == 0 { - logger.Ctx(ctx).Info("folder missing ID") - continue - } - - if !strings.HasPrefix(ptr.Val(item.GetName()), prefix) { - continue - } - - // Add the item instead of the folder because the item has more - // functionality. - folders[itemID] = &Displayable{item} - } - - return nil - } - - _, _, _, err = collectItems( - ictx, - bh.NewItemPager(id, "", nil), - id, - name, - collector, - map[string]string{}, - "", - errs) - if err != nil { - el.AddRecoverable(ctx, clues.Wrap(err, "enumerating items in drive")) - } - } - - res := make([]*Displayable, 0, len(folders)) - - for _, f := range folders { - res = append(res, f) - } - - return res, el.Failure() -} diff --git a/src/internal/m365/onedrive/item_collector_test.go b/src/internal/m365/collection/drive/item_collector_test.go similarity index 73% rename from src/internal/m365/onedrive/item_collector_test.go rename to src/internal/m365/collection/drive/item_collector_test.go index ea6726839..f8aca7eb6 100644 --- a/src/internal/m365/onedrive/item_collector_test.go +++ b/src/internal/m365/collection/drive/item_collector_test.go @@ -1,8 +1,7 @@ -package onedrive +package drive import ( "context" - "strings" "testing" "github.com/alcionai/clues" @@ -13,17 +12,13 @@ import ( "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" - "github.com/alcionai/corso/src/internal/common/dttm" "github.com/alcionai/corso/src/internal/common/prefixmatcher" - "github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/internal/m365/graph" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/internal/tester/tconfig" "github.com/alcionai/corso/src/pkg/account" "github.com/alcionai/corso/src/pkg/control" "github.com/alcionai/corso/src/pkg/fault" - "github.com/alcionai/corso/src/pkg/logger" - "github.com/alcionai/corso/src/pkg/path" "github.com/alcionai/corso/src/pkg/selectors" "github.com/alcionai/corso/src/pkg/services/m365/api" "github.com/alcionai/corso/src/pkg/services/m365/api/mock" @@ -317,113 +312,6 @@ func (suite *OneDriveIntgSuite) SetupSuite() { require.NoError(t, err, clues.ToCore(err)) } -func (suite *OneDriveIntgSuite) TestCreateGetDeleteFolder() { - t := suite.T() - - ctx, flush := tester.NewContext(t) - defer flush() - - var ( - folderIDs = []string{} - folderName1 = "Corso_Folder_Test_" + dttm.FormatNow(dttm.SafeForTesting) - folderElements = []string{folderName1} - ) - - pager := suite.ac.Drives().NewUserDrivePager(suite.userID, nil) - - drives, err := api.GetAllDrives(ctx, pager, true, maxDrivesRetries) - require.NoError(t, err, clues.ToCore(err)) - require.NotEmpty(t, drives) - - // TODO: Verify the intended drive - driveID := ptr.Val(drives[0].GetId()) - - defer func() { - for _, id := range folderIDs { - ictx := clues.Add(ctx, "folder_id", id) - - // deletes require unique http clients - // https://github.com/alcionai/corso/issues/2707 - err := suite.ac.Drives().DeleteItem(ictx, driveID, id) - if err != nil { - logger.CtxErr(ictx, err).Errorw("deleting folder") - } - } - }() - - rootFolder, err := suite.ac.Drives().GetRootFolder(ctx, driveID) - require.NoError(t, err, clues.ToCore(err)) - - restoreDir := path.Builder{}.Append(folderElements...) - drivePath := path.DrivePath{ - DriveID: driveID, - Root: "root:", - Folders: folderElements, - } - - caches := NewRestoreCaches(nil) - caches.DriveIDToDriveInfo.Store(driveID, driveInfo{rootFolderID: ptr.Val(rootFolder.GetId())}) - - rh := NewRestoreHandler(suite.ac) - - folderID, err := createRestoreFolders(ctx, rh, &drivePath, restoreDir, caches) - require.NoError(t, err, clues.ToCore(err)) - - folderIDs = append(folderIDs, folderID) - - folderName2 := "Corso_Folder_Test_" + dttm.FormatNow(dttm.SafeForTesting) - restoreDir = restoreDir.Append(folderName2) - - folderID, err = createRestoreFolders(ctx, rh, &drivePath, restoreDir, caches) - require.NoError(t, err, clues.ToCore(err)) - - folderIDs = append(folderIDs, folderID) - - table := []struct { - name string - prefix string - }{ - { - name: "NoPrefix", - prefix: "", - }, - { - name: "Prefix", - prefix: "Corso_Folder_Test", - }, - } - - for _, test := range table { - suite.Run(test.name, func() { - t := suite.T() - bh := itemBackupHandler{ - suite.ac.Drives(), - (&selectors.OneDriveBackup{}).Folders(selectors.Any())[0], - } - pager := suite.ac.Drives().NewUserDrivePager(suite.userID, nil) - - ctx, flush := tester.NewContext(t) - defer flush() - - allFolders, err := GetAllFolders(ctx, bh, pager, test.prefix, fault.New(true)) - require.NoError(t, err, clues.ToCore(err)) - - foundFolderIDs := []string{} - - for _, f := range allFolders { - - if ptr.Val(f.GetName()) == folderName1 || ptr.Val(f.GetName()) == folderName2 { - foundFolderIDs = append(foundFolderIDs, ptr.Val(f.GetId())) - } - - assert.True(t, strings.HasPrefix(ptr.Val(f.GetName()), test.prefix), "folder prefix") - } - - assert.ElementsMatch(t, folderIDs, foundFolderIDs) - }) - } -} - func (suite *OneDriveIntgSuite) TestOneDriveNewCollections() { creds, err := tconfig.NewM365Account(suite.T()).M365Config() require.NoError(suite.T(), err, clues.ToCore(err)) diff --git a/src/internal/m365/onedrive/item_handler.go b/src/internal/m365/collection/drive/item_handler.go similarity index 97% rename from src/internal/m365/onedrive/item_handler.go rename to src/internal/m365/collection/drive/item_handler.go index 64701da8f..929649aae 100644 --- a/src/internal/m365/onedrive/item_handler.go +++ b/src/internal/m365/collection/drive/item_handler.go @@ -1,4 +1,4 @@ -package onedrive +package drive import ( "context" @@ -10,7 +10,7 @@ import ( "github.com/microsoftgraph/msgraph-sdk-go/models" "github.com/alcionai/corso/src/internal/common/ptr" - odConsts "github.com/alcionai/corso/src/internal/m365/onedrive/consts" + odConsts "github.com/alcionai/corso/src/internal/m365/service/onedrive/consts" "github.com/alcionai/corso/src/pkg/backup/details" "github.com/alcionai/corso/src/pkg/control" "github.com/alcionai/corso/src/pkg/path" @@ -29,6 +29,10 @@ type itemBackupHandler struct { scope selectors.OneDriveScope } +func NewItemBackupHandler(ac api.Drives, scope selectors.OneDriveScope) *itemBackupHandler { + return &itemBackupHandler{ac, scope} +} + func (h itemBackupHandler) Get( ctx context.Context, url string, diff --git a/src/internal/m365/onedrive/item_handler_test.go b/src/internal/m365/collection/drive/item_handler_test.go similarity index 98% rename from src/internal/m365/onedrive/item_handler_test.go rename to src/internal/m365/collection/drive/item_handler_test.go index dbc2c0b61..76767acce 100644 --- a/src/internal/m365/onedrive/item_handler_test.go +++ b/src/internal/m365/collection/drive/item_handler_test.go @@ -1,4 +1,4 @@ -package onedrive +package drive import ( "testing" diff --git a/src/internal/m365/onedrive/item_test.go b/src/internal/m365/collection/drive/item_test.go similarity index 89% rename from src/internal/m365/onedrive/item_test.go rename to src/internal/m365/collection/drive/item_test.go index b3f352bbf..dfec42e2d 100644 --- a/src/internal/m365/onedrive/item_test.go +++ b/src/internal/m365/collection/drive/item_test.go @@ -1,14 +1,16 @@ -package onedrive +package drive import ( "bytes" "context" "io" "net/http" + "syscall" "testing" "github.com/alcionai/clues" "github.com/microsoftgraph/msgraph-sdk-go/models" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" @@ -438,3 +440,64 @@ func (suite *ItemUnitTestSuite) TestDownloadItem() { }) } } + +type errReader struct{} + +func (r errReader) Read(p []byte) (int, error) { + return 0, syscall.ECONNRESET +} + +func (suite *ItemUnitTestSuite) TestDownloadItem_ConnectionResetErrorOnFirstRead() { + var ( + callCount int + + testData = []byte("test") + testRc = io.NopCloser(bytes.NewReader(testData)) + url = "https://example.com" + + itemFunc = func() models.DriveItemable { + di := newItem("test", false) + di.SetAdditionalData(map[string]any{ + "@microsoft.graph.downloadUrl": url, + }) + + return di + } + + GetFunc = func(ctx context.Context, url string) (*http.Response, error) { + defer func() { + callCount++ + }() + + if callCount == 0 { + return &http.Response{ + StatusCode: http.StatusOK, + Body: io.NopCloser(errReader{}), + }, nil + } + + return &http.Response{ + StatusCode: http.StatusOK, + Body: testRc, + }, nil + } + errorExpected = require.NoError + rcExpected = require.NotNil + ) + + t := suite.T() + + ctx, flush := tester.NewContext(t) + defer flush() + + mg := mockGetter{ + GetFunc: GetFunc, + } + rc, err := downloadItem(ctx, mg, itemFunc()) + errorExpected(t, err, clues.ToCore(err)) + rcExpected(t, rc) + + data, err := io.ReadAll(rc) + require.NoError(t, err, clues.ToCore(err)) + assert.Equal(t, testData, data) +} diff --git a/src/internal/m365/sharepoint/library_handler.go b/src/internal/m365/collection/drive/library_handler.go similarity index 93% rename from src/internal/m365/sharepoint/library_handler.go rename to src/internal/m365/collection/drive/library_handler.go index 3f16c6eae..4649e458c 100644 --- a/src/internal/m365/sharepoint/library_handler.go +++ b/src/internal/m365/collection/drive/library_handler.go @@ -1,4 +1,4 @@ -package sharepoint +package drive import ( "context" @@ -9,8 +9,7 @@ import ( "github.com/microsoftgraph/msgraph-sdk-go/models" "github.com/alcionai/corso/src/internal/common/ptr" - "github.com/alcionai/corso/src/internal/m365/onedrive" - odConsts "github.com/alcionai/corso/src/internal/m365/onedrive/consts" + odConsts "github.com/alcionai/corso/src/internal/m365/service/onedrive/consts" "github.com/alcionai/corso/src/pkg/backup/details" "github.com/alcionai/corso/src/pkg/control" "github.com/alcionai/corso/src/pkg/path" @@ -18,13 +17,17 @@ import ( "github.com/alcionai/corso/src/pkg/services/m365/api" ) -var _ onedrive.BackupHandler = &libraryBackupHandler{} +var _ BackupHandler = &libraryBackupHandler{} type libraryBackupHandler struct { ac api.Drives scope selectors.SharePointScope } +func NewLibraryBackupHandler(ac api.Drives, scope selectors.SharePointScope) libraryBackupHandler { + return libraryBackupHandler{ac, scope} +} + func (h libraryBackupHandler) Get( ctx context.Context, url string, @@ -78,7 +81,7 @@ func (h libraryBackupHandler) AugmentItemInfo( size int64, parentPath *path.Builder, ) details.ItemInfo { - return augmentItemInfo(dii, item, size, parentPath) + return augmentLibraryItemInfo(dii, item, size, parentPath) } // constructWebURL is a helper function for recreating the webURL @@ -154,12 +157,16 @@ func (h libraryBackupHandler) IncludesDir(dir string) bool { // Restore // --------------------------------------------------------------------------- -var _ onedrive.RestoreHandler = &libraryRestoreHandler{} +var _ RestoreHandler = &libraryRestoreHandler{} type libraryRestoreHandler struct { ac api.Client } +func NewLibraryRestoreHandler(ac api.Client) libraryRestoreHandler { + return libraryRestoreHandler{ac} +} + func (h libraryRestoreHandler) PostDrive( ctx context.Context, siteID, driveName string, @@ -167,10 +174,6 @@ func (h libraryRestoreHandler) PostDrive( return h.ac.Lists().PostDrive(ctx, siteID, driveName) } -func NewRestoreHandler(ac api.Client) *libraryRestoreHandler { - return &libraryRestoreHandler{ac} -} - func (h libraryRestoreHandler) NewDrivePager( resourceOwner string, fields []string, @@ -184,7 +187,7 @@ func (h libraryRestoreHandler) AugmentItemInfo( size int64, parentPath *path.Builder, ) details.ItemInfo { - return augmentItemInfo(dii, item, size, parentPath) + return augmentLibraryItemInfo(dii, item, size, parentPath) } func (h libraryRestoreHandler) DeleteItem( @@ -263,7 +266,7 @@ func (h libraryRestoreHandler) GetRootFolder( // Common // --------------------------------------------------------------------------- -func augmentItemInfo( +func augmentLibraryItemInfo( dii details.ItemInfo, item models.DriveItemable, size int64, diff --git a/src/internal/m365/sharepoint/library_handler_test.go b/src/internal/m365/collection/drive/library_handler_test.go similarity index 98% rename from src/internal/m365/sharepoint/library_handler_test.go rename to src/internal/m365/collection/drive/library_handler_test.go index 254af56aa..1646868e0 100644 --- a/src/internal/m365/sharepoint/library_handler_test.go +++ b/src/internal/m365/collection/drive/library_handler_test.go @@ -1,4 +1,4 @@ -package sharepoint +package drive import ( "testing" diff --git a/src/internal/m365/onedrive/metadata/consts.go b/src/internal/m365/collection/drive/metadata/consts.go similarity index 100% rename from src/internal/m365/onedrive/metadata/consts.go rename to src/internal/m365/collection/drive/metadata/consts.go diff --git a/src/internal/m365/onedrive/metadata/metadata.go b/src/internal/m365/collection/drive/metadata/metadata.go similarity index 100% rename from src/internal/m365/onedrive/metadata/metadata.go rename to src/internal/m365/collection/drive/metadata/metadata.go diff --git a/src/internal/m365/onedrive/metadata/permissions.go b/src/internal/m365/collection/drive/metadata/permissions.go similarity index 100% rename from src/internal/m365/onedrive/metadata/permissions.go rename to src/internal/m365/collection/drive/metadata/permissions.go diff --git a/src/internal/m365/onedrive/metadata/permissions_test.go b/src/internal/m365/collection/drive/metadata/permissions_test.go similarity index 100% rename from src/internal/m365/onedrive/metadata/permissions_test.go rename to src/internal/m365/collection/drive/metadata/permissions_test.go diff --git a/src/internal/m365/onedrive/metadata/testdata/permissions.go b/src/internal/m365/collection/drive/metadata/testdata/permissions.go similarity index 94% rename from src/internal/m365/onedrive/metadata/testdata/permissions.go rename to src/internal/m365/collection/drive/metadata/testdata/permissions.go index a3ccc5cb3..846ee19ed 100644 --- a/src/internal/m365/onedrive/metadata/testdata/permissions.go +++ b/src/internal/m365/collection/drive/metadata/testdata/permissions.go @@ -6,7 +6,7 @@ import ( "github.com/microsoftgraph/msgraph-sdk-go/models" "github.com/stretchr/testify/assert" - "github.com/alcionai/corso/src/internal/m365/onedrive/metadata" + "github.com/alcionai/corso/src/internal/m365/collection/drive/metadata" ) func AssertMetadataEqual(t *testing.T, expect, got metadata.Metadata) { diff --git a/src/internal/m365/onedrive/permission.go b/src/internal/m365/collection/drive/permission.go similarity index 98% rename from src/internal/m365/onedrive/permission.go rename to src/internal/m365/collection/drive/permission.go index 900d8c989..4125231c6 100644 --- a/src/internal/m365/onedrive/permission.go +++ b/src/internal/m365/collection/drive/permission.go @@ -1,4 +1,4 @@ -package onedrive +package drive import ( "context" @@ -11,7 +11,7 @@ import ( "github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/internal/data" - "github.com/alcionai/corso/src/internal/m365/onedrive/metadata" + "github.com/alcionai/corso/src/internal/m365/collection/drive/metadata" "github.com/alcionai/corso/src/internal/version" "github.com/alcionai/corso/src/pkg/logger" "github.com/alcionai/corso/src/pkg/path" @@ -76,7 +76,7 @@ func getCollectionMetadata( metaName = metadata.DirMetaFileSuffix } - meta, err := fetchAndReadMetadata(ctx, dc, metaName) + meta, err := FetchAndReadMetadata(ctx, dc, metaName) if err != nil { return metadata.Metadata{}, clues.Wrap(err, "collection metadata") } diff --git a/src/internal/m365/onedrive/permission_test.go b/src/internal/m365/collection/drive/permission_test.go similarity index 95% rename from src/internal/m365/onedrive/permission_test.go rename to src/internal/m365/collection/drive/permission_test.go index 7782fccd9..c241f8a98 100644 --- a/src/internal/m365/onedrive/permission_test.go +++ b/src/internal/m365/collection/drive/permission_test.go @@ -1,4 +1,4 @@ -package onedrive +package drive import ( "strings" @@ -9,8 +9,8 @@ import ( "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" - odConsts "github.com/alcionai/corso/src/internal/m365/onedrive/consts" - "github.com/alcionai/corso/src/internal/m365/onedrive/metadata" + "github.com/alcionai/corso/src/internal/m365/collection/drive/metadata" + odConsts "github.com/alcionai/corso/src/internal/m365/service/onedrive/consts" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/path" ) diff --git a/src/internal/m365/onedrive/restore.go b/src/internal/m365/collection/drive/restore.go similarity index 82% rename from src/internal/m365/onedrive/restore.go rename to src/internal/m365/collection/drive/restore.go index 900f37e60..ad7cad33f 100644 --- a/src/internal/m365/onedrive/restore.go +++ b/src/internal/m365/collection/drive/restore.go @@ -1,4 +1,4 @@ -package onedrive +package drive import ( "context" @@ -6,7 +6,6 @@ import ( "fmt" "io" "runtime/trace" - "sort" "strings" "sync" "sync/atomic" @@ -15,12 +14,11 @@ import ( "github.com/microsoftgraph/msgraph-sdk-go/models" "github.com/pkg/errors" - "github.com/alcionai/corso/src/internal/common/idname" "github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/diagnostics" + "github.com/alcionai/corso/src/internal/m365/collection/drive/metadata" "github.com/alcionai/corso/src/internal/m365/graph" - "github.com/alcionai/corso/src/internal/m365/onedrive/metadata" "github.com/alcionai/corso/src/internal/m365/support" "github.com/alcionai/corso/src/internal/observe" "github.com/alcionai/corso/src/internal/operations/inject" @@ -39,81 +37,6 @@ const ( maxUploadRetries = 3 ) -// ConsumeRestoreCollections will restore the specified data collections into OneDrive -func ConsumeRestoreCollections( - ctx context.Context, - rh RestoreHandler, - rcc inject.RestoreConsumerConfig, - backupDriveIDNames idname.Cacher, - dcs []data.RestoreCollection, - deets *details.Builder, - errs *fault.Bus, - ctr *count.Bus, -) (*support.ControllerOperationStatus, error) { - var ( - restoreMetrics support.CollectionMetrics - el = errs.Local() - caches = NewRestoreCaches(backupDriveIDNames) - fallbackDriveName = rcc.RestoreConfig.Location - ) - - ctx = clues.Add(ctx, "backup_version", rcc.BackupVersion) - - err := caches.Populate(ctx, rh, rcc.ProtectedResource.ID()) - if err != nil { - return nil, clues.Wrap(err, "initializing restore caches") - } - - // Reorder collections so that the parents directories are created - // before the child directories; a requirement for permissions. - data.SortRestoreCollections(dcs) - - // Iterate through the data collections and restore the contents of each - for _, dc := range dcs { - if el.Failure() != nil { - break - } - - var ( - err error - metrics support.CollectionMetrics - ictx = clues.Add( - ctx, - "category", dc.FullPath().Category(), - "full_path", dc.FullPath()) - ) - - metrics, err = RestoreCollection( - ictx, - rh, - rcc, - dc, - caches, - deets, - fallbackDriveName, - errs, - ctr.Local()) - if err != nil { - el.AddRecoverable(ctx, err) - } - - restoreMetrics = support.CombineMetrics(restoreMetrics, metrics) - - if errors.Is(err, context.Canceled) { - break - } - } - - status := support.CreateStatus( - ctx, - support.Restore, - len(dcs), - restoreMetrics, - rcc.RestoreConfig.Location) - - return status, el.Failure() -} - // RestoreCollection handles restoration of an individual collection. // returns: // - the collection's item and byte count metrics @@ -518,7 +441,7 @@ func restoreV1File( // Fetch item permissions from the collection and restore them. metaName := trimmedName + metadata.MetaFileSuffix - meta, err := fetchAndReadMetadata(ctx, fibn, metaName) + meta, err := FetchAndReadMetadata(ctx, fibn, metaName) if err != nil { return details.ItemInfo{}, clues.Wrap(err, "restoring file") } @@ -556,7 +479,7 @@ func restoreV6File( // Get metadata file so we can determine the file name. metaName := trimmedName + metadata.MetaFileSuffix - meta, err := fetchAndReadMetadata(ctx, fibn, metaName) + meta, err := FetchAndReadMetadata(ctx, fibn, metaName) if err != nil { return details.ItemInfo{}, clues.Wrap(err, "restoring file") } @@ -932,7 +855,7 @@ func restoreFile( return ptr.Val(newItem.GetId()), dii, nil } -func fetchAndReadMetadata( +func FetchAndReadMetadata( ctx context.Context, fibn data.FetchItemByNamer, metaName string, @@ -974,132 +897,6 @@ func getMetadata(metar io.ReadCloser) (metadata.Metadata, error) { return meta, nil } -// Augment restore path to add extra files(meta) needed for restore as -// well as do any other ordering operations on the paths -// -// Only accepts StoragePath/RestorePath pairs where the RestorePath is -// at least as long as the StoragePath. If the RestorePath is longer than the -// StoragePath then the first few (closest to the root) directories will use -// default permissions during restore. -func AugmentRestorePaths( - backupVersion int, - paths []path.RestorePaths, -) ([]path.RestorePaths, error) { - // Keyed by each value's StoragePath.String() which corresponds to the RepoRef - // of the directory. - colPaths := map[string]path.RestorePaths{} - - for _, p := range paths { - first := true - - for { - sp, err := p.StoragePath.Dir() - if err != nil { - return nil, err - } - - drivePath, err := path.ToDrivePath(sp) - if err != nil { - return nil, err - } - - if len(drivePath.Folders) == 0 { - break - } - - if len(p.RestorePath.Elements()) < len(sp.Elements()) { - return nil, clues.New("restorePath shorter than storagePath"). - With("restore_path", p.RestorePath, "storage_path", sp) - } - - rp := p.RestorePath - - // Make sure the RestorePath always points to the level of the current - // collection. We need to track if it's the first iteration because the - // RestorePath starts out at the collection level to begin with. - if !first { - rp, err = p.RestorePath.Dir() - if err != nil { - return nil, err - } - } - - paths := path.RestorePaths{ - StoragePath: sp, - RestorePath: rp, - } - - colPaths[sp.String()] = paths - p = paths - first = false - } - } - - // Adds dirmeta files as we need to make sure collections for all - // directories involved are created and not just the final one. No - // need to add `.meta` files (metadata for files) as they will - // anyways be looked up automatically. - // TODO: Stop populating .dirmeta for newer versions once we can - // get files from parent directory via `Fetch` in a collection. - // As of now look up metadata for parent directories from a - // collection. - for _, p := range colPaths { - el := p.StoragePath.Elements() - - if backupVersion >= version.OneDrive6NameInMeta { - mPath, err := p.StoragePath.AppendItem(".dirmeta") - if err != nil { - return nil, err - } - - paths = append( - paths, - path.RestorePaths{StoragePath: mPath, RestorePath: p.RestorePath}) - } else if backupVersion >= version.OneDrive4DirIncludesPermissions { - mPath, err := p.StoragePath.AppendItem(el.Last() + ".dirmeta") - if err != nil { - return nil, err - } - - paths = append( - paths, - path.RestorePaths{StoragePath: mPath, RestorePath: p.RestorePath}) - } else if backupVersion >= version.OneDrive1DataAndMetaFiles { - pp, err := p.StoragePath.Dir() - if err != nil { - return nil, err - } - - mPath, err := pp.AppendItem(el.Last() + ".dirmeta") - if err != nil { - return nil, err - } - - prp, err := p.RestorePath.Dir() - if err != nil { - return nil, err - } - - paths = append( - paths, - path.RestorePaths{StoragePath: mPath, RestorePath: prp}) - } - } - - // This sort is done primarily to order `.meta` files after `.data` - // files. This is only a necessity for OneDrive as we are storing - // metadata for files/folders in separate meta files and we the - // data to be restored before we can restore the metadata. - // - // This sorting assumes stuff in the same StoragePath directory end up in the - // same RestorePath collection. - sort.Slice(paths, func(i, j int) bool { - return paths[i].StoragePath.String() < paths[j].StoragePath.String() - }) - - return paths, nil -} - type PostDriveAndGetRootFolderer interface { PostDriver GetRootFolderer diff --git a/src/internal/m365/onedrive/restore_caches.go b/src/internal/m365/collection/drive/restore_caches.go similarity index 97% rename from src/internal/m365/onedrive/restore_caches.go rename to src/internal/m365/collection/drive/restore_caches.go index 096e0bff8..e2b4953dd 100644 --- a/src/internal/m365/onedrive/restore_caches.go +++ b/src/internal/m365/collection/drive/restore_caches.go @@ -1,4 +1,4 @@ -package onedrive +package drive import ( "context" @@ -10,8 +10,8 @@ import ( "github.com/alcionai/corso/src/internal/common/idname" "github.com/alcionai/corso/src/internal/common/ptr" + "github.com/alcionai/corso/src/internal/m365/collection/drive/metadata" "github.com/alcionai/corso/src/internal/m365/graph" - "github.com/alcionai/corso/src/internal/m365/onedrive/metadata" "github.com/alcionai/corso/src/pkg/services/m365/api" ) diff --git a/src/internal/m365/onedrive/restore_test.go b/src/internal/m365/collection/drive/restore_test.go similarity index 70% rename from src/internal/m365/onedrive/restore_test.go rename to src/internal/m365/collection/drive/restore_test.go index b948a8cab..2b64ce7a2 100644 --- a/src/internal/m365/onedrive/restore_test.go +++ b/src/internal/m365/collection/drive/restore_test.go @@ -1,4 +1,4 @@ -package onedrive +package drive import ( "context" @@ -14,8 +14,8 @@ import ( "github.com/alcionai/corso/src/internal/common/idname" "github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/internal/m365/graph" - odConsts "github.com/alcionai/corso/src/internal/m365/onedrive/consts" - "github.com/alcionai/corso/src/internal/m365/onedrive/mock" + odConsts "github.com/alcionai/corso/src/internal/m365/service/onedrive/consts" + "github.com/alcionai/corso/src/internal/m365/service/onedrive/mock" "github.com/alcionai/corso/src/internal/operations/inject" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/internal/version" @@ -34,301 +34,6 @@ func TestRestoreUnitSuite(t *testing.T) { suite.Run(t, &RestoreUnitSuite{Suite: tester.NewUnitSuite(t)}) } -func (suite *RestoreUnitSuite) TestAugmentRestorePaths() { - // Adding a simple test here so that we can be sure that this - // function gets updated whenever we add a new version. - require.LessOrEqual(suite.T(), version.Backup, version.All8MigrateUserPNToID, "unsupported backup version") - - table := []struct { - name string - version int - input []string - output []string - }{ - { - name: "no change v0", - version: 0, - input: []string{ - "file.txt.data", - "file.txt", // v0 does not have `.data` - }, - output: []string{ - "file.txt", // ordering artifact of sorting - "file.txt.data", - }, - }, - { - name: "one folder v0", - version: 0, - input: []string{ - "folder/file.txt.data", - "folder/file.txt", - }, - output: []string{ - "folder/file.txt", - "folder/file.txt.data", - }, - }, - { - name: "no change v1", - version: version.OneDrive1DataAndMetaFiles, - input: []string{ - "file.txt.data", - }, - output: []string{ - "file.txt.data", - }, - }, - { - name: "one folder v1", - version: version.OneDrive1DataAndMetaFiles, - input: []string{ - "folder/file.txt.data", - }, - output: []string{ - "folder.dirmeta", - "folder/file.txt.data", - }, - }, - { - name: "nested folders v1", - version: version.OneDrive1DataAndMetaFiles, - input: []string{ - "folder/file.txt.data", - "folder/folder2/file.txt.data", - }, - output: []string{ - "folder.dirmeta", - "folder/file.txt.data", - "folder/folder2.dirmeta", - "folder/folder2/file.txt.data", - }, - }, - { - name: "no change v4", - version: version.OneDrive4DirIncludesPermissions, - input: []string{ - "file.txt.data", - }, - output: []string{ - "file.txt.data", - }, - }, - { - name: "one folder v4", - version: version.OneDrive4DirIncludesPermissions, - input: []string{ - "folder/file.txt.data", - }, - output: []string{ - "folder/file.txt.data", - "folder/folder.dirmeta", - }, - }, - { - name: "nested folders v4", - version: version.OneDrive4DirIncludesPermissions, - input: []string{ - "folder/file.txt.data", - "folder/folder2/file.txt.data", - }, - output: []string{ - "folder/file.txt.data", - "folder/folder.dirmeta", - "folder/folder2/file.txt.data", - "folder/folder2/folder2.dirmeta", - }, - }, - { - name: "no change v6", - version: version.OneDrive6NameInMeta, - input: []string{ - "file.txt.data", - }, - output: []string{ - "file.txt.data", - }, - }, - { - name: "one folder v6", - version: version.OneDrive6NameInMeta, - input: []string{ - "folder/file.txt.data", - }, - output: []string{ - "folder/.dirmeta", - "folder/file.txt.data", - }, - }, - { - name: "nested folders v6", - version: version.OneDrive6NameInMeta, - input: []string{ - "folder/file.txt.data", - "folder/folder2/file.txt.data", - }, - output: []string{ - "folder/.dirmeta", - "folder/file.txt.data", - "folder/folder2/.dirmeta", - "folder/folder2/file.txt.data", - }, - }, - } - - for _, test := range table { - suite.Run(test.name, func() { - t := suite.T() - - _, flush := tester.NewContext(t) - defer flush() - - base := "id/onedrive/user/files/drives/driveID/root:/" - - inPaths := []path.RestorePaths{} - for _, ps := range test.input { - p, err := path.FromDataLayerPath(base+ps, true) - require.NoError(t, err, "creating path", clues.ToCore(err)) - - pd, err := p.Dir() - require.NoError(t, err, "creating collection path", clues.ToCore(err)) - - inPaths = append( - inPaths, - path.RestorePaths{StoragePath: p, RestorePath: pd}) - } - - outPaths := []path.RestorePaths{} - for _, ps := range test.output { - p, err := path.FromDataLayerPath(base+ps, true) - require.NoError(t, err, "creating path", clues.ToCore(err)) - - pd, err := p.Dir() - require.NoError(t, err, "creating collection path", clues.ToCore(err)) - - outPaths = append( - outPaths, - path.RestorePaths{StoragePath: p, RestorePath: pd}) - } - - actual, err := AugmentRestorePaths(test.version, inPaths) - require.NoError(t, err, "augmenting paths", clues.ToCore(err)) - - // Ordering of paths matter here as we need dirmeta files - // to show up before file in dir - assert.Equal(t, outPaths, actual, "augmented paths") - }) - } -} - -// TestAugmentRestorePaths_DifferentRestorePath tests that RestorePath -// substitution works properly. Since it's only possible for future backup -// versions to need restore path substitution (i.e. due to storing folders by -// ID instead of name) this is only tested against the most recent backup -// version at the moment. -func (suite *RestoreUnitSuite) TestAugmentRestorePaths_DifferentRestorePath() { - // Adding a simple test here so that we can be sure that this - // function gets updated whenever we add a new version. - require.LessOrEqual(suite.T(), version.Backup, version.All8MigrateUserPNToID, "unsupported backup version") - - type pathPair struct { - storage string - restore string - } - - table := []struct { - name string - version int - input []pathPair - output []pathPair - errCheck assert.ErrorAssertionFunc - }{ - { - name: "nested folders", - version: version.Backup, - input: []pathPair{ - {storage: "folder-id/file.txt.data", restore: "folder"}, - {storage: "folder-id/folder2-id/file.txt.data", restore: "folder/folder2"}, - }, - output: []pathPair{ - {storage: "folder-id/.dirmeta", restore: "folder"}, - {storage: "folder-id/file.txt.data", restore: "folder"}, - {storage: "folder-id/folder2-id/.dirmeta", restore: "folder/folder2"}, - {storage: "folder-id/folder2-id/file.txt.data", restore: "folder/folder2"}, - }, - errCheck: assert.NoError, - }, - { - name: "restore path longer one folder", - version: version.Backup, - input: []pathPair{ - {storage: "folder-id/file.txt.data", restore: "corso_restore/folder"}, - }, - output: []pathPair{ - {storage: "folder-id/.dirmeta", restore: "corso_restore/folder"}, - {storage: "folder-id/file.txt.data", restore: "corso_restore/folder"}, - }, - errCheck: assert.NoError, - }, - { - name: "restore path shorter one folder", - version: version.Backup, - input: []pathPair{ - {storage: "folder-id/file.txt.data", restore: ""}, - }, - errCheck: assert.Error, - }, - } - - for _, test := range table { - suite.Run(test.name, func() { - t := suite.T() - - _, flush := tester.NewContext(t) - defer flush() - - base := "id/onedrive/user/files/drives/driveID/root:/" - - inPaths := []path.RestorePaths{} - for _, ps := range test.input { - p, err := path.FromDataLayerPath(base+ps.storage, true) - require.NoError(t, err, "creating path", clues.ToCore(err)) - - r, err := path.FromDataLayerPath(base+ps.restore, false) - require.NoError(t, err, "creating path", clues.ToCore(err)) - - inPaths = append( - inPaths, - path.RestorePaths{StoragePath: p, RestorePath: r}) - } - - outPaths := []path.RestorePaths{} - for _, ps := range test.output { - p, err := path.FromDataLayerPath(base+ps.storage, true) - require.NoError(t, err, "creating path", clues.ToCore(err)) - - r, err := path.FromDataLayerPath(base+ps.restore, false) - require.NoError(t, err, "creating path", clues.ToCore(err)) - - outPaths = append( - outPaths, - path.RestorePaths{StoragePath: p, RestorePath: r}) - } - - actual, err := AugmentRestorePaths(test.version, inPaths) - test.errCheck(t, err, "augmenting paths", clues.ToCore(err)) - - if err != nil { - return - } - - // Ordering of paths matter here as we need dirmeta files - // to show up before file in dir - assert.Equal(t, outPaths, actual, "augmented paths") - }) - } -} - func (suite *RestoreUnitSuite) TestRestoreItem_collisionHandling() { const mndiID = "mndi-id" diff --git a/src/internal/m365/onedrive/url_cache.go b/src/internal/m365/collection/drive/url_cache.go similarity index 99% rename from src/internal/m365/onedrive/url_cache.go rename to src/internal/m365/collection/drive/url_cache.go index ebd67d8b1..6c06866c6 100644 --- a/src/internal/m365/onedrive/url_cache.go +++ b/src/internal/m365/collection/drive/url_cache.go @@ -1,4 +1,4 @@ -package onedrive +package drive import ( "context" diff --git a/src/internal/m365/onedrive/url_cache_test.go b/src/internal/m365/collection/drive/url_cache_test.go similarity index 99% rename from src/internal/m365/onedrive/url_cache_test.go rename to src/internal/m365/collection/drive/url_cache_test.go index bf4f25350..f2fd257b8 100644 --- a/src/internal/m365/onedrive/url_cache_test.go +++ b/src/internal/m365/collection/drive/url_cache_test.go @@ -1,4 +1,4 @@ -package onedrive +package drive import ( "context" diff --git a/src/internal/m365/collection/site/backup.go b/src/internal/m365/collection/site/backup.go new file mode 100644 index 000000000..14f1333be --- /dev/null +++ b/src/internal/m365/collection/site/backup.go @@ -0,0 +1,168 @@ +package site + +import ( + "context" + + "github.com/alcionai/clues" + + "github.com/alcionai/corso/src/internal/common/prefixmatcher" + "github.com/alcionai/corso/src/internal/data" + "github.com/alcionai/corso/src/internal/m365/collection/drive" + "github.com/alcionai/corso/src/internal/m365/graph" + betaAPI "github.com/alcionai/corso/src/internal/m365/service/sharepoint/api" + "github.com/alcionai/corso/src/internal/m365/support" + "github.com/alcionai/corso/src/internal/operations/inject" + "github.com/alcionai/corso/src/pkg/account" + "github.com/alcionai/corso/src/pkg/fault" + "github.com/alcionai/corso/src/pkg/logger" + "github.com/alcionai/corso/src/pkg/path" + "github.com/alcionai/corso/src/pkg/selectors" + "github.com/alcionai/corso/src/pkg/services/m365/api" +) + +// CollectLibraries constructs a onedrive Collections struct and Get()s +// all the drives associated with the site. +func CollectLibraries( + ctx context.Context, + bpc inject.BackupProducerConfig, + ad api.Drives, + tenantID string, + ssmb *prefixmatcher.StringSetMatchBuilder, + scope selectors.SharePointScope, + su support.StatusUpdater, + errs *fault.Bus, +) ([]data.BackupCollection, bool, error) { + logger.Ctx(ctx).Debug("creating SharePoint Library collections") + + var ( + collections = []data.BackupCollection{} + colls = drive.NewCollections( + drive.NewLibraryBackupHandler(ad, scope), + tenantID, + bpc.ProtectedResource.ID(), + su, + bpc.Options) + ) + + odcs, canUsePreviousBackup, err := colls.Get(ctx, bpc.MetadataCollections, ssmb, errs) + if err != nil { + return nil, false, graph.Wrap(ctx, err, "getting library") + } + + return append(collections, odcs...), canUsePreviousBackup, nil +} + +// CollectPages constructs a sharepoint Collections struct and Get()s the associated +// M365 IDs for the associated Pages. +func CollectPages( + ctx context.Context, + bpc inject.BackupProducerConfig, + creds account.M365Config, + ac api.Client, + su support.StatusUpdater, + errs *fault.Bus, +) ([]data.BackupCollection, error) { + logger.Ctx(ctx).Debug("creating SharePoint Pages collections") + + var ( + el = errs.Local() + spcs = make([]data.BackupCollection, 0) + ) + + // make the betaClient + // Need to receive From DataCollection Call + adpt, err := graph.CreateAdapter( + creds.AzureTenantID, + creds.AzureClientID, + creds.AzureClientSecret) + if err != nil { + return nil, clues.Wrap(err, "creating azure client adapter") + } + + betaService := betaAPI.NewBetaService(adpt) + + tuples, err := betaAPI.FetchPages(ctx, betaService, bpc.ProtectedResource.ID()) + if err != nil { + return nil, err + } + + for _, tuple := range tuples { + if el.Failure() != nil { + break + } + + dir, err := path.Build( + creds.AzureTenantID, + bpc.ProtectedResource.ID(), + path.SharePointService, + path.PagesCategory, + false, + tuple.Name) + if err != nil { + el.AddRecoverable(ctx, clues.Wrap(err, "creating page collection path").WithClues(ctx)) + } + + collection := NewCollection( + dir, + ac, + Pages, + su, + bpc.Options) + collection.SetBetaService(betaService) + collection.AddJob(tuple.ID) + + spcs = append(spcs, collection) + } + + return spcs, el.Failure() +} + +func CollectLists( + ctx context.Context, + bpc inject.BackupProducerConfig, + ac api.Client, + tenantID string, + su support.StatusUpdater, + errs *fault.Bus, +) ([]data.BackupCollection, error) { + logger.Ctx(ctx).Debug("Creating SharePoint List Collections") + + var ( + el = errs.Local() + spcs = make([]data.BackupCollection, 0) + ) + + lists, err := PreFetchLists(ctx, ac.Stable, bpc.ProtectedResource.ID()) + if err != nil { + return nil, err + } + + for _, tuple := range lists { + if el.Failure() != nil { + break + } + + dir, err := path.Build( + tenantID, + bpc.ProtectedResource.ID(), + path.SharePointService, + path.ListsCategory, + false, + tuple.Name) + if err != nil { + el.AddRecoverable(ctx, clues.Wrap(err, "creating list collection path").WithClues(ctx)) + } + + collection := NewCollection( + dir, + ac, + List, + su, + bpc.Options) + collection.AddJob(tuple.ID) + + spcs = append(spcs, collection) + } + + return spcs, el.Failure() +} diff --git a/src/internal/m365/collection/site/backup_test.go b/src/internal/m365/collection/site/backup_test.go new file mode 100644 index 000000000..de0d91c50 --- /dev/null +++ b/src/internal/m365/collection/site/backup_test.go @@ -0,0 +1,73 @@ +package site + +import ( + "testing" + + "github.com/alcionai/clues" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + + "github.com/alcionai/corso/src/internal/common/idname/mock" + "github.com/alcionai/corso/src/internal/m365/graph" + "github.com/alcionai/corso/src/internal/operations/inject" + "github.com/alcionai/corso/src/internal/tester" + "github.com/alcionai/corso/src/internal/tester/tconfig" + "github.com/alcionai/corso/src/internal/version" + "github.com/alcionai/corso/src/pkg/control" + "github.com/alcionai/corso/src/pkg/fault" + "github.com/alcionai/corso/src/pkg/services/m365/api" +) + +type SharePointPagesSuite struct { + tester.Suite +} + +func TestSharePointPagesSuite(t *testing.T) { + suite.Run(t, &SharePointPagesSuite{ + Suite: tester.NewIntegrationSuite( + t, + [][]string{tconfig.M365AcctCredEnvs}), + }) +} + +func (suite *SharePointPagesSuite) SetupSuite() { + ctx, flush := tester.NewContext(suite.T()) + defer flush() + + graph.InitializeConcurrencyLimiter(ctx, false, 4) +} + +func (suite *SharePointPagesSuite) TestCollectPages() { + t := suite.T() + + ctx, flush := tester.NewContext(t) + defer flush() + + var ( + siteID = tconfig.M365SiteID(t) + a = tconfig.NewM365Account(t) + ) + + creds, err := a.M365Config() + require.NoError(t, err, clues.ToCore(err)) + + ac, err := api.NewClient(creds, control.DefaultOptions()) + require.NoError(t, err, clues.ToCore(err)) + + bpc := inject.BackupProducerConfig{ + LastBackupVersion: version.NoBackup, + Options: control.DefaultOptions(), + ProtectedResource: mock.NewProvider(siteID, siteID), + } + + col, err := CollectPages( + ctx, + bpc, + creds, + ac, + (&MockGraphService{}).UpdateStatus, + fault.New(true)) + assert.NoError(t, err, clues.ToCore(err)) + assert.NotEmpty(t, col) +} diff --git a/src/internal/m365/sharepoint/collection.go b/src/internal/m365/collection/site/collection.go similarity index 97% rename from src/internal/m365/sharepoint/collection.go rename to src/internal/m365/collection/site/collection.go index 23a7cb447..6d115ca3b 100644 --- a/src/internal/m365/sharepoint/collection.go +++ b/src/internal/m365/collection/site/collection.go @@ -1,4 +1,4 @@ -package sharepoint +package site import ( "bytes" @@ -13,7 +13,7 @@ import ( "github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/m365/graph" - betaAPI "github.com/alcionai/corso/src/internal/m365/sharepoint/api" + betaAPI "github.com/alcionai/corso/src/internal/m365/service/sharepoint/api" "github.com/alcionai/corso/src/internal/m365/support" "github.com/alcionai/corso/src/internal/observe" "github.com/alcionai/corso/src/pkg/backup/details" @@ -81,6 +81,10 @@ func NewCollection( return c } +func (sc *Collection) SetBetaService(betaService *betaAPI.BetaService) { + sc.betaService = betaService +} + // AddJob appends additional objectID to job field func (sc *Collection) AddJob(objID string) { sc.jobs = append(sc.jobs, objID) @@ -254,7 +258,7 @@ func (sc *Collection) retrieveLists( sc.data <- &Item{ id: ptr.Val(lst.GetId()), data: io.NopCloser(bytes.NewReader(byteArray)), - info: listToSPInfo(lst, size), + info: ListToSPInfo(lst, size), modTime: t, } diff --git a/src/internal/m365/sharepoint/collection_test.go b/src/internal/m365/collection/site/collection_test.go similarity index 95% rename from src/internal/m365/sharepoint/collection_test.go rename to src/internal/m365/collection/site/collection_test.go index 0462a5c8e..9c7d1ab88 100644 --- a/src/internal/m365/sharepoint/collection_test.go +++ b/src/internal/m365/collection/site/collection_test.go @@ -1,4 +1,4 @@ -package sharepoint +package site import ( "bytes" @@ -14,8 +14,8 @@ import ( "github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/internal/data" - betaAPI "github.com/alcionai/corso/src/internal/m365/sharepoint/api" - spMock "github.com/alcionai/corso/src/internal/m365/sharepoint/mock" + betaAPI "github.com/alcionai/corso/src/internal/m365/service/sharepoint/api" + spMock "github.com/alcionai/corso/src/internal/m365/service/sharepoint/mock" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/internal/tester/tconfig" "github.com/alcionai/corso/src/pkg/account" @@ -118,7 +118,7 @@ func (suite *SharePointCollectionSuite) TestCollection_Items() { data := &Item{ id: name, data: io.NopCloser(bytes.NewReader(byteArray)), - info: listToSPInfo(listing, int64(len(byteArray))), + info: ListToSPInfo(listing, int64(len(byteArray))), } return data @@ -207,7 +207,7 @@ func (suite *SharePointCollectionSuite) TestListCollection_Restore() { listData := &Item{ id: testName, data: io.NopCloser(bytes.NewReader(byteArray)), - info: listToSPInfo(listing, int64(len(byteArray))), + info: ListToSPInfo(listing, int64(len(byteArray))), } destName := testdata.DefaultRestoreConfig("").Location diff --git a/src/internal/m365/sharepoint/datacategory_string.go b/src/internal/m365/collection/site/datacategory_string.go similarity index 97% rename from src/internal/m365/sharepoint/datacategory_string.go rename to src/internal/m365/collection/site/datacategory_string.go index b3281ff7f..eac0006cc 100644 --- a/src/internal/m365/sharepoint/datacategory_string.go +++ b/src/internal/m365/collection/site/datacategory_string.go @@ -1,6 +1,6 @@ // Code generated by "stringer -type=DataCategory"; DO NOT EDIT. -package sharepoint +package site import "strconv" diff --git a/src/internal/m365/sharepoint/helper_test.go b/src/internal/m365/collection/site/helper_test.go similarity index 97% rename from src/internal/m365/sharepoint/helper_test.go rename to src/internal/m365/collection/site/helper_test.go index 006a5648c..ca953a9b4 100644 --- a/src/internal/m365/sharepoint/helper_test.go +++ b/src/internal/m365/collection/site/helper_test.go @@ -1,4 +1,4 @@ -package sharepoint +package site import ( "testing" @@ -43,7 +43,7 @@ func (ms *MockGraphService) UpdateStatus(*support.ControllerOperationStatus) { } // --------------------------------------------------------------------------- -// Helper Functions +// Helper functions // --------------------------------------------------------------------------- func createTestService(t *testing.T, credentials account.M365Config) *graph.Service { diff --git a/src/internal/m365/sharepoint/lists.go b/src/internal/m365/collection/site/lists.go similarity index 96% rename from src/internal/m365/sharepoint/lists.go rename to src/internal/m365/collection/site/lists.go index 0555516af..e717f8d67 100644 --- a/src/internal/m365/sharepoint/lists.go +++ b/src/internal/m365/collection/site/lists.go @@ -1,4 +1,4 @@ -package sharepoint +package site import ( "context" @@ -14,9 +14,9 @@ import ( "github.com/alcionai/corso/src/pkg/fault" ) -// listToSPInfo translates models.Listable metadata into searchable content +// ListToSPInfo translates models.Listable metadata into searchable content // List Details: https://learn.microsoft.com/en-us/graph/api/resources/list?view=graph-rest-1.0 -func listToSPInfo(lst models.Listable, size int64) *details.SharePointInfo { +func ListToSPInfo(lst models.Listable, size int64) *details.SharePointInfo { var ( name = ptr.Val(lst.GetDisplayName()) webURL = ptr.Val(lst.GetWebUrl()) @@ -34,9 +34,9 @@ func listToSPInfo(lst models.Listable, size int64) *details.SharePointInfo { } } -type listTuple struct { - name string - id string +type ListTuple struct { + ID string + Name string } func preFetchListOptions() *sites.ItemListsRequestBuilderGetRequestConfiguration { @@ -51,15 +51,15 @@ func preFetchListOptions() *sites.ItemListsRequestBuilderGetRequestConfiguration return options } -func preFetchLists( +func PreFetchLists( ctx context.Context, gs graph.Servicer, siteID string, -) ([]listTuple, error) { +) ([]ListTuple, error) { var ( builder = gs.Client().Sites().BySiteId(siteID).Lists() options = preFetchListOptions() - listTuples = make([]listTuple, 0) + listTuples = make([]ListTuple, 0) ) for { @@ -72,11 +72,11 @@ func preFetchLists( var ( id = ptr.Val(entry.GetId()) name = ptr.Val(entry.GetDisplayName()) - temp = listTuple{id: id, name: name} + temp = ListTuple{ID: id, Name: name} ) if len(name) == 0 { - temp.name = id + temp.Name = id } listTuples = append(listTuples, temp) diff --git a/src/internal/m365/sharepoint/lists_test.go b/src/internal/m365/collection/site/lists_test.go similarity index 89% rename from src/internal/m365/sharepoint/lists_test.go rename to src/internal/m365/collection/site/lists_test.go index 61265003e..6942f0e83 100644 --- a/src/internal/m365/sharepoint/lists_test.go +++ b/src/internal/m365/collection/site/lists_test.go @@ -1,4 +1,4 @@ -package sharepoint +package site import ( "testing" @@ -9,6 +9,7 @@ import ( "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" + "github.com/alcionai/corso/src/internal/m365/graph" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/internal/tester/tconfig" "github.com/alcionai/corso/src/pkg/account" @@ -28,6 +29,11 @@ func (suite *ListsUnitSuite) SetupSuite() { require.NoError(t, err, clues.ToCore(err)) suite.creds = m365 + + ctx, flush := tester.NewContext(suite.T()) + defer flush() + + graph.InitializeConcurrencyLimiter(ctx, false, 4) } func TestListsUnitSuite(t *testing.T) { @@ -57,10 +63,10 @@ func (suite *ListsUnitSuite) TestLoadList() { defer flush() service := createTestService(t, suite.creds) - tuples, err := preFetchLists(ctx, service, "root") + tuples, err := PreFetchLists(ctx, service, "root") require.NoError(t, err, clues.ToCore(err)) - job := []string{tuples[0].id} + job := []string{tuples[0].ID} lists, err := loadSiteLists(ctx, service, "root", job, fault.New(true)) assert.NoError(t, err, clues.ToCore(err)) assert.Greater(t, len(lists), 0) @@ -98,7 +104,7 @@ func (suite *ListsUnitSuite) TestSharePointInfo() { t := suite.T() list, expected := test.listAndDeets() - info := listToSPInfo(list, 10) + info := ListToSPInfo(list, 10) assert.Equal(t, expected.ItemType, info.ItemType) assert.Equal(t, expected.ItemName, info.ItemName) assert.Equal(t, expected.WebURL, info.WebURL) diff --git a/src/internal/m365/sharepoint/pages.go b/src/internal/m365/collection/site/pages.go similarity index 98% rename from src/internal/m365/sharepoint/pages.go rename to src/internal/m365/collection/site/pages.go index c5e0bb633..23e4e0e9f 100644 --- a/src/internal/m365/sharepoint/pages.go +++ b/src/internal/m365/collection/site/pages.go @@ -1,4 +1,4 @@ -package sharepoint +package site import ( "time" diff --git a/src/internal/m365/sharepoint/pages_test.go b/src/internal/m365/collection/site/pages_test.go similarity index 98% rename from src/internal/m365/sharepoint/pages_test.go rename to src/internal/m365/collection/site/pages_test.go index d89b0d921..a1c044aaf 100644 --- a/src/internal/m365/sharepoint/pages_test.go +++ b/src/internal/m365/collection/site/pages_test.go @@ -1,4 +1,4 @@ -package sharepoint +package site import ( "testing" diff --git a/src/internal/m365/sharepoint/restore.go b/src/internal/m365/collection/site/restore.go similarity index 95% rename from src/internal/m365/sharepoint/restore.go rename to src/internal/m365/collection/site/restore.go index bb894f5ea..875ac5115 100644 --- a/src/internal/m365/sharepoint/restore.go +++ b/src/internal/m365/collection/site/restore.go @@ -1,4 +1,4 @@ -package sharepoint +package site import ( "context" @@ -15,9 +15,9 @@ import ( "github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/diagnostics" + "github.com/alcionai/corso/src/internal/m365/collection/drive" "github.com/alcionai/corso/src/internal/m365/graph" - "github.com/alcionai/corso/src/internal/m365/onedrive" - betaAPI "github.com/alcionai/corso/src/internal/m365/sharepoint/api" + betaAPI "github.com/alcionai/corso/src/internal/m365/service/sharepoint/api" "github.com/alcionai/corso/src/internal/m365/support" "github.com/alcionai/corso/src/internal/operations/inject" "github.com/alcionai/corso/src/pkg/backup/details" @@ -41,9 +41,9 @@ func ConsumeRestoreCollections( ctr *count.Bus, ) (*support.ControllerOperationStatus, error) { var ( - lrh = libraryRestoreHandler{ac} + lrh = drive.NewLibraryRestoreHandler(ac) restoreMetrics support.CollectionMetrics - caches = onedrive.NewRestoreCaches(backupDriveIDNames) + caches = drive.NewRestoreCaches(backupDriveIDNames) el = errs.Local() ) @@ -75,7 +75,7 @@ func ConsumeRestoreCollections( switch dc.FullPath().Category() { case path.LibrariesCategory: - metrics, err = onedrive.RestoreCollection( + metrics, err = drive.RestoreCollection( ictx, lrh, rcc, @@ -200,7 +200,7 @@ func restoreListItem( } } - dii.SharePoint = listToSPInfo(restoredList, int64(len(byteArray))) + dii.SharePoint = ListToSPInfo(restoredList, int64(len(byteArray))) return dii, nil } diff --git a/src/internal/m365/controller_test.go b/src/internal/m365/controller_test.go index 7076455a9..ec2c8c72c 100644 --- a/src/internal/m365/controller_test.go +++ b/src/internal/m365/controller_test.go @@ -17,10 +17,10 @@ import ( inMock "github.com/alcionai/corso/src/internal/common/idname/mock" "github.com/alcionai/corso/src/internal/data" dataMock "github.com/alcionai/corso/src/internal/data/mock" - exchMock "github.com/alcionai/corso/src/internal/m365/exchange/mock" "github.com/alcionai/corso/src/internal/m365/graph" "github.com/alcionai/corso/src/internal/m365/mock" "github.com/alcionai/corso/src/internal/m365/resource" + exchMock "github.com/alcionai/corso/src/internal/m365/service/exchange/mock" "github.com/alcionai/corso/src/internal/m365/stub" "github.com/alcionai/corso/src/internal/m365/support" "github.com/alcionai/corso/src/internal/operations/inject" diff --git a/src/internal/m365/export.go b/src/internal/m365/export.go index 4da037e26..0003353fb 100644 --- a/src/internal/m365/export.go +++ b/src/internal/m365/export.go @@ -8,7 +8,7 @@ import ( "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/diagnostics" "github.com/alcionai/corso/src/internal/m365/graph" - "github.com/alcionai/corso/src/internal/m365/onedrive" + "github.com/alcionai/corso/src/internal/m365/service/onedrive" "github.com/alcionai/corso/src/internal/m365/support" "github.com/alcionai/corso/src/pkg/backup/details" "github.com/alcionai/corso/src/pkg/control" diff --git a/src/internal/m365/graph/http_wrapper.go b/src/internal/m365/graph/http_wrapper.go index 23e5566e1..b810c7aa3 100644 --- a/src/internal/m365/graph/http_wrapper.go +++ b/src/internal/m365/graph/http_wrapper.go @@ -4,11 +4,12 @@ import ( "context" "io" "net/http" - "regexp" "time" "github.com/alcionai/clues" khttp "github.com/microsoft/kiota-http-go" + "github.com/pkg/errors" + "golang.org/x/net/http2" "github.com/alcionai/corso/src/internal/events" "github.com/alcionai/corso/src/internal/version" @@ -56,7 +57,7 @@ func NewHTTPWrapper(opts ...Option) *httpWrapper { cc.apply(hc) - return &httpWrapper{hc} + return &httpWrapper{hc, cc} } // NewNoTimeoutHTTPWrapper constructs a http wrapper with no context timeout. @@ -74,8 +75,6 @@ func NewNoTimeoutHTTPWrapper(opts ...Option) *httpWrapper { // requests // --------------------------------------------------------------------------- -var streamErrRE = regexp.MustCompile(`stream error: stream ID \d+; .+; received from peer`) - // Request does the provided request. func (hw httpWrapper) Request( ctx context.Context, @@ -105,18 +104,20 @@ func (hw httpWrapper) Request( // retry wrapper is unsophisticated, but should only // retry in the event of a `stream error`, which is not // a common expectation. - for i := 0; i < 3; i++ { + for i := 0; i < hw.config.maxConnectionRetries+1; i++ { ictx := clues.Add(ctx, "request_retry_iter", i) resp, err = hw.client.Do(req) - if err != nil && !streamErrRE.MatchString(err.Error()) { - return nil, Stack(ictx, err) - } if err == nil { break } + var http2StreamErr http2.StreamError + if !errors.As(err, &http2StreamErr) { + return nil, Stack(ictx, err) + } + logger.Ctx(ictx).Debug("http2 stream error") events.Inc(events.APICall, "streamerror") @@ -137,6 +138,7 @@ func (hw httpWrapper) Request( type ( httpWrapper struct { client *http.Client + config *clientConfig } customTransport struct { diff --git a/src/internal/m365/graph/http_wrapper_test.go b/src/internal/m365/graph/http_wrapper_test.go index 31e5faf52..fb01ef5b4 100644 --- a/src/internal/m365/graph/http_wrapper_test.go +++ b/src/internal/m365/graph/http_wrapper_test.go @@ -7,8 +7,10 @@ import ( "github.com/alcionai/clues" khttp "github.com/microsoft/kiota-http-go" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" + "golang.org/x/net/http2" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/internal/tester/tconfig" @@ -116,3 +118,70 @@ func (suite *HTTPWrapperUnitSuite) TestNewHTTPWrapper_redirectMiddleware() { // require.Equal(t, 1, calledCorrectly, "test server was called with expected path") require.Equal(t, http.StatusOK, resp.StatusCode) } + +func (suite *HTTPWrapperUnitSuite) TestNewHTTPWrapper_http2StreamErrorRetries() { + var ( + url = "https://graph.microsoft.com/fnords/beaux/regard" + streamErr = http2.StreamError{ + StreamID: 1, + Code: http2.ErrCodeEnhanceYourCalm, + Cause: assert.AnError, + } + ) + + table := []struct { + name string + retries int + expectRetries int + }{ + { + name: "zero retries", + retries: 0, + expectRetries: 0, + }, + { + name: "negative max", + retries: -1, + expectRetries: 0, + }, + { + name: "upper limit", + retries: 9001, + expectRetries: 5, + }, + { + name: "four", + retries: 4, + expectRetries: 4, + }, + } + for _, test := range table { + suite.Run(test.name, func() { + t := suite.T() + + ctx, flush := tester.NewContext(t) + defer flush() + + // -1 to account for the first try, + // which isn't a retry. + tries := -1 + + mwResp := mwForceResp{ + err: streamErr, + alternate: func(*http.Request) (bool, *http.Response, error) { + tries++ + return false, nil, nil + }, + } + + hw := NewHTTPWrapper( + appendMiddleware(&mwResp), + MaxConnectionRetries(test.retries)) + + _, err := hw.Request(ctx, http.MethodGet, url, nil, nil) + require.ErrorAs(t, err, &http2.StreamError{}, clues.ToCore(err)) + + require.Equal(t, test.expectRetries, tries, "count of retries") + }) + } +} diff --git a/src/internal/m365/graph/metadata/metadata.go b/src/internal/m365/graph/metadata/metadata.go index 9b61a3fc0..d213cd481 100644 --- a/src/internal/m365/graph/metadata/metadata.go +++ b/src/internal/m365/graph/metadata/metadata.go @@ -1,7 +1,7 @@ package metadata import ( - "github.com/alcionai/corso/src/internal/m365/onedrive/metadata" + "github.com/alcionai/corso/src/internal/m365/collection/drive/metadata" "github.com/alcionai/corso/src/pkg/path" ) diff --git a/src/internal/m365/graph/metadata/metadata_test.go b/src/internal/m365/graph/metadata/metadata_test.go index f7c1b81fe..15b190a19 100644 --- a/src/internal/m365/graph/metadata/metadata_test.go +++ b/src/internal/m365/graph/metadata/metadata_test.go @@ -9,8 +9,8 @@ import ( "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" + odmetadata "github.com/alcionai/corso/src/internal/m365/collection/drive/metadata" "github.com/alcionai/corso/src/internal/m365/graph/metadata" - odmetadata "github.com/alcionai/corso/src/internal/m365/onedrive/metadata" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/path" ) diff --git a/src/internal/m365/graph/mock/service.go b/src/internal/m365/graph/mock/service.go index 813488626..64f6c3d6f 100644 --- a/src/internal/m365/graph/mock/service.go +++ b/src/internal/m365/graph/mock/service.go @@ -33,7 +33,7 @@ func CreateAdapter( return nil, err } - httpClient := graph.KiotaHTTPClient(opts...) + httpClient, _ := graph.KiotaHTTPClient(opts...) // This makes sure that we are able to intercept any requests via // gock. Only necessary for testing. diff --git a/src/internal/m365/graph/service.go b/src/internal/m365/graph/service.go index 2f5ae72ee..212314e49 100644 --- a/src/internal/m365/graph/service.go +++ b/src/internal/m365/graph/service.go @@ -120,7 +120,7 @@ func CreateAdapter( return nil, err } - httpClient := KiotaHTTPClient(opts...) + httpClient, cc := KiotaHTTPClient(opts...) adpt, err := msgraphsdkgo.NewGraphRequestAdapterWithParseNodeFactoryAndSerializationWriterFactoryAndHttpClient( auth, @@ -130,7 +130,7 @@ func CreateAdapter( return nil, clues.Stack(err) } - return wrapAdapter(adpt), nil + return wrapAdapter(adpt, cc), nil } func GetAuth(tenant string, client string, secret string) (*kauth.AzureIdentityAuthenticationProvider, error) { @@ -158,7 +158,7 @@ func GetAuth(tenant string, client string, secret string) (*kauth.AzureIdentityA // and consume relatively unbound socket connections. It is important // to centralize this client to be passed downstream where api calls // can utilize it on a per-download basis. -func KiotaHTTPClient(opts ...Option) *http.Client { +func KiotaHTTPClient(opts ...Option) (*http.Client, *clientConfig) { var ( clientOptions = msgraphsdkgo.GetDefaultClientOptions() cc = populateConfig(opts...) @@ -170,7 +170,7 @@ func KiotaHTTPClient(opts ...Option) *http.Client { cc.apply(httpClient) - return httpClient + return httpClient, cc } // --------------------------------------------------------------------------- @@ -179,11 +179,14 @@ func KiotaHTTPClient(opts ...Option) *http.Client { type clientConfig struct { noTimeout bool - // MaxRetries before failure + // MaxConnectionRetries is the number of connection-level retries that + // attempt to re-run the request due to a broken or closed connection. + maxConnectionRetries int + // MaxRetries is the number of middleware retires attempted + // before returning with failure maxRetries int // The minimum delay in seconds between retries - minDelay time.Duration - overrideRetryCount bool + minDelay time.Duration appendMiddleware []khttp.Middleware } @@ -193,8 +196,9 @@ type Option func(*clientConfig) // populate constructs a clientConfig according to the provided options. func populateConfig(opts ...Option) *clientConfig { cc := clientConfig{ - maxRetries: defaultMaxRetries, - minDelay: defaultDelay, + maxConnectionRetries: defaultMaxRetries, + maxRetries: defaultMaxRetries, + minDelay: defaultDelay, } for _, opt := range opts { @@ -227,14 +231,25 @@ func NoTimeout() Option { func MaxRetries(max int) Option { return func(c *clientConfig) { - c.overrideRetryCount = true + if max < 0 { + max = 0 + } else if max > 5 { + max = 5 + } + c.maxRetries = max } } -func MinimumBackoff(dur time.Duration) Option { +func MinimumBackoff(min time.Duration) Option { return func(c *clientConfig) { - c.minDelay = dur + if min < 100*time.Millisecond { + min = 100 * time.Millisecond + } else if min > 5*time.Second { + min = 5 * time.Second + } + + c.minDelay = min } } @@ -246,6 +261,18 @@ func appendMiddleware(mw ...khttp.Middleware) Option { } } +func MaxConnectionRetries(max int) Option { + return func(c *clientConfig) { + if max < 0 { + max = 0 + } else if max > 5 { + max = 5 + } + + c.maxConnectionRetries = max + } +} + // --------------------------------------------------------------------------- // Middleware Control // --------------------------------------------------------------------------- @@ -302,10 +329,11 @@ var _ abstractions.RequestAdapter = &adapterWrap{} // 3. Error and debug conditions are logged. type adapterWrap struct { abstractions.RequestAdapter + config *clientConfig } -func wrapAdapter(gra *msgraphsdkgo.GraphRequestAdapter) *adapterWrap { - return &adapterWrap{gra} +func wrapAdapter(gra *msgraphsdkgo.GraphRequestAdapter, cc *clientConfig) *adapterWrap { + return &adapterWrap{gra, cc} } var connectionEnded = filters.Contains([]string{ @@ -331,7 +359,7 @@ func (aw *adapterWrap) Send( // retry wrapper is unsophisticated, but should only // retry in the event of a `stream error`, which is not // a common expectation. - for i := 0; i < 3; i++ { + for i := 0; i < aw.config.maxConnectionRetries+1; i++ { ictx := clues.Add(ctx, "request_retry_iter", i) sp, err = aw.RequestAdapter.Send(ctx, requestInfo, constructor, errorMappings) diff --git a/src/internal/m365/graph/service_test.go b/src/internal/m365/graph/service_test.go index 08d8c6169..fb8e31b7b 100644 --- a/src/internal/m365/graph/service_test.go +++ b/src/internal/m365/graph/service_test.go @@ -67,9 +67,10 @@ func (suite *GraphIntgSuite) TestCreateAdapter() { func (suite *GraphIntgSuite) TestHTTPClient() { table := []struct { - name string - opts []Option - check func(*testing.T, *http.Client) + name string + opts []Option + check func(*testing.T, *http.Client) + checkConfig func(*testing.T, *clientConfig) }{ { name: "no options", @@ -77,23 +78,75 @@ func (suite *GraphIntgSuite) TestHTTPClient() { check: func(t *testing.T, c *http.Client) { assert.Equal(t, defaultHTTPClientTimeout, c.Timeout, "default timeout") }, + checkConfig: func(t *testing.T, c *clientConfig) { + assert.Equal(t, defaultDelay, c.minDelay, "default delay") + assert.Equal(t, defaultMaxRetries, c.maxRetries, "max retries") + assert.Equal(t, defaultMaxRetries, c.maxConnectionRetries, "max connection retries") + }, }, { - name: "no timeout", - opts: []Option{NoTimeout()}, + name: "configured options", + opts: []Option{ + NoTimeout(), + MaxRetries(4), + MaxConnectionRetries(2), + MinimumBackoff(999 * time.Millisecond), + }, check: func(t *testing.T, c *http.Client) { // FIXME: Change to 0 one upstream issue is fixed assert.Equal(t, time.Duration(48*time.Hour), c.Timeout, "unlimited timeout") }, + checkConfig: func(t *testing.T, c *clientConfig) { + assert.Equal(t, 999*time.Millisecond, c.minDelay, "minimum delay") + assert.Equal(t, 4, c.maxRetries, "max retries") + assert.Equal(t, 2, c.maxConnectionRetries, "max connection retries") + }, + }, + { + name: "below minimums", + opts: []Option{ + NoTimeout(), + MaxRetries(-1), + MaxConnectionRetries(-1), + MinimumBackoff(0), + }, + check: func(t *testing.T, c *http.Client) { + // FIXME: Change to 0 one upstream issue is fixed + assert.Equal(t, time.Duration(48*time.Hour), c.Timeout, "unlimited timeout") + }, + checkConfig: func(t *testing.T, c *clientConfig) { + assert.Equal(t, 100*time.Millisecond, c.minDelay, "minimum delay") + assert.Equal(t, 0, c.maxRetries, "max retries") + assert.Equal(t, 0, c.maxConnectionRetries, "max connection retries") + }, + }, + { + name: "above maximums", + opts: []Option{ + NoTimeout(), + MaxRetries(9001), + MaxConnectionRetries(9001), + MinimumBackoff(999 * time.Second), + }, + check: func(t *testing.T, c *http.Client) { + // FIXME: Change to 0 one upstream issue is fixed + assert.Equal(t, time.Duration(48*time.Hour), c.Timeout, "unlimited timeout") + }, + checkConfig: func(t *testing.T, c *clientConfig) { + assert.Equal(t, 5*time.Second, c.minDelay, "minimum delay") + assert.Equal(t, 5, c.maxRetries, "max retries") + assert.Equal(t, 5, c.maxConnectionRetries, "max connection retries") + }, }, } for _, test := range table { suite.Run(test.name, func() { t := suite.T() - cli := KiotaHTTPClient(test.opts...) + cli, cc := KiotaHTTPClient(test.opts...) assert.NotNil(t, cli) test.check(t, cli) + test.checkConfig(t, cc) }) } } @@ -178,12 +231,12 @@ func (suite *GraphIntgSuite) TestAdapterWrap_retriesConnectionClose() { // the query doesn't matter _, err = users.NewItemCalendarsItemEventsDeltaRequestBuilder(url, adpt).Get(ctx, nil) require.ErrorIs(t, err, syscall.ECONNRESET, clues.ToCore(err)) - require.Equal(t, 12, count, "number of retries") + require.Equal(t, 16, count, "number of retries") count = 0 // the query doesn't matter _, err = NewService(adpt).Client().Users().Get(ctx, nil) require.ErrorIs(t, err, syscall.ECONNRESET, clues.ToCore(err)) - require.Equal(t, 12, count, "number of retries") + require.Equal(t, 16, count, "number of retries") } diff --git a/src/internal/m365/helper_test.go b/src/internal/m365/helper_test.go index 78e9cb365..f4c80a479 100644 --- a/src/internal/m365/helper_test.go +++ b/src/internal/m365/helper_test.go @@ -17,10 +17,10 @@ import ( "github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/internal/data" - "github.com/alcionai/corso/src/internal/m365/onedrive" - "github.com/alcionai/corso/src/internal/m365/onedrive/metadata" - odStub "github.com/alcionai/corso/src/internal/m365/onedrive/stub" + "github.com/alcionai/corso/src/internal/m365/collection/drive" + "github.com/alcionai/corso/src/internal/m365/collection/drive/metadata" "github.com/alcionai/corso/src/internal/m365/resource" + odStub "github.com/alcionai/corso/src/internal/m365/service/onedrive/stub" m365Stub "github.com/alcionai/corso/src/internal/m365/stub" "github.com/alcionai/corso/src/internal/tester/tconfig" "github.com/alcionai/corso/src/pkg/control" @@ -737,7 +737,7 @@ func compareDriveItem( ) if !isMeta { - oitem := item.(*onedrive.Item) + oitem := item.(*drive.Item) info := oitem.Info() if info.OneDrive != nil { diff --git a/src/internal/m365/mock/collection.go b/src/internal/m365/mock/collection.go index e1d61db58..3df910943 100644 --- a/src/internal/m365/mock/collection.go +++ b/src/internal/m365/mock/collection.go @@ -4,6 +4,8 @@ import ( "context" "github.com/alcionai/corso/src/internal/data" + "github.com/alcionai/corso/src/pkg/fault" + "github.com/alcionai/corso/src/pkg/path" ) type RestoreCollection struct { @@ -22,3 +24,44 @@ func (rc RestoreCollection) FetchItemByName( return res, nil } + +type BackupCollection struct { + Path path.Path + Loc *path.Builder + Streams []data.Stream + CState data.CollectionState +} + +func (c *BackupCollection) Items(context.Context, *fault.Bus) <-chan data.Stream { + res := make(chan data.Stream) + + go func() { + defer close(res) + + for _, s := range c.Streams { + res <- s + } + }() + + return res +} + +func (c BackupCollection) FullPath() path.Path { + return c.Path +} + +func (c BackupCollection) PreviousPath() path.Path { + return c.Path +} + +func (c BackupCollection) LocationPath() *path.Builder { + return c.Loc +} + +func (c BackupCollection) State() data.CollectionState { + return c.CState +} + +func (c BackupCollection) DoNotMergeItems() bool { + return false +} diff --git a/src/internal/m365/onedrive_test.go b/src/internal/m365/onedrive_test.go index ba81a477a..53b45be52 100644 --- a/src/internal/m365/onedrive_test.go +++ b/src/internal/m365/onedrive_test.go @@ -14,11 +14,11 @@ import ( "github.com/alcionai/corso/src/internal/common/dttm" "github.com/alcionai/corso/src/internal/common/ptr" + "github.com/alcionai/corso/src/internal/m365/collection/drive/metadata" "github.com/alcionai/corso/src/internal/m365/graph" - odConsts "github.com/alcionai/corso/src/internal/m365/onedrive/consts" - "github.com/alcionai/corso/src/internal/m365/onedrive/metadata" - "github.com/alcionai/corso/src/internal/m365/onedrive/stub" "github.com/alcionai/corso/src/internal/m365/resource" + odConsts "github.com/alcionai/corso/src/internal/m365/service/onedrive/consts" + "github.com/alcionai/corso/src/internal/m365/service/onedrive/stub" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/internal/tester/tconfig" "github.com/alcionai/corso/src/internal/version" diff --git a/src/internal/m365/restore.go b/src/internal/m365/restore.go index de9e0bb13..3455e650f 100644 --- a/src/internal/m365/restore.go +++ b/src/internal/m365/restore.go @@ -7,10 +7,11 @@ import ( "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/diagnostics" - "github.com/alcionai/corso/src/internal/m365/exchange" + "github.com/alcionai/corso/src/internal/m365/collection/drive" "github.com/alcionai/corso/src/internal/m365/graph" - "github.com/alcionai/corso/src/internal/m365/onedrive" - "github.com/alcionai/corso/src/internal/m365/sharepoint" + "github.com/alcionai/corso/src/internal/m365/service/exchange" + "github.com/alcionai/corso/src/internal/m365/service/onedrive" + "github.com/alcionai/corso/src/internal/m365/service/sharepoint" "github.com/alcionai/corso/src/internal/m365/support" "github.com/alcionai/corso/src/internal/operations/inject" "github.com/alcionai/corso/src/pkg/backup/details" @@ -71,7 +72,7 @@ func (ctrl *Controller) ConsumeRestoreCollections( case path.OneDriveService: status, err = onedrive.ConsumeRestoreCollections( ctx, - onedrive.NewRestoreHandler(ctrl.AC), + drive.NewRestoreHandler(ctrl.AC), rcc, ctrl.backupDriveIDNames, dcs, diff --git a/src/internal/m365/exchange/attachment.go b/src/internal/m365/service/exchange/attachment.go similarity index 100% rename from src/internal/m365/exchange/attachment.go rename to src/internal/m365/service/exchange/attachment.go diff --git a/src/internal/m365/exchange/attendees.go b/src/internal/m365/service/exchange/attendees.go similarity index 100% rename from src/internal/m365/exchange/attendees.go rename to src/internal/m365/service/exchange/attendees.go diff --git a/src/internal/m365/exchange/backup.go b/src/internal/m365/service/exchange/backup.go similarity index 100% rename from src/internal/m365/exchange/backup.go rename to src/internal/m365/service/exchange/backup.go diff --git a/src/internal/m365/exchange/backup_test.go b/src/internal/m365/service/exchange/backup_test.go similarity index 100% rename from src/internal/m365/exchange/backup_test.go rename to src/internal/m365/service/exchange/backup_test.go diff --git a/src/internal/m365/exchange/cache_container.go b/src/internal/m365/service/exchange/cache_container.go similarity index 100% rename from src/internal/m365/exchange/cache_container.go rename to src/internal/m365/service/exchange/cache_container.go diff --git a/src/internal/m365/exchange/collection.go b/src/internal/m365/service/exchange/collection.go similarity index 100% rename from src/internal/m365/exchange/collection.go rename to src/internal/m365/service/exchange/collection.go diff --git a/src/internal/m365/exchange/collection_test.go b/src/internal/m365/service/exchange/collection_test.go similarity index 100% rename from src/internal/m365/exchange/collection_test.go rename to src/internal/m365/service/exchange/collection_test.go diff --git a/src/internal/m365/exchange/consts.go b/src/internal/m365/service/exchange/consts.go similarity index 100% rename from src/internal/m365/exchange/consts.go rename to src/internal/m365/service/exchange/consts.go diff --git a/src/internal/m365/exchange/contacts_backup.go b/src/internal/m365/service/exchange/contacts_backup.go similarity index 100% rename from src/internal/m365/exchange/contacts_backup.go rename to src/internal/m365/service/exchange/contacts_backup.go diff --git a/src/internal/m365/exchange/contacts_container_cache.go b/src/internal/m365/service/exchange/contacts_container_cache.go similarity index 100% rename from src/internal/m365/exchange/contacts_container_cache.go rename to src/internal/m365/service/exchange/contacts_container_cache.go diff --git a/src/internal/m365/exchange/contacts_restore.go b/src/internal/m365/service/exchange/contacts_restore.go similarity index 100% rename from src/internal/m365/exchange/contacts_restore.go rename to src/internal/m365/service/exchange/contacts_restore.go diff --git a/src/internal/m365/exchange/contacts_restore_test.go b/src/internal/m365/service/exchange/contacts_restore_test.go similarity index 98% rename from src/internal/m365/exchange/contacts_restore_test.go rename to src/internal/m365/service/exchange/contacts_restore_test.go index d55c1d261..f2030ea16 100644 --- a/src/internal/m365/exchange/contacts_restore_test.go +++ b/src/internal/m365/service/exchange/contacts_restore_test.go @@ -10,8 +10,8 @@ import ( "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" - "github.com/alcionai/corso/src/internal/m365/exchange/mock" "github.com/alcionai/corso/src/internal/m365/graph" + "github.com/alcionai/corso/src/internal/m365/service/exchange/mock" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/internal/tester/tconfig" "github.com/alcionai/corso/src/pkg/control" diff --git a/src/internal/m365/exchange/container_resolver.go b/src/internal/m365/service/exchange/container_resolver.go similarity index 100% rename from src/internal/m365/exchange/container_resolver.go rename to src/internal/m365/service/exchange/container_resolver.go diff --git a/src/internal/m365/exchange/container_resolver_test.go b/src/internal/m365/service/exchange/container_resolver_test.go similarity index 100% rename from src/internal/m365/exchange/container_resolver_test.go rename to src/internal/m365/service/exchange/container_resolver_test.go diff --git a/src/internal/m365/exchange/events_backup.go b/src/internal/m365/service/exchange/events_backup.go similarity index 100% rename from src/internal/m365/exchange/events_backup.go rename to src/internal/m365/service/exchange/events_backup.go diff --git a/src/internal/m365/exchange/events_container_cache.go b/src/internal/m365/service/exchange/events_container_cache.go similarity index 100% rename from src/internal/m365/exchange/events_container_cache.go rename to src/internal/m365/service/exchange/events_container_cache.go diff --git a/src/internal/m365/exchange/events_instance_restore.go b/src/internal/m365/service/exchange/events_instance_restore.go similarity index 100% rename from src/internal/m365/exchange/events_instance_restore.go rename to src/internal/m365/service/exchange/events_instance_restore.go diff --git a/src/internal/m365/exchange/events_restore.go b/src/internal/m365/service/exchange/events_restore.go similarity index 100% rename from src/internal/m365/exchange/events_restore.go rename to src/internal/m365/service/exchange/events_restore.go diff --git a/src/internal/m365/exchange/events_restore_test.go b/src/internal/m365/service/exchange/events_restore_test.go similarity index 99% rename from src/internal/m365/exchange/events_restore_test.go rename to src/internal/m365/service/exchange/events_restore_test.go index b8db6f052..ed0fbc60c 100644 --- a/src/internal/m365/exchange/events_restore_test.go +++ b/src/internal/m365/service/exchange/events_restore_test.go @@ -11,8 +11,8 @@ import ( "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" - "github.com/alcionai/corso/src/internal/m365/exchange/mock" "github.com/alcionai/corso/src/internal/m365/graph" + "github.com/alcionai/corso/src/internal/m365/service/exchange/mock" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/internal/tester/tconfig" "github.com/alcionai/corso/src/pkg/control" diff --git a/src/internal/m365/exchange/handlers.go b/src/internal/m365/service/exchange/handlers.go similarity index 100% rename from src/internal/m365/exchange/handlers.go rename to src/internal/m365/service/exchange/handlers.go diff --git a/src/internal/m365/exchange/helper_test.go b/src/internal/m365/service/exchange/helper_test.go similarity index 100% rename from src/internal/m365/exchange/helper_test.go rename to src/internal/m365/service/exchange/helper_test.go diff --git a/src/internal/m365/exchange/mail_backup.go b/src/internal/m365/service/exchange/mail_backup.go similarity index 100% rename from src/internal/m365/exchange/mail_backup.go rename to src/internal/m365/service/exchange/mail_backup.go diff --git a/src/internal/m365/exchange/mail_container_cache.go b/src/internal/m365/service/exchange/mail_container_cache.go similarity index 100% rename from src/internal/m365/exchange/mail_container_cache.go rename to src/internal/m365/service/exchange/mail_container_cache.go diff --git a/src/internal/m365/exchange/mail_container_cache_test.go b/src/internal/m365/service/exchange/mail_container_cache_test.go similarity index 100% rename from src/internal/m365/exchange/mail_container_cache_test.go rename to src/internal/m365/service/exchange/mail_container_cache_test.go diff --git a/src/internal/m365/exchange/mail_restore.go b/src/internal/m365/service/exchange/mail_restore.go similarity index 100% rename from src/internal/m365/exchange/mail_restore.go rename to src/internal/m365/service/exchange/mail_restore.go diff --git a/src/internal/m365/exchange/mail_restore_test.go b/src/internal/m365/service/exchange/mail_restore_test.go similarity index 99% rename from src/internal/m365/exchange/mail_restore_test.go rename to src/internal/m365/service/exchange/mail_restore_test.go index 5b85321b6..58fdcd7cb 100644 --- a/src/internal/m365/exchange/mail_restore_test.go +++ b/src/internal/m365/service/exchange/mail_restore_test.go @@ -11,8 +11,8 @@ import ( "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" - "github.com/alcionai/corso/src/internal/m365/exchange/mock" "github.com/alcionai/corso/src/internal/m365/graph" + "github.com/alcionai/corso/src/internal/m365/service/exchange/mock" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/internal/tester/tconfig" "github.com/alcionai/corso/src/pkg/control" diff --git a/src/internal/m365/exchange/mock/collections.go b/src/internal/m365/service/exchange/mock/collections.go similarity index 100% rename from src/internal/m365/exchange/mock/collections.go rename to src/internal/m365/service/exchange/mock/collections.go diff --git a/src/internal/m365/exchange/mock/contact.go b/src/internal/m365/service/exchange/mock/contact.go similarity index 100% rename from src/internal/m365/exchange/mock/contact.go rename to src/internal/m365/service/exchange/mock/contact.go diff --git a/src/internal/m365/exchange/mock/event.go b/src/internal/m365/service/exchange/mock/event.go similarity index 100% rename from src/internal/m365/exchange/mock/event.go rename to src/internal/m365/service/exchange/mock/event.go diff --git a/src/internal/m365/exchange/mock/mail.go b/src/internal/m365/service/exchange/mock/mail.go similarity index 100% rename from src/internal/m365/exchange/mock/mail.go rename to src/internal/m365/service/exchange/mock/mail.go diff --git a/src/internal/m365/exchange/mock/mock_test.go b/src/internal/m365/service/exchange/mock/mock_test.go similarity index 100% rename from src/internal/m365/exchange/mock/mock_test.go rename to src/internal/m365/service/exchange/mock/mock_test.go diff --git a/src/internal/m365/exchange/restore.go b/src/internal/m365/service/exchange/restore.go similarity index 100% rename from src/internal/m365/exchange/restore.go rename to src/internal/m365/service/exchange/restore.go diff --git a/src/internal/m365/exchange/restore_test.go b/src/internal/m365/service/exchange/restore_test.go similarity index 99% rename from src/internal/m365/exchange/restore_test.go rename to src/internal/m365/service/exchange/restore_test.go index a30d56dd0..88983114e 100644 --- a/src/internal/m365/exchange/restore_test.go +++ b/src/internal/m365/service/exchange/restore_test.go @@ -10,7 +10,7 @@ import ( "github.com/stretchr/testify/suite" "github.com/alcionai/corso/src/internal/common/ptr" - exchMock "github.com/alcionai/corso/src/internal/m365/exchange/mock" + exchMock "github.com/alcionai/corso/src/internal/m365/service/exchange/mock" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/internal/tester/tconfig" "github.com/alcionai/corso/src/pkg/account" diff --git a/src/internal/m365/exchange/testdata/handlers.go b/src/internal/m365/service/exchange/testdata/handlers.go similarity index 92% rename from src/internal/m365/exchange/testdata/handlers.go rename to src/internal/m365/service/exchange/testdata/handlers.go index 559c23b2c..2a62e609f 100644 --- a/src/internal/m365/exchange/testdata/handlers.go +++ b/src/internal/m365/service/exchange/testdata/handlers.go @@ -7,8 +7,8 @@ import ( "github.com/alcionai/clues" "github.com/stretchr/testify/require" - "github.com/alcionai/corso/src/internal/m365/exchange" "github.com/alcionai/corso/src/internal/m365/graph" + "github.com/alcionai/corso/src/internal/m365/service/exchange" "github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/path" "github.com/alcionai/corso/src/pkg/services/m365/api" diff --git a/src/internal/m365/exchange/transform.go b/src/internal/m365/service/exchange/transform.go similarity index 100% rename from src/internal/m365/exchange/transform.go rename to src/internal/m365/service/exchange/transform.go diff --git a/src/internal/m365/exchange/transform_test.go b/src/internal/m365/service/exchange/transform_test.go similarity index 98% rename from src/internal/m365/exchange/transform_test.go rename to src/internal/m365/service/exchange/transform_test.go index 1bd8070dc..020406803 100644 --- a/src/internal/m365/exchange/transform_test.go +++ b/src/internal/m365/service/exchange/transform_test.go @@ -10,7 +10,7 @@ import ( "github.com/stretchr/testify/suite" "github.com/alcionai/corso/src/internal/common/ptr" - exchMock "github.com/alcionai/corso/src/internal/m365/exchange/mock" + exchMock "github.com/alcionai/corso/src/internal/m365/service/exchange/mock" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/services/m365/api" ) diff --git a/src/internal/m365/service/groups/backup.go b/src/internal/m365/service/groups/backup.go new file mode 100644 index 000000000..3bb779507 --- /dev/null +++ b/src/internal/m365/service/groups/backup.go @@ -0,0 +1,85 @@ +package groups + +import ( + "context" + + "github.com/alcionai/clues" + + "github.com/alcionai/corso/src/internal/common/prefixmatcher" + "github.com/alcionai/corso/src/internal/data" + "github.com/alcionai/corso/src/internal/m365/graph" + "github.com/alcionai/corso/src/internal/m365/support" + "github.com/alcionai/corso/src/internal/observe" + "github.com/alcionai/corso/src/internal/operations/inject" + "github.com/alcionai/corso/src/pkg/account" + "github.com/alcionai/corso/src/pkg/fault" + "github.com/alcionai/corso/src/pkg/path" + "github.com/alcionai/corso/src/pkg/services/m365/api" +) + +func ProduceBackupCollections( + ctx context.Context, + bpc inject.BackupProducerConfig, + ac api.Client, + creds account.M365Config, + su support.StatusUpdater, + errs *fault.Bus, +) ([]data.BackupCollection, *prefixmatcher.StringSetMatcher, bool, error) { + b, err := bpc.Selector.ToGroupsBackup() + if err != nil { + return nil, nil, false, clues.Wrap(err, "groupsDataCollection: parsing selector") + } + + var ( + el = errs.Local() + collections = []data.BackupCollection{} + categories = map[path.CategoryType]struct{}{} + ssmb = prefixmatcher.NewStringSetBuilder() + canUsePreviousBackup bool + ) + + ctx = clues.Add( + ctx, + "group_id", clues.Hide(bpc.ProtectedResource.ID()), + "group_name", clues.Hide(bpc.ProtectedResource.Name())) + + for _, scope := range b.Scopes() { + if el.Failure() != nil { + break + } + + progressBar := observe.MessageWithCompletion( + ctx, + observe.Bulletf("%s", scope.Category().PathType())) + defer close(progressBar) + + var dbcs []data.BackupCollection + + switch scope.Category().PathType() { + case path.LibrariesCategory: // TODO + } + + collections = append(collections, dbcs...) + + categories[scope.Category().PathType()] = struct{}{} + } + + if len(collections) > 0 { + baseCols, err := graph.BaseCollections( + ctx, + collections, + creds.AzureTenantID, + bpc.ProtectedResource.ID(), + path.UnknownService, // path.GroupsService + categories, + su, + errs) + if err != nil { + return nil, nil, false, err + } + + collections = append(collections, baseCols...) + } + + return collections, ssmb.ToReader(), canUsePreviousBackup, el.Failure() +} diff --git a/src/internal/m365/service/groups/restore.go b/src/internal/m365/service/groups/restore.go new file mode 100644 index 000000000..e36b3d7df --- /dev/null +++ b/src/internal/m365/service/groups/restore.go @@ -0,0 +1,93 @@ +package groups + +import ( + "context" + "errors" + + "github.com/alcionai/clues" + + "github.com/alcionai/corso/src/internal/common/idname" + "github.com/alcionai/corso/src/internal/data" + "github.com/alcionai/corso/src/internal/m365/support" + "github.com/alcionai/corso/src/internal/operations/inject" + "github.com/alcionai/corso/src/pkg/backup/details" + "github.com/alcionai/corso/src/pkg/count" + "github.com/alcionai/corso/src/pkg/fault" + "github.com/alcionai/corso/src/pkg/path" + "github.com/alcionai/corso/src/pkg/services/m365/api" +) + +// ConsumeRestoreCollections will restore the specified data collections into OneDrive +func ConsumeRestoreCollections( + ctx context.Context, + rcc inject.RestoreConsumerConfig, + ac api.Client, + backupDriveIDNames idname.Cacher, + dcs []data.RestoreCollection, + deets *details.Builder, + errs *fault.Bus, + ctr *count.Bus, +) (*support.ControllerOperationStatus, error) { + var ( + restoreMetrics support.CollectionMetrics + // caches = onedrive.NewRestoreCaches(backupDriveIDNames) + el = errs.Local() + ) + + // TODO: uncomment when a handler is available + // err := caches.Populate(ctx, lrh, rcc.ProtectedResource.ID()) + // if err != nil { + // return nil, clues.Wrap(err, "initializing restore caches") + // } + + // Reorder collections so that the parents directories are created + // before the child directories; a requirement for permissions. + data.SortRestoreCollections(dcs) + + // Iterate through the data collections and restore the contents of each + for _, dc := range dcs { + if el.Failure() != nil { + break + } + + var ( + err error + category = dc.FullPath().Category() + metrics support.CollectionMetrics + ictx = clues.Add(ctx, + "category", category, + "restore_location", clues.Hide(rcc.RestoreConfig.Location), + "protected_resource", clues.Hide(dc.FullPath().ResourceOwner()), + "full_path", dc.FullPath()) + ) + + switch dc.FullPath().Category() { + case path.LibrariesCategory: + // TODO + + default: + return nil, clues.New("data category not supported"). + With("category", category). + WithClues(ictx) + } + + restoreMetrics = support.CombineMetrics(restoreMetrics, metrics) + + if err != nil { + el.AddRecoverable(ctx, err) + } + + if errors.Is(err, context.Canceled) { + break + } + } + + status := support.CreateStatus( + ctx, + support.Restore, + len(dcs), + restoreMetrics, + rcc.RestoreConfig.Location) + + return status, el.Failure() +} diff --git a/src/internal/m365/onedrive/backup.go b/src/internal/m365/service/onedrive/backup.go similarity index 95% rename from src/internal/m365/onedrive/backup.go rename to src/internal/m365/service/onedrive/backup.go index ddf410958..169aba08c 100644 --- a/src/internal/m365/onedrive/backup.go +++ b/src/internal/m365/service/onedrive/backup.go @@ -7,6 +7,7 @@ import ( "github.com/alcionai/corso/src/internal/common/prefixmatcher" "github.com/alcionai/corso/src/internal/data" + "github.com/alcionai/corso/src/internal/m365/collection/drive" "github.com/alcionai/corso/src/internal/m365/graph" "github.com/alcionai/corso/src/internal/m365/support" "github.com/alcionai/corso/src/internal/operations/inject" @@ -47,8 +48,8 @@ func ProduceBackupCollections( logger.Ctx(ctx).Debug("creating OneDrive collections") - nc := NewCollections( - &itemBackupHandler{ac.Drives(), scope}, + nc := drive.NewCollections( + drive.NewItemBackupHandler(ac.Drives(), scope), tenant, bpc.ProtectedResource.ID(), su, diff --git a/src/internal/m365/onedrive/backup_test.go b/src/internal/m365/service/onedrive/backup_test.go similarity index 100% rename from src/internal/m365/onedrive/backup_test.go rename to src/internal/m365/service/onedrive/backup_test.go diff --git a/src/internal/m365/onedrive/consts/consts.go b/src/internal/m365/service/onedrive/consts/consts.go similarity index 100% rename from src/internal/m365/onedrive/consts/consts.go rename to src/internal/m365/service/onedrive/consts/consts.go diff --git a/src/internal/m365/onedrive/export.go b/src/internal/m365/service/onedrive/export.go similarity index 95% rename from src/internal/m365/onedrive/export.go rename to src/internal/m365/service/onedrive/export.go index 9868a9b71..8c0af44a2 100644 --- a/src/internal/m365/onedrive/export.go +++ b/src/internal/m365/service/onedrive/export.go @@ -7,7 +7,8 @@ import ( "github.com/alcionai/clues" "github.com/alcionai/corso/src/internal/data" - "github.com/alcionai/corso/src/internal/m365/onedrive/metadata" + "github.com/alcionai/corso/src/internal/m365/collection/drive" + "github.com/alcionai/corso/src/internal/m365/collection/drive/metadata" "github.com/alcionai/corso/src/internal/version" "github.com/alcionai/corso/src/pkg/backup/details" "github.com/alcionai/corso/src/pkg/control" @@ -120,7 +121,7 @@ func getItemName( trimmedName := strings.TrimSuffix(id, metadata.DataFileSuffix) metaName := trimmedName + metadata.MetaFileSuffix - meta, err := fetchAndReadMetadata(ctx, fin, metaName) + meta, err := drive.FetchAndReadMetadata(ctx, fin, metaName) if err != nil { return "", clues.Wrap(err, "getting metadata").WithClues(ctx) } diff --git a/src/internal/m365/onedrive/export_test.go b/src/internal/m365/service/onedrive/export_test.go similarity index 98% rename from src/internal/m365/onedrive/export_test.go rename to src/internal/m365/service/onedrive/export_test.go index ce707885f..3468a7661 100644 --- a/src/internal/m365/onedrive/export_test.go +++ b/src/internal/m365/service/onedrive/export_test.go @@ -10,8 +10,8 @@ import ( "github.com/stretchr/testify/suite" "github.com/alcionai/corso/src/internal/data" - odConsts "github.com/alcionai/corso/src/internal/m365/onedrive/consts" - "github.com/alcionai/corso/src/internal/m365/onedrive/metadata" + "github.com/alcionai/corso/src/internal/m365/collection/drive/metadata" + odConsts "github.com/alcionai/corso/src/internal/m365/service/onedrive/consts" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/internal/version" "github.com/alcionai/corso/src/pkg/control" diff --git a/src/internal/m365/onedrive/mock/handlers.go b/src/internal/m365/service/onedrive/mock/handlers.go similarity index 99% rename from src/internal/m365/onedrive/mock/handlers.go rename to src/internal/m365/service/onedrive/mock/handlers.go index 75dd3c3f1..20beb6bca 100644 --- a/src/internal/m365/onedrive/mock/handlers.go +++ b/src/internal/m365/service/onedrive/mock/handlers.go @@ -8,7 +8,7 @@ import ( "github.com/microsoftgraph/msgraph-sdk-go/drives" "github.com/microsoftgraph/msgraph-sdk-go/models" - odConsts "github.com/alcionai/corso/src/internal/m365/onedrive/consts" + odConsts "github.com/alcionai/corso/src/internal/m365/service/onedrive/consts" "github.com/alcionai/corso/src/pkg/backup/details" "github.com/alcionai/corso/src/pkg/control" "github.com/alcionai/corso/src/pkg/path" diff --git a/src/internal/m365/onedrive/mock/item.go b/src/internal/m365/service/onedrive/mock/item.go similarity index 83% rename from src/internal/m365/onedrive/mock/item.go rename to src/internal/m365/service/onedrive/mock/item.go index 1dd8cc885..852a260e6 100644 --- a/src/internal/m365/onedrive/mock/item.go +++ b/src/internal/m365/service/onedrive/mock/item.go @@ -8,6 +8,7 @@ import ( "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/pkg/backup/details" + "github.com/alcionai/corso/src/pkg/extensions" ) // --------------------------------------------------------------------------- @@ -16,21 +17,22 @@ import ( var _ data.Stream = &Data{} -// TODO: move to data/mock for service-agnostic mocking -// Data represents a single item retrieved from, or restored to, onedrive type Data struct { - ID string - Reader io.ReadCloser - ReadErr error - size int64 - modifiedTime time.Time - deleted bool + ID string + DriveID string + DriveName string + Reader io.ReadCloser + ReadErr error + Sz int64 + ModifiedTime time.Time + Del bool + ExtensionData *details.ExtensionData } func (d *Data) UUID() string { return d.ID } -func (d *Data) Deleted() bool { return d.deleted } -func (d *Data) Size() int64 { return d.size } -func (d *Data) ModTime() time.Time { return d.modifiedTime } +func (d *Data) Deleted() bool { return d.Del } +func (d *Data) Size() int64 { return d.Sz } +func (d *Data) ModTime() time.Time { return d.ModifiedTime } func (d *Data) ToReader() io.ReadCloser { if d.ReadErr != nil { @@ -43,10 +45,14 @@ func (d *Data) ToReader() io.ReadCloser { func (d *Data) Info() details.ItemInfo { return details.ItemInfo{ OneDrive: &details.OneDriveInfo{ - ItemType: details.OneDriveItem, - ItemName: "test.txt", - Size: 1, + ItemType: details.OneDriveItem, + ItemName: "test.txt", + Size: d.Sz, + DriveID: d.DriveID, + DriveName: d.DriveName, + Modified: d.ModifiedTime, }, + Extension: d.ExtensionData, } } @@ -81,6 +87,18 @@ func FileRespReadCloser(pl string) io.ReadCloser { return io.NopCloser(bytes.NewReader([]byte(pl))) } +func FileRespWithExtensions(pl string, extData *details.ExtensionData) io.ReadCloser { + rc := FileRespReadCloser(pl) + + me := &extensions.MockExtension{ + Ctx: context.Background(), + InnerRc: rc, + ExtData: extData, + } + + return io.NopCloser(me) +} + const ( DriveItemFileName = "fnords.txt" DriveFileMetaData = `{"fileName": "` + DriveItemFileName + `"}` diff --git a/src/internal/m365/service/onedrive/restore.go b/src/internal/m365/service/onedrive/restore.go new file mode 100644 index 000000000..a1dc65182 --- /dev/null +++ b/src/internal/m365/service/onedrive/restore.go @@ -0,0 +1,221 @@ +package onedrive + +import ( + "context" + "sort" + + "github.com/alcionai/clues" + "github.com/pkg/errors" + + "github.com/alcionai/corso/src/internal/common/idname" + "github.com/alcionai/corso/src/internal/data" + "github.com/alcionai/corso/src/internal/m365/collection/drive" + "github.com/alcionai/corso/src/internal/m365/support" + "github.com/alcionai/corso/src/internal/operations/inject" + "github.com/alcionai/corso/src/internal/version" + "github.com/alcionai/corso/src/pkg/backup/details" + "github.com/alcionai/corso/src/pkg/count" + "github.com/alcionai/corso/src/pkg/fault" + "github.com/alcionai/corso/src/pkg/path" +) + +// ConsumeRestoreCollections will restore the specified data collections into OneDrive +func ConsumeRestoreCollections( + ctx context.Context, + rh drive.RestoreHandler, + rcc inject.RestoreConsumerConfig, + backupDriveIDNames idname.Cacher, + dcs []data.RestoreCollection, + deets *details.Builder, + errs *fault.Bus, + ctr *count.Bus, +) (*support.ControllerOperationStatus, error) { + var ( + restoreMetrics support.CollectionMetrics + el = errs.Local() + caches = drive.NewRestoreCaches(backupDriveIDNames) + fallbackDriveName = rcc.RestoreConfig.Location + ) + + ctx = clues.Add(ctx, "backup_version", rcc.BackupVersion) + + err := caches.Populate(ctx, rh, rcc.ProtectedResource.ID()) + if err != nil { + return nil, clues.Wrap(err, "initializing restore caches") + } + + // Reorder collections so that the parents directories are created + // before the child directories; a requirement for permissions. + data.SortRestoreCollections(dcs) + + // Iterate through the data collections and restore the contents of each + for _, dc := range dcs { + if el.Failure() != nil { + break + } + + var ( + err error + metrics support.CollectionMetrics + ictx = clues.Add( + ctx, + "category", dc.FullPath().Category(), + "full_path", dc.FullPath()) + ) + + metrics, err = drive.RestoreCollection( + ictx, + rh, + rcc, + dc, + caches, + deets, + fallbackDriveName, + errs, + ctr.Local()) + if err != nil { + el.AddRecoverable(ctx, err) + } + + restoreMetrics = support.CombineMetrics(restoreMetrics, metrics) + + if errors.Is(err, context.Canceled) { + break + } + } + + status := support.CreateStatus( + ctx, + support.Restore, + len(dcs), + restoreMetrics, + rcc.RestoreConfig.Location) + + return status, el.Failure() +} + +// Augment restore path to add extra files(meta) needed for restore as +// well as do any other ordering operations on the paths +// +// Only accepts StoragePath/RestorePath pairs where the RestorePath is +// at least as long as the StoragePath. If the RestorePath is longer than the +// StoragePath then the first few (closest to the root) directories will use +// default permissions during restore. +func AugmentRestorePaths( + backupVersion int, + paths []path.RestorePaths, +) ([]path.RestorePaths, error) { + // Keyed by each value's StoragePath.String() which corresponds to the RepoRef + // of the directory. + colPaths := map[string]path.RestorePaths{} + + for _, p := range paths { + first := true + + for { + sp, err := p.StoragePath.Dir() + if err != nil { + return nil, err + } + + drivePath, err := path.ToDrivePath(sp) + if err != nil { + return nil, err + } + + if len(drivePath.Folders) == 0 { + break + } + + if len(p.RestorePath.Elements()) < len(sp.Elements()) { + return nil, clues.New("restorePath shorter than storagePath"). + With("restore_path", p.RestorePath, "storage_path", sp) + } + + rp := p.RestorePath + + // Make sure the RestorePath always points to the level of the current + // collection. We need to track if it's the first iteration because the + // RestorePath starts out at the collection level to begin with. + if !first { + rp, err = p.RestorePath.Dir() + if err != nil { + return nil, err + } + } + + paths := path.RestorePaths{ + StoragePath: sp, + RestorePath: rp, + } + + colPaths[sp.String()] = paths + p = paths + first = false + } + } + + // Adds dirmeta files as we need to make sure collections for all + // directories involved are created and not just the final one. No + // need to add `.meta` files (metadata for files) as they will + // anyways be looked up automatically. + // TODO: Stop populating .dirmeta for newer versions once we can + // get files from parent directory via `Fetch` in a collection. + // As of now look up metadata for parent directories from a + // collection. + for _, p := range colPaths { + el := p.StoragePath.Elements() + + if backupVersion >= version.OneDrive6NameInMeta { + mPath, err := p.StoragePath.AppendItem(".dirmeta") + if err != nil { + return nil, err + } + + paths = append( + paths, + path.RestorePaths{StoragePath: mPath, RestorePath: p.RestorePath}) + } else if backupVersion >= version.OneDrive4DirIncludesPermissions { + mPath, err := p.StoragePath.AppendItem(el.Last() + ".dirmeta") + if err != nil { + return nil, err + } + + paths = append( + paths, + path.RestorePaths{StoragePath: mPath, RestorePath: p.RestorePath}) + } else if backupVersion >= version.OneDrive1DataAndMetaFiles { + pp, err := p.StoragePath.Dir() + if err != nil { + return nil, err + } + + mPath, err := pp.AppendItem(el.Last() + ".dirmeta") + if err != nil { + return nil, err + } + + prp, err := p.RestorePath.Dir() + if err != nil { + return nil, err + } + + paths = append( + paths, + path.RestorePaths{StoragePath: mPath, RestorePath: prp}) + } + } + + // This sort is done primarily to order `.meta` files after `.data` + // files. This is only a necessity for OneDrive as we are storing + // metadata for files/folders in separate meta files and we the + // data to be restored before we can restore the metadata. + // + // This sorting assumes stuff in the same StoragePath directory end up in the + // same RestorePath collection. + sort.Slice(paths, func(i, j int) bool { + return paths[i].StoragePath.String() < paths[j].StoragePath.String() + }) + + return paths, nil +} diff --git a/src/internal/m365/service/onedrive/restore_test.go b/src/internal/m365/service/onedrive/restore_test.go new file mode 100644 index 000000000..0af13eccb --- /dev/null +++ b/src/internal/m365/service/onedrive/restore_test.go @@ -0,0 +1,317 @@ +package onedrive + +import ( + "testing" + + "github.com/alcionai/clues" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + + "github.com/alcionai/corso/src/internal/tester" + "github.com/alcionai/corso/src/internal/version" + "github.com/alcionai/corso/src/pkg/path" +) + +type RestoreUnitSuite struct { + tester.Suite +} + +func TestRestoreUnitSuite(t *testing.T) { + suite.Run(t, &RestoreUnitSuite{Suite: tester.NewUnitSuite(t)}) +} + +func (suite *RestoreUnitSuite) TestAugmentRestorePaths() { + // Adding a simple test here so that we can be sure that this + // function gets updated whenever we add a new version. + require.LessOrEqual(suite.T(), version.Backup, version.All8MigrateUserPNToID, "unsupported backup version") + + table := []struct { + name string + version int + input []string + output []string + }{ + { + name: "no change v0", + version: 0, + input: []string{ + "file.txt.data", + "file.txt", // v0 does not have `.data` + }, + output: []string{ + "file.txt", // ordering artifact of sorting + "file.txt.data", + }, + }, + { + name: "one folder v0", + version: 0, + input: []string{ + "folder/file.txt.data", + "folder/file.txt", + }, + output: []string{ + "folder/file.txt", + "folder/file.txt.data", + }, + }, + { + name: "no change v1", + version: version.OneDrive1DataAndMetaFiles, + input: []string{ + "file.txt.data", + }, + output: []string{ + "file.txt.data", + }, + }, + { + name: "one folder v1", + version: version.OneDrive1DataAndMetaFiles, + input: []string{ + "folder/file.txt.data", + }, + output: []string{ + "folder.dirmeta", + "folder/file.txt.data", + }, + }, + { + name: "nested folders v1", + version: version.OneDrive1DataAndMetaFiles, + input: []string{ + "folder/file.txt.data", + "folder/folder2/file.txt.data", + }, + output: []string{ + "folder.dirmeta", + "folder/file.txt.data", + "folder/folder2.dirmeta", + "folder/folder2/file.txt.data", + }, + }, + { + name: "no change v4", + version: version.OneDrive4DirIncludesPermissions, + input: []string{ + "file.txt.data", + }, + output: []string{ + "file.txt.data", + }, + }, + { + name: "one folder v4", + version: version.OneDrive4DirIncludesPermissions, + input: []string{ + "folder/file.txt.data", + }, + output: []string{ + "folder/file.txt.data", + "folder/folder.dirmeta", + }, + }, + { + name: "nested folders v4", + version: version.OneDrive4DirIncludesPermissions, + input: []string{ + "folder/file.txt.data", + "folder/folder2/file.txt.data", + }, + output: []string{ + "folder/file.txt.data", + "folder/folder.dirmeta", + "folder/folder2/file.txt.data", + "folder/folder2/folder2.dirmeta", + }, + }, + { + name: "no change v6", + version: version.OneDrive6NameInMeta, + input: []string{ + "file.txt.data", + }, + output: []string{ + "file.txt.data", + }, + }, + { + name: "one folder v6", + version: version.OneDrive6NameInMeta, + input: []string{ + "folder/file.txt.data", + }, + output: []string{ + "folder/.dirmeta", + "folder/file.txt.data", + }, + }, + { + name: "nested folders v6", + version: version.OneDrive6NameInMeta, + input: []string{ + "folder/file.txt.data", + "folder/folder2/file.txt.data", + }, + output: []string{ + "folder/.dirmeta", + "folder/file.txt.data", + "folder/folder2/.dirmeta", + "folder/folder2/file.txt.data", + }, + }, + } + + for _, test := range table { + suite.Run(test.name, func() { + t := suite.T() + + _, flush := tester.NewContext(t) + defer flush() + + base := "id/onedrive/user/files/drives/driveID/root:/" + + inPaths := []path.RestorePaths{} + for _, ps := range test.input { + p, err := path.FromDataLayerPath(base+ps, true) + require.NoError(t, err, "creating path", clues.ToCore(err)) + + pd, err := p.Dir() + require.NoError(t, err, "creating collection path", clues.ToCore(err)) + + inPaths = append( + inPaths, + path.RestorePaths{StoragePath: p, RestorePath: pd}) + } + + outPaths := []path.RestorePaths{} + for _, ps := range test.output { + p, err := path.FromDataLayerPath(base+ps, true) + require.NoError(t, err, "creating path", clues.ToCore(err)) + + pd, err := p.Dir() + require.NoError(t, err, "creating collection path", clues.ToCore(err)) + + outPaths = append( + outPaths, + path.RestorePaths{StoragePath: p, RestorePath: pd}) + } + + actual, err := AugmentRestorePaths(test.version, inPaths) + require.NoError(t, err, "augmenting paths", clues.ToCore(err)) + + // Ordering of paths matter here as we need dirmeta files + // to show up before file in dir + assert.Equal(t, outPaths, actual, "augmented paths") + }) + } +} + +// TestAugmentRestorePaths_DifferentRestorePath tests that RestorePath +// substitution works properly. Since it's only possible for future backup +// versions to need restore path substitution (i.e. due to storing folders by +// ID instead of name) this is only tested against the most recent backup +// version at the moment. +func (suite *RestoreUnitSuite) TestAugmentRestorePaths_DifferentRestorePath() { + // Adding a simple test here so that we can be sure that this + // function gets updated whenever we add a new version. + require.LessOrEqual(suite.T(), version.Backup, version.All8MigrateUserPNToID, "unsupported backup version") + + type pathPair struct { + storage string + restore string + } + + table := []struct { + name string + version int + input []pathPair + output []pathPair + errCheck assert.ErrorAssertionFunc + }{ + { + name: "nested folders", + version: version.Backup, + input: []pathPair{ + {storage: "folder-id/file.txt.data", restore: "folder"}, + {storage: "folder-id/folder2-id/file.txt.data", restore: "folder/folder2"}, + }, + output: []pathPair{ + {storage: "folder-id/.dirmeta", restore: "folder"}, + {storage: "folder-id/file.txt.data", restore: "folder"}, + {storage: "folder-id/folder2-id/.dirmeta", restore: "folder/folder2"}, + {storage: "folder-id/folder2-id/file.txt.data", restore: "folder/folder2"}, + }, + errCheck: assert.NoError, + }, + { + name: "restore path longer one folder", + version: version.Backup, + input: []pathPair{ + {storage: "folder-id/file.txt.data", restore: "corso_restore/folder"}, + }, + output: []pathPair{ + {storage: "folder-id/.dirmeta", restore: "corso_restore/folder"}, + {storage: "folder-id/file.txt.data", restore: "corso_restore/folder"}, + }, + errCheck: assert.NoError, + }, + { + name: "restore path shorter one folder", + version: version.Backup, + input: []pathPair{ + {storage: "folder-id/file.txt.data", restore: ""}, + }, + errCheck: assert.Error, + }, + } + + for _, test := range table { + suite.Run(test.name, func() { + t := suite.T() + + _, flush := tester.NewContext(t) + defer flush() + + base := "id/onedrive/user/files/drives/driveID/root:/" + + inPaths := []path.RestorePaths{} + for _, ps := range test.input { + p, err := path.FromDataLayerPath(base+ps.storage, true) + require.NoError(t, err, "creating path", clues.ToCore(err)) + + r, err := path.FromDataLayerPath(base+ps.restore, false) + require.NoError(t, err, "creating path", clues.ToCore(err)) + + inPaths = append( + inPaths, + path.RestorePaths{StoragePath: p, RestorePath: r}) + } + + outPaths := []path.RestorePaths{} + for _, ps := range test.output { + p, err := path.FromDataLayerPath(base+ps.storage, true) + require.NoError(t, err, "creating path", clues.ToCore(err)) + + r, err := path.FromDataLayerPath(base+ps.restore, false) + require.NoError(t, err, "creating path", clues.ToCore(err)) + + outPaths = append( + outPaths, + path.RestorePaths{StoragePath: p, RestorePath: r}) + } + + actual, err := AugmentRestorePaths(test.version, inPaths) + test.errCheck(t, err, "augmenting paths", clues.ToCore(err)) + + if err != nil { + return + } + + // Ordering of paths matter here as we need dirmeta files + // to show up before file in dir + assert.Equal(t, outPaths, actual, "augmented paths") + }) + } +} diff --git a/src/internal/m365/onedrive/stub/stub.go b/src/internal/m365/service/onedrive/stub/stub.go similarity index 98% rename from src/internal/m365/onedrive/stub/stub.go rename to src/internal/m365/service/onedrive/stub/stub.go index da313a98c..933e98762 100644 --- a/src/internal/m365/onedrive/stub/stub.go +++ b/src/internal/m365/service/onedrive/stub/stub.go @@ -8,8 +8,8 @@ import ( "github.com/alcionai/clues" "github.com/google/uuid" - odConsts "github.com/alcionai/corso/src/internal/m365/onedrive/consts" - "github.com/alcionai/corso/src/internal/m365/onedrive/metadata" + "github.com/alcionai/corso/src/internal/m365/collection/drive/metadata" + odConsts "github.com/alcionai/corso/src/internal/m365/service/onedrive/consts" m365Stub "github.com/alcionai/corso/src/internal/m365/stub" "github.com/alcionai/corso/src/internal/version" "github.com/alcionai/corso/src/pkg/path" diff --git a/src/internal/m365/onedrive/testdata/item.go b/src/internal/m365/service/onedrive/testdata/item.go similarity index 100% rename from src/internal/m365/onedrive/testdata/item.go rename to src/internal/m365/service/onedrive/testdata/item.go diff --git a/src/internal/m365/sharepoint/api/beta_service.go b/src/internal/m365/service/sharepoint/api/beta_service.go similarity index 100% rename from src/internal/m365/sharepoint/api/beta_service.go rename to src/internal/m365/service/sharepoint/api/beta_service.go diff --git a/src/internal/m365/sharepoint/api/beta_service_test.go b/src/internal/m365/service/sharepoint/api/beta_service_test.go similarity index 100% rename from src/internal/m365/sharepoint/api/beta_service_test.go rename to src/internal/m365/service/sharepoint/api/beta_service_test.go diff --git a/src/internal/m365/sharepoint/api/pages.go b/src/internal/m365/service/sharepoint/api/pages.go similarity index 100% rename from src/internal/m365/sharepoint/api/pages.go rename to src/internal/m365/service/sharepoint/api/pages.go diff --git a/src/internal/m365/sharepoint/api/pages_test.go b/src/internal/m365/service/sharepoint/api/pages_test.go similarity index 92% rename from src/internal/m365/sharepoint/api/pages_test.go rename to src/internal/m365/service/sharepoint/api/pages_test.go index f3052c7a4..ae02d87c1 100644 --- a/src/internal/m365/sharepoint/api/pages_test.go +++ b/src/internal/m365/service/sharepoint/api/pages_test.go @@ -10,10 +10,10 @@ import ( "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" + "github.com/alcionai/corso/src/internal/m365/collection/site" "github.com/alcionai/corso/src/internal/m365/graph" - "github.com/alcionai/corso/src/internal/m365/sharepoint" - "github.com/alcionai/corso/src/internal/m365/sharepoint/api" - spMock "github.com/alcionai/corso/src/internal/m365/sharepoint/mock" + "github.com/alcionai/corso/src/internal/m365/service/sharepoint/api" + spMock "github.com/alcionai/corso/src/internal/m365/service/sharepoint/mock" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/internal/tester/tconfig" "github.com/alcionai/corso/src/pkg/account" @@ -108,7 +108,7 @@ func (suite *SharePointPageSuite) TestRestoreSinglePage() { //nolint:lll byteArray := spMock.Page("Byte Test") - pageData := sharepoint.NewItem( + pageData := site.NewItem( testName, io.NopCloser(bytes.NewReader(byteArray)), ) diff --git a/src/internal/m365/sharepoint/api/serialization.go b/src/internal/m365/service/sharepoint/api/serialization.go similarity index 100% rename from src/internal/m365/sharepoint/api/serialization.go rename to src/internal/m365/service/sharepoint/api/serialization.go diff --git a/src/internal/m365/sharepoint/api/serialization_test.go b/src/internal/m365/service/sharepoint/api/serialization_test.go similarity index 97% rename from src/internal/m365/sharepoint/api/serialization_test.go rename to src/internal/m365/service/sharepoint/api/serialization_test.go index 099691d16..8673cd95e 100644 --- a/src/internal/m365/sharepoint/api/serialization_test.go +++ b/src/internal/m365/service/sharepoint/api/serialization_test.go @@ -10,7 +10,7 @@ import ( "github.com/stretchr/testify/suite" bmodels "github.com/alcionai/corso/src/internal/m365/graph/betasdk/models" - spMock "github.com/alcionai/corso/src/internal/m365/sharepoint/mock" + spMock "github.com/alcionai/corso/src/internal/m365/service/sharepoint/mock" "github.com/alcionai/corso/src/internal/tester" ) diff --git a/src/internal/m365/service/sharepoint/backup.go b/src/internal/m365/service/sharepoint/backup.go new file mode 100644 index 000000000..479d4ac24 --- /dev/null +++ b/src/internal/m365/service/sharepoint/backup.go @@ -0,0 +1,133 @@ +package sharepoint + +import ( + "context" + + "github.com/alcionai/clues" + + "github.com/alcionai/corso/src/internal/common/prefixmatcher" + "github.com/alcionai/corso/src/internal/data" + "github.com/alcionai/corso/src/internal/m365/collection/site" + "github.com/alcionai/corso/src/internal/m365/graph" + "github.com/alcionai/corso/src/internal/m365/support" + "github.com/alcionai/corso/src/internal/observe" + "github.com/alcionai/corso/src/internal/operations/inject" + "github.com/alcionai/corso/src/pkg/account" + "github.com/alcionai/corso/src/pkg/fault" + "github.com/alcionai/corso/src/pkg/path" + "github.com/alcionai/corso/src/pkg/services/m365/api" +) + +func ProduceBackupCollections( + ctx context.Context, + bpc inject.BackupProducerConfig, + ac api.Client, + creds account.M365Config, + su support.StatusUpdater, + errs *fault.Bus, +) ([]data.BackupCollection, *prefixmatcher.StringSetMatcher, bool, error) { + b, err := bpc.Selector.ToSharePointBackup() + if err != nil { + return nil, nil, false, clues.Wrap(err, "sharePointDataCollection: parsing selector") + } + + var ( + el = errs.Local() + collections = []data.BackupCollection{} + categories = map[path.CategoryType]struct{}{} + ssmb = prefixmatcher.NewStringSetBuilder() + canUsePreviousBackup bool + ) + + ctx = clues.Add( + ctx, + "site_id", clues.Hide(bpc.ProtectedResource.ID()), + "site_url", clues.Hide(bpc.ProtectedResource.Name())) + + for _, scope := range b.Scopes() { + if el.Failure() != nil { + break + } + + progressBar := observe.MessageWithCompletion( + ctx, + observe.Bulletf("%s", scope.Category().PathType())) + defer close(progressBar) + + var spcs []data.BackupCollection + + switch scope.Category().PathType() { + case path.ListsCategory: + spcs, err = site.CollectLists( + ctx, + bpc, + ac, + creds.AzureTenantID, + su, + errs) + if err != nil { + el.AddRecoverable(ctx, err) + continue + } + + // Lists don't make use of previous metadata + // TODO: Revisit when we add support of lists + canUsePreviousBackup = true + + case path.LibrariesCategory: + spcs, canUsePreviousBackup, err = site.CollectLibraries( + ctx, + bpc, + ac.Drives(), + creds.AzureTenantID, + ssmb, + scope, + su, + errs) + if err != nil { + el.AddRecoverable(ctx, err) + continue + } + + case path.PagesCategory: + spcs, err = site.CollectPages( + ctx, + bpc, + creds, + ac, + su, + errs) + if err != nil { + el.AddRecoverable(ctx, err) + continue + } + + // Lists don't make use of previous metadata + // TODO: Revisit when we add support of pages + canUsePreviousBackup = true + } + + collections = append(collections, spcs...) + + categories[scope.Category().PathType()] = struct{}{} + } + + if len(collections) > 0 { + baseCols, err := graph.BaseCollections( + ctx, + collections, + creds.AzureTenantID, + bpc.ProtectedResource.ID(), + path.SharePointService, + categories, + su, + errs) + if err != nil { + return nil, nil, false, err + } + + collections = append(collections, baseCols...) + } + + return collections, ssmb.ToReader(), canUsePreviousBackup, el.Failure() +} diff --git a/src/internal/m365/sharepoint/backup_test.go b/src/internal/m365/service/sharepoint/backup_test.go similarity index 70% rename from src/internal/m365/sharepoint/backup_test.go rename to src/internal/m365/service/sharepoint/backup_test.go index 433b8ab01..2a7c6aad8 100644 --- a/src/internal/m365/sharepoint/backup_test.go +++ b/src/internal/m365/service/sharepoint/backup_test.go @@ -9,14 +9,9 @@ import ( "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" - "github.com/alcionai/corso/src/internal/common/idname/mock" - "github.com/alcionai/corso/src/internal/m365/graph" - "github.com/alcionai/corso/src/internal/m365/onedrive" - odConsts "github.com/alcionai/corso/src/internal/m365/onedrive/consts" - "github.com/alcionai/corso/src/internal/operations/inject" + "github.com/alcionai/corso/src/internal/m365/collection/drive" + odConsts "github.com/alcionai/corso/src/internal/m365/service/onedrive/consts" "github.com/alcionai/corso/src/internal/tester" - "github.com/alcionai/corso/src/internal/tester/tconfig" - "github.com/alcionai/corso/src/internal/version" "github.com/alcionai/corso/src/pkg/control" "github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/path" @@ -50,12 +45,12 @@ func (suite *LibrariesBackupUnitSuite) TestUpdateCollections() { const ( tenantID = "tenant" - site = "site" + siteID = "site" driveID = "driveID1" ) pb := path.Builder{}.Append(testBaseDrivePath.Elements()...) - ep, err := libraryBackupHandler{}.CanonicalPath(pb, tenantID, site) + ep, err := drive.NewLibraryBackupHandler(api.Drives{}, nil).CanonicalPath(pb, tenantID, siteID) require.NoError(suite.T(), err, clues.ToCore(err)) tests := []struct { @@ -99,15 +94,15 @@ func (suite *LibrariesBackupUnitSuite) TestUpdateCollections() { itemColls = map[string]map[string]string{ driveID: {}, } - collMap = map[string]map[string]*onedrive.Collection{ + collMap = map[string]map[string]*drive.Collection{ driveID: {}, } ) - c := onedrive.NewCollections( - &libraryBackupHandler{api.Drives{}, test.scope}, + c := drive.NewCollections( + drive.NewLibraryBackupHandler(api.Drives{}, test.scope), tenantID, - site, + siteID, nil, control.DefaultOptions()) @@ -169,56 +164,3 @@ func driveRootItem(id string) models.DriveItemable { return item } - -type SharePointPagesSuite struct { - tester.Suite -} - -func TestSharePointPagesSuite(t *testing.T) { - suite.Run(t, &SharePointPagesSuite{ - Suite: tester.NewIntegrationSuite( - t, - [][]string{tconfig.M365AcctCredEnvs}), - }) -} - -func (suite *SharePointPagesSuite) SetupSuite() { - ctx, flush := tester.NewContext(suite.T()) - defer flush() - - graph.InitializeConcurrencyLimiter(ctx, false, 4) -} - -func (suite *SharePointPagesSuite) TestCollectPages() { - t := suite.T() - - ctx, flush := tester.NewContext(t) - defer flush() - - var ( - siteID = tconfig.M365SiteID(t) - a = tconfig.NewM365Account(t) - ) - - creds, err := a.M365Config() - require.NoError(t, err, clues.ToCore(err)) - - ac, err := api.NewClient(creds, control.DefaultOptions()) - require.NoError(t, err, clues.ToCore(err)) - - bpc := inject.BackupProducerConfig{ - LastBackupVersion: version.NoBackup, - Options: control.DefaultOptions(), - ProtectedResource: mock.NewProvider(siteID, siteID), - } - - col, err := collectPages( - ctx, - bpc, - creds, - ac, - &MockGraphService{}, - fault.New(true)) - assert.NoError(t, err, clues.ToCore(err)) - assert.NotEmpty(t, col) -} diff --git a/src/internal/m365/sharepoint/mock/list.go b/src/internal/m365/service/sharepoint/mock/list.go similarity index 100% rename from src/internal/m365/sharepoint/mock/list.go rename to src/internal/m365/service/sharepoint/mock/list.go diff --git a/src/internal/m365/sharepoint/mock/mock_test.go b/src/internal/m365/service/sharepoint/mock/mock_test.go similarity index 95% rename from src/internal/m365/sharepoint/mock/mock_test.go rename to src/internal/m365/service/sharepoint/mock/mock_test.go index 52070c2cb..61590fb9e 100644 --- a/src/internal/m365/sharepoint/mock/mock_test.go +++ b/src/internal/m365/service/sharepoint/mock/mock_test.go @@ -9,7 +9,7 @@ import ( "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" - "github.com/alcionai/corso/src/internal/m365/sharepoint/api" + "github.com/alcionai/corso/src/internal/m365/service/sharepoint/api" "github.com/alcionai/corso/src/internal/tester" ) diff --git a/src/internal/m365/sharepoint/mock/page.go b/src/internal/m365/service/sharepoint/mock/page.go similarity index 100% rename from src/internal/m365/sharepoint/mock/page.go rename to src/internal/m365/service/sharepoint/mock/page.go diff --git a/src/internal/m365/service/sharepoint/restore.go b/src/internal/m365/service/sharepoint/restore.go new file mode 100644 index 000000000..35e1c67cd --- /dev/null +++ b/src/internal/m365/service/sharepoint/restore.go @@ -0,0 +1,122 @@ +package sharepoint + +import ( + "context" + "errors" + + "github.com/alcionai/clues" + + "github.com/alcionai/corso/src/internal/common/dttm" + "github.com/alcionai/corso/src/internal/common/idname" + "github.com/alcionai/corso/src/internal/data" + "github.com/alcionai/corso/src/internal/m365/collection/drive" + "github.com/alcionai/corso/src/internal/m365/collection/site" + "github.com/alcionai/corso/src/internal/m365/support" + "github.com/alcionai/corso/src/internal/operations/inject" + "github.com/alcionai/corso/src/pkg/backup/details" + "github.com/alcionai/corso/src/pkg/control" + "github.com/alcionai/corso/src/pkg/count" + "github.com/alcionai/corso/src/pkg/fault" + "github.com/alcionai/corso/src/pkg/path" + "github.com/alcionai/corso/src/pkg/services/m365/api" +) + +// ConsumeRestoreCollections will restore the specified data collections into OneDrive +func ConsumeRestoreCollections( + ctx context.Context, + rcc inject.RestoreConsumerConfig, + ac api.Client, + backupDriveIDNames idname.Cacher, + dcs []data.RestoreCollection, + deets *details.Builder, + errs *fault.Bus, + ctr *count.Bus, +) (*support.ControllerOperationStatus, error) { + var ( + lrh = drive.NewLibraryRestoreHandler(ac) + restoreMetrics support.CollectionMetrics + caches = drive.NewRestoreCaches(backupDriveIDNames) + el = errs.Local() + ) + + err := caches.Populate(ctx, lrh, rcc.ProtectedResource.ID()) + if err != nil { + return nil, clues.Wrap(err, "initializing restore caches") + } + + // Reorder collections so that the parents directories are created + // before the child directories; a requirement for permissions. + data.SortRestoreCollections(dcs) + + // Iterate through the data collections and restore the contents of each + for _, dc := range dcs { + if el.Failure() != nil { + break + } + + var ( + err error + category = dc.FullPath().Category() + metrics support.CollectionMetrics + ictx = clues.Add(ctx, + "category", category, + "restore_location", clues.Hide(rcc.RestoreConfig.Location), + "resource_owner", clues.Hide(dc.FullPath().ResourceOwner()), + "full_path", dc.FullPath()) + ) + + switch dc.FullPath().Category() { + case path.LibrariesCategory: + metrics, err = drive.RestoreCollection( + ictx, + lrh, + rcc, + dc, + caches, + deets, + control.DefaultRestoreContainerName(dttm.HumanReadableDriveItem), + errs, + ctr) + + case path.ListsCategory: + metrics, err = site.RestoreListCollection( + ictx, + ac.Stable, + dc, + rcc.RestoreConfig.Location, + deets, + errs) + + case path.PagesCategory: + metrics, err = site.RestorePageCollection( + ictx, + ac.Stable, + dc, + rcc.RestoreConfig.Location, + deets, + errs) + + default: + return nil, clues.Wrap(clues.New(category.String()), "category not supported").With("category", category) + } + + restoreMetrics = support.CombineMetrics(restoreMetrics, metrics) + + if err != nil { + el.AddRecoverable(ctx, err) + } + + if errors.Is(err, context.Canceled) { + break + } + } + + status := support.CreateStatus( + ctx, + support.Restore, + len(dcs), + restoreMetrics, + rcc.RestoreConfig.Location) + + return status, el.Failure() +} diff --git a/src/internal/m365/sharepoint/backup.go b/src/internal/m365/sharepoint/backup.go deleted file mode 100644 index 61acac7d2..000000000 --- a/src/internal/m365/sharepoint/backup.go +++ /dev/null @@ -1,289 +0,0 @@ -package sharepoint - -import ( - "context" - - "github.com/alcionai/clues" - - "github.com/alcionai/corso/src/internal/common/prefixmatcher" - "github.com/alcionai/corso/src/internal/data" - "github.com/alcionai/corso/src/internal/m365/graph" - "github.com/alcionai/corso/src/internal/m365/onedrive" - betaAPI "github.com/alcionai/corso/src/internal/m365/sharepoint/api" - "github.com/alcionai/corso/src/internal/m365/support" - "github.com/alcionai/corso/src/internal/observe" - "github.com/alcionai/corso/src/internal/operations/inject" - "github.com/alcionai/corso/src/pkg/account" - "github.com/alcionai/corso/src/pkg/fault" - "github.com/alcionai/corso/src/pkg/logger" - "github.com/alcionai/corso/src/pkg/path" - "github.com/alcionai/corso/src/pkg/selectors" - "github.com/alcionai/corso/src/pkg/services/m365/api" -) - -type statusUpdater interface { - UpdateStatus(status *support.ControllerOperationStatus) -} - -// ProduceBackupCollections returns a set of DataCollection which represents the SharePoint data -// for the specified user -func ProduceBackupCollections( - ctx context.Context, - bpc inject.BackupProducerConfig, - ac api.Client, - creds account.M365Config, - su statusUpdater, - errs *fault.Bus, -) ([]data.BackupCollection, *prefixmatcher.StringSetMatcher, bool, error) { - b, err := bpc.Selector.ToSharePointBackup() - if err != nil { - return nil, nil, false, clues.Wrap(err, "sharePointDataCollection: parsing selector") - } - - var ( - el = errs.Local() - collections = []data.BackupCollection{} - categories = map[path.CategoryType]struct{}{} - ssmb = prefixmatcher.NewStringSetBuilder() - canUsePreviousBackup bool - ) - - ctx = clues.Add( - ctx, - "site_id", clues.Hide(bpc.ProtectedResource.ID()), - "site_url", clues.Hide(bpc.ProtectedResource.Name())) - - for _, scope := range b.Scopes() { - if el.Failure() != nil { - break - } - - progressBar := observe.MessageWithCompletion( - ctx, - observe.Bulletf("%s", scope.Category().PathType())) - defer close(progressBar) - - var spcs []data.BackupCollection - - switch scope.Category().PathType() { - case path.ListsCategory: - spcs, err = collectLists( - ctx, - bpc, - ac, - creds.AzureTenantID, - su, - errs) - if err != nil { - el.AddRecoverable(ctx, err) - continue - } - - // Lists don't make use of previous metadata - // TODO: Revisit when we add support of lists - canUsePreviousBackup = true - - case path.LibrariesCategory: - spcs, canUsePreviousBackup, err = collectLibraries( - ctx, - bpc, - ac.Drives(), - creds.AzureTenantID, - ssmb, - scope, - su, - errs) - if err != nil { - el.AddRecoverable(ctx, err) - continue - } - - case path.PagesCategory: - spcs, err = collectPages( - ctx, - bpc, - creds, - ac, - su, - errs) - if err != nil { - el.AddRecoverable(ctx, err) - continue - } - - // Lists don't make use of previous metadata - // TODO: Revisit when we add support of pages - canUsePreviousBackup = true - } - - collections = append(collections, spcs...) - - categories[scope.Category().PathType()] = struct{}{} - } - - if len(collections) > 0 { - baseCols, err := graph.BaseCollections( - ctx, - collections, - creds.AzureTenantID, - bpc.ProtectedResource.ID(), - path.SharePointService, - categories, - su.UpdateStatus, - errs) - if err != nil { - return nil, nil, false, err - } - - collections = append(collections, baseCols...) - } - - return collections, ssmb.ToReader(), canUsePreviousBackup, el.Failure() -} - -func collectLists( - ctx context.Context, - bpc inject.BackupProducerConfig, - ac api.Client, - tenantID string, - updater statusUpdater, - errs *fault.Bus, -) ([]data.BackupCollection, error) { - logger.Ctx(ctx).Debug("Creating SharePoint List Collections") - - var ( - el = errs.Local() - spcs = make([]data.BackupCollection, 0) - ) - - lists, err := preFetchLists(ctx, ac.Stable, bpc.ProtectedResource.ID()) - if err != nil { - return nil, err - } - - for _, tuple := range lists { - if el.Failure() != nil { - break - } - - dir, err := path.Build( - tenantID, - bpc.ProtectedResource.ID(), - path.SharePointService, - path.ListsCategory, - false, - tuple.name) - if err != nil { - el.AddRecoverable(ctx, clues.Wrap(err, "creating list collection path").WithClues(ctx)) - } - - collection := NewCollection( - dir, - ac, - List, - updater.UpdateStatus, - bpc.Options) - collection.AddJob(tuple.id) - - spcs = append(spcs, collection) - } - - return spcs, el.Failure() -} - -// collectLibraries constructs a onedrive Collections struct and Get()s -// all the drives associated with the site. -func collectLibraries( - ctx context.Context, - bpc inject.BackupProducerConfig, - ad api.Drives, - tenantID string, - ssmb *prefixmatcher.StringSetMatchBuilder, - scope selectors.SharePointScope, - updater statusUpdater, - errs *fault.Bus, -) ([]data.BackupCollection, bool, error) { - logger.Ctx(ctx).Debug("creating SharePoint Library collections") - - var ( - collections = []data.BackupCollection{} - colls = onedrive.NewCollections( - &libraryBackupHandler{ad, scope}, - tenantID, - bpc.ProtectedResource.ID(), - updater.UpdateStatus, - bpc.Options) - ) - - odcs, canUsePreviousBackup, err := colls.Get(ctx, bpc.MetadataCollections, ssmb, errs) - if err != nil { - return nil, false, graph.Wrap(ctx, err, "getting library") - } - - return append(collections, odcs...), canUsePreviousBackup, nil -} - -// collectPages constructs a sharepoint Collections struct and Get()s the associated -// M365 IDs for the associated Pages. -func collectPages( - ctx context.Context, - bpc inject.BackupProducerConfig, - creds account.M365Config, - ac api.Client, - updater statusUpdater, - errs *fault.Bus, -) ([]data.BackupCollection, error) { - logger.Ctx(ctx).Debug("creating SharePoint Pages collections") - - var ( - el = errs.Local() - spcs = make([]data.BackupCollection, 0) - ) - - // make the betaClient - // Need to receive From DataCollection Call - adpt, err := graph.CreateAdapter( - creds.AzureTenantID, - creds.AzureClientID, - creds.AzureClientSecret) - if err != nil { - return nil, clues.Wrap(err, "creating azure client adapter") - } - - betaService := betaAPI.NewBetaService(adpt) - - tuples, err := betaAPI.FetchPages(ctx, betaService, bpc.ProtectedResource.ID()) - if err != nil { - return nil, err - } - - for _, tuple := range tuples { - if el.Failure() != nil { - break - } - - dir, err := path.Build( - creds.AzureTenantID, - bpc.ProtectedResource.ID(), - path.SharePointService, - path.PagesCategory, - false, - tuple.Name) - if err != nil { - el.AddRecoverable(ctx, clues.Wrap(err, "creating page collection path").WithClues(ctx)) - } - - collection := NewCollection( - dir, - ac, - Pages, - updater.UpdateStatus, - bpc.Options) - collection.betaService = betaService - collection.AddJob(tuple.ID) - - spcs = append(spcs, collection) - } - - return spcs, el.Failure() -} diff --git a/src/internal/m365/stub/stub.go b/src/internal/m365/stub/stub.go index da3340f60..cb986cc4d 100644 --- a/src/internal/m365/stub/stub.go +++ b/src/internal/m365/stub/stub.go @@ -7,10 +7,10 @@ import ( "golang.org/x/exp/maps" "github.com/alcionai/corso/src/internal/data" - exchMock "github.com/alcionai/corso/src/internal/m365/exchange/mock" + "github.com/alcionai/corso/src/internal/m365/collection/drive/metadata" "github.com/alcionai/corso/src/internal/m365/mock" - "github.com/alcionai/corso/src/internal/m365/onedrive/metadata" "github.com/alcionai/corso/src/internal/m365/resource" + exchMock "github.com/alcionai/corso/src/internal/m365/service/exchange/mock" "github.com/alcionai/corso/src/pkg/control" "github.com/alcionai/corso/src/pkg/path" ) diff --git a/src/internal/model/model.go b/src/internal/model/model.go index b33762545..dcf0dce51 100644 --- a/src/internal/model/model.go +++ b/src/internal/model/model.go @@ -32,7 +32,10 @@ const ( // common tags for filtering const ( - ServiceTag = "service" + ServiceTag = "service" + BackupTypeTag = "backup-type" + AssistBackup = "assist-backup" + MergeBackup = "merge-backup" ) // Valid returns true if the ModelType value fits within the iota range. diff --git a/src/internal/operations/backup.go b/src/internal/operations/backup.go index ed50dd368..a2531bc30 100644 --- a/src/internal/operations/backup.go +++ b/src/internal/operations/backup.go @@ -26,6 +26,7 @@ import ( "github.com/alcionai/corso/src/pkg/account" "github.com/alcionai/corso/src/pkg/backup" "github.com/alcionai/corso/src/pkg/backup/details" + "github.com/alcionai/corso/src/pkg/backup/identity" "github.com/alcionai/corso/src/pkg/control" "github.com/alcionai/corso/src/pkg/count" "github.com/alcionai/corso/src/pkg/fault" @@ -57,6 +58,9 @@ type BackupOperation struct { // when true, this allows for incremental backups instead of full data pulls incremental bool + // When true, disables kopia-assisted incremental backups. This forces + // downloading and hashing all item data for items not in the merge base(s). + disableAssistBackup bool } // BackupResults aggregate the details of the result of the operation. @@ -79,14 +83,15 @@ func NewBackupOperation( bus events.Eventer, ) (BackupOperation, error) { op := BackupOperation{ - operation: newOperation(opts, bus, count.New(), kw, sw), - ResourceOwner: owner, - Selectors: selector, - Version: "v0", - BackupVersion: version.Backup, - account: acct, - incremental: useIncrementalBackup(selector, opts), - bp: bp, + operation: newOperation(opts, bus, count.New(), kw, sw), + ResourceOwner: owner, + Selectors: selector, + Version: "v0", + BackupVersion: version.Backup, + account: acct, + incremental: useIncrementalBackup(selector, opts), + disableAssistBackup: opts.ToggleFeatures.ForceItemDataDownload, + bp: bp, } if err := op.validate(); err != nil { @@ -117,9 +122,64 @@ func (op BackupOperation) validate() error { // pointer wrapping the values, while those values // get populated asynchronously. type backupStats struct { - k *kopia.BackupStats - ctrl *data.CollectionStats - resourceCount int + k *kopia.BackupStats + ctrl *data.CollectionStats + resourceCount int + hasNewDetailEntries bool +} + +// An assist backup must meet the following criteria: +// 1. new detail entries were produced +// 2. valid details ssid & item snapshot ID +// 3. no non-recoverable errors +// 4. we observed recoverable errors +// 5. not running in best effort mode. Reason being that there is +// no way to distinguish assist backups from merge backups in best effort mode. +// +// Primary reason for persisting assist backup models is to ensure we don't +// lose corso extension data(deets) in the event of recoverable failures. +// +// Note: kopia.DetailsMergeInfoer doesn't impact decision making for creating +// assist backups. It may be empty if it’s the very first backup so there is no +// merge base to source base details from, or non-empty, if there was a merge +// base. In summary, if there are no new deets, no new extension data was produced +// and hence no need to persist assist backup model. +func isAssistBackup( + newDeetsProduced bool, + snapID, ssid string, + failurePolicy control.FailurePolicy, + err *fault.Bus, +) bool { + return newDeetsProduced && + len(snapID) > 0 && + len(ssid) > 0 && + failurePolicy != control.BestEffort && + err.Failure() == nil && + len(err.Recovered()) > 0 +} + +// A merge backup must meet the following criteria: +// 1. valid details ssid & item snapshot ID +// 2. zero recoverable errors +// 3. no recoverable errors if not running in best effort mode +func isMergeBackup( + snapID, ssid string, + failurePolicy control.FailurePolicy, + err *fault.Bus, +) bool { + if len(snapID) == 0 || len(ssid) == 0 { + return false + } + + if err.Failure() != nil { + return false + } + + if failurePolicy == control.BestEffort { + return true + } + + return len(err.Recovered()) == 0 } // --------------------------------------------------------------------------- @@ -180,7 +240,8 @@ func (op *BackupOperation) Run(ctx context.Context) (err error) { "resource_owner_name", clues.Hide(op.ResourceOwner.Name()), "backup_id", op.Results.BackupID, "service", op.Selectors.Service, - "incremental", op.incremental) + "incremental", op.incremental, + "disable_assist_backup", op.disableAssistBackup) op.bus.Event( ctx, @@ -224,7 +285,6 @@ func (op *BackupOperation) Run(ctx context.Context) (err error) { op.Errors.Fail(clues.Wrap(err, "running backup")) } - finalizeErrorHandling(ctx, op.Options, op.Errors, "running backup") LogFaultErrors(ctx, op.Errors.Errors(), "running backup") // ----- @@ -237,35 +297,25 @@ func (op *BackupOperation) Run(ctx context.Context) (err error) { return op.Errors.Failure() } - // force exit without backup in certain cases. - // see: https://github.com/alcionai/corso/pull/2510#discussion_r1113532530 - for _, e := range op.Errors.Recovered() { - if clues.HasLabel(e, fault.LabelForceNoBackupCreation) { - logger.Ctx(ctx). - With("error", e). - With(clues.InErr(err).Slice()...). - Infow("completed backup; conditional error forcing exit without model persistence", - "results", op.Results) - - return op.Errors.Fail(clues.Wrap(e, "forced backup")).Failure() - } - } - err = op.createBackupModels( ctx, sstore, - opStats.k.SnapshotID, + opStats, op.Results.BackupID, op.BackupVersion, deets.Details()) if err != nil { - op.Errors.Fail(clues.Wrap(err, "persisting backup")) + op.Errors.Fail(clues.Wrap(err, "persisting backup models")) return op.Errors.Failure() } - logger.Ctx(ctx).Infow("completed backup", "results", op.Results) + finalizeErrorHandling(ctx, op.Options, op.Errors, "running backup") - return nil + if op.Errors.Failure() == nil { + logger.Ctx(ctx).Infow("completed backup", "results", op.Results) + } + + return op.Errors.Failure() } // do is purely the action of running a backup. All pre/post behavior @@ -301,7 +351,8 @@ func (op *BackupOperation) do( op.kopia, reasons, fallbackReasons, op.account.ID(), - op.incremental) + op.incremental, + op.disableAssistBackup) if err != nil { return nil, clues.Wrap(err, "producing manifests and metadata") } @@ -312,6 +363,10 @@ func (op *BackupOperation) do( lastBackupVersion = mans.MinBackupVersion() } + // TODO(ashmrtn): This should probably just return a collection that deletes + // the entire subtree instead of returning an additional bool. That way base + // selection is controlled completely by flags and merging is controlled + // completely by collections. cs, ssmb, canUsePreviousBackup, err := produceBackupDataCollections( ctx, op.bp, @@ -345,12 +400,14 @@ func (op *BackupOperation) do( return nil, clues.Wrap(err, "persisting collection backups") } + opStats.hasNewDetailEntries = (deets != nil && !deets.Empty()) || + (toMerge != nil && toMerge.ItemsToMerge() > 0) opStats.k = writeStats err = mergeDetails( ctx, detailsStore, - mans.Backups(), + mans, toMerge, deets, writeStats, @@ -367,7 +424,7 @@ func (op *BackupOperation) do( return deets, nil } -func makeFallbackReasons(tenant string, sel selectors.Selector) []kopia.Reasoner { +func makeFallbackReasons(tenant string, sel selectors.Selector) []identity.Reasoner { if sel.PathService() != path.SharePointService && sel.DiscreteOwner != sel.DiscreteOwnerName { return selectorToReasons(tenant, sel, true) @@ -419,9 +476,9 @@ func selectorToReasons( tenant string, sel selectors.Selector, useOwnerNameForID bool, -) []kopia.Reasoner { +) []identity.Reasoner { service := sel.PathService() - reasons := []kopia.Reasoner{} + reasons := []identity.Reasoner{} pcs, err := sel.PathCategories() if err != nil { @@ -449,7 +506,7 @@ func consumeBackupCollections( ctx context.Context, bc kinject.BackupConsumer, tenantID string, - reasons []kopia.Reasoner, + reasons []identity.Reasoner, bbs kopia.BackupBases, cs []data.BackupCollection, pmr prefixmatcher.StringSetReader, @@ -495,13 +552,13 @@ func consumeBackupCollections( if kopiaStats.ErrorCount > 0 { err = clues.New("building kopia snapshot").WithClues(ctx) } else if kopiaStats.IgnoredErrorCount > kopiaStats.ExpectedIgnoredErrorCount { - err = clues.New("downloading items for persistence").WithClues(ctx) + logger.Ctx(ctx).Info("recoverable errors were seen during backup") } return kopiaStats, deets, itemsSourcedFromBase, err } -func matchesReason(reasons []kopia.Reasoner, p path.Path) bool { +func matchesReason(reasons []identity.Reasoner, p path.Path) bool { for _, reason := range reasons { if p.ResourceOwner() == reason.ProtectedResource() && p.Service() == reason.Service() && @@ -532,7 +589,10 @@ func getNewPathRefs( // able to assume we always have the location in the previous entry. We'll end // up doing some extra parsing, but it will simplify this code. if repoRef.Service() == path.ExchangeService { - newPath, newLoc, err := dataFromBackup.GetNewPathRefs(repoRef.ToBuilder(), nil) + newPath, newLoc, err := dataFromBackup.GetNewPathRefs( + repoRef.ToBuilder(), + entry.Modified(), + nil) if err != nil { return nil, nil, false, clues.Wrap(err, "getting new paths") } else if newPath == nil { @@ -565,7 +625,10 @@ func getNewPathRefs( return nil, nil, false, clues.New("entry with empty LocationRef") } - newPath, newLoc, err := dataFromBackup.GetNewPathRefs(repoRef.ToBuilder(), locRef) + newPath, newLoc, err := dataFromBackup.GetNewPathRefs( + repoRef.ToBuilder(), + entry.Modified(), + locRef) if err != nil { return nil, nil, false, clues.Wrap(err, "getting new paths with old location") } else if newPath == nil { @@ -579,10 +642,118 @@ func getNewPathRefs( return newPath, newLoc, updated, nil } +func mergeItemsFromBase( + ctx context.Context, + checkReason bool, + baseBackup kopia.BackupEntry, + detailsStore streamstore.Streamer, + dataFromBackup kopia.DetailsMergeInfoer, + deets *details.Builder, + alreadySeenItems map[string]struct{}, + errs *fault.Bus, +) (int, error) { + var ( + manifestAddedEntries int + totalBaseItems int + ) + + // Can't be in the above block else it's counted as a redeclaration. + ctx = clues.Add(ctx, "base_backup_id", baseBackup.ID) + + baseDeets, err := getDetailsFromBackup( + ctx, + baseBackup.Backup, + detailsStore, + errs) + if err != nil { + return manifestAddedEntries, + clues.New("fetching base details for backup").WithClues(ctx) + } + + for _, entry := range baseDeets.Items() { + // Track this here instead of calling Items() again to get the count since + // it can be a bit expensive. + totalBaseItems++ + + rr, err := path.FromDataLayerPath(entry.RepoRef, true) + if err != nil { + return manifestAddedEntries, clues.New("parsing base item info path"). + WithClues(ctx). + With("repo_ref", path.LoggableDir(entry.RepoRef)) + } + + // Although this base has an entry it may not be the most recent. Check + // the reasons a snapshot was returned to ensure we only choose the recent + // entries. + // + // We only really want to do this check for merge bases though because + // kopia won't abide by reasons when determining if an item's cached. This + // leaves us in a bit of a pickle if the user has run any concurrent backups + // with overlapping reasons that then turn into assist bases, but the + // modTime check in DetailsMergeInfoer should handle that. + if checkReason && !matchesReason(baseBackup.Reasons, rr) { + continue + } + + // Skip items that were already found in a previous base backup. + if _, ok := alreadySeenItems[rr.ShortRef()]; ok { + continue + } + + ictx := clues.Add(ctx, "repo_ref", rr) + + newPath, newLoc, locUpdated, err := getNewPathRefs( + dataFromBackup, + entry, + rr, + baseBackup.Version) + if err != nil { + return manifestAddedEntries, + clues.Wrap(err, "getting updated info for entry").WithClues(ictx) + } + + // This entry isn't merged. + if newPath == nil { + continue + } + + // Fixup paths in the item. + item := entry.ItemInfo + details.UpdateItem(&item, newLoc) + + // TODO(ashmrtn): This can most likely be removed altogether. + itemUpdated := newPath.String() != rr.String() || locUpdated + + err = deets.Add( + newPath, + newLoc, + itemUpdated, + item) + if err != nil { + return manifestAddedEntries, + clues.Wrap(err, "adding item to details").WithClues(ictx) + } + + // Make sure we won't add this again in another base. + alreadySeenItems[rr.ShortRef()] = struct{}{} + + // Track how many entries we added so that we know if we got them all when + // we're done. + manifestAddedEntries++ + } + + logger.Ctx(ctx).Infow( + "merged details with base manifest", + "count_base_item_unfiltered", totalBaseItems, + "count_base_item_added", manifestAddedEntries) + + return manifestAddedEntries, nil +} + func mergeDetails( ctx context.Context, detailsStore streamstore.Streamer, - backups []kopia.BackupEntry, + bases kopia.BackupBases, dataFromBackup kopia.DetailsMergeInfoer, deets *details.Builder, writeStats *kopia.BackupStats, @@ -597,88 +768,68 @@ func mergeDetails( writeStats.TotalNonMetaUploadedBytes = detailsModel.SumNonMetaFileSizes() // Don't bother loading any of the base details if there's nothing we need to merge. - if dataFromBackup == nil || dataFromBackup.ItemsToMerge() == 0 { + if bases == nil || dataFromBackup == nil || dataFromBackup.ItemsToMerge() == 0 { return nil } - var addedEntries int + var ( + addedEntries int + // alreadySeenEntries tracks items that we've already merged so we don't + // accidentally merge them again. This could happen if, for example, there's + // an assist backup and a merge backup that both have the same version of an + // item at the same path. + alreadySeenEntries = map[string]struct{}{} + ) - for _, baseBackup := range backups { - var ( - mctx = clues.Add(ctx, "base_backup_id", baseBackup.ID) - manifestAddedEntries int - ) - - baseDeets, err := getDetailsFromBackup( - mctx, - baseBackup.Backup, + // Merge details from assist bases first. It shouldn't technically matter + // since the DetailsMergeInfoer should take into account the modTime of items, + // but just to be on the safe side. + // + // We don't want to match entries based on Reason for assist bases because + // kopia won't abide by Reasons when determining if an item's cached. This + // leaves us in a bit of a pickle if the user has run any concurrent backups + // with overlapping Reasons that turn into assist bases, but the modTime check + // in DetailsMergeInfoer should handle that. + for _, base := range bases.AssistBackups() { + added, err := mergeItemsFromBase( + ctx, + false, + base, detailsStore, + dataFromBackup, + deets, + alreadySeenEntries, errs) if err != nil { - return clues.New("fetching base details for backup") + return clues.Wrap(err, "merging assist backup base details") } - for _, entry := range baseDeets.Items() { - rr, err := path.FromDataLayerPath(entry.RepoRef, true) - if err != nil { - return clues.New("parsing base item info path"). - WithClues(mctx). - With("repo_ref", path.NewElements(entry.RepoRef)) - } + addedEntries = addedEntries + added + } - // Although this base has an entry it may not be the most recent. Check - // the reasons a snapshot was returned to ensure we only choose the recent - // entries. - // - // TODO(ashmrtn): This logic will need expanded to cover entries from - // checkpoints if we start doing kopia-assisted incrementals for those. - if !matchesReason(baseBackup.Reasons, rr) { - continue - } - - mctx = clues.Add(mctx, "repo_ref", rr) - - newPath, newLoc, locUpdated, err := getNewPathRefs( - dataFromBackup, - entry, - rr, - baseBackup.Version) - if err != nil { - return clues.Wrap(err, "getting updated info for entry").WithClues(mctx) - } - - // This entry isn't merged. - if newPath == nil { - continue - } - - // Fixup paths in the item. - item := entry.ItemInfo - details.UpdateItem(&item, newLoc) - - // TODO(ashmrtn): This may need updated if we start using this merge - // strategry for items that were cached in kopia. - itemUpdated := newPath.String() != rr.String() || locUpdated - - err = deets.Add( - newPath, - newLoc, - itemUpdated, - item) - if err != nil { - return clues.Wrap(err, "adding item to details") - } - - // Track how many entries we added so that we know if we got them all when - // we're done. - addedEntries++ - manifestAddedEntries++ + // Now add entries from the merge base backups. These will be things that + // weren't changed in the new backup. Items that were already added because + // they were counted as cached in an assist base backup will be skipped due to + // alreadySeenEntries. + // + // We do want to enable matching entries based on Reasons because we + // explicitly control which subtrees from the merge base backup are grafted + // onto the hierarchy for the currently running backup. + for _, base := range bases.Backups() { + added, err := mergeItemsFromBase( + ctx, + true, + base, + detailsStore, + dataFromBackup, + deets, + alreadySeenEntries, + errs) + if err != nil { + return clues.Wrap(err, "merging merge backup base details") } - logger.Ctx(mctx).Infow( - "merged details with base manifest", - "base_item_count_unfiltered", len(baseDeets.Items()), - "base_item_count_added", manifestAddedEntries) + addedEntries = addedEntries + added } checkCount := dataFromBackup.ItemsToMerge() @@ -705,6 +856,8 @@ func (op *BackupOperation) persistResults( op.Status = Completed + // Non recoverable errors always result in a failed backup. + // This holds true for all FailurePolicy. if op.Errors.Failure() != nil { op.Status = Failed } @@ -732,6 +885,7 @@ func (op *BackupOperation) persistResults( op.Results.ItemsRead = opStats.ctrl.Successes + // Only return non-recoverable errors at this point. return op.Errors.Failure() } @@ -739,12 +893,16 @@ func (op *BackupOperation) persistResults( func (op *BackupOperation) createBackupModels( ctx context.Context, sscw streamstore.CollectorWriter, - snapID string, + opStats backupStats, backupID model.StableID, backupVersion int, deets *details.Details, ) error { - ctx = clues.Add(ctx, "snapshot_id", snapID, "backup_id", backupID) + snapID := opStats.k.SnapshotID + ctx = clues.Add(ctx, + "snapshot_id", snapID, + "backup_id", backupID) + // generate a new fault bus so that we can maintain clean // separation between the errors we serialize and those that // are generated during the serialization process. @@ -773,6 +931,32 @@ func (op *BackupOperation) createBackupModels( ctx = clues.Add(ctx, "streamstore_snapshot_id", ssid) + tags := map[string]string{ + model.ServiceTag: op.Selectors.PathService().String(), + } + + // Add tags to mark this backup as either assist or merge. This is used to: + // 1. Filter assist backups by tag during base selection process + // 2. Differentiate assist backups from merge backups + if isMergeBackup( + snapID, + ssid, + op.Options.FailureHandling, + op.Errors) { + tags[model.BackupTypeTag] = model.MergeBackup + } else if isAssistBackup( + opStats.hasNewDetailEntries, + snapID, + ssid, + op.Options.FailureHandling, + op.Errors) { + tags[model.BackupTypeTag] = model.AssistBackup + } else { + return clues.New("backup is neither assist nor merge").WithClues(ctx) + } + + ctx = clues.Add(ctx, model.BackupTypeTag, tags[model.BackupTypeTag]) + b := backup.New( snapID, ssid, op.Status.String(), @@ -783,7 +967,8 @@ func (op *BackupOperation) createBackupModels( op.ResourceOwner.Name(), op.Results.ReadWrites, op.Results.StartAndEndTime, - op.Errors.Errors()) + op.Errors.Errors(), + tags) logger.Ctx(ctx).Info("creating new backup") diff --git a/src/internal/operations/backup_test.go b/src/internal/operations/backup_test.go index a2783e92e..fd39ddc82 100644 --- a/src/internal/operations/backup_test.go +++ b/src/internal/operations/backup_test.go @@ -2,6 +2,7 @@ package operations import ( "context" + "encoding/json" stdpath "path" "testing" "time" @@ -12,25 +13,34 @@ import ( "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" + "github.com/alcionai/corso/src/cli/config" "github.com/alcionai/corso/src/internal/common/prefixmatcher" "github.com/alcionai/corso/src/internal/data" evmock "github.com/alcionai/corso/src/internal/events/mock" "github.com/alcionai/corso/src/internal/kopia" "github.com/alcionai/corso/src/internal/m365/graph" "github.com/alcionai/corso/src/internal/m365/mock" - odConsts "github.com/alcionai/corso/src/internal/m365/onedrive/consts" + odConsts "github.com/alcionai/corso/src/internal/m365/service/onedrive/consts" + odMock "github.com/alcionai/corso/src/internal/m365/service/onedrive/mock" + "github.com/alcionai/corso/src/internal/m365/support" "github.com/alcionai/corso/src/internal/model" "github.com/alcionai/corso/src/internal/operations/inject" + "github.com/alcionai/corso/src/internal/streamstore" ssmock "github.com/alcionai/corso/src/internal/streamstore/mock" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/internal/tester/tconfig" "github.com/alcionai/corso/src/pkg/account" "github.com/alcionai/corso/src/pkg/backup" "github.com/alcionai/corso/src/pkg/backup/details" + deeTD "github.com/alcionai/corso/src/pkg/backup/details/testdata" + "github.com/alcionai/corso/src/pkg/backup/identity" "github.com/alcionai/corso/src/pkg/control" + "github.com/alcionai/corso/src/pkg/control/repository" + "github.com/alcionai/corso/src/pkg/extensions" "github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/path" "github.com/alcionai/corso/src/pkg/selectors" + selTD "github.com/alcionai/corso/src/pkg/selectors/testdata" "github.com/alcionai/corso/src/pkg/services/m365/api" storeTD "github.com/alcionai/corso/src/pkg/storage/testdata" "github.com/alcionai/corso/src/pkg/store" @@ -107,7 +117,7 @@ func checkPaths(t *testing.T, expected, got []path.Path) { type mockBackupConsumer struct { checkFunc func( - backupReasons []kopia.Reasoner, + backupReasons []identity.Reasoner, bases kopia.BackupBases, cs []data.BackupCollection, tags map[string]string, @@ -116,7 +126,7 @@ type mockBackupConsumer struct { func (mbu mockBackupConsumer) ConsumeBackupCollections( ctx context.Context, - backupReasons []kopia.Reasoner, + backupReasons []identity.Reasoner, bases kopia.BackupBases, cs []data.BackupCollection, excluded prefixmatcher.StringSetReader, @@ -136,6 +146,7 @@ func (mbu mockBackupConsumer) ConsumeBackupCollections( type mockDetailsMergeInfoer struct { repoRefs map[string]path.Path locs map[string]*path.Builder + modTimes map[string]time.Time } func (m *mockDetailsMergeInfoer) add(oldRef, newRef path.Path, newLoc *path.Builder) { @@ -147,10 +158,31 @@ func (m *mockDetailsMergeInfoer) add(oldRef, newRef path.Path, newLoc *path.Buil m.locs[oldPB.ShortRef()] = newLoc } +func (m *mockDetailsMergeInfoer) addWithModTime( + oldRef path.Path, + modTime time.Time, + newRef path.Path, + newLoc *path.Builder, +) { + oldPB := oldRef.ToBuilder() + // Items are indexed individually. + m.repoRefs[oldPB.ShortRef()] = newRef + m.modTimes[oldPB.ShortRef()] = modTime + + // Locations are indexed by directory. + m.locs[oldPB.ShortRef()] = newLoc +} + func (m *mockDetailsMergeInfoer) GetNewPathRefs( oldRef *path.Builder, + modTime time.Time, _ details.LocationIDer, ) (path.Path, *path.Builder, error) { + // Return no match if the modTime was set and it wasn't what was passed in. + if mt, ok := m.modTimes[oldRef.ShortRef()]; ok && !mt.Equal(modTime) { + return nil, nil, nil + } + return m.repoRefs[oldRef.ShortRef()], m.locs[oldRef.ShortRef()], nil } @@ -166,6 +198,7 @@ func newMockDetailsMergeInfoer() *mockDetailsMergeInfoer { return &mockDetailsMergeInfoer{ repoRefs: map[string]path.Path{}, locs: map[string]*path.Builder{}, + modTimes: map[string]time.Time{}, } } @@ -292,6 +325,30 @@ func makeDetailsEntry( return res } +func makeDetailsEntryWithModTime( + t *testing.T, + p path.Path, + l *path.Builder, + size int, + updated bool, + modTime time.Time, +) *details.Entry { + t.Helper() + + res := makeDetailsEntry(t, p, l, size, updated) + + switch { + case res.Exchange != nil: + res.Exchange.Modified = modTime + case res.OneDrive != nil: + res.OneDrive.Modified = modTime + case res.SharePoint != nil: + res.SharePoint.Modified = modTime + } + + return res +} + // --------------------------------------------------------------------------- // unit tests // --------------------------------------------------------------------------- @@ -406,7 +463,7 @@ func (suite *BackupOpUnitSuite) TestBackupOperation_ConsumeBackupDataCollections path.ExchangeService, path.ContactsCategory) - reasons = []kopia.Reasoner{ + reasons = []identity.Reasoner{ emailReason, contactsReason, } @@ -421,13 +478,13 @@ func (suite *BackupOpUnitSuite) TestBackupOperation_ConsumeBackupDataCollections bases = kopia.NewMockBackupBases().WithMergeBases( kopia.ManifestEntry{ Manifest: manifest1, - Reasons: []kopia.Reasoner{ + Reasons: []identity.Reasoner{ emailReason, }, }).WithAssistBases( kopia.ManifestEntry{ Manifest: manifest2, - Reasons: []kopia.Reasoner{ + Reasons: []identity.Reasoner{ contactsReason, }, }) @@ -441,7 +498,7 @@ func (suite *BackupOpUnitSuite) TestBackupOperation_ConsumeBackupDataCollections mbu := &mockBackupConsumer{ checkFunc: func( - backupReasons []kopia.Reasoner, + backupReasons []identity.Reasoner, gotBases kopia.BackupBases, cs []data.BackupCollection, gotTags map[string]string, @@ -545,6 +602,9 @@ func (suite *BackupOpUnitSuite) TestBackupOperation_MergeBackupDetails_AddsItems itemPath3.ResourceOwner(), itemPath3.Service(), itemPath3.Category()) + + time1 = time.Now() + time2 = time1.Add(time.Hour) ) itemParents1, err := path.GetDriveFolderPath(itemPath1) @@ -553,10 +613,11 @@ func (suite *BackupOpUnitSuite) TestBackupOperation_MergeBackupDetails_AddsItems itemParents1String := itemParents1.String() table := []struct { - name string - populatedDetails map[string]*details.Details - inputBackups []kopia.BackupEntry - mdm *mockDetailsMergeInfoer + name string + populatedDetails map[string]*details.Details + inputBackups []kopia.BackupEntry + inputAssistBackups []kopia.BackupEntry + mdm *mockDetailsMergeInfoer errCheck assert.ErrorAssertionFunc expectedEntries []*details.Entry @@ -590,7 +651,7 @@ func (suite *BackupOpUnitSuite) TestBackupOperation_MergeBackupDetails_AddsItems }, DetailsID: "foo", }, - Reasons: []kopia.Reasoner{ + Reasons: []identity.Reasoner{ pathReason1, }, }, @@ -609,40 +670,7 @@ func (suite *BackupOpUnitSuite) TestBackupOperation_MergeBackupDetails_AddsItems inputBackups: []kopia.BackupEntry{ { Backup: &backup1, - Reasons: []kopia.Reasoner{ - pathReason1, - }, - }, - }, - populatedDetails: map[string]*details.Details{ - backup1.DetailsID: { - DetailsModel: details.DetailsModel{ - Entries: []details.Entry{ - *makeDetailsEntry(suite.T(), itemPath1, locationPath1, 42, false), - }, - }, - }, - }, - errCheck: assert.Error, - }, - { - name: "TooManyItems", - mdm: func() *mockDetailsMergeInfoer { - res := newMockDetailsMergeInfoer() - res.add(itemPath1, itemPath1, locationPath1) - - return res - }(), - inputBackups: []kopia.BackupEntry{ - { - Backup: &backup1, - Reasons: []kopia.Reasoner{ - pathReason1, - }, - }, - { - Backup: &backup1, - Reasons: []kopia.Reasoner{ + Reasons: []identity.Reasoner{ pathReason1, }, }, @@ -669,7 +697,7 @@ func (suite *BackupOpUnitSuite) TestBackupOperation_MergeBackupDetails_AddsItems inputBackups: []kopia.BackupEntry{ { Backup: &backup1, - Reasons: []kopia.Reasoner{ + Reasons: []identity.Reasoner{ pathReason1, }, }, @@ -728,7 +756,7 @@ func (suite *BackupOpUnitSuite) TestBackupOperation_MergeBackupDetails_AddsItems inputBackups: []kopia.BackupEntry{ { Backup: &backup1, - Reasons: []kopia.Reasoner{ + Reasons: []identity.Reasoner{ pathReason1, }, }, @@ -755,7 +783,7 @@ func (suite *BackupOpUnitSuite) TestBackupOperation_MergeBackupDetails_AddsItems inputBackups: []kopia.BackupEntry{ { Backup: &backup1, - Reasons: []kopia.Reasoner{ + Reasons: []identity.Reasoner{ pathReason1, }, }, @@ -785,7 +813,7 @@ func (suite *BackupOpUnitSuite) TestBackupOperation_MergeBackupDetails_AddsItems inputBackups: []kopia.BackupEntry{ { Backup: &backup1, - Reasons: []kopia.Reasoner{ + Reasons: []identity.Reasoner{ pathReason1, }, }, @@ -815,7 +843,7 @@ func (suite *BackupOpUnitSuite) TestBackupOperation_MergeBackupDetails_AddsItems inputBackups: []kopia.BackupEntry{ { Backup: &backup1, - Reasons: []kopia.Reasoner{ + Reasons: []identity.Reasoner{ pathReason1, }, }, @@ -846,7 +874,7 @@ func (suite *BackupOpUnitSuite) TestBackupOperation_MergeBackupDetails_AddsItems inputBackups: []kopia.BackupEntry{ { Backup: &backup1, - Reasons: []kopia.Reasoner{ + Reasons: []identity.Reasoner{ pathReason1, }, }, @@ -877,13 +905,13 @@ func (suite *BackupOpUnitSuite) TestBackupOperation_MergeBackupDetails_AddsItems inputBackups: []kopia.BackupEntry{ { Backup: &backup1, - Reasons: []kopia.Reasoner{ + Reasons: []identity.Reasoner{ pathReason1, }, }, { Backup: &backup2, - Reasons: []kopia.Reasoner{ + Reasons: []identity.Reasoner{ pathReason3, }, }, @@ -913,6 +941,210 @@ func (suite *BackupOpUnitSuite) TestBackupOperation_MergeBackupDetails_AddsItems makeDetailsEntry(suite.T(), itemPath3, locationPath3, 37, false), }, }, + { + name: "MergeAndAssistBases SameItems", + mdm: func() *mockDetailsMergeInfoer { + res := newMockDetailsMergeInfoer() + res.addWithModTime(itemPath1, time1, itemPath1, locationPath1) + res.addWithModTime(itemPath3, time2, itemPath3, locationPath3) + + return res + }(), + inputBackups: []kopia.BackupEntry{ + { + Backup: &backup1, + Reasons: []identity.Reasoner{ + pathReason1, + pathReason3, + }, + }, + }, + inputAssistBackups: []kopia.BackupEntry{ + {Backup: &backup2}, + }, + populatedDetails: map[string]*details.Details{ + backup1.DetailsID: { + DetailsModel: details.DetailsModel{ + Entries: []details.Entry{ + *makeDetailsEntryWithModTime(suite.T(), itemPath1, locationPath1, 42, false, time1), + *makeDetailsEntryWithModTime(suite.T(), itemPath3, locationPath3, 37, false, time2), + }, + }, + }, + backup2.DetailsID: { + DetailsModel: details.DetailsModel{ + Entries: []details.Entry{ + *makeDetailsEntryWithModTime(suite.T(), itemPath1, locationPath1, 42, false, time1), + *makeDetailsEntryWithModTime(suite.T(), itemPath3, locationPath3, 37, false, time2), + }, + }, + }, + }, + errCheck: assert.NoError, + expectedEntries: []*details.Entry{ + makeDetailsEntryWithModTime(suite.T(), itemPath1, locationPath1, 42, false, time1), + makeDetailsEntryWithModTime(suite.T(), itemPath3, locationPath3, 37, false, time2), + }, + }, + { + name: "MergeAndAssistBases AssistBaseHasNewerItems", + mdm: func() *mockDetailsMergeInfoer { + res := newMockDetailsMergeInfoer() + res.addWithModTime(itemPath1, time2, itemPath1, locationPath1) + + return res + }(), + inputBackups: []kopia.BackupEntry{ + { + Backup: &backup1, + Reasons: []identity.Reasoner{ + pathReason1, + }, + }, + }, + inputAssistBackups: []kopia.BackupEntry{ + {Backup: &backup2}, + }, + populatedDetails: map[string]*details.Details{ + backup1.DetailsID: { + DetailsModel: details.DetailsModel{ + Entries: []details.Entry{ + *makeDetailsEntryWithModTime(suite.T(), itemPath1, locationPath1, 42, false, time1), + }, + }, + }, + backup2.DetailsID: { + DetailsModel: details.DetailsModel{ + Entries: []details.Entry{ + *makeDetailsEntryWithModTime(suite.T(), itemPath1, locationPath1, 84, false, time2), + }, + }, + }, + }, + errCheck: assert.NoError, + expectedEntries: []*details.Entry{ + makeDetailsEntryWithModTime(suite.T(), itemPath1, locationPath1, 84, false, time2), + }, + }, + { + name: "AssistBases ConcurrentAssistBasesPicksMatchingVersion1", + mdm: func() *mockDetailsMergeInfoer { + res := newMockDetailsMergeInfoer() + res.addWithModTime(itemPath1, time2, itemPath1, locationPath1) + + return res + }(), + inputAssistBackups: []kopia.BackupEntry{ + {Backup: &backup1}, + {Backup: &backup2}, + }, + populatedDetails: map[string]*details.Details{ + backup1.DetailsID: { + DetailsModel: details.DetailsModel{ + Entries: []details.Entry{ + *makeDetailsEntryWithModTime(suite.T(), itemPath1, locationPath1, 42, false, time1), + }, + }, + }, + backup2.DetailsID: { + DetailsModel: details.DetailsModel{ + Entries: []details.Entry{ + *makeDetailsEntryWithModTime(suite.T(), itemPath1, locationPath1, 84, false, time2), + }, + }, + }, + }, + errCheck: assert.NoError, + expectedEntries: []*details.Entry{ + makeDetailsEntryWithModTime(suite.T(), itemPath1, locationPath1, 84, false, time2), + }, + }, + { + name: "AssistBases ConcurrentAssistBasesPicksMatchingVersion2", + mdm: func() *mockDetailsMergeInfoer { + res := newMockDetailsMergeInfoer() + res.addWithModTime(itemPath1, time1, itemPath1, locationPath1) + + return res + }(), + inputAssistBackups: []kopia.BackupEntry{ + {Backup: &backup1}, + {Backup: &backup2}, + }, + populatedDetails: map[string]*details.Details{ + backup1.DetailsID: { + DetailsModel: details.DetailsModel{ + Entries: []details.Entry{ + *makeDetailsEntryWithModTime(suite.T(), itemPath1, locationPath1, 42, false, time1), + }, + }, + }, + backup2.DetailsID: { + DetailsModel: details.DetailsModel{ + Entries: []details.Entry{ + *makeDetailsEntryWithModTime(suite.T(), itemPath1, locationPath1, 84, false, time2), + }, + }, + }, + }, + errCheck: assert.NoError, + expectedEntries: []*details.Entry{ + makeDetailsEntryWithModTime(suite.T(), itemPath1, locationPath1, 42, false, time1), + }, + }, + { + name: "AssistBases SameItemVersion", + mdm: func() *mockDetailsMergeInfoer { + res := newMockDetailsMergeInfoer() + res.addWithModTime(itemPath1, time1, itemPath1, locationPath1) + + return res + }(), + inputAssistBackups: []kopia.BackupEntry{ + {Backup: &backup1}, + {Backup: &backup2}, + }, + populatedDetails: map[string]*details.Details{ + backup1.DetailsID: { + DetailsModel: details.DetailsModel{ + Entries: []details.Entry{ + *makeDetailsEntryWithModTime(suite.T(), itemPath1, locationPath1, 42, false, time1), + }, + }, + }, + backup2.DetailsID: { + DetailsModel: details.DetailsModel{ + Entries: []details.Entry{ + *makeDetailsEntryWithModTime(suite.T(), itemPath1, locationPath1, 42, false, time1), + }, + }, + }, + }, + errCheck: assert.NoError, + expectedEntries: []*details.Entry{ + makeDetailsEntryWithModTime(suite.T(), itemPath1, locationPath1, 42, false, time1), + }, + }, + { + name: "AssistBase ItemDeleted", + mdm: func() *mockDetailsMergeInfoer { + return newMockDetailsMergeInfoer() + }(), + inputAssistBackups: []kopia.BackupEntry{ + {Backup: &backup1}, + }, + populatedDetails: map[string]*details.Details{ + backup1.DetailsID: { + DetailsModel: details.DetailsModel{ + Entries: []details.Entry{ + *makeDetailsEntryWithModTime(suite.T(), itemPath1, locationPath1, 42, false, time1), + }, + }, + }, + }, + errCheck: assert.NoError, + expectedEntries: []*details.Entry{}, + }, } for _, test := range table { @@ -926,10 +1158,14 @@ func (suite *BackupOpUnitSuite) TestBackupOperation_MergeBackupDetails_AddsItems deets := details.Builder{} writeStats := kopia.BackupStats{} + bb := kopia.NewMockBackupBases(). + WithBackups(test.inputBackups...). + WithAssistBackups(test.inputAssistBackups...) + err := mergeDetails( ctx, mds, - test.inputBackups, + bb, test.mdm, &deets, &writeStats, @@ -941,11 +1177,29 @@ func (suite *BackupOpUnitSuite) TestBackupOperation_MergeBackupDetails_AddsItems return } - assert.ElementsMatch(t, test.expectedEntries, deets.Details().Items()) + // Check the JSON output format of things because for some reason it's not + // using the proper comparison for time.Time and failing due to that. + checkJSONOutputs(t, test.expectedEntries, deets.Details().Items()) }) } } +func checkJSONOutputs( + t *testing.T, + expected []*details.Entry, + got []*details.Entry, +) { + t.Helper() + + expectedJSON, err := json.Marshal(expected) + require.NoError(t, err, "marshalling expected data") + + gotJSON, err := json.Marshal(got) + require.NoError(t, err, "marshalling got data") + + assert.JSONEq(t, string(expectedJSON), string(gotJSON)) +} + func (suite *BackupOpUnitSuite) TestBackupOperation_MergeBackupDetails_AddsFolders() { var ( t = suite.T() @@ -983,7 +1237,7 @@ func (suite *BackupOpUnitSuite) TestBackupOperation_MergeBackupDetails_AddsFolde }, DetailsID: "did1", }, - Reasons: []kopia.Reasoner{ + Reasons: []identity.Reasoner{ pathReason1, }, } @@ -1035,7 +1289,7 @@ func (suite *BackupOpUnitSuite) TestBackupOperation_MergeBackupDetails_AddsFolde err := mergeDetails( ctx, mds, - []kopia.BackupEntry{backup1}, + kopia.NewMockBackupBases().WithBackups(backup1), mdm, &deets, &writeStats, @@ -1187,3 +1441,750 @@ func (suite *BackupOpIntegrationSuite) TestNewBackupOperation() { }) } } + +type AssistBackupIntegrationSuite struct { + tester.Suite + kopiaCloser func(ctx context.Context) + acct account.Account + kw *kopia.Wrapper + sw *store.Wrapper + ms *kopia.ModelStore +} + +func TestAssistBackupIntegrationSuite(t *testing.T) { + suite.Run(t, &AssistBackupIntegrationSuite{ + Suite: tester.NewIntegrationSuite( + t, + [][]string{storeTD.AWSStorageCredEnvs, tconfig.M365AcctCredEnvs}), + }) +} + +func (suite *AssistBackupIntegrationSuite) SetupSuite() { + t := suite.T() + + ctx, flush := tester.NewContext(t) + defer flush() + + var ( + st = storeTD.NewPrefixedS3Storage(t) + k = kopia.NewConn(st) + ) + + suite.acct = tconfig.NewM365Account(t) + + err := k.Initialize(ctx, repository.Options{}, repository.Retention{}) + require.NoError(t, err, clues.ToCore(err)) + + suite.kopiaCloser = func(ctx context.Context) { + k.Close(ctx) + } + + kw, err := kopia.NewWrapper(k) + require.NoError(t, err, clues.ToCore(err)) + + suite.kw = kw + + ms, err := kopia.NewModelStore(k) + require.NoError(t, err, clues.ToCore(err)) + + suite.ms = ms + + sw := store.NewKopiaStore(ms) + suite.sw = sw +} + +func (suite *AssistBackupIntegrationSuite) TearDownSuite() { + ctx, flush := tester.NewContext(suite.T()) + defer flush() + + if suite.ms != nil { + suite.ms.Close(ctx) + } + + if suite.kw != nil { + suite.kw.Close(ctx) + } + + if suite.kopiaCloser != nil { + suite.kopiaCloser(ctx) + } +} + +var _ inject.BackupProducer = &mockBackupProducer{} + +type mockBackupProducer struct { + colls []data.BackupCollection + dcs data.CollectionStats + injectNonRecoverableErr bool +} + +func (mbp *mockBackupProducer) ProduceBackupCollections( + context.Context, + inject.BackupProducerConfig, + *fault.Bus, +) ([]data.BackupCollection, prefixmatcher.StringSetReader, bool, error) { + if mbp.injectNonRecoverableErr { + return nil, nil, false, clues.New("non-recoverable error") + } + + return mbp.colls, nil, true, nil +} + +func (mbp *mockBackupProducer) IsBackupRunnable( + context.Context, + path.ServiceType, + string, +) (bool, error) { + return true, nil +} + +func (mbp *mockBackupProducer) Wait() *data.CollectionStats { + return &mbp.dcs +} + +func makeBackupCollection( + p path.Path, + locPath *path.Builder, + items []odMock.Data, +) data.BackupCollection { + streams := make([]data.Stream, len(items)) + + for i := range items { + streams[i] = &items[i] + } + + return &mock.BackupCollection{ + Path: p, + Loc: locPath, + Streams: streams, + } +} + +func makeMetadataCollectionEntries( + deltaURL, driveID, folderID string, + p path.Path, +) []graph.MetadataCollectionEntry { + return []graph.MetadataCollectionEntry{ + graph.NewMetadataEntry( + graph.DeltaURLsFileName, + map[string]string{driveID: deltaURL}, + ), + graph.NewMetadataEntry( + graph.PreviousPathFileName, + map[string]map[string]string{ + driveID: { + folderID: p.PlainString(), + }, + }, + ), + } +} + +const ( + userID = "user-id" + driveID = "drive-id" + driveName = "drive-name" + folderID = "folder-id" +) + +func makeODMockData( + fileID string, + extData *details.ExtensionData, + modTime time.Time, + del bool, + readErr error, +) odMock.Data { + rc := odMock.FileRespReadCloser(odMock.DriveFilePayloadData) + if extData != nil { + rc = odMock.FileRespWithExtensions(odMock.DriveFilePayloadData, extData) + } + + return odMock.Data{ + ID: fileID, + DriveID: driveID, + DriveName: driveName, + Reader: rc, + ReadErr: readErr, + Sz: 100, + ModifiedTime: modTime, + Del: del, + ExtensionData: extData, + } +} + +// Check what kind of backup is produced for a given failurePolicy/observed fault +// bus combination. +// +// It's currently using errors generated during mockBackupProducer phase. +// Ideally we would test with errors generated in various phases of backup, but +// that needs putting produceManifestsAndMetadata and mergeDetails behind mockable +// interfaces. +// +// Note: Tests are incremental since we are reusing kopia repo between tests, +// but this is irrelevant here. + +func (suite *AssistBackupIntegrationSuite) TestBackupTypesForFailureModes() { + var ( + acct = tconfig.NewM365Account(suite.T()) + tenantID = acct.Config[config.AzureTenantIDKey] + opts = control.DefaultOptions() + osel = selectors.NewOneDriveBackup([]string{userID}) + ) + + osel.Include(selTD.OneDriveBackupFolderScope(osel)) + + pathElements := []string{odConsts.DrivesPathDir, "drive-id", odConsts.RootPathDir, folderID} + + tmp, err := path.Build(tenantID, userID, path.OneDriveService, path.FilesCategory, false, pathElements...) + require.NoError(suite.T(), err, clues.ToCore(err)) + + locPath := path.Builder{}.Append(tmp.Folders()...) + + table := []struct { + name string + collFunc func() []data.BackupCollection + injectNonRecoverableErr bool + failurePolicy control.FailurePolicy + expectRunErr assert.ErrorAssertionFunc + expectBackupTag string + expectFaults func(t *testing.T, errs *fault.Bus) + }{ + { + name: "fail fast, no errors", + collFunc: func() []data.BackupCollection { + bc := []data.BackupCollection{ + makeBackupCollection( + tmp, + locPath, + []odMock.Data{ + makeODMockData("file1", nil, time.Now(), false, nil), + }), + } + + return bc + }, + failurePolicy: control.FailFast, + expectRunErr: assert.NoError, + expectBackupTag: model.MergeBackup, + expectFaults: func(t *testing.T, errs *fault.Bus) { + assert.NoError(t, errs.Failure(), clues.ToCore(errs.Failure())) + assert.Empty(t, errs.Recovered(), "recovered errors") + }, + }, + { + name: "fail fast, any errors", + collFunc: func() []data.BackupCollection { + bc := []data.BackupCollection{ + makeBackupCollection( + tmp, + locPath, + []odMock.Data{ + makeODMockData("file1", nil, time.Now(), false, assert.AnError), + }), + } + return bc + }, + failurePolicy: control.FailFast, + expectRunErr: assert.Error, + expectBackupTag: "", + expectFaults: func(t *testing.T, errs *fault.Bus) { + assert.Error(t, errs.Failure(), clues.ToCore(errs.Failure())) + }, + }, + { + name: "best effort, no errors", + collFunc: func() []data.BackupCollection { + bc := []data.BackupCollection{ + makeBackupCollection( + tmp, + locPath, + []odMock.Data{ + makeODMockData("file1", nil, time.Now(), false, nil), + }), + } + + return bc + }, + failurePolicy: control.BestEffort, + expectRunErr: assert.NoError, + expectBackupTag: model.MergeBackup, + expectFaults: func(t *testing.T, errs *fault.Bus) { + assert.NoError(t, errs.Failure(), clues.ToCore(errs.Failure())) + assert.Empty(t, errs.Recovered(), "recovered errors") + }, + }, + { + name: "best effort, non-recoverable errors", + collFunc: func() []data.BackupCollection { + return nil + }, + injectNonRecoverableErr: true, + failurePolicy: control.BestEffort, + expectRunErr: assert.Error, + expectBackupTag: "", + expectFaults: func(t *testing.T, errs *fault.Bus) { + assert.Error(t, errs.Failure(), clues.ToCore(errs.Failure())) + }, + }, + { + name: "best effort, recoverable errors", + collFunc: func() []data.BackupCollection { + bc := []data.BackupCollection{ + makeBackupCollection( + tmp, + locPath, + []odMock.Data{ + makeODMockData("file1", nil, time.Now(), false, assert.AnError), + }), + } + + return bc + }, + failurePolicy: control.BestEffort, + expectRunErr: assert.NoError, + expectBackupTag: model.MergeBackup, + expectFaults: func(t *testing.T, errs *fault.Bus) { + assert.NoError(t, errs.Failure(), clues.ToCore(errs.Failure())) + assert.Greater(t, len(errs.Recovered()), 0, "recovered errors") + }, + }, + { + name: "fail after recovery, no errors", + collFunc: func() []data.BackupCollection { + bc := []data.BackupCollection{ + makeBackupCollection( + tmp, + locPath, + []odMock.Data{ + makeODMockData("file1", nil, time.Now(), false, nil), + makeODMockData("file2", nil, time.Now(), false, nil), + }), + } + + return bc + }, + failurePolicy: control.FailAfterRecovery, + expectRunErr: assert.NoError, + expectBackupTag: model.MergeBackup, + expectFaults: func(t *testing.T, errs *fault.Bus) { + assert.NoError(t, errs.Failure(), clues.ToCore(errs.Failure())) + assert.Empty(t, errs.Recovered(), "recovered errors") + }, + }, + { + name: "fail after recovery, non-recoverable errors", + collFunc: func() []data.BackupCollection { + return nil + }, + injectNonRecoverableErr: true, + failurePolicy: control.FailAfterRecovery, + expectRunErr: assert.Error, + expectBackupTag: "", + expectFaults: func(t *testing.T, errs *fault.Bus) { + assert.Error(t, errs.Failure(), clues.ToCore(errs.Failure())) + }, + }, + { + name: "fail after recovery, recoverable errors", + collFunc: func() []data.BackupCollection { + bc := []data.BackupCollection{ + makeBackupCollection( + tmp, + locPath, + []odMock.Data{ + makeODMockData("file1", nil, time.Now(), false, nil), + makeODMockData("file2", nil, time.Now(), false, assert.AnError), + }), + } + + return bc + }, + failurePolicy: control.FailAfterRecovery, + expectRunErr: assert.Error, + expectBackupTag: model.AssistBackup, + expectFaults: func(t *testing.T, errs *fault.Bus) { + assert.Error(t, errs.Failure(), clues.ToCore(errs.Failure())) + assert.Greater(t, len(errs.Recovered()), 0, "recovered errors") + }, + }, + } + for _, test := range table { + suite.Run(test.name, func() { + t := suite.T() + + ctx, flush := tester.NewContext(t) + defer flush() + + cs := test.collFunc() + + mc, err := graph.MakeMetadataCollection( + tenantID, + userID, + path.OneDriveService, + path.FilesCategory, + makeMetadataCollectionEntries("url/1", driveID, folderID, tmp), + func(*support.ControllerOperationStatus) {}) + require.NoError(t, err, clues.ToCore(err)) + + cs = append(cs, mc) + bp := &mockBackupProducer{ + colls: cs, + injectNonRecoverableErr: test.injectNonRecoverableErr, + } + + opts.FailureHandling = test.failurePolicy + + bo, err := NewBackupOperation( + ctx, + opts, + suite.kw, + suite.sw, + bp, + acct, + osel.Selector, + selectors.Selector{DiscreteOwner: userID}, + evmock.NewBus()) + require.NoError(t, err, clues.ToCore(err)) + + err = bo.Run(ctx) + test.expectRunErr(t, err, clues.ToCore(err)) + + test.expectFaults(t, bo.Errors) + + if len(test.expectBackupTag) == 0 { + return + } + + bID := bo.Results.BackupID + require.NotEmpty(t, bID) + + bup := backup.Backup{} + + err = suite.ms.Get(ctx, model.BackupSchema, bID, &bup) + require.NoError(t, err, clues.ToCore(err)) + + require.Equal(t, test.expectBackupTag, bup.Tags[model.BackupTypeTag]) + }) + } +} + +func selectFilesFromDeets(d details.Details) map[string]details.Entry { + files := make(map[string]details.Entry) + + for _, ent := range d.Entries { + if ent.Folder != nil { + continue + } + + files[ent.ItemRef] = ent + } + + return files +} + +// TestExtensionsIncrementals tests presence of corso extension data in details +// Note that since we are mocking out backup producer here, corso extensions can't be +// attached as they would in prod. However, this is fine here, since we are more interested +// in testing whether deets get carried over correctly for various scenarios. +func (suite *AssistBackupIntegrationSuite) TestExtensionsIncrementals() { + var ( + acct = tconfig.NewM365Account(suite.T()) + tenantID = acct.Config[config.AzureTenantIDKey] + opts = control.DefaultOptions() + osel = selectors.NewOneDriveBackup([]string{userID}) + // Default policy used by SDK clients + failurePolicy = control.FailAfterRecovery + T1 = time.Now().Truncate(0) + T2 = T1.Add(time.Hour).Truncate(0) + T3 = T2.Add(time.Hour).Truncate(0) + extData = make(map[int]*details.ExtensionData) + ) + + for i := 0; i < 3; i++ { + d := make(map[string]any) + extData[i] = &details.ExtensionData{ + Data: d, + } + } + + osel.Include(selTD.OneDriveBackupFolderScope(osel)) + + sss := streamstore.NewStreamer( + suite.kw, + suite.acct.ID(), + osel.PathService()) + + pathElements := []string{odConsts.DrivesPathDir, "drive-id", odConsts.RootPathDir, folderID} + + tmp, err := path.Build(tenantID, userID, path.OneDriveService, path.FilesCategory, false, pathElements...) + require.NoError(suite.T(), err, clues.ToCore(err)) + + locPath := path.Builder{}.Append(tmp.Folders()...) + + table := []struct { + name string + collFunc func() []data.BackupCollection + expectRunErr assert.ErrorAssertionFunc + validateDeets func(t *testing.T, gotDeets details.Details) + }{ + { + name: "Assist backup, 1 new deets", + collFunc: func() []data.BackupCollection { + bc := []data.BackupCollection{ + makeBackupCollection( + tmp, + locPath, + []odMock.Data{ + makeODMockData("file1", extData[0], T1, false, nil), + makeODMockData("file2", extData[1], T1, false, assert.AnError), + }), + } + + return bc + }, + expectRunErr: assert.Error, + validateDeets: func(t *testing.T, d details.Details) { + files := selectFilesFromDeets(d) + require.Len(t, files, 1) + + f := files["file1"] + require.NotNil(t, f) + + require.True(t, T1.Equal(f.Modified())) + require.NotZero(t, f.Extension.Data[extensions.KNumBytes]) + }, + }, + { + name: "Assist backup after assist backup, 1 existing, 1 new deets", + collFunc: func() []data.BackupCollection { + bc := []data.BackupCollection{ + makeBackupCollection( + tmp, + locPath, + []odMock.Data{ + makeODMockData("file1", extData[0], T1, false, nil), + makeODMockData("file2", extData[1], T2, false, nil), + makeODMockData("file3", extData[2], T2, false, assert.AnError), + }), + } + + return bc + }, + expectRunErr: assert.Error, + validateDeets: func(t *testing.T, d details.Details) { + files := selectFilesFromDeets(d) + require.Len(t, files, 2) + + for _, f := range files { + switch f.ItemRef { + case "file1": + require.True(t, T1.Equal(f.Modified())) + require.NotZero(t, f.Extension.Data[extensions.KNumBytes]) + case "file2": + require.True(t, T2.Equal(f.Modified())) + require.NotZero(t, f.Extension.Data[extensions.KNumBytes]) + default: + require.Fail(t, "unexpected file", f.ItemRef) + } + } + }, + }, + { + name: "Merge backup, 2 existing deets, 1 new deet", + collFunc: func() []data.BackupCollection { + bc := []data.BackupCollection{ + makeBackupCollection( + tmp, + locPath, + []odMock.Data{ + makeODMockData("file1", extData[0], T1, false, nil), + makeODMockData("file2", extData[1], T2, false, nil), + makeODMockData("file3", extData[2], T3, false, nil), + }), + } + + return bc + }, + expectRunErr: assert.NoError, + validateDeets: func(t *testing.T, d details.Details) { + files := selectFilesFromDeets(d) + require.Len(t, files, 3) + + for _, f := range files { + switch f.ItemRef { + case "file1": + require.True(t, T1.Equal(f.Modified())) + require.NotZero(t, f.Extension.Data[extensions.KNumBytes]) + case "file2": + require.True(t, T2.Equal(f.Modified())) + require.NotZero(t, f.Extension.Data[extensions.KNumBytes]) + case "file3": + require.True(t, T3.Equal(f.Modified())) + require.NotZero(t, f.Extension.Data[extensions.KNumBytes]) + default: + require.Fail(t, "unexpected file", f.ItemRef) + } + } + }, + }, + { + // Reset state so we can reuse the same test data + name: "All files deleted", + collFunc: func() []data.BackupCollection { + bc := []data.BackupCollection{ + makeBackupCollection( + tmp, + locPath, + []odMock.Data{ + makeODMockData("file1", extData[0], T1, true, nil), + makeODMockData("file2", extData[1], T2, true, nil), + makeODMockData("file3", extData[2], T3, true, nil), + }), + } + + return bc + }, + expectRunErr: assert.NoError, + validateDeets: func(t *testing.T, d details.Details) { + files := selectFilesFromDeets(d) + require.Len(t, files, 0) + }, + }, + { + name: "Merge backup, 1 new deets", + collFunc: func() []data.BackupCollection { + bc := []data.BackupCollection{ + makeBackupCollection( + tmp, + locPath, + []odMock.Data{ + makeODMockData("file1", extData[0], T1, false, nil), + }), + } + + return bc + }, + expectRunErr: assert.NoError, + validateDeets: func(t *testing.T, d details.Details) { + files := selectFilesFromDeets(d) + require.Len(t, files, 1) + + for _, f := range files { + switch f.ItemRef { + case "file1": + require.True(t, T1.Equal(f.Modified())) + require.NotZero(t, f.Extension.Data[extensions.KNumBytes]) + default: + require.Fail(t, "unexpected file", f.ItemRef) + } + } + }, + }, + // This test fails currently, need to rerun with Ashlie's PR. + { + name: "Assist backup after merge backup, 1 new deets, 1 existing deet", + collFunc: func() []data.BackupCollection { + bc := []data.BackupCollection{ + makeBackupCollection( + tmp, + locPath, + []odMock.Data{ + makeODMockData("file1", extData[0], T1, false, nil), + makeODMockData("file2", extData[1], T2, false, nil), + makeODMockData("file3", extData[2], T3, false, assert.AnError), + }), + } + + return bc + }, + expectRunErr: assert.Error, + validateDeets: func(t *testing.T, d details.Details) { + files := selectFilesFromDeets(d) + require.Len(t, files, 2) + + for _, f := range files { + switch f.ItemRef { + case "file1": + require.True(t, T1.Equal(f.Modified())) + require.NotZero(t, f.Extension.Data[extensions.KNumBytes]) + + case "file2": + require.True(t, T2.Equal(f.Modified())) + require.NotZero(t, f.Extension.Data[extensions.KNumBytes]) + default: + require.Fail(t, "unexpected file", f.ItemRef) + } + } + }, + }, + + // TODO(pandeyabs): Remaining tests. + // 1. Deets updated in assist backup. Following backup should have updated deets. + // 2. Concurrent overlapping reasons. + } + for _, test := range table { + suite.Run(test.name, func() { + t := suite.T() + + ctx, flush := tester.NewContext(t) + defer flush() + + cs := test.collFunc() + + mc, err := graph.MakeMetadataCollection( + tenantID, + userID, + path.OneDriveService, + path.FilesCategory, + makeMetadataCollectionEntries("url/1", driveID, folderID, tmp), + func(*support.ControllerOperationStatus) {}) + require.NoError(t, err, clues.ToCore(err)) + + cs = append(cs, mc) + bp := &mockBackupProducer{ + colls: cs, + } + + opts.FailureHandling = failurePolicy + + bo, err := NewBackupOperation( + ctx, + opts, + suite.kw, + suite.sw, + bp, + acct, + osel.Selector, + selectors.Selector{DiscreteOwner: userID}, + evmock.NewBus()) + require.NoError(t, err, clues.ToCore(err)) + + err = bo.Run(ctx) + test.expectRunErr(t, err, clues.ToCore(err)) + + assert.NotEmpty(t, bo.Results.BackupID) + + deets, _ := deeTD.GetDeetsInBackup( + t, + ctx, + bo.Results.BackupID, + tenantID, + userID, + path.OneDriveService, + deeTD.DriveIDFromRepoRef, + suite.ms, + sss) + assert.NotNil(t, deets) + + test.validateDeets(t, deets) + + // Clear extension data between test runs + for i := 0; i < 3; i++ { + d := make(map[string]any) + extData[i] = &details.ExtensionData{ + Data: d, + } + } + }) + } +} diff --git a/src/internal/operations/export_test.go b/src/internal/operations/export_test.go index 10dec2ab1..2fbc843a7 100644 --- a/src/internal/operations/export_test.go +++ b/src/internal/operations/export_test.go @@ -18,8 +18,8 @@ import ( "github.com/alcionai/corso/src/internal/data" evmock "github.com/alcionai/corso/src/internal/events/mock" "github.com/alcionai/corso/src/internal/kopia" - exchMock "github.com/alcionai/corso/src/internal/m365/exchange/mock" "github.com/alcionai/corso/src/internal/m365/mock" + exchMock "github.com/alcionai/corso/src/internal/m365/service/exchange/mock" "github.com/alcionai/corso/src/internal/stats" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/account" diff --git a/src/internal/operations/helpers.go b/src/internal/operations/helpers.go index 0c5c9c049..cdce0fdec 100644 --- a/src/internal/operations/helpers.go +++ b/src/internal/operations/helpers.go @@ -11,8 +11,8 @@ import ( "github.com/alcionai/corso/src/pkg/logger" ) -// finalizeErrorHandling ensures the operation follow the options -// failure behavior requirements. +// finalizeErrorHandling ensures the operation follows the +// failure policy requirements. func finalizeErrorHandling( ctx context.Context, opts control.Options, diff --git a/src/internal/operations/manifests.go b/src/internal/operations/manifests.go index 1c5d1716c..95b313adc 100644 --- a/src/internal/operations/manifests.go +++ b/src/internal/operations/manifests.go @@ -10,20 +10,54 @@ import ( "github.com/alcionai/corso/src/internal/kopia" "github.com/alcionai/corso/src/internal/kopia/inject" "github.com/alcionai/corso/src/internal/m365/graph" + "github.com/alcionai/corso/src/pkg/backup/identity" "github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/logger" "github.com/alcionai/corso/src/pkg/path" ) -// calls kopia to retrieve prior backup manifests, metadata collections to supply backup heuristics. -// TODO(ashmrtn): Make this a helper function that always returns as much as -// possible and call in another function that drops metadata and/or -// kopia-assisted incremental bases based on flag values. func produceManifestsAndMetadata( ctx context.Context, bf inject.BaseFinder, rp inject.RestoreProducer, - reasons, fallbackReasons []kopia.Reasoner, + reasons, fallbackReasons []identity.Reasoner, + tenantID string, + getMetadata, dropAssistBases bool, +) (kopia.BackupBases, []data.RestoreCollection, bool, error) { + bb, meta, useMergeBases, err := getManifestsAndMetadata( + ctx, + bf, + rp, + reasons, + fallbackReasons, + tenantID, + getMetadata) + if err != nil { + return nil, nil, false, clues.Stack(err) + } + + if !useMergeBases || !getMetadata { + logger.Ctx(ctx).Debug("full backup requested, dropping merge bases") + + bb.ClearMergeBases() + } + + if dropAssistBases { + logger.Ctx(ctx).Debug("no caching requested, dropping assist bases") + + bb.ClearAssistBases() + } + + return bb, meta, useMergeBases, nil +} + +// getManifestsAndMetadata calls kopia to retrieve prior backup manifests, +// metadata collections to supply backup heuristics. +func getManifestsAndMetadata( + ctx context.Context, + bf inject.BaseFinder, + rp inject.RestoreProducer, + reasons, fallbackReasons []identity.Reasoner, tenantID string, getMetadata bool, ) (kopia.BackupBases, []data.RestoreCollection, bool, error) { @@ -44,20 +78,16 @@ func produceManifestsAndMetadata( // 2. the current reasons only contain an incomplete manifest, and the fallback // can find a complete manifest. // 3. the current reasons contain all the necessary manifests. + // Note: This is not relevant for assist backups, since they are newly introduced + // and they don't exist with fallback reasons. bb = bb.MergeBackupBases( ctx, fbb, - func(r kopia.Reasoner) string { + func(r identity.Reasoner) string { return r.Service().String() + r.Category().String() }) if !getMetadata { - logger.Ctx(ctx).Debug("full backup requested, dropping merge bases") - - // TODO(ashmrtn): If this function is moved to be a helper function then - // move this change to the bases to the caller of this function. - bb.ClearMergeBases() - return bb, nil, false, nil } diff --git a/src/internal/operations/manifests_test.go b/src/internal/operations/manifests_test.go index 5fdf22424..1061c0e8e 100644 --- a/src/internal/operations/manifests_test.go +++ b/src/internal/operations/manifests_test.go @@ -15,6 +15,7 @@ import ( "github.com/alcionai/corso/src/internal/model" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/backup" + "github.com/alcionai/corso/src/pkg/backup/identity" "github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/path" ) @@ -47,7 +48,7 @@ type mockBackupFinder struct { func (bf *mockBackupFinder) FindBases( _ context.Context, - reasons []kopia.Reasoner, + reasons []identity.Reasoner, _ map[string]string, ) kopia.BackupBases { if len(reasons) == 0 { @@ -102,7 +103,7 @@ func (suite *OperationsManifestsUnitSuite) TestCollectMetadata() { table := []struct { name string manID string - reasons []kopia.Reasoner + reasons []identity.Reasoner fileNames []string expectPaths func(*testing.T, []string) []path.Path expectErr error @@ -110,7 +111,7 @@ func (suite *OperationsManifestsUnitSuite) TestCollectMetadata() { { name: "single reason, single file", manID: "single single", - reasons: []kopia.Reasoner{ + reasons: []identity.Reasoner{ kopia.NewReason(tid, ro, path.ExchangeService, path.EmailCategory), }, expectPaths: func(t *testing.T, files []string) []path.Path { @@ -129,7 +130,7 @@ func (suite *OperationsManifestsUnitSuite) TestCollectMetadata() { { name: "single reason, multiple files", manID: "single multi", - reasons: []kopia.Reasoner{ + reasons: []identity.Reasoner{ kopia.NewReason(tid, ro, path.ExchangeService, path.EmailCategory), }, expectPaths: func(t *testing.T, files []string) []path.Path { @@ -148,7 +149,7 @@ func (suite *OperationsManifestsUnitSuite) TestCollectMetadata() { { name: "multiple reasons, single file", manID: "multi single", - reasons: []kopia.Reasoner{ + reasons: []identity.Reasoner{ kopia.NewReason(tid, ro, path.ExchangeService, path.EmailCategory), kopia.NewReason(tid, ro, path.ExchangeService, path.ContactsCategory), }, @@ -171,7 +172,7 @@ func (suite *OperationsManifestsUnitSuite) TestCollectMetadata() { { name: "multiple reasons, multiple file", manID: "multi multi", - reasons: []kopia.Reasoner{ + reasons: []identity.Reasoner{ kopia.NewReason(tid, ro, path.ExchangeService, path.EmailCategory), kopia.NewReason(tid, ro, path.ExchangeService, path.ContactsCategory), }, @@ -219,8 +220,8 @@ func buildReasons( ro string, service path.ServiceType, cats ...path.CategoryType, -) []kopia.Reasoner { - var reasons []kopia.Reasoner +) []identity.Reasoner { + var reasons []identity.Reasoner for _, cat := range cats { reasons = append( @@ -252,8 +253,9 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata() { name string bf *mockBackupFinder rp mockRestoreProducer - reasons []kopia.Reasoner + reasons []identity.Reasoner getMeta bool + dropAssist bool assertErr assert.ErrorAssertionFunc assertB assert.BoolAssertionFunc expectDCS []mockColl @@ -263,7 +265,7 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata() { { name: "don't get metadata, no mans", rp: mockRestoreProducer{}, - reasons: []kopia.Reasoner{}, + reasons: []identity.Reasoner{}, getMeta: false, assertErr: assert.NoError, assertB: assert.False, @@ -280,7 +282,7 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata() { }, }, rp: mockRestoreProducer{}, - reasons: []kopia.Reasoner{ + reasons: []identity.Reasoner{ kopia.NewReason("", ro, path.ExchangeService, path.EmailCategory), }, getMeta: false, @@ -301,7 +303,7 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata() { }, }, rp: mockRestoreProducer{}, - reasons: []kopia.Reasoner{ + reasons: []identity.Reasoner{ kopia.NewReason("", ro, path.ExchangeService, path.EmailCategory), }, getMeta: true, @@ -329,7 +331,7 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata() { "id1": {data.NoFetchRestoreCollection{Collection: mockColl{id: "id1"}}}, }, }, - reasons: []kopia.Reasoner{ + reasons: []identity.Reasoner{ kopia.NewReason("", ro, path.ExchangeService, path.EmailCategory), kopia.NewReason("", ro, path.ExchangeService, path.ContactsCategory), }, @@ -377,7 +379,7 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata() { "id2": {data.NoFetchRestoreCollection{Collection: mockColl{id: "id2"}}}, }, }, - reasons: []kopia.Reasoner{ + reasons: []identity.Reasoner{ kopia.NewReason("", ro, path.ExchangeService, path.EmailCategory), }, getMeta: true, @@ -390,6 +392,36 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata() { makeMan("id2", "checkpoint", path.EmailCategory), ), }, + { + name: "one valid man, extra incomplete man, no assist bases", + bf: &mockBackupFinder{ + data: map[string]kopia.BackupBases{ + ro: kopia.NewMockBackupBases().WithMergeBases( + makeMan("id1", "", path.EmailCategory), + ).WithAssistBases( + makeMan("id2", "checkpoint", path.EmailCategory), + ), + }, + }, + rp: mockRestoreProducer{ + collsByID: map[string][]data.RestoreCollection{ + "id1": {data.NoFetchRestoreCollection{Collection: mockColl{id: "id1"}}}, + "id2": {data.NoFetchRestoreCollection{Collection: mockColl{id: "id2"}}}, + }, + }, + reasons: []identity.Reasoner{ + kopia.NewReason("", ro, path.ExchangeService, path.EmailCategory), + }, + getMeta: true, + dropAssist: true, + assertErr: assert.NoError, + assertB: assert.True, + expectDCS: []mockColl{{id: "id1"}}, + expectMans: kopia.NewMockBackupBases().WithMergeBases( + makeMan("id1", "", path.EmailCategory), + ). + ClearMockAssistBases(), + }, { name: "multiple valid mans", bf: &mockBackupFinder{ @@ -406,7 +438,7 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata() { "id2": {data.NoFetchRestoreCollection{Collection: mockColl{id: "id2"}}}, }, }, - reasons: []kopia.Reasoner{ + reasons: []identity.Reasoner{ kopia.NewReason("", ro, path.ExchangeService, path.EmailCategory), }, getMeta: true, @@ -428,7 +460,7 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata() { }, }, rp: mockRestoreProducer{err: assert.AnError}, - reasons: []kopia.Reasoner{ + reasons: []identity.Reasoner{ kopia.NewReason("", ro, path.ExchangeService, path.EmailCategory), }, getMeta: true, @@ -452,7 +484,8 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata() { &test.rp, test.reasons, nil, tid, - test.getMeta) + test.getMeta, + test.dropAssist) test.assertErr(t, err, clues.ToCore(err)) test.assertB(t, b) @@ -548,9 +581,10 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata_Fallb name string bf *mockBackupFinder rp mockRestoreProducer - reasons []kopia.Reasoner - fallbackReasons []kopia.Reasoner + reasons []identity.Reasoner + fallbackReasons []identity.Reasoner getMeta bool + dropAssist bool assertErr assert.ErrorAssertionFunc assertB assert.BoolAssertionFunc expectDCS []mockColl @@ -568,7 +602,7 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata_Fallb }, }, rp: mockRestoreProducer{}, - fallbackReasons: []kopia.Reasoner{fbEmailReason}, + fallbackReasons: []identity.Reasoner{fbEmailReason}, getMeta: false, assertErr: assert.NoError, assertB: assert.False, @@ -593,7 +627,7 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata_Fallb "fb_id1": {data.NoFetchRestoreCollection{Collection: mockColl{id: "fb_id1"}}}, }, }, - fallbackReasons: []kopia.Reasoner{fbEmailReason}, + fallbackReasons: []identity.Reasoner{fbEmailReason}, getMeta: true, assertErr: assert.NoError, assertB: assert.True, @@ -604,6 +638,35 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata_Fallb makeBackup(fbro, "fb_id1", path.EmailCategory), ), }, + { + name: "only fallbacks, no assist", + bf: &mockBackupFinder{ + data: map[string]kopia.BackupBases{ + fbro: kopia.NewMockBackupBases().WithMergeBases( + makeMan(fbro, "fb_id1", "", path.EmailCategory), + ).WithBackups( + makeBackup(fbro, "fb_id1", path.EmailCategory), + ), + }, + }, + rp: mockRestoreProducer{ + collsByID: map[string][]data.RestoreCollection{ + "fb_id1": {data.NoFetchRestoreCollection{Collection: mockColl{id: "fb_id1"}}}, + }, + }, + fallbackReasons: []identity.Reasoner{fbEmailReason}, + getMeta: true, + dropAssist: true, + assertErr: assert.NoError, + assertB: assert.True, + expectDCS: []mockColl{{id: "fb_id1"}}, + expectMans: kopia.NewMockBackupBases().WithMergeBases( + makeMan(fbro, "fb_id1", "", path.EmailCategory), + ).WithBackups( + makeBackup(fbro, "fb_id1", path.EmailCategory), + ). + ClearMockAssistBases(), + }, { name: "complete mans and fallbacks", bf: &mockBackupFinder{ @@ -624,8 +687,8 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata_Fallb "fb_id1": {data.NoFetchRestoreCollection{Collection: mockColl{id: "fb_id1"}}}, }, }, - reasons: []kopia.Reasoner{emailReason}, - fallbackReasons: []kopia.Reasoner{fbEmailReason}, + reasons: []identity.Reasoner{emailReason}, + fallbackReasons: []identity.Reasoner{fbEmailReason}, getMeta: true, assertErr: assert.NoError, assertB: assert.True, @@ -652,8 +715,8 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata_Fallb "fb_id2": {data.NoFetchRestoreCollection{Collection: mockColl{id: "fb_id2"}}}, }, }, - reasons: []kopia.Reasoner{emailReason}, - fallbackReasons: []kopia.Reasoner{fbEmailReason}, + reasons: []identity.Reasoner{emailReason}, + fallbackReasons: []identity.Reasoner{fbEmailReason}, getMeta: true, assertErr: assert.NoError, assertB: assert.True, @@ -688,8 +751,8 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata_Fallb "fb_id2": {data.NoFetchRestoreCollection{Collection: mockColl{id: "fb_id2"}}}, }, }, - reasons: []kopia.Reasoner{emailReason}, - fallbackReasons: []kopia.Reasoner{fbEmailReason}, + reasons: []identity.Reasoner{emailReason}, + fallbackReasons: []identity.Reasoner{fbEmailReason}, getMeta: true, assertErr: assert.NoError, assertB: assert.True, @@ -720,8 +783,8 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata_Fallb "fb_id1": {data.NoFetchRestoreCollection{Collection: mockColl{id: "fb_id1"}}}, }, }, - reasons: []kopia.Reasoner{emailReason}, - fallbackReasons: []kopia.Reasoner{fbEmailReason}, + reasons: []identity.Reasoner{emailReason}, + fallbackReasons: []identity.Reasoner{fbEmailReason}, getMeta: true, assertErr: assert.NoError, assertB: assert.True, @@ -734,6 +797,40 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata_Fallb makeMan(ro, "id2", "checkpoint", path.EmailCategory), ), }, + { + name: "incomplete mans and complete fallbacks, no assist bases", + bf: &mockBackupFinder{ + data: map[string]kopia.BackupBases{ + ro: kopia.NewMockBackupBases().WithAssistBases( + makeMan(ro, "id2", "checkpoint", path.EmailCategory), + ), + fbro: kopia.NewMockBackupBases().WithMergeBases( + makeMan(fbro, "fb_id1", "", path.EmailCategory), + ).WithBackups( + makeBackup(fbro, "fb_id1", path.EmailCategory), + ), + }, + }, + rp: mockRestoreProducer{ + collsByID: map[string][]data.RestoreCollection{ + "id2": {data.NoFetchRestoreCollection{Collection: mockColl{id: "id2"}}}, + "fb_id1": {data.NoFetchRestoreCollection{Collection: mockColl{id: "fb_id1"}}}, + }, + }, + reasons: []identity.Reasoner{emailReason}, + fallbackReasons: []identity.Reasoner{fbEmailReason}, + getMeta: true, + dropAssist: true, + assertErr: assert.NoError, + assertB: assert.True, + expectDCS: []mockColl{{id: "fb_id1"}}, + expectMans: kopia.NewMockBackupBases().WithMergeBases( + makeMan(fbro, "fb_id1", "", path.EmailCategory), + ).WithBackups( + makeBackup(fbro, "fb_id1", path.EmailCategory), + ). + ClearMockAssistBases(), + }, { name: "complete mans and incomplete fallbacks", bf: &mockBackupFinder{ @@ -752,8 +849,8 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata_Fallb "fb_id2": {data.NoFetchRestoreCollection{Collection: mockColl{id: "fb_id2"}}}, }, }, - reasons: []kopia.Reasoner{emailReason}, - fallbackReasons: []kopia.Reasoner{fbEmailReason}, + reasons: []identity.Reasoner{emailReason}, + fallbackReasons: []identity.Reasoner{fbEmailReason}, getMeta: true, assertErr: assert.NoError, assertB: assert.True, @@ -782,11 +879,11 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata_Fallb "fb_id1": {data.NoFetchRestoreCollection{Collection: mockColl{id: "fb_id1"}}}, }, }, - reasons: []kopia.Reasoner{ + reasons: []identity.Reasoner{ emailReason, kopia.NewReason("", ro, path.ExchangeService, path.ContactsCategory), }, - fallbackReasons: []kopia.Reasoner{ + fallbackReasons: []identity.Reasoner{ fbEmailReason, kopia.NewReason("", fbro, path.ExchangeService, path.ContactsCategory), }, @@ -818,8 +915,8 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata_Fallb "fb_id1": {data.NoFetchRestoreCollection{Collection: mockColl{id: "fb_id1"}}}, }, }, - reasons: []kopia.Reasoner{emailReason}, - fallbackReasons: []kopia.Reasoner{ + reasons: []identity.Reasoner{emailReason}, + fallbackReasons: []identity.Reasoner{ kopia.NewReason("", fbro, path.ExchangeService, path.ContactsCategory), }, getMeta: true, @@ -853,11 +950,11 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata_Fallb "fb_id1": {data.NoFetchRestoreCollection{Collection: mockColl{id: "fb_id1"}}}, }, }, - reasons: []kopia.Reasoner{ + reasons: []identity.Reasoner{ emailReason, kopia.NewReason("", ro, path.ExchangeService, path.ContactsCategory), }, - fallbackReasons: []kopia.Reasoner{ + fallbackReasons: []identity.Reasoner{ fbEmailReason, kopia.NewReason("", fbro, path.ExchangeService, path.ContactsCategory), }, @@ -887,7 +984,8 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata_Fallb &test.rp, test.reasons, test.fallbackReasons, tid, - test.getMeta) + test.getMeta, + test.dropAssist) test.assertErr(t, err, clues.ToCore(err)) test.assertB(t, b) diff --git a/src/internal/operations/restore.go b/src/internal/operations/restore.go index 1e6cc62a5..68c8c1b5f 100644 --- a/src/internal/operations/restore.go +++ b/src/internal/operations/restore.go @@ -16,7 +16,7 @@ import ( "github.com/alcionai/corso/src/internal/diagnostics" "github.com/alcionai/corso/src/internal/events" "github.com/alcionai/corso/src/internal/kopia" - "github.com/alcionai/corso/src/internal/m365/onedrive" + "github.com/alcionai/corso/src/internal/m365/service/onedrive" "github.com/alcionai/corso/src/internal/model" "github.com/alcionai/corso/src/internal/observe" "github.com/alcionai/corso/src/internal/operations/inject" diff --git a/src/internal/operations/restore_test.go b/src/internal/operations/restore_test.go index c97812a63..856505bfd 100644 --- a/src/internal/operations/restore_test.go +++ b/src/internal/operations/restore_test.go @@ -17,10 +17,10 @@ import ( evmock "github.com/alcionai/corso/src/internal/events/mock" "github.com/alcionai/corso/src/internal/kopia" "github.com/alcionai/corso/src/internal/m365" - exchMock "github.com/alcionai/corso/src/internal/m365/exchange/mock" "github.com/alcionai/corso/src/internal/m365/graph" "github.com/alcionai/corso/src/internal/m365/mock" "github.com/alcionai/corso/src/internal/m365/resource" + exchMock "github.com/alcionai/corso/src/internal/m365/service/exchange/mock" "github.com/alcionai/corso/src/internal/operations/inject" "github.com/alcionai/corso/src/internal/stats" "github.com/alcionai/corso/src/internal/tester" diff --git a/src/internal/operations/test/exchange_test.go b/src/internal/operations/test/exchange_test.go index 8dd917719..7fc1ff58e 100644 --- a/src/internal/operations/test/exchange_test.go +++ b/src/internal/operations/test/exchange_test.go @@ -18,11 +18,11 @@ import ( "github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/internal/events" evmock "github.com/alcionai/corso/src/internal/events/mock" - "github.com/alcionai/corso/src/internal/m365/exchange" - exchMock "github.com/alcionai/corso/src/internal/m365/exchange/mock" - exchTD "github.com/alcionai/corso/src/internal/m365/exchange/testdata" "github.com/alcionai/corso/src/internal/m365/graph" "github.com/alcionai/corso/src/internal/m365/resource" + "github.com/alcionai/corso/src/internal/m365/service/exchange" + exchMock "github.com/alcionai/corso/src/internal/m365/service/exchange/mock" + exchTD "github.com/alcionai/corso/src/internal/m365/service/exchange/testdata" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/internal/tester/tconfig" "github.com/alcionai/corso/src/internal/version" diff --git a/src/internal/operations/test/helper_test.go b/src/internal/operations/test/helper_test.go index c826b3e44..5ed5e5f2e 100644 --- a/src/internal/operations/test/helper_test.go +++ b/src/internal/operations/test/helper_test.go @@ -19,10 +19,10 @@ import ( evmock "github.com/alcionai/corso/src/internal/events/mock" "github.com/alcionai/corso/src/internal/kopia" "github.com/alcionai/corso/src/internal/m365" - exchMock "github.com/alcionai/corso/src/internal/m365/exchange/mock" "github.com/alcionai/corso/src/internal/m365/graph" - odConsts "github.com/alcionai/corso/src/internal/m365/onedrive/consts" "github.com/alcionai/corso/src/internal/m365/resource" + exchMock "github.com/alcionai/corso/src/internal/m365/service/exchange/mock" + odConsts "github.com/alcionai/corso/src/internal/m365/service/onedrive/consts" "github.com/alcionai/corso/src/internal/model" "github.com/alcionai/corso/src/internal/operations" "github.com/alcionai/corso/src/internal/operations/inject" @@ -32,6 +32,7 @@ import ( "github.com/alcionai/corso/src/pkg/account" "github.com/alcionai/corso/src/pkg/backup" "github.com/alcionai/corso/src/pkg/backup/details" + "github.com/alcionai/corso/src/pkg/backup/identity" "github.com/alcionai/corso/src/pkg/control" "github.com/alcionai/corso/src/pkg/control/repository" "github.com/alcionai/corso/src/pkg/count" @@ -251,7 +252,7 @@ func checkBackupIsInManifests( bf, err := kw.NewBaseFinder(sw) require.NoError(t, err, clues.ToCore(err)) - mans := bf.FindBases(ctx, []kopia.Reasoner{r}, tags) + mans := bf.FindBases(ctx, []identity.Reasoner{r}, tags) for _, man := range mans.MergeBases() { bID, ok := man.GetTag(kopia.TagBackupID) if !assert.Truef(t, ok, "snapshot manifest %s missing backup ID tag", man.ID) { diff --git a/src/internal/operations/test/onedrive_test.go b/src/internal/operations/test/onedrive_test.go index 75387a471..c4faedec6 100644 --- a/src/internal/operations/test/onedrive_test.go +++ b/src/internal/operations/test/onedrive_test.go @@ -20,9 +20,9 @@ import ( "github.com/alcionai/corso/src/internal/events" evmock "github.com/alcionai/corso/src/internal/events/mock" "github.com/alcionai/corso/src/internal/m365" + "github.com/alcionai/corso/src/internal/m365/collection/drive" + "github.com/alcionai/corso/src/internal/m365/collection/drive/metadata" "github.com/alcionai/corso/src/internal/m365/graph" - "github.com/alcionai/corso/src/internal/m365/onedrive" - "github.com/alcionai/corso/src/internal/m365/onedrive/metadata" "github.com/alcionai/corso/src/internal/m365/resource" "github.com/alcionai/corso/src/internal/model" "github.com/alcionai/corso/src/internal/streamstore" @@ -132,8 +132,8 @@ func (suite *OneDriveBackupIntgSuite) TestBackup_Run_incrementalOneDrive() { return id } - grh := func(ac api.Client) onedrive.RestoreHandler { - return onedrive.NewRestoreHandler(ac) + grh := func(ac api.Client) drive.RestoreHandler { + return drive.NewRestoreHandler(ac) } runDriveIncrementalTest( @@ -157,7 +157,7 @@ func runDriveIncrementalTest( category path.CategoryType, includeContainers func([]string) selectors.Selector, getTestDriveID func(*testing.T, context.Context) string, - getRestoreHandler func(api.Client) onedrive.RestoreHandler, + getRestoreHandler func(api.Client) drive.RestoreHandler, skipPermissionsTests bool, ) { t := suite.T() @@ -388,7 +388,7 @@ func runDriveIncrementalTest( { name: "add permission to new file", updateFiles: func(t *testing.T, ctx context.Context) { - err = onedrive.UpdatePermissions( + err = drive.UpdatePermissions( ctx, rh, driveID, @@ -401,12 +401,12 @@ func runDriveIncrementalTest( }, itemsRead: 1, // .data file for newitem itemsWritten: 3, // .meta for newitem, .dirmeta for parent (.data is not written as it is not updated) - nonMetaItemsWritten: 1, // the file for which permission was updated + nonMetaItemsWritten: 0, // none because the file is considered cached instead of written. }, { name: "remove permission from new file", updateFiles: func(t *testing.T, ctx context.Context) { - err = onedrive.UpdatePermissions( + err = drive.UpdatePermissions( ctx, rh, driveID, @@ -419,13 +419,13 @@ func runDriveIncrementalTest( }, itemsRead: 1, // .data file for newitem itemsWritten: 3, // .meta for newitem, .dirmeta for parent (.data is not written as it is not updated) - nonMetaItemsWritten: 1, //.data file for newitem + nonMetaItemsWritten: 0, // none because the file is considered cached instead of written. }, { name: "add permission to container", updateFiles: func(t *testing.T, ctx context.Context) { targetContainer := containerInfos[container1].id - err = onedrive.UpdatePermissions( + err = drive.UpdatePermissions( ctx, rh, driveID, @@ -444,7 +444,7 @@ func runDriveIncrementalTest( name: "remove permission from container", updateFiles: func(t *testing.T, ctx context.Context) { targetContainer := containerInfos[container1].id - err = onedrive.UpdatePermissions( + err = drive.UpdatePermissions( ctx, rh, driveID, @@ -518,7 +518,7 @@ func runDriveIncrementalTest( }, itemsRead: 1, // .data file for newitem itemsWritten: 4, // .data and .meta for newitem, .dirmeta for parent - nonMetaItemsWritten: 1, // .data file for new item + nonMetaItemsWritten: 1, // .data file for moved item }, { name: "boomerang a file", @@ -550,7 +550,7 @@ func runDriveIncrementalTest( }, itemsRead: 1, // .data file for newitem itemsWritten: 3, // .data and .meta for newitem, .dirmeta for parent - nonMetaItemsWritten: 1, // .data file for new item + nonMetaItemsWritten: 0, // non because the file is considered cached instead of written. }, { name: "delete file", diff --git a/src/internal/operations/test/sharepoint_test.go b/src/internal/operations/test/sharepoint_test.go index 635c8c1ac..dea0c23bf 100644 --- a/src/internal/operations/test/sharepoint_test.go +++ b/src/internal/operations/test/sharepoint_test.go @@ -13,10 +13,9 @@ import ( "github.com/alcionai/corso/src/internal/common/ptr" evmock "github.com/alcionai/corso/src/internal/events/mock" + "github.com/alcionai/corso/src/internal/m365/collection/drive" "github.com/alcionai/corso/src/internal/m365/graph" - "github.com/alcionai/corso/src/internal/m365/onedrive" "github.com/alcionai/corso/src/internal/m365/resource" - "github.com/alcionai/corso/src/internal/m365/sharepoint" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/internal/tester/tconfig" "github.com/alcionai/corso/src/internal/version" @@ -74,8 +73,8 @@ func (suite *SharePointBackupIntgSuite) TestBackup_Run_incrementalSharePoint() { return id } - grh := func(ac api.Client) onedrive.RestoreHandler { - return sharepoint.NewRestoreHandler(ac) + grh := func(ac api.Client) drive.RestoreHandler { + return drive.NewLibraryRestoreHandler(ac) } runDriveIncrementalTest( diff --git a/src/pkg/backup/backup.go b/src/pkg/backup/backup.go index 39e317513..f9bee844b 100644 --- a/src/pkg/backup/backup.go +++ b/src/pkg/backup/backup.go @@ -79,6 +79,7 @@ func New( rw stats.ReadWrites, se stats.StartAndEndTime, fe *fault.Errors, + tags map[string]string, ) *Backup { if fe == nil { fe = &fault.Errors{} @@ -113,10 +114,8 @@ func New( return &Backup{ BaseModel: model.BaseModel{ - ID: id, - Tags: map[string]string{ - model.ServiceTag: selector.PathService().String(), - }, + ID: id, + Tags: tags, }, ResourceOwnerID: ownerID, diff --git a/src/pkg/backup/details/builder.go b/src/pkg/backup/details/builder.go new file mode 100644 index 000000000..41ce3c60f --- /dev/null +++ b/src/pkg/backup/details/builder.go @@ -0,0 +1,150 @@ +package details + +import ( + "sync" + + "github.com/alcionai/clues" + "golang.org/x/exp/maps" + + "github.com/alcionai/corso/src/pkg/path" +) + +// Builder should be used to create a details model. +type Builder struct { + d Details + mu sync.Mutex `json:"-"` + knownFolders map[string]Entry `json:"-"` +} + +func (b *Builder) Empty() bool { + b.mu.Lock() + defer b.mu.Unlock() + + return len(b.d.Entries) == 0 +} + +func (b *Builder) Add( + repoRef path.Path, + locationRef *path.Builder, + updated bool, + info ItemInfo, +) error { + b.mu.Lock() + defer b.mu.Unlock() + + entry, err := b.d.add( + repoRef, + locationRef, + updated, + info) + if err != nil { + return clues.Wrap(err, "adding entry to details") + } + + if err := b.addFolderEntries( + repoRef.ToBuilder().Dir(), + locationRef, + entry, + ); err != nil { + return clues.Wrap(err, "adding folder entries") + } + + return nil +} + +func (b *Builder) addFolderEntries( + repoRef, locationRef *path.Builder, + entry Entry, +) error { + if len(repoRef.Elements()) < len(locationRef.Elements()) { + return clues.New("RepoRef shorter than LocationRef"). + With("repo_ref", repoRef, "location_ref", locationRef) + } + + if b.knownFolders == nil { + b.knownFolders = map[string]Entry{} + } + + // Need a unique location because we want to have separate folders for + // different drives and categories even if there's duplicate folder names in + // them. + uniqueLoc, err := entry.uniqueLocation(locationRef) + if err != nil { + return clues.Wrap(err, "getting LocationIDer") + } + + for uniqueLoc.elementCount() > 0 { + mapKey := uniqueLoc.ID().ShortRef() + + name := uniqueLoc.lastElem() + if len(name) == 0 { + return clues.New("folder with no display name"). + With("repo_ref", repoRef, "location_ref", uniqueLoc.InDetails()) + } + + shortRef := repoRef.ShortRef() + rr := repoRef.String() + + // Get the parent of this entry to add as the LocationRef for the folder. + uniqueLoc.dir() + + repoRef = repoRef.Dir() + parentRef := repoRef.ShortRef() + + folder, ok := b.knownFolders[mapKey] + if !ok { + loc := uniqueLoc.InDetails().String() + + folder = Entry{ + RepoRef: rr, + ShortRef: shortRef, + ParentRef: parentRef, + LocationRef: loc, + ItemInfo: ItemInfo{ + Folder: &FolderInfo{ + ItemType: FolderItem, + // TODO(ashmrtn): Use the item type returned by the entry once + // SharePoint properly sets it. + DisplayName: name, + }, + }, + } + + if err := entry.updateFolder(folder.Folder); err != nil { + return clues.Wrap(err, "adding folder"). + With("parent_repo_ref", repoRef, "location_ref", loc) + } + } + + folder.Folder.Size += entry.size() + folder.Updated = folder.Updated || entry.Updated + + itemModified := entry.Modified() + if folder.Folder.Modified.Before(itemModified) { + folder.Folder.Modified = itemModified + } + + // Always update the map because we're storing structs not pointers to + // structs. + b.knownFolders[mapKey] = folder + } + + return nil +} + +func (b *Builder) Details() *Details { + b.mu.Lock() + defer b.mu.Unlock() + + ents := make([]Entry, len(b.d.Entries)) + copy(ents, b.d.Entries) + + // Write the cached folder entries to details + details := &Details{ + DetailsModel{ + Entries: append(ents, maps.Values(b.knownFolders)...), + }, + } + + return details +} diff --git a/src/pkg/backup/details/details.go b/src/pkg/backup/details/details.go index 0f265fce8..ec2fdfcd5 100644 --- a/src/pkg/backup/details/details.go +++ b/src/pkg/backup/details/details.go @@ -1,22 +1,13 @@ package details import ( - "context" "encoding/json" "io" - "strconv" "strings" - "sync" - "time" "github.com/alcionai/clues" - "github.com/dustin/go-humanize" - "golang.org/x/exp/maps" - "github.com/alcionai/corso/src/cli/print" - "github.com/alcionai/corso/src/internal/common/dttm" - "github.com/alcionai/corso/src/internal/m365/onedrive/metadata" - "github.com/alcionai/corso/src/internal/version" + "github.com/alcionai/corso/src/internal/m365/collection/drive/metadata" "github.com/alcionai/corso/src/pkg/path" ) @@ -24,383 +15,6 @@ import ( // more than this, then we just show a summary. const maxPrintLimit = 50 -// LocationIDer provides access to location information but guarantees that it -// can also generate a unique location (among items in the same service but -// possibly across data types within the service) that can be used as a key in -// maps and other structures. The unique location may be different than -// InDetails, the location used in backup details. -type LocationIDer interface { - ID() *path.Builder - InDetails() *path.Builder -} - -type uniqueLoc struct { - pb *path.Builder - prefixElems int -} - -func (ul uniqueLoc) ID() *path.Builder { - return ul.pb -} - -func (ul uniqueLoc) InDetails() *path.Builder { - return path.Builder{}.Append(ul.pb.Elements()[ul.prefixElems:]...) -} - -// elementCount returns the number of non-prefix elements in the LocationIDer -// (i.e. the number of elements in the InDetails path.Builder). -func (ul uniqueLoc) elementCount() int { - res := len(ul.pb.Elements()) - ul.prefixElems - if res < 0 { - res = 0 - } - - return res -} - -func (ul *uniqueLoc) dir() { - if ul.elementCount() == 0 { - return - } - - ul.pb = ul.pb.Dir() -} - -// lastElem returns the unescaped last element in the location. If the location -// is empty returns an empty string. -func (ul uniqueLoc) lastElem() string { - if ul.elementCount() == 0 { - return "" - } - - return ul.pb.LastElem() -} - -// Having service-specific constructors can be kind of clunky, but in this case -// I think they'd be useful to ensure the proper args are used since this -// path.Builder is used as a key in some maps. - -// NewExchangeLocationIDer builds a LocationIDer for the given category and -// folder path. The path denoted by the folders should be unique within the -// category. -func NewExchangeLocationIDer( - category path.CategoryType, - escapedFolders ...string, -) (uniqueLoc, error) { - if err := path.ValidateServiceAndCategory(path.ExchangeService, category); err != nil { - return uniqueLoc{}, clues.Wrap(err, "making exchange LocationIDer") - } - - pb := path.Builder{}.Append(category.String()).Append(escapedFolders...) - - return uniqueLoc{ - pb: pb, - prefixElems: 1, - }, nil -} - -// NewOneDriveLocationIDer builds a LocationIDer for the drive and folder path. -// The path denoted by the folders should be unique within the drive. -func NewOneDriveLocationIDer( - driveID string, - escapedFolders ...string, -) uniqueLoc { - pb := path.Builder{}. - Append(path.FilesCategory.String(), driveID). - Append(escapedFolders...) - - return uniqueLoc{ - pb: pb, - prefixElems: 2, - } -} - -// NewSharePointLocationIDer builds a LocationIDer for the drive and folder -// path. The path denoted by the folders should be unique within the drive. -func NewSharePointLocationIDer( - driveID string, - escapedFolders ...string, -) uniqueLoc { - pb := path.Builder{}. - Append(path.LibrariesCategory.String(), driveID). - Append(escapedFolders...) - - return uniqueLoc{ - pb: pb, - prefixElems: 2, - } -} - -// -------------------------------------------------------------------------------- -// Model -// -------------------------------------------------------------------------------- - -// DetailsModel describes what was stored in a Backup -type DetailsModel struct { - Entries []Entry `json:"entries"` -} - -// Print writes the DetailModel Entries to StdOut, in the format -// requested by the caller. -func (dm DetailsModel) PrintEntries(ctx context.Context) { - printEntries(ctx, dm.Entries) -} - -type infoer interface { - Entry | *Entry - // Need this here so we can access the infoType function without a type - // assertion. See https://stackoverflow.com/a/71378366 for more details. - infoType() ItemType -} - -func printEntries[T infoer](ctx context.Context, entries []T) { - if print.DisplayJSONFormat() { - printJSON(ctx, entries) - } else { - printTable(ctx, entries) - } -} - -func printTable[T infoer](ctx context.Context, entries []T) { - perType := map[ItemType][]print.Printable{} - - for _, ent := range entries { - it := ent.infoType() - ps, ok := perType[it] - - if !ok { - ps = []print.Printable{} - } - - perType[it] = append(ps, print.Printable(ent)) - } - - for _, ps := range perType { - print.All(ctx, ps...) - } -} - -func printJSON[T infoer](ctx context.Context, entries []T) { - ents := []print.Printable{} - - for _, ent := range entries { - ents = append(ents, print.Printable(ent)) - } - - print.All(ctx, ents...) -} - -// Paths returns the list of Paths for non-folder and non-meta items extracted -// from the Entries slice. -func (dm DetailsModel) Paths() []string { - r := make([]string, 0, len(dm.Entries)) - - for _, ent := range dm.Entries { - if ent.Folder != nil || ent.isMetaFile() { - continue - } - - r = append(r, ent.RepoRef) - } - - return r -} - -// Items returns a slice of *ItemInfo that does not contain any FolderInfo -// entries. Required because not all folders in the details are valid resource -// paths, and we want to slice out metadata. -func (dm DetailsModel) Items() entrySet { - res := make([]*Entry, 0, len(dm.Entries)) - - for i := 0; i < len(dm.Entries); i++ { - ent := dm.Entries[i] - if ent.Folder != nil || ent.isMetaFile() { - continue - } - - res = append(res, &ent) - } - - return res -} - -// FilterMetaFiles returns a copy of the Details with all of the -// .meta files removed from the entries. -func (dm DetailsModel) FilterMetaFiles() DetailsModel { - d2 := DetailsModel{ - Entries: []Entry{}, - } - - for _, ent := range dm.Entries { - if !ent.isMetaFile() { - d2.Entries = append(d2.Entries, ent) - } - } - - return d2 -} - -// SumNonMetaFileSizes returns the total size of items excluding all the -// .meta files from the items. -func (dm DetailsModel) SumNonMetaFileSizes() int64 { - var size int64 - - // Items will provide only files and filter out folders - for _, ent := range dm.FilterMetaFiles().Items() { - size += ent.size() - } - - return size -} - -// Check if a file is a metadata file. These are used to store -// additional data like permissions (in case of Drive items) and are -// not to be treated as regular files. -func (de Entry) isMetaFile() bool { - // sharepoint types not needed, since sharepoint permissions were - // added after IsMeta was deprecated. - // Earlier onedrive backups used to store both metafiles and files in details. - // So filter out just the onedrive items and check for metafiles - return de.ItemInfo.OneDrive != nil && de.ItemInfo.OneDrive.IsMeta -} - -// --------------------------------------------------------------------------- -// Builder -// --------------------------------------------------------------------------- - -// Builder should be used to create a details model. -type Builder struct { - d Details - mu sync.Mutex `json:"-"` - knownFolders map[string]Entry `json:"-"` -} - -func (b *Builder) Add( - repoRef path.Path, - locationRef *path.Builder, - updated bool, - info ItemInfo, -) error { - b.mu.Lock() - defer b.mu.Unlock() - - entry, err := b.d.add( - repoRef, - locationRef, - updated, - info) - if err != nil { - return clues.Wrap(err, "adding entry to details") - } - - if err := b.addFolderEntries( - repoRef.ToBuilder().Dir(), - locationRef, - entry, - ); err != nil { - return clues.Wrap(err, "adding folder entries") - } - - return nil -} - -func (b *Builder) addFolderEntries( - repoRef, locationRef *path.Builder, - entry Entry, -) error { - if len(repoRef.Elements()) < len(locationRef.Elements()) { - return clues.New("RepoRef shorter than LocationRef"). - With("repo_ref", repoRef, "location_ref", locationRef) - } - - if b.knownFolders == nil { - b.knownFolders = map[string]Entry{} - } - - // Need a unique location because we want to have separate folders for - // different drives and categories even if there's duplicate folder names in - // them. - uniqueLoc, err := entry.uniqueLocation(locationRef) - if err != nil { - return clues.Wrap(err, "getting LocationIDer") - } - - for uniqueLoc.elementCount() > 0 { - mapKey := uniqueLoc.ID().ShortRef() - - name := uniqueLoc.lastElem() - if len(name) == 0 { - return clues.New("folder with no display name"). - With("repo_ref", repoRef, "location_ref", uniqueLoc.InDetails()) - } - - shortRef := repoRef.ShortRef() - rr := repoRef.String() - - // Get the parent of this entry to add as the LocationRef for the folder. - uniqueLoc.dir() - - repoRef = repoRef.Dir() - parentRef := repoRef.ShortRef() - - folder, ok := b.knownFolders[mapKey] - if !ok { - loc := uniqueLoc.InDetails().String() - - folder = Entry{ - RepoRef: rr, - ShortRef: shortRef, - ParentRef: parentRef, - LocationRef: loc, - ItemInfo: ItemInfo{ - Folder: &FolderInfo{ - ItemType: FolderItem, - // TODO(ashmrtn): Use the item type returned by the entry once - // SharePoint properly sets it. - DisplayName: name, - }, - }, - } - - if err := entry.updateFolder(folder.Folder); err != nil { - return clues.Wrap(err, "adding folder"). - With("parent_repo_ref", repoRef, "location_ref", loc) - } - } - - folder.Folder.Size += entry.size() - folder.Updated = folder.Updated || entry.Updated - - itemModified := entry.Modified() - if folder.Folder.Modified.Before(itemModified) { - folder.Folder.Modified = itemModified - } - - // Always update the map because we're storing structs not pointers to - // structs. - b.knownFolders[mapKey] = folder - } - - return nil -} - -func (b *Builder) Details() *Details { - b.mu.Lock() - defer b.mu.Unlock() - - ents := make([]Entry, len(b.d.Entries)) - copy(ents, b.d.Entries) - - // Write the cached folder entries to details - details := &Details{ - DetailsModel{ - Entries: append(ents, maps.Values(b.knownFolders)...), - }, - } - - return details -} - // -------------------------------------------------------------------------------- // Details // -------------------------------------------------------------------------------- @@ -490,551 +104,65 @@ func withoutMetadataSuffix(id string) string { return id } -// -------------------------------------------------------------------------------- -// Entry -// -------------------------------------------------------------------------------- +// --------------------------------------------------------------------------- +// LocationIDer +// --------------------------------------------------------------------------- -// Add a new type so we can transparently use PrintAll in different situations. -type entrySet []*Entry - -func (ents entrySet) PrintEntries(ctx context.Context) { - printEntries(ctx, ents) +// LocationIDer provides access to location information but guarantees that it +// can also generate a unique location (among items in the same service but +// possibly across data types within the service) that can be used as a key in +// maps and other structures. The unique location may be different than +// InDetails, the location used in backup details. +type LocationIDer interface { + ID() *path.Builder + InDetails() *path.Builder } -// MaybePrintEntries is same as PrintEntries, but only prints if we -// have less than 15 items or is not json output. -func (ents entrySet) MaybePrintEntries(ctx context.Context) { - if len(ents) <= maxPrintLimit || - print.DisplayJSONFormat() || - print.DisplayVerbose() { - printEntries(ctx, ents) - } +type uniqueLoc struct { + pb *path.Builder + prefixElems int } -// Entry describes a single item stored in a Backup -type Entry struct { - // RepoRef is the full storage path of the item in Kopia - RepoRef string `json:"repoRef"` - ShortRef string `json:"shortRef"` - ParentRef string `json:"parentRef,omitempty"` - - // LocationRef contains the logical path structure by its human-readable - // display names. IE: If an item is located at "/Inbox/Important", we - // hold that string in the LocationRef, while the actual IDs of each - // container are used for the RepoRef. - // LocationRef only holds the container values, and does not include - // the metadata prefixes (tenant, service, owner, etc) found in the - // repoRef. - // Currently only implemented for Exchange Calendars. - LocationRef string `json:"locationRef,omitempty"` - - // ItemRef contains the stable id of the item itself. ItemRef is not - // guaranteed to be unique within a repository. Uniqueness guarantees - // maximally inherit from the source item. Eg: Entries for m365 mail items - // are only as unique as m365 mail item IDs themselves. - ItemRef string `json:"itemRef,omitempty"` - - // Indicates the item was added or updated in this backup - // Always `true` for full backups - Updated bool `json:"updated"` - - ItemInfo +func (ul uniqueLoc) ID() *path.Builder { + return ul.pb } -// ToLocationIDer takes a backup version and produces the unique location for -// this entry if possible. Reasons it may not be possible to produce the unique -// location include an unsupported backup version or missing information. -func (de Entry) ToLocationIDer(backupVersion int) (LocationIDer, error) { - if len(de.LocationRef) > 0 { - baseLoc, err := path.Builder{}.SplitUnescapeAppend(de.LocationRef) - if err != nil { - return nil, clues.Wrap(err, "parsing base location info"). - With("location_ref", de.LocationRef) - } +func (ul uniqueLoc) InDetails() *path.Builder { + return path.Builder{}.Append(ul.pb.Elements()[ul.prefixElems:]...) +} - // Individual services may add additional info to the base and return that. - return de.ItemInfo.uniqueLocation(baseLoc) +// elementCount returns the number of non-prefix elements in the LocationIDer +// (i.e. the number of elements in the InDetails path.Builder). +func (ul uniqueLoc) elementCount() int { + res := len(ul.pb.Elements()) - ul.prefixElems + if res < 0 { + res = 0 } - if backupVersion >= version.OneDrive7LocationRef || - (de.ItemInfo.infoType() != OneDriveItem && - de.ItemInfo.infoType() != SharePointLibrary) { - return nil, clues.New("no previous location for entry") + return res +} + +func (ul *uniqueLoc) dir() { + if ul.elementCount() == 0 { + return } - // This is a little hacky, but we only want to try to extract the old - // location if it's OneDrive or SharePoint libraries and it's known to - // be an older backup version. - // - // TODO(ashmrtn): Remove this code once OneDrive/SharePoint libraries - // LocationRef code has been out long enough that all delta tokens for - // previous backup versions will have expired. At that point, either - // we'll do a full backup (token expired, no newer backups) or have a - // backup of a higher version with the information we need. - rr, err := path.FromDataLayerPath(de.RepoRef, true) - if err != nil { - return nil, clues.Wrap(err, "getting item RepoRef") + ul.pb = ul.pb.Dir() +} + +// lastElem returns the unescaped last element in the location. If the location +// is empty returns an empty string. +func (ul uniqueLoc) lastElem() string { + if ul.elementCount() == 0 { + return "" } - p, err := path.ToDrivePath(rr) - if err != nil { - return nil, clues.New("converting RepoRef to drive path") - } - - baseLoc := path.Builder{}.Append(p.Root).Append(p.Folders...) - - // Individual services may add additional info to the base and return that. - return de.ItemInfo.uniqueLocation(baseLoc) + return ul.pb.LastElem() } -// -------------------------------------------------------------------------------- -// CLI Output -// -------------------------------------------------------------------------------- - -// interface compliance checks -var _ print.Printable = &Entry{} - -// MinimumPrintable DetailsEntries is a passthrough func, because no -// reduction is needed for the json output. -func (de Entry) MinimumPrintable() any { - return de -} - -// Headers returns the human-readable names of properties in a DetailsEntry -// for printing out to a terminal in a columnar display. -func (de Entry) Headers() []string { - hs := []string{"ID"} - - if de.ItemInfo.Folder != nil { - hs = append(hs, de.ItemInfo.Folder.Headers()...) - } - - if de.ItemInfo.Exchange != nil { - hs = append(hs, de.ItemInfo.Exchange.Headers()...) - } - - if de.ItemInfo.SharePoint != nil { - hs = append(hs, de.ItemInfo.SharePoint.Headers()...) - } - - if de.ItemInfo.OneDrive != nil { - hs = append(hs, de.ItemInfo.OneDrive.Headers()...) - } - - return hs -} - -// Values returns the values matching the Headers list. -func (de Entry) Values() []string { - vs := []string{de.ShortRef} - - if de.ItemInfo.Folder != nil { - vs = append(vs, de.ItemInfo.Folder.Values()...) - } - - if de.ItemInfo.Exchange != nil { - vs = append(vs, de.ItemInfo.Exchange.Values()...) - } - - if de.ItemInfo.SharePoint != nil { - vs = append(vs, de.ItemInfo.SharePoint.Values()...) - } - - if de.ItemInfo.OneDrive != nil { - vs = append(vs, de.ItemInfo.OneDrive.Values()...) - } - - return vs -} - -type ItemType int - -// ItemTypes are enumerated by service (hundredth digit) and data type (ones digit). -// Ex: exchange is 00x where x is the data type. Sharepoint is 10x, and etc. -// Every item info struct should get its own hundredth enumeration entry. -// Every item category for that service should get its own entry (even if differences -// between types aren't apparent on initial implementation, this future-proofs -// against breaking changes). -// Entries should not be rearranged. -// Additionally, any itemType directly assigned a number should not be altered. -// This applies to OneDriveItem and FolderItem -const ( - UnknownType ItemType = iota // 0, global unknown value - - // Exchange (00x) - ExchangeContact - ExchangeEvent - ExchangeMail - // SharePoint (10x) - SharePointLibrary ItemType = iota + 97 // 100 - SharePointList // 101... - SharePointPage - - // OneDrive (20x) - OneDriveItem ItemType = 205 - - // Folder Management(30x) - FolderItem ItemType = 306 -) - -func UpdateItem(item *ItemInfo, newLocPath *path.Builder) { - // Only OneDrive and SharePoint have information about parent folders - // contained in them. - // Can't switch based on infoType because that's been unstable. - if item.Exchange != nil { - item.Exchange.UpdateParentPath(newLocPath) - } else if item.SharePoint != nil { - // SharePoint used to store library items with the OneDriveItem ItemType. - // Start switching them over as we see them since there's no point in - // keeping the old format. - if item.SharePoint.ItemType == OneDriveItem { - item.SharePoint.ItemType = SharePointLibrary - } - - item.SharePoint.UpdateParentPath(newLocPath) - } else if item.OneDrive != nil { - item.OneDrive.UpdateParentPath(newLocPath) - } -} - -// ItemInfo is a oneOf that contains service specific -// information about the item it tracks -type ItemInfo struct { - Folder *FolderInfo `json:"folder,omitempty"` - Exchange *ExchangeInfo `json:"exchange,omitempty"` - SharePoint *SharePointInfo `json:"sharePoint,omitempty"` - OneDrive *OneDriveInfo `json:"oneDrive,omitempty"` - // Optional item extension data - Extension *ExtensionData `json:"extension,omitempty"` -} - -// typedInfo should get embedded in each sesrvice type to track -// the type of item it stores for multi-item service support. - -// infoType provides internal categorization for collecting like-typed ItemInfos. -// It should return the most granular value type (ex: "event" for an exchange -// calendar event). -func (i ItemInfo) infoType() ItemType { - switch { - case i.Folder != nil: - return i.Folder.ItemType - - case i.Exchange != nil: - return i.Exchange.ItemType - - case i.SharePoint != nil: - return i.SharePoint.ItemType - - case i.OneDrive != nil: - return i.OneDrive.ItemType - } - - return UnknownType -} - -func (i ItemInfo) size() int64 { - switch { - case i.Exchange != nil: - return i.Exchange.Size - - case i.OneDrive != nil: - return i.OneDrive.Size - - case i.SharePoint != nil: - return i.SharePoint.Size - - case i.Folder != nil: - return i.Folder.Size - } - - return 0 -} - -func (i ItemInfo) Modified() time.Time { - switch { - case i.Exchange != nil: - return i.Exchange.Modified - - case i.OneDrive != nil: - return i.OneDrive.Modified - - case i.SharePoint != nil: - return i.SharePoint.Modified - - case i.Folder != nil: - return i.Folder.Modified - } - - return time.Time{} -} - -func (i ItemInfo) uniqueLocation(baseLoc *path.Builder) (*uniqueLoc, error) { - switch { - case i.Exchange != nil: - return i.Exchange.uniqueLocation(baseLoc) - - case i.OneDrive != nil: - return i.OneDrive.uniqueLocation(baseLoc) - - case i.SharePoint != nil: - return i.SharePoint.uniqueLocation(baseLoc) - - default: - return nil, clues.New("unsupported type") - } -} - -func (i ItemInfo) updateFolder(f *FolderInfo) error { - switch { - case i.Exchange != nil: - return i.Exchange.updateFolder(f) - - case i.OneDrive != nil: - return i.OneDrive.updateFolder(f) - - case i.SharePoint != nil: - return i.SharePoint.updateFolder(f) - - default: - return clues.New("unsupported type") - } -} - -type FolderInfo struct { - ItemType ItemType `json:"itemType,omitempty"` - DisplayName string `json:"displayName"` - Modified time.Time `json:"modified,omitempty"` - Size int64 `json:"size,omitempty"` - DataType ItemType `json:"dataType,omitempty"` - DriveName string `json:"driveName,omitempty"` - DriveID string `json:"driveID,omitempty"` -} - -func (i FolderInfo) Headers() []string { - return []string{"Display Name"} -} - -func (i FolderInfo) Values() []string { - return []string{i.DisplayName} -} - -// ExchangeInfo describes an exchange item -type ExchangeInfo struct { - ItemType ItemType `json:"itemType,omitempty"` - Sender string `json:"sender,omitempty"` - Subject string `json:"subject,omitempty"` - Recipient []string `json:"recipient,omitempty"` - ParentPath string `json:"parentPath,omitempty"` - Received time.Time `json:"received,omitempty"` - EventStart time.Time `json:"eventStart,omitempty"` - EventEnd time.Time `json:"eventEnd,omitempty"` - Organizer string `json:"organizer,omitempty"` - ContactName string `json:"contactName,omitempty"` - EventRecurs bool `json:"eventRecurs,omitempty"` - Created time.Time `json:"created,omitempty"` - Modified time.Time `json:"modified,omitempty"` - Size int64 `json:"size,omitempty"` -} - -// Headers returns the human-readable names of properties in an ExchangeInfo -// for printing out to a terminal in a columnar display. -func (i ExchangeInfo) Headers() []string { - switch i.ItemType { - case ExchangeEvent: - return []string{"Organizer", "Subject", "Starts", "Ends", "Recurring"} - - case ExchangeContact: - return []string{"Contact Name"} - - case ExchangeMail: - return []string{"Sender", "Folder", "Subject", "Received"} - } - - return []string{} -} - -// Values returns the values matching the Headers list for printing -// out to a terminal in a columnar display. -func (i ExchangeInfo) Values() []string { - switch i.ItemType { - case ExchangeEvent: - return []string{ - i.Organizer, - i.Subject, - dttm.FormatToTabularDisplay(i.EventStart), - dttm.FormatToTabularDisplay(i.EventEnd), - strconv.FormatBool(i.EventRecurs), - } - - case ExchangeContact: - return []string{i.ContactName} - - case ExchangeMail: - return []string{ - i.Sender, i.ParentPath, i.Subject, - dttm.FormatToTabularDisplay(i.Received), - } - } - - return []string{} -} - -func (i *ExchangeInfo) UpdateParentPath(newLocPath *path.Builder) { - i.ParentPath = newLocPath.String() -} - -func (i *ExchangeInfo) uniqueLocation(baseLoc *path.Builder) (*uniqueLoc, error) { - var category path.CategoryType - - switch i.ItemType { - case ExchangeEvent: - category = path.EventsCategory - case ExchangeContact: - category = path.ContactsCategory - case ExchangeMail: - category = path.EmailCategory - } - - loc, err := NewExchangeLocationIDer(category, baseLoc.Elements()...) - - return &loc, err -} - -func (i *ExchangeInfo) updateFolder(f *FolderInfo) error { - // Use a switch instead of a rather large if-statement. Just make sure it's an - // Exchange type. If it's not return an error. - switch i.ItemType { - case ExchangeContact, ExchangeEvent, ExchangeMail: - default: - return clues.New("unsupported non-Exchange ItemType"). - With("item_type", i.ItemType) - } - - f.DataType = i.ItemType - - return nil -} - -// ChannelsInfo describes an exchange item -type ChannelsInfo struct { - ItemType ItemType `json:"itemType,omitempty"` - Sender string `json:"sender,omitempty"` - ParentPath string `json:"parentPath,omitempty"` - Received time.Time `json:"received,omitempty"` - Created time.Time `json:"created,omitempty"` - Modified time.Time `json:"modified,omitempty"` - Size int64 `json:"size,omitempty"` -} - -// SharePointInfo describes a sharepoint item -type SharePointInfo struct { - Created time.Time `json:"created,omitempty"` - DriveName string `json:"driveName,omitempty"` - DriveID string `json:"driveID,omitempty"` - ItemName string `json:"itemName,omitempty"` - ItemType ItemType `json:"itemType,omitempty"` - Modified time.Time `json:"modified,omitempty"` - Owner string `json:"owner,omitempty"` - ParentPath string `json:"parentPath,omitempty"` - Size int64 `json:"size,omitempty"` - WebURL string `json:"webUrl,omitempty"` - SiteID string `json:"siteID,omitempty"` -} - -// Headers returns the human-readable names of properties in a SharePointInfo -// for printing out to a terminal in a columnar display. -func (i SharePointInfo) Headers() []string { - return []string{"ItemName", "Library", "ParentPath", "Size", "Owner", "Created", "Modified"} -} - -// Values returns the values matching the Headers list for printing -// out to a terminal in a columnar display. -func (i SharePointInfo) Values() []string { - return []string{ - i.ItemName, - i.DriveName, - i.ParentPath, - humanize.Bytes(uint64(i.Size)), - i.Owner, - dttm.FormatToTabularDisplay(i.Created), - dttm.FormatToTabularDisplay(i.Modified), - } -} - -func (i *SharePointInfo) UpdateParentPath(newLocPath *path.Builder) { - i.ParentPath = newLocPath.PopFront().String() -} - -func (i *SharePointInfo) uniqueLocation(baseLoc *path.Builder) (*uniqueLoc, error) { - if len(i.DriveID) == 0 { - return nil, clues.New("empty drive ID") - } - - loc := NewSharePointLocationIDer(i.DriveID, baseLoc.Elements()...) - - return &loc, nil -} - -func (i *SharePointInfo) updateFolder(f *FolderInfo) error { - // TODO(ashmrtn): Change to just SharePointLibrary when the code that - // generates the item type is fixed. - if i.ItemType == OneDriveItem || i.ItemType == SharePointLibrary { - return updateFolderWithinDrive(SharePointLibrary, i.DriveName, i.DriveID, f) - } - - return clues.New("unsupported non-SharePoint ItemType").With("item_type", i.ItemType) -} - -// OneDriveInfo describes a oneDrive item -type OneDriveInfo struct { - Created time.Time `json:"created,omitempty"` - DriveID string `json:"driveID,omitempty"` - DriveName string `json:"driveName,omitempty"` - IsMeta bool `json:"isMeta,omitempty"` - ItemName string `json:"itemName,omitempty"` - ItemType ItemType `json:"itemType,omitempty"` - Modified time.Time `json:"modified,omitempty"` - Owner string `json:"owner,omitempty"` - ParentPath string `json:"parentPath"` - Size int64 `json:"size,omitempty"` -} - -// Headers returns the human-readable names of properties in a OneDriveInfo -// for printing out to a terminal in a columnar display. -func (i OneDriveInfo) Headers() []string { - return []string{"ItemName", "ParentPath", "Size", "Owner", "Created", "Modified"} -} - -// Values returns the values matching the Headers list for printing -// out to a terminal in a columnar display. -func (i OneDriveInfo) Values() []string { - return []string{ - i.ItemName, - i.ParentPath, - humanize.Bytes(uint64(i.Size)), - i.Owner, - dttm.FormatToTabularDisplay(i.Created), - dttm.FormatToTabularDisplay(i.Modified), - } -} - -func (i *OneDriveInfo) UpdateParentPath(newLocPath *path.Builder) { - i.ParentPath = newLocPath.PopFront().String() -} - -func (i *OneDriveInfo) uniqueLocation(baseLoc *path.Builder) (*uniqueLoc, error) { - if len(i.DriveID) == 0 { - return nil, clues.New("empty drive ID") - } - - loc := NewOneDriveLocationIDer(i.DriveID, baseLoc.Elements()...) - - return &loc, nil -} - -func (i *OneDriveInfo) updateFolder(f *FolderInfo) error { - return updateFolderWithinDrive(OneDriveItem, i.DriveName, i.DriveID, f) -} +// --------------------------------------------------------------------------- +// helpers +// --------------------------------------------------------------------------- func updateFolderWithinDrive( t ItemType, diff --git a/src/pkg/backup/details/details_test.go b/src/pkg/backup/details/details_test.go index 4646b484a..b804c04cf 100644 --- a/src/pkg/backup/details/details_test.go +++ b/src/pkg/backup/details/details_test.go @@ -14,8 +14,8 @@ import ( "github.com/stretchr/testify/suite" "github.com/alcionai/corso/src/internal/common/dttm" - odConsts "github.com/alcionai/corso/src/internal/m365/onedrive/consts" - "github.com/alcionai/corso/src/internal/m365/onedrive/metadata" + "github.com/alcionai/corso/src/internal/m365/collection/drive/metadata" + odConsts "github.com/alcionai/corso/src/internal/m365/service/onedrive/consts" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/internal/version" "github.com/alcionai/corso/src/pkg/path" diff --git a/src/pkg/backup/details/entry.go b/src/pkg/backup/details/entry.go new file mode 100644 index 000000000..83b9af133 --- /dev/null +++ b/src/pkg/backup/details/entry.go @@ -0,0 +1,175 @@ +package details + +import ( + "context" + + "github.com/alcionai/clues" + + "github.com/alcionai/corso/src/cli/print" + "github.com/alcionai/corso/src/internal/version" + "github.com/alcionai/corso/src/pkg/path" +) + +// Add a new type so we can transparently use PrintAll in different situations. +type entrySet []*Entry + +func (ents entrySet) PrintEntries(ctx context.Context) { + printEntries(ctx, ents) +} + +// MaybePrintEntries is same as PrintEntries, but only prints if we +// have less than 15 items or is not json output. +func (ents entrySet) MaybePrintEntries(ctx context.Context) { + if len(ents) <= maxPrintLimit || + print.DisplayJSONFormat() || + print.DisplayVerbose() { + printEntries(ctx, ents) + } +} + +// Entry describes a single item stored in a Backup +type Entry struct { + // RepoRef is the full storage path of the item in Kopia + RepoRef string `json:"repoRef"` + ShortRef string `json:"shortRef"` + ParentRef string `json:"parentRef,omitempty"` + + // LocationRef contains the logical path structure by its human-readable + // display names. IE: If an item is located at "/Inbox/Important", we + // hold that string in the LocationRef, while the actual IDs of each + // container are used for the RepoRef. + // LocationRef only holds the container values, and does not include + // the metadata prefixes (tenant, service, owner, etc) found in the + // repoRef. + // Currently only implemented for Exchange Calendars. + LocationRef string `json:"locationRef,omitempty"` + + // ItemRef contains the stable id of the item itself. ItemRef is not + // guaranteed to be unique within a repository. Uniqueness guarantees + // maximally inherit from the source item. Eg: Entries for m365 mail items + // are only as unique as m365 mail item IDs themselves. + ItemRef string `json:"itemRef,omitempty"` + + // Indicates the item was added or updated in this backup + // Always `true` for full backups + Updated bool `json:"updated"` + + ItemInfo +} + +// ToLocationIDer takes a backup version and produces the unique location for +// this entry if possible. Reasons it may not be possible to produce the unique +// location include an unsupported backup version or missing information. +func (de Entry) ToLocationIDer(backupVersion int) (LocationIDer, error) { + if len(de.LocationRef) > 0 { + baseLoc, err := path.Builder{}.SplitUnescapeAppend(de.LocationRef) + if err != nil { + return nil, clues.Wrap(err, "parsing base location info"). + With("location_ref", de.LocationRef) + } + + // Individual services may add additional info to the base and return that. + return de.ItemInfo.uniqueLocation(baseLoc) + } + + if backupVersion >= version.OneDrive7LocationRef || + (de.ItemInfo.infoType() != OneDriveItem && + de.ItemInfo.infoType() != SharePointLibrary) { + return nil, clues.New("no previous location for entry") + } + + // This is a little hacky, but we only want to try to extract the old + // location if it's OneDrive or SharePoint libraries and it's known to + // be an older backup version. + // + // TODO(ashmrtn): Remove this code once OneDrive/SharePoint libraries + // LocationRef code has been out long enough that all delta tokens for + // previous backup versions will have expired. At that point, either + // we'll do a full backup (token expired, no newer backups) or have a + // backup of a higher version with the information we need. + rr, err := path.FromDataLayerPath(de.RepoRef, true) + if err != nil { + return nil, clues.Wrap(err, "getting item RepoRef") + } + + p, err := path.ToDrivePath(rr) + if err != nil { + return nil, clues.New("converting RepoRef to drive path") + } + + baseLoc := path.Builder{}.Append(p.Root).Append(p.Folders...) + + // Individual services may add additional info to the base and return that. + return de.ItemInfo.uniqueLocation(baseLoc) +} + +// Check if a file is a metadata file. These are used to store +// additional data like permissions (in case of Drive items) and are +// not to be treated as regular files. +func (de Entry) isMetaFile() bool { + // sharepoint types not needed, since sharepoint permissions were + // added after IsMeta was deprecated. + // Earlier onedrive backups used to store both metafiles and files in details. + // So filter out just the onedrive items and check for metafiles + return de.ItemInfo.OneDrive != nil && de.ItemInfo.OneDrive.IsMeta +} + +// -------------------------------------------------------------------------------- +// CLI Output +// -------------------------------------------------------------------------------- + +// interface compliance checks +var _ print.Printable = &Entry{} + +// MinimumPrintable DetailsEntries is a passthrough func, because no +// reduction is needed for the json output. +func (de Entry) MinimumPrintable() any { + return de +} + +// Headers returns the human-readable names of properties in a DetailsEntry +// for printing out to a terminal in a columnar display. +func (de Entry) Headers() []string { + hs := []string{"ID"} + + if de.ItemInfo.Folder != nil { + hs = append(hs, de.ItemInfo.Folder.Headers()...) + } + + if de.ItemInfo.Exchange != nil { + hs = append(hs, de.ItemInfo.Exchange.Headers()...) + } + + if de.ItemInfo.SharePoint != nil { + hs = append(hs, de.ItemInfo.SharePoint.Headers()...) + } + + if de.ItemInfo.OneDrive != nil { + hs = append(hs, de.ItemInfo.OneDrive.Headers()...) + } + + return hs +} + +// Values returns the values matching the Headers list. +func (de Entry) Values() []string { + vs := []string{de.ShortRef} + + if de.ItemInfo.Folder != nil { + vs = append(vs, de.ItemInfo.Folder.Values()...) + } + + if de.ItemInfo.Exchange != nil { + vs = append(vs, de.ItemInfo.Exchange.Values()...) + } + + if de.ItemInfo.SharePoint != nil { + vs = append(vs, de.ItemInfo.SharePoint.Values()...) + } + + if de.ItemInfo.OneDrive != nil { + vs = append(vs, de.ItemInfo.OneDrive.Values()...) + } + + return vs +} diff --git a/src/pkg/backup/details/exchange.go b/src/pkg/backup/details/exchange.go new file mode 100644 index 000000000..dda1f2d40 --- /dev/null +++ b/src/pkg/backup/details/exchange.go @@ -0,0 +1,127 @@ +package details + +import ( + "strconv" + "time" + + "github.com/alcionai/clues" + + "github.com/alcionai/corso/src/internal/common/dttm" + "github.com/alcionai/corso/src/pkg/path" +) + +// NewExchangeLocationIDer builds a LocationIDer for the given category and +// folder path. The path denoted by the folders should be unique within the +// category. +func NewExchangeLocationIDer( + category path.CategoryType, + escapedFolders ...string, +) (uniqueLoc, error) { + if err := path.ValidateServiceAndCategory(path.ExchangeService, category); err != nil { + return uniqueLoc{}, clues.Wrap(err, "making exchange LocationIDer") + } + + pb := path.Builder{}.Append(category.String()).Append(escapedFolders...) + + return uniqueLoc{ + pb: pb, + prefixElems: 1, + }, nil +} + +// ExchangeInfo describes an exchange item +type ExchangeInfo struct { + ItemType ItemType `json:"itemType,omitempty"` + Sender string `json:"sender,omitempty"` + Subject string `json:"subject,omitempty"` + Recipient []string `json:"recipient,omitempty"` + ParentPath string `json:"parentPath,omitempty"` + Received time.Time `json:"received,omitempty"` + EventStart time.Time `json:"eventStart,omitempty"` + EventEnd time.Time `json:"eventEnd,omitempty"` + Organizer string `json:"organizer,omitempty"` + ContactName string `json:"contactName,omitempty"` + EventRecurs bool `json:"eventRecurs,omitempty"` + Created time.Time `json:"created,omitempty"` + Modified time.Time `json:"modified,omitempty"` + Size int64 `json:"size,omitempty"` +} + +// Headers returns the human-readable names of properties in an ExchangeInfo +// for printing out to a terminal in a columnar display. +func (i ExchangeInfo) Headers() []string { + switch i.ItemType { + case ExchangeEvent: + return []string{"Organizer", "Subject", "Starts", "Ends", "Recurring"} + + case ExchangeContact: + return []string{"Contact Name"} + + case ExchangeMail: + return []string{"Sender", "Folder", "Subject", "Received"} + } + + return []string{} +} + +// Values returns the values matching the Headers list for printing +// out to a terminal in a columnar display. +func (i ExchangeInfo) Values() []string { + switch i.ItemType { + case ExchangeEvent: + return []string{ + i.Organizer, + i.Subject, + dttm.FormatToTabularDisplay(i.EventStart), + dttm.FormatToTabularDisplay(i.EventEnd), + strconv.FormatBool(i.EventRecurs), + } + + case ExchangeContact: + return []string{i.ContactName} + + case ExchangeMail: + return []string{ + i.Sender, i.ParentPath, i.Subject, + dttm.FormatToTabularDisplay(i.Received), + } + } + + return []string{} +} + +func (i *ExchangeInfo) UpdateParentPath(newLocPath *path.Builder) { + i.ParentPath = newLocPath.String() +} + +func (i *ExchangeInfo) uniqueLocation(baseLoc *path.Builder) (*uniqueLoc, error) { + var category path.CategoryType + + switch i.ItemType { + case ExchangeEvent: + category = path.EventsCategory + case ExchangeContact: + category = path.ContactsCategory + case ExchangeMail: + category = path.EmailCategory + } + + loc, err := NewExchangeLocationIDer(category, baseLoc.Elements()...) + + return &loc, err +} + +func (i *ExchangeInfo) updateFolder(f *FolderInfo) error { + // Use a switch instead of a rather large if-statement. Just make sure it's an + // Exchange type. If it's not return an error. + switch i.ItemType { + case ExchangeContact, ExchangeEvent, ExchangeMail: + default: + return clues.New("unsupported non-Exchange ItemType"). + With("item_type", i.ItemType) + } + + f.DataType = i.ItemType + + return nil +} diff --git a/src/pkg/backup/details/folder.go b/src/pkg/backup/details/folder.go new file mode 100644 index 000000000..f837836cd --- /dev/null +++ b/src/pkg/backup/details/folder.go @@ -0,0 +1,21 @@ +package details + +import "time" + +type FolderInfo struct { + ItemType ItemType `json:"itemType,omitempty"` + DisplayName string `json:"displayName"` + Modified time.Time `json:"modified,omitempty"` + Size int64 `json:"size,omitempty"` + DataType ItemType `json:"dataType,omitempty"` + DriveName string `json:"driveName,omitempty"` + DriveID string `json:"driveID,omitempty"` +} + +func (i FolderInfo) Headers() []string { + return []string{"Display Name"} +} + +func (i FolderInfo) Values() []string { + return []string{i.DisplayName} +} diff --git a/src/pkg/backup/details/groups.go b/src/pkg/backup/details/groups.go new file mode 100644 index 000000000..398d8f529 --- /dev/null +++ b/src/pkg/backup/details/groups.go @@ -0,0 +1,59 @@ +package details + +import ( + "time" + + "github.com/alcionai/clues" + + "github.com/alcionai/corso/src/internal/common/dttm" + "github.com/alcionai/corso/src/pkg/path" +) + +// NewGroupsLocationIDer builds a LocationIDer for the groups. +func NewGroupsLocationIDer( + driveID string, + escapedFolders ...string, +) uniqueLoc { + // TODO: implement + return uniqueLoc{} +} + +// GroupsInfo describes a groups item +type GroupsInfo struct { + Created time.Time `json:"created,omitempty"` + DriveName string `json:"driveName,omitempty"` + DriveID string `json:"driveID,omitempty"` + ItemName string `json:"itemName,omitempty"` + ItemType ItemType `json:"itemType,omitempty"` + Modified time.Time `json:"modified,omitempty"` + Owner string `json:"owner,omitempty"` + ParentPath string `json:"parentPath,omitempty"` + Size int64 `json:"size,omitempty"` +} + +// Headers returns the human-readable names of properties in a SharePointInfo +// for printing out to a terminal in a columnar display. +func (i GroupsInfo) Headers() []string { + return []string{"Created", "Modified"} +} + +// Values returns the values matching the Headers list for printing +// out to a terminal in a columnar display. +func (i GroupsInfo) Values() []string { + return []string{ + dttm.FormatToTabularDisplay(i.Created), + dttm.FormatToTabularDisplay(i.Modified), + } +} + +func (i *GroupsInfo) UpdateParentPath(newLocPath *path.Builder) { + i.ParentPath = newLocPath.PopFront().String() +} + +func (i *GroupsInfo) uniqueLocation(baseLoc *path.Builder) (*uniqueLoc, error) { + return nil, clues.New("not yet implemented") +} + +func (i *GroupsInfo) updateFolder(f *FolderInfo) error { + return clues.New("not yet implemented") +} diff --git a/src/pkg/backup/details/iteminfo.go b/src/pkg/backup/details/iteminfo.go new file mode 100644 index 000000000..9912fb6d2 --- /dev/null +++ b/src/pkg/backup/details/iteminfo.go @@ -0,0 +1,169 @@ +package details + +import ( + "time" + + "github.com/alcionai/clues" + + "github.com/alcionai/corso/src/pkg/path" +) + +type ItemType int + +// ItemTypes are enumerated by service (hundredth digit) and data type (ones digit). +// Ex: exchange is 00x where x is the data type. Sharepoint is 10x, and etc. +// Every item info struct should get its own hundredth enumeration entry. +// Every item category for that service should get its own entry (even if differences +// between types aren't apparent on initial implementation, this future-proofs +// against breaking changes). +// Entries should not be rearranged. +// Additionally, any itemType directly assigned a number should not be altered. +// This applies to OneDriveItem and FolderItem +const ( + UnknownType ItemType = iota // 0, global unknown value + + // Exchange (00x) + ExchangeContact + ExchangeEvent + ExchangeMail + // SharePoint (10x) + SharePointLibrary ItemType = iota + 97 // 100 + SharePointList // 101... + SharePointPage + + // OneDrive (20x) + OneDriveItem ItemType = 205 + + // Folder Management(30x) + FolderItem ItemType = 306 +) + +func UpdateItem(item *ItemInfo, newLocPath *path.Builder) { + // Only OneDrive and SharePoint have information about parent folders + // contained in them. + // Can't switch based on infoType because that's been unstable. + if item.Exchange != nil { + item.Exchange.UpdateParentPath(newLocPath) + } else if item.SharePoint != nil { + // SharePoint used to store library items with the OneDriveItem ItemType. + // Start switching them over as we see them since there's no point in + // keeping the old format. + if item.SharePoint.ItemType == OneDriveItem { + item.SharePoint.ItemType = SharePointLibrary + } + + item.SharePoint.UpdateParentPath(newLocPath) + } else if item.OneDrive != nil { + item.OneDrive.UpdateParentPath(newLocPath) + } +} + +// ItemInfo is a oneOf that contains service specific +// information about the item it tracks +type ItemInfo struct { + Folder *FolderInfo `json:"folder,omitempty"` + Exchange *ExchangeInfo `json:"exchange,omitempty"` + SharePoint *SharePointInfo `json:"sharePoint,omitempty"` + OneDrive *OneDriveInfo `json:"oneDrive,omitempty"` + Groups *GroupsInfo `json:"groups,omitempty"` + // Optional item extension data + Extension *ExtensionData `json:"extension,omitempty"` +} + +// typedInfo should get embedded in each sesrvice type to track +// the type of item it stores for multi-item service support. + +// infoType provides internal categorization for collecting like-typed ItemInfos. +// It should return the most granular value type (ex: "event" for an exchange +// calendar event). +func (i ItemInfo) infoType() ItemType { + switch { + case i.Folder != nil: + return i.Folder.ItemType + + case i.Exchange != nil: + return i.Exchange.ItemType + + case i.SharePoint != nil: + return i.SharePoint.ItemType + + case i.OneDrive != nil: + return i.OneDrive.ItemType + } + + return UnknownType +} + +func (i ItemInfo) size() int64 { + switch { + case i.Exchange != nil: + return i.Exchange.Size + + case i.OneDrive != nil: + return i.OneDrive.Size + + case i.SharePoint != nil: + return i.SharePoint.Size + + case i.Folder != nil: + return i.Folder.Size + } + + return 0 +} + +func (i ItemInfo) Modified() time.Time { + switch { + case i.Exchange != nil: + return i.Exchange.Modified + + case i.OneDrive != nil: + return i.OneDrive.Modified + + case i.SharePoint != nil: + return i.SharePoint.Modified + + case i.Folder != nil: + return i.Folder.Modified + } + + return time.Time{} +} + +func (i ItemInfo) uniqueLocation(baseLoc *path.Builder) (*uniqueLoc, error) { + switch { + case i.Exchange != nil: + return i.Exchange.uniqueLocation(baseLoc) + + case i.OneDrive != nil: + return i.OneDrive.uniqueLocation(baseLoc) + + case i.SharePoint != nil: + return i.SharePoint.uniqueLocation(baseLoc) + + case i.Groups != nil: + return i.Groups.uniqueLocation(baseLoc) + + default: + return nil, clues.New("unsupported type") + } +} + +func (i ItemInfo) updateFolder(f *FolderInfo) error { + switch { + case i.Exchange != nil: + return i.Exchange.updateFolder(f) + + case i.OneDrive != nil: + return i.OneDrive.updateFolder(f) + + case i.SharePoint != nil: + return i.SharePoint.updateFolder(f) + + case i.Groups != nil: + return i.Groups.updateFolder(f) + + default: + return clues.New("unsupported type") + } +} diff --git a/src/pkg/backup/details/model.go b/src/pkg/backup/details/model.go new file mode 100644 index 000000000..062621732 --- /dev/null +++ b/src/pkg/backup/details/model.go @@ -0,0 +1,125 @@ +package details + +import ( + "context" + + "github.com/alcionai/corso/src/cli/print" +) + +// DetailsModel describes what was stored in a Backup +type DetailsModel struct { + Entries []Entry `json:"entries"` +} + +// Print writes the DetailModel Entries to StdOut, in the format +// requested by the caller. +func (dm DetailsModel) PrintEntries(ctx context.Context) { + printEntries(ctx, dm.Entries) +} + +type infoer interface { + Entry | *Entry + // Need this here so we can access the infoType function without a type + // assertion. See https://stackoverflow.com/a/71378366 for more details. + infoType() ItemType +} + +func printEntries[T infoer](ctx context.Context, entries []T) { + if print.DisplayJSONFormat() { + printJSON(ctx, entries) + } else { + printTable(ctx, entries) + } +} + +func printTable[T infoer](ctx context.Context, entries []T) { + perType := map[ItemType][]print.Printable{} + + for _, ent := range entries { + it := ent.infoType() + ps, ok := perType[it] + + if !ok { + ps = []print.Printable{} + } + + perType[it] = append(ps, print.Printable(ent)) + } + + for _, ps := range perType { + print.All(ctx, ps...) + } +} + +func printJSON[T infoer](ctx context.Context, entries []T) { + ents := []print.Printable{} + + for _, ent := range entries { + ents = append(ents, print.Printable(ent)) + } + + print.All(ctx, ents...) +} + +// Paths returns the list of Paths for non-folder and non-meta items extracted +// from the Entries slice. +func (dm DetailsModel) Paths() []string { + r := make([]string, 0, len(dm.Entries)) + + for _, ent := range dm.Entries { + if ent.Folder != nil || ent.isMetaFile() { + continue + } + + r = append(r, ent.RepoRef) + } + + return r +} + +// Items returns a slice of *ItemInfo that does not contain any FolderInfo +// entries. Required because not all folders in the details are valid resource +// paths, and we want to slice out metadata. +func (dm DetailsModel) Items() entrySet { + res := make([]*Entry, 0, len(dm.Entries)) + + for i := 0; i < len(dm.Entries); i++ { + ent := dm.Entries[i] + if ent.Folder != nil || ent.isMetaFile() { + continue + } + + res = append(res, &ent) + } + + return res +} + +// FilterMetaFiles returns a copy of the Details with all of the +// .meta files removed from the entries. +func (dm DetailsModel) FilterMetaFiles() DetailsModel { + d2 := DetailsModel{ + Entries: []Entry{}, + } + + for _, ent := range dm.Entries { + if !ent.isMetaFile() { + d2.Entries = append(d2.Entries, ent) + } + } + + return d2 +} + +// SumNonMetaFileSizes returns the total size of items excluding all the +// .meta files from the items. +func (dm DetailsModel) SumNonMetaFileSizes() int64 { + var size int64 + + // Items will provide only files and filter out folders + for _, ent := range dm.FilterMetaFiles().Items() { + size += ent.size() + } + + return size +} diff --git a/src/pkg/backup/details/onedrive.go b/src/pkg/backup/details/onedrive.go new file mode 100644 index 000000000..f0974030b --- /dev/null +++ b/src/pkg/backup/details/onedrive.go @@ -0,0 +1,78 @@ +package details + +import ( + "time" + + "github.com/alcionai/clues" + "github.com/dustin/go-humanize" + + "github.com/alcionai/corso/src/internal/common/dttm" + "github.com/alcionai/corso/src/pkg/path" +) + +// NewOneDriveLocationIDer builds a LocationIDer for the drive and folder path. +// The path denoted by the folders should be unique within the drive. +func NewOneDriveLocationIDer( + driveID string, + escapedFolders ...string, +) uniqueLoc { + pb := path.Builder{}. + Append(path.FilesCategory.String(), driveID). + Append(escapedFolders...) + + return uniqueLoc{ + pb: pb, + prefixElems: 2, + } +} + +// OneDriveInfo describes a oneDrive item +type OneDriveInfo struct { + Created time.Time `json:"created,omitempty"` + DriveID string `json:"driveID,omitempty"` + DriveName string `json:"driveName,omitempty"` + IsMeta bool `json:"isMeta,omitempty"` + ItemName string `json:"itemName,omitempty"` + ItemType ItemType `json:"itemType,omitempty"` + Modified time.Time `json:"modified,omitempty"` + Owner string `json:"owner,omitempty"` + ParentPath string `json:"parentPath"` + Size int64 `json:"size,omitempty"` +} + +// Headers returns the human-readable names of properties in a OneDriveInfo +// for printing out to a terminal in a columnar display. +func (i OneDriveInfo) Headers() []string { + return []string{"ItemName", "ParentPath", "Size", "Owner", "Created", "Modified"} +} + +// Values returns the values matching the Headers list for printing +// out to a terminal in a columnar display. +func (i OneDriveInfo) Values() []string { + return []string{ + i.ItemName, + i.ParentPath, + humanize.Bytes(uint64(i.Size)), + i.Owner, + dttm.FormatToTabularDisplay(i.Created), + dttm.FormatToTabularDisplay(i.Modified), + } +} + +func (i *OneDriveInfo) UpdateParentPath(newLocPath *path.Builder) { + i.ParentPath = newLocPath.PopFront().String() +} + +func (i *OneDriveInfo) uniqueLocation(baseLoc *path.Builder) (*uniqueLoc, error) { + if len(i.DriveID) == 0 { + return nil, clues.New("empty drive ID") + } + + loc := NewOneDriveLocationIDer(i.DriveID, baseLoc.Elements()...) + + return &loc, nil +} + +func (i *OneDriveInfo) updateFolder(f *FolderInfo) error { + return updateFolderWithinDrive(OneDriveItem, i.DriveName, i.DriveID, f) +} diff --git a/src/pkg/backup/details/sharepoint.go b/src/pkg/backup/details/sharepoint.go new file mode 100644 index 000000000..dceff7f99 --- /dev/null +++ b/src/pkg/backup/details/sharepoint.go @@ -0,0 +1,86 @@ +package details + +import ( + "time" + + "github.com/alcionai/clues" + "github.com/dustin/go-humanize" + + "github.com/alcionai/corso/src/internal/common/dttm" + "github.com/alcionai/corso/src/pkg/path" +) + +// NewSharePointLocationIDer builds a LocationIDer for the drive and folder +// path. The path denoted by the folders should be unique within the drive. +func NewSharePointLocationIDer( + driveID string, + escapedFolders ...string, +) uniqueLoc { + pb := path.Builder{}. + Append(path.LibrariesCategory.String(), driveID). + Append(escapedFolders...) + + return uniqueLoc{ + pb: pb, + prefixElems: 2, + } +} + +// SharePointInfo describes a sharepoint item +type SharePointInfo struct { + Created time.Time `json:"created,omitempty"` + DriveName string `json:"driveName,omitempty"` + DriveID string `json:"driveID,omitempty"` + ItemName string `json:"itemName,omitempty"` + ItemType ItemType `json:"itemType,omitempty"` + Modified time.Time `json:"modified,omitempty"` + Owner string `json:"owner,omitempty"` + ParentPath string `json:"parentPath,omitempty"` + Size int64 `json:"size,omitempty"` + WebURL string `json:"webUrl,omitempty"` + SiteID string `json:"siteID,omitempty"` +} + +// Headers returns the human-readable names of properties in a SharePointInfo +// for printing out to a terminal in a columnar display. +func (i SharePointInfo) Headers() []string { + return []string{"ItemName", "Library", "ParentPath", "Size", "Owner", "Created", "Modified"} +} + +// Values returns the values matching the Headers list for printing +// out to a terminal in a columnar display. +func (i SharePointInfo) Values() []string { + return []string{ + i.ItemName, + i.DriveName, + i.ParentPath, + humanize.Bytes(uint64(i.Size)), + i.Owner, + dttm.FormatToTabularDisplay(i.Created), + dttm.FormatToTabularDisplay(i.Modified), + } +} + +func (i *SharePointInfo) UpdateParentPath(newLocPath *path.Builder) { + i.ParentPath = newLocPath.PopFront().String() +} + +func (i *SharePointInfo) uniqueLocation(baseLoc *path.Builder) (*uniqueLoc, error) { + if len(i.DriveID) == 0 { + return nil, clues.New("empty drive ID") + } + + loc := NewSharePointLocationIDer(i.DriveID, baseLoc.Elements()...) + + return &loc, nil +} + +func (i *SharePointInfo) updateFolder(f *FolderInfo) error { + // TODO(ashmrtn): Change to just SharePointLibrary when the code that + // generates the item type is fixed. + if i.ItemType == OneDriveItem || i.ItemType == SharePointLibrary { + return updateFolderWithinDrive(SharePointLibrary, i.DriveName, i.DriveID, f) + } + + return clues.New("unsupported non-SharePoint ItemType").With("item_type", i.ItemType) +} diff --git a/src/pkg/backup/identity/identity.go b/src/pkg/backup/identity/identity.go new file mode 100644 index 000000000..0f0d77416 --- /dev/null +++ b/src/pkg/backup/identity/identity.go @@ -0,0 +1,16 @@ +package identity + +import "github.com/alcionai/corso/src/pkg/path" + +// Reasoner describes the parts of the backup that make up its +// data identity: the tenant, protected resources, services, and +// categories which are held within the backup. +type Reasoner interface { + Tenant() string + ProtectedResource() string + Service() path.ServiceType + Category() path.CategoryType + // SubtreePath returns the path prefix for data in existing backups that have + // parameters (tenant, protected resourced, etc) that match this Reasoner. + SubtreePath() (path.Path, error) +} diff --git a/src/pkg/control/options.go b/src/pkg/control/options.go index 01c88b5eb..0f7d559aa 100644 --- a/src/pkg/control/options.go +++ b/src/pkg/control/options.go @@ -62,6 +62,12 @@ type Toggles struct { // DisableIncrementals prevents backups from using incremental lookups, // forcing a new, complete backup of all data regardless of prior state. DisableIncrementals bool `json:"exchangeIncrementals,omitempty"` + // ForceItemDataDownload disables finding cached items in previous failed + // backups (i.e. kopia-assisted incrementals). Data dedupe will still occur + // since that is based on content hashes. Items that have not changed since + // the previous backup (i.e. in the merge base) will not be redownloaded. Use + // DisableIncrementals to control that behavior. + ForceItemDataDownload bool `json:"forceItemDataDownload,omitempty"` // DisableDelta prevents backups from using delta based lookups, // forcing a backup by enumerating all items. This is different // from DisableIncrementals in that this does not even makes use of diff --git a/src/pkg/path/drive_test.go b/src/pkg/path/drive_test.go index 131c17b9c..e457a4423 100644 --- a/src/pkg/path/drive_test.go +++ b/src/pkg/path/drive_test.go @@ -9,7 +9,7 @@ import ( "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" - odConsts "github.com/alcionai/corso/src/internal/m365/onedrive/consts" + odConsts "github.com/alcionai/corso/src/internal/m365/service/onedrive/consts" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/path" ) diff --git a/src/pkg/repository/repository.go b/src/pkg/repository/repository.go index a604a5ac5..8d84d0cf3 100644 --- a/src/pkg/repository/repository.go +++ b/src/pkg/repository/repository.go @@ -14,7 +14,7 @@ import ( "github.com/alcionai/corso/src/internal/events" "github.com/alcionai/corso/src/internal/kopia" "github.com/alcionai/corso/src/internal/m365" - "github.com/alcionai/corso/src/internal/m365/onedrive/metadata" + "github.com/alcionai/corso/src/internal/m365/collection/drive/metadata" "github.com/alcionai/corso/src/internal/m365/resource" "github.com/alcionai/corso/src/internal/model" "github.com/alcionai/corso/src/internal/observe" @@ -449,7 +449,7 @@ func getBackup( return b, nil } -// BackupsByID lists backups by ID. Returns as many backups as possible with +// Backups lists backups by ID. Returns as many backups as possible with // errors for the backups it was unable to retrieve. func (r repository) Backups(ctx context.Context, ids []string) ([]*backup.Backup, *fault.Bus) { var ( @@ -472,10 +472,38 @@ func (r repository) Backups(ctx context.Context, ids []string) ([]*backup.Backup return bups, errs } -// backups lists backups in a repository +// BackupsByTag lists all backups in a repository that contain all the tags +// specified. func (r repository) BackupsByTag(ctx context.Context, fs ...store.FilterOption) ([]*backup.Backup, error) { sw := store.NewKopiaStore(r.modelStore) - return sw.GetBackups(ctx, fs...) + return backupsByTag(ctx, sw, fs) +} + +// backupsByTag returns all backups matching all provided tags. +// +// TODO(ashmrtn): This exists mostly for testing, but we could restructure the +// code in this file so there's a more elegant mocking solution. +func backupsByTag( + ctx context.Context, + sw store.BackupWrapper, + fs []store.FilterOption, +) ([]*backup.Backup, error) { + bs, err := sw.GetBackups(ctx, fs...) + if err != nil { + return nil, clues.Stack(err) + } + + // Filter out assist backup bases as they're considered incomplete and we + // haven't been displaying them before now. + res := make([]*backup.Backup, 0, len(bs)) + + for _, b := range bs { + if t := b.Tags[model.BackupTypeTag]; t != model.AssistBackup { + res = append(res, b) + } + } + + return res, nil } // BackupDetails returns the specified backup.Details diff --git a/src/pkg/repository/repository_unexported_test.go b/src/pkg/repository/repository_unexported_test.go index 0e600157d..e24f2e1d6 100644 --- a/src/pkg/repository/repository_unexported_test.go +++ b/src/pkg/repository/repository_unexported_test.go @@ -30,6 +30,41 @@ import ( "github.com/alcionai/corso/src/pkg/store/mock" ) +// --------------------------------------------------------------------------- +// Mocks +// --------------------------------------------------------------------------- + +type mockBackupList struct { + backups []*backup.Backup + err error + check func(fs []store.FilterOption) +} + +func (mbl mockBackupList) GetBackup( + ctx context.Context, + backupID model.StableID, +) (*backup.Backup, error) { + return nil, clues.New("not implemented") +} + +func (mbl mockBackupList) DeleteBackup( + ctx context.Context, + backupID model.StableID, +) error { + return clues.New("not implemented") +} + +func (mbl mockBackupList) GetBackups( + ctx context.Context, + filters ...store.FilterOption, +) ([]*backup.Backup, error) { + if mbl.check != nil { + mbl.check(filters) + } + + return mbl.backups, mbl.err +} + // --------------------------------------------------------------------------- // Unit // --------------------------------------------------------------------------- @@ -100,6 +135,191 @@ func (suite *RepositoryBackupsUnitSuite) TestGetBackup() { } } +func (suite *RepositoryBackupsUnitSuite) TestBackupsByTag() { + unlabeled1 := &backup.Backup{ + BaseModel: model.BaseModel{ + ID: model.StableID(uuid.NewString()), + }, + } + unlabeled2 := &backup.Backup{ + BaseModel: model.BaseModel{ + ID: model.StableID(uuid.NewString()), + }, + } + + merge1 := &backup.Backup{ + BaseModel: model.BaseModel{ + ID: model.StableID(uuid.NewString()), + Tags: map[string]string{ + model.BackupTypeTag: model.MergeBackup, + }, + }, + } + merge2 := &backup.Backup{ + BaseModel: model.BaseModel{ + ID: model.StableID(uuid.NewString()), + Tags: map[string]string{ + model.BackupTypeTag: model.MergeBackup, + }, + }, + } + + assist1 := &backup.Backup{ + BaseModel: model.BaseModel{ + ID: model.StableID(uuid.NewString()), + Tags: map[string]string{ + model.BackupTypeTag: model.AssistBackup, + }, + }, + } + assist2 := &backup.Backup{ + BaseModel: model.BaseModel{ + ID: model.StableID(uuid.NewString()), + Tags: map[string]string{ + model.BackupTypeTag: model.AssistBackup, + }, + }, + } + + table := []struct { + name string + getBackups []*backup.Backup + filters []store.FilterOption + listErr error + expectErr assert.ErrorAssertionFunc + expect []*backup.Backup + }{ + { + name: "UnlabeledOnly", + getBackups: []*backup.Backup{ + unlabeled1, + unlabeled2, + }, + expectErr: assert.NoError, + expect: []*backup.Backup{ + unlabeled1, + unlabeled2, + }, + }, + { + name: "MergeOnly", + getBackups: []*backup.Backup{ + merge1, + merge2, + }, + expectErr: assert.NoError, + expect: []*backup.Backup{ + merge1, + merge2, + }, + }, + { + name: "AssistOnly", + getBackups: []*backup.Backup{ + assist1, + assist2, + }, + expectErr: assert.NoError, + }, + { + name: "UnlabledAndMerge", + getBackups: []*backup.Backup{ + merge1, + unlabeled1, + merge2, + unlabeled2, + }, + expectErr: assert.NoError, + expect: []*backup.Backup{ + merge1, + merge2, + unlabeled1, + unlabeled2, + }, + }, + { + name: "UnlabeledAndAssist", + getBackups: []*backup.Backup{ + unlabeled1, + assist1, + unlabeled2, + assist2, + }, + expectErr: assert.NoError, + expect: []*backup.Backup{ + unlabeled1, + unlabeled2, + }, + }, + { + name: "MergeAndAssist", + getBackups: []*backup.Backup{ + merge1, + assist1, + merge2, + assist2, + }, + expectErr: assert.NoError, + expect: []*backup.Backup{ + merge1, + merge2, + }, + }, + { + name: "UnlabeledAndMergeAndAssist", + getBackups: []*backup.Backup{ + unlabeled1, + merge1, + assist1, + merge2, + unlabeled2, + assist2, + }, + expectErr: assert.NoError, + expect: []*backup.Backup{ + merge1, + merge2, + unlabeled1, + unlabeled2, + }, + }, + { + name: "LookupError", + getBackups: []*backup.Backup{ + unlabeled1, + merge1, + assist1, + merge2, + unlabeled2, + assist2, + }, + listErr: assert.AnError, + expectErr: assert.Error, + }, + } + for _, test := range table { + suite.Run(test.name, func() { + t := suite.T() + + ctx, flush := tester.NewContext(t) + defer flush() + + mbl := mockBackupList{ + backups: test.getBackups, + err: test.listErr, + check: func(fs []store.FilterOption) { + assert.ElementsMatch(t, test.filters, fs) + }, + } + + bs, err := backupsByTag(ctx, mbl, test.filters) + test.expectErr(t, err, clues.ToCore(err)) + + assert.ElementsMatch(t, test.expect, bs) + }) + } +} + type mockSSDeleter struct { err error } @@ -339,6 +559,10 @@ func writeBackup( ssid, err := sstore.Write(ctx, errs) require.NoError(t, err, "writing to streamstore") + tags := map[string]string{ + model.ServiceTag: sel.PathService().String(), + } + b := backup.New( snapID, ssid, operations.Completed.String(), @@ -348,7 +572,8 @@ func writeBackup( ownerID, ownerName, stats.ReadWrites{}, stats.StartAndEndTime{}, - fe) + fe, + tags) err = sw.Put(ctx, model.BackupSchema, b) require.NoError(t, err) diff --git a/src/pkg/selectors/exchange.go b/src/pkg/selectors/exchange.go index 008134559..245909161 100644 --- a/src/pkg/selectors/exchange.go +++ b/src/pkg/selectors/exchange.go @@ -69,7 +69,7 @@ func (s Selector) ToExchangeBackup() (*ExchangeBackup, error) { } func (s ExchangeBackup) SplitByResourceOwner(users []string) []ExchangeBackup { - sels := splitByResourceOwner[ExchangeScope](s.Selector, users, ExchangeUser) + sels := splitByProtectedResource[ExchangeScope](s.Selector, users, ExchangeUser) ss := make([]ExchangeBackup, 0, len(sels)) for _, sel := range sels { @@ -103,7 +103,7 @@ func (s Selector) ToExchangeRestore() (*ExchangeRestore, error) { } func (sr ExchangeRestore) SplitByResourceOwner(users []string) []ExchangeRestore { - sels := splitByResourceOwner[ExchangeScope](sr.Selector, users, ExchangeUser) + sels := splitByProtectedResource[ExchangeScope](sr.Selector, users, ExchangeUser) ss := make([]ExchangeRestore, 0, len(sels)) for _, sel := range sels { diff --git a/src/pkg/selectors/groups.go b/src/pkg/selectors/groups.go new file mode 100644 index 000000000..cc4a7ebfd --- /dev/null +++ b/src/pkg/selectors/groups.go @@ -0,0 +1,513 @@ +package selectors + +import ( + "context" + "fmt" + + "github.com/alcionai/clues" + + "github.com/alcionai/corso/src/pkg/backup/details" + "github.com/alcionai/corso/src/pkg/fault" + "github.com/alcionai/corso/src/pkg/path" +) + +// --------------------------------------------------------------------------- +// Selectors +// --------------------------------------------------------------------------- + +type ( + // groups provides an api for selecting + // data scopes applicable to the groups service. + groups struct { + Selector + } + + // groups provides an api for selecting + // data scopes applicable to the groups service, + // plus backup-specific methods. + GroupsBackup struct { + groups + } + + // GroupsRestorep provides an api for selecting + // data scopes applicable to the Groups service, + // plus restore-specific methods. + GroupsRestore struct { + groups + } +) + +var ( + _ Reducer = &GroupsRestore{} + _ pathCategorier = &GroupsRestore{} +) + +// NewGroupsBackup produces a new Selector with the service set to ServiceGroups. +func NewGroupsBackup(resources []string) *GroupsBackup { + src := GroupsBackup{ + groups{ + newSelector(ServiceGroups, resources), + }, + } + + return &src +} + +// ToGroupsBackup transforms the generic selector into an GroupsBackup. +// Errors if the service defined by the selector is not ServiceGroups. +func (s Selector) ToGroupsBackup() (*GroupsBackup, error) { + if s.Service != ServiceGroups { + return nil, badCastErr(ServiceGroups, s.Service) + } + + src := GroupsBackup{groups{s}} + + return &src, nil +} + +func (s GroupsBackup) SplitByResourceOwner(resources []string) []GroupsBackup { + sels := splitByProtectedResource[GroupsScope](s.Selector, resources, GroupsGroup) + + ss := make([]GroupsBackup, 0, len(sels)) + for _, sel := range sels { + ss = append(ss, GroupsBackup{groups{sel}}) + } + + return ss +} + +// NewGroupsRestore produces a new Selector with the service set to ServiceGroups. +func NewGroupsRestore(resources []string) *GroupsRestore { + src := GroupsRestore{ + groups{ + newSelector(ServiceGroups, resources), + }, + } + + return &src +} + +// ToGroupsRestore transforms the generic selector into an GroupsRestore. +// Errors if the service defined by the selector is not ServiceGroups. +func (s Selector) ToGroupsRestore() (*GroupsRestore, error) { + if s.Service != ServiceGroups { + return nil, badCastErr(ServiceGroups, s.Service) + } + + src := GroupsRestore{groups{s}} + + return &src, nil +} + +func (s GroupsRestore) SplitByResourceOwner(resources []string) []GroupsRestore { + sels := splitByProtectedResource[GroupsScope](s.Selector, resources, GroupsGroup) + + ss := make([]GroupsRestore, 0, len(sels)) + for _, sel := range sels { + ss = append(ss, GroupsRestore{groups{sel}}) + } + + return ss +} + +// PathCategories produces the aggregation of discrete resources described by each type of scope. +func (s groups) PathCategories() selectorPathCategories { + return selectorPathCategories{ + Excludes: pathCategoriesIn[GroupsScope, groupsCategory](s.Excludes), + Filters: pathCategoriesIn[GroupsScope, groupsCategory](s.Filters), + Includes: pathCategoriesIn[GroupsScope, groupsCategory](s.Includes), + } +} + +// --------------------------------------------------------------------------- +// Stringers and Concealers +// --------------------------------------------------------------------------- + +func (s GroupsScope) Conceal() string { return conceal(s) } +func (s GroupsScope) Format(fs fmt.State, r rune) { format(s, fs, r) } +func (s GroupsScope) String() string { return conceal(s) } +func (s GroupsScope) PlainString() string { return plainString(s) } + +// ------------------- +// Scope Factories + +// Include appends the provided scopes to the selector's inclusion set. +// Data is included if it matches ANY inclusion. +// The inclusion set is later filtered (all included data must pass ALL +// filters) and excluded (all included data must not match ANY exclusion). +// Data is included if it matches ANY inclusion (of the same data category). +// +// All parts of the scope must match for data to be exclucded. +// Ex: File(s1, f1, i1) => only excludes an item if it is owned by site s1, +// located in folder f1, and ID'd as i1. Use selectors.Any() to wildcard +// a scope value. No value will match if selectors.None() is provided. +// +// Group-level scopes will automatically apply the Any() wildcard to +// child properties. +// ex: Site(u1) automatically cascades to all folders and files owned +// by s1. +func (s *groups) Include(scopes ...[]GroupsScope) { + s.Includes = appendScopes(s.Includes, scopes...) +} + +// Exclude appends the provided scopes to the selector's exclusion set. +// Every Exclusion scope applies globally, affecting all inclusion scopes. +// Data is excluded if it matches ANY exclusion. +// +// All parts of the scope must match for data to be exclucded. +// Ex: File(s1, f1, i1) => only excludes an item if it is owned by site s1, +// located in folder f1, and ID'd as i1. Use selectors.Any() to wildcard +// a scope value. No value will match if selectors.None() is provided. +// +// Group-level scopes will automatically apply the Any() wildcard to +// child properties. +// ex: Site(u1) automatically cascades to all folders and files owned +// by s1. +func (s *groups) Exclude(scopes ...[]GroupsScope) { + s.Excludes = appendScopes(s.Excludes, scopes...) +} + +// Filter appends the provided scopes to the selector's filters set. +// A selector with >0 filters and 0 inclusions will include any data +// that passes all filters. +// A selector with >0 filters and >0 inclusions will reduce the +// inclusion set to only the data that passes all filters. +// Data is retained if it passes ALL filters. +// +// All parts of the scope must match for data to be exclucded. +// Ex: File(s1, f1, i1) => only excludes an item if it is owned by site s1, +// located in folder f1, and ID'd as i1. Use selectors.Any() to wildcard +// a scope value. No value will match if selectors.None() is provided. +// +// Group-level scopes will automatically apply the Any() wildcard to +// child properties. +// ex: Site(u1) automatically cascades to all folders and files owned +// by s1. +func (s *groups) Filter(scopes ...[]GroupsScope) { + s.Filters = appendScopes(s.Filters, scopes...) +} + +// Scopes retrieves the list of groupsScopes in the selector. +func (s *groups) Scopes() []GroupsScope { + return scopes[GroupsScope](s.Selector) +} + +// ------------------- +// Scope Factories + +// Produces one or more Groups site scopes. +// One scope is created per site entry. +// If any slice contains selectors.Any, that slice is reduced to [selectors.Any] +// If any slice contains selectors.None, that slice is reduced to [selectors.None] +// If any slice is empty, it defaults to [selectors.None] +func (s *groups) AllData() []GroupsScope { + scopes := []GroupsScope{} + + scopes = append( + scopes, + makeScope[GroupsScope](GroupsTODOContainer, Any())) + + return scopes +} + +// TODO produces one or more Groups TODO scopes. +// If any slice contains selectors.Any, that slice is reduced to [selectors.Any] +// If any slice contains selectors.None, that slice is reduced to [selectors.None] +// Any empty slice defaults to [selectors.None] +func (s *groups) TODO(lists []string, opts ...option) []GroupsScope { + var ( + scopes = []GroupsScope{} + os = append([]option{pathComparator()}, opts...) + ) + + scopes = append(scopes, makeScope[GroupsScope](GroupsTODOContainer, lists, os...)) + + return scopes +} + +// ListTODOItemsItems produces one or more Groups TODO item scopes. +// If any slice contains selectors.Any, that slice is reduced to [selectors.Any] +// If any slice contains selectors.None, that slice is reduced to [selectors.None] +// If any slice is empty, it defaults to [selectors.None] +// options are only applied to the list scopes. +func (s *groups) TODOItems(lists, items []string, opts ...option) []GroupsScope { + scopes := []GroupsScope{} + + scopes = append( + scopes, + makeScope[GroupsScope](GroupsTODOItem, items, defaultItemOptions(s.Cfg)...). + set(GroupsTODOContainer, lists, opts...)) + + return scopes +} + +// ------------------- +// ItemInfo Factories + +// TODO + +// --------------------------------------------------------------------------- +// Categories +// --------------------------------------------------------------------------- + +// groupsCategory enumerates the type of the lowest level +// of data () in a scope. +type groupsCategory string + +// interface compliance checks +var _ categorizer = GroupsCategoryUnknown + +const ( + GroupsCategoryUnknown groupsCategory = "" + + // types of data in Groups + GroupsGroup groupsCategory = "GroupsGroup" + GroupsTODOContainer groupsCategory = "GroupsTODOContainer" + GroupsTODOItem groupsCategory = "GroupsTODOItem" + + // details.itemInfo comparables + + // library drive selection + GroupsInfoSiteLibraryDrive groupsCategory = "GroupsInfoSiteLibraryDrive" +) + +// groupsLeafProperties describes common metadata of the leaf categories +var groupsLeafProperties = map[categorizer]leafProperty{ + GroupsTODOItem: { // the root category must be represented, even though it isn't a leaf + pathKeys: []categorizer{GroupsTODOContainer, GroupsTODOItem}, + pathType: path.UnknownCategory, + }, + GroupsGroup: { // the root category must be represented, even though it isn't a leaf + pathKeys: []categorizer{GroupsGroup}, + pathType: path.UnknownCategory, + }, +} + +func (c groupsCategory) String() string { + return string(c) +} + +// leafCat returns the leaf category of the receiver. +// If the receiver category has multiple leaves (ex: User) or no leaves, +// (ex: Unknown), the receiver itself is returned. +// Ex: ServiceTypeFolder.leafCat() => ServiceTypeItem +// Ex: ServiceUser.leafCat() => ServiceUser +func (c groupsCategory) leafCat() categorizer { + switch c { + case GroupsTODOContainer, GroupsInfoSiteLibraryDrive: + return GroupsTODOItem + } + + return c +} + +// rootCat returns the root category type. +func (c groupsCategory) rootCat() categorizer { + return GroupsGroup +} + +// unknownCat returns the unknown category type. +func (c groupsCategory) unknownCat() categorizer { + return GroupsCategoryUnknown +} + +// isUnion returns true if the category is a site or a webURL, which +// can act as an alternative identifier to siteID across all site types. +func (c groupsCategory) isUnion() bool { + return c == c.rootCat() +} + +// isLeaf is true if the category is a GroupsItem category. +func (c groupsCategory) isLeaf() bool { + return c == c.leafCat() +} + +// pathValues transforms the two paths to maps of identified properties. +// +// Example: +// [tenantID, service, siteID, category, folder, itemID] +// => {spFolder: folder, spItemID: itemID} +func (c groupsCategory) pathValues( + repo path.Path, + ent details.Entry, + cfg Config, +) (map[categorizer][]string, error) { + var ( + folderCat, itemCat categorizer + itemID string + rFld string + ) + + switch c { + case GroupsTODOContainer, GroupsTODOItem: + if ent.Groups == nil { + return nil, clues.New("no Groups ItemInfo in details") + } + + folderCat, itemCat = GroupsTODOContainer, GroupsTODOItem + rFld = ent.Groups.ParentPath + + default: + return nil, clues.New("unrecognized groupsCategory").With("category", c) + } + + item := ent.ItemRef + if len(item) == 0 { + item = repo.Item() + } + + if cfg.OnlyMatchItemNames { + item = ent.ItemInfo.Groups.ItemName + } + + result := map[categorizer][]string{ + folderCat: {rFld}, + itemCat: {item, ent.ShortRef}, + } + + if len(itemID) > 0 { + result[itemCat] = append(result[itemCat], itemID) + } + + return result, nil +} + +// pathKeys returns the path keys recognized by the receiver's leaf type. +func (c groupsCategory) pathKeys() []categorizer { + return groupsLeafProperties[c.leafCat()].pathKeys +} + +// PathType converts the category's leaf type into the matching path.CategoryType. +func (c groupsCategory) PathType() path.CategoryType { + return groupsLeafProperties[c.leafCat()].pathType +} + +// --------------------------------------------------------------------------- +// Scopes +// --------------------------------------------------------------------------- + +// GroupsScope specifies the data available +// when interfacing with the Groups service. +type GroupsScope scope + +// interface compliance checks +var _ scoper = &GroupsScope{} + +// Category describes the type of the data in scope. +func (s GroupsScope) Category() groupsCategory { + return groupsCategory(getCategory(s)) +} + +// categorizer type is a generic wrapper around Category. +// Primarily used by scopes.go to for abstract comparisons. +func (s GroupsScope) categorizer() categorizer { + return s.Category() +} + +// Matches returns true if the category is included in the scope's +// data type, and the target string matches that category's comparator. +func (s GroupsScope) Matches(cat groupsCategory, target string) bool { + return matches(s, cat, target) +} + +// InfoCategory returns the category enum of the scope info. +// If the scope is not an info type, returns GroupsUnknownCategory. +func (s GroupsScope) InfoCategory() groupsCategory { + return groupsCategory(getInfoCategory(s)) +} + +// IncludeCategory checks whether the scope includes a +// certain category of data. +// Ex: to check if the scope includes file data: +// s.IncludesCategory(selector.GroupsFile) +func (s GroupsScope) IncludesCategory(cat groupsCategory) bool { + return categoryMatches(s.Category(), cat) +} + +// returns true if the category is included in the scope's data type, +// and the value is set to Any(). +func (s GroupsScope) IsAny(cat groupsCategory) bool { + return isAnyTarget(s, cat) +} + +// Get returns the data category in the scope. If the scope +// contains all data types for a user, it'll return the +// GroupsUser category. +func (s GroupsScope) Get(cat groupsCategory) []string { + return getCatValue(s, cat) +} + +// sets a value by category to the scope. Only intended for internal use. +func (s GroupsScope) set(cat groupsCategory, v []string, opts ...option) GroupsScope { + os := []option{} + + switch cat { + case GroupsTODOContainer: + os = append(os, pathComparator()) + } + + return set(s, cat, v, append(os, opts...)...) +} + +// setDefaults ensures that site scopes express `AnyTgt` for their child category types. +func (s GroupsScope) setDefaults() { + switch s.Category() { + case GroupsGroup: + s[GroupsTODOContainer.String()] = passAny + s[GroupsTODOItem.String()] = passAny + case GroupsTODOContainer: + s[GroupsTODOItem.String()] = passAny + } +} + +// --------------------------------------------------------------------------- +// Backup Details Filtering +// --------------------------------------------------------------------------- + +// Reduce filters the entries in a details struct to only those that match the +// inclusions, filters, and exclusions in the selector. +func (s groups) Reduce( + ctx context.Context, + deets *details.Details, + errs *fault.Bus, +) *details.Details { + return reduce[GroupsScope]( + ctx, + deets, + s.Selector, + map[path.CategoryType]groupsCategory{ + path.UnknownCategory: GroupsTODOItem, + }, + errs) +} + +// matchesInfo handles the standard behavior when comparing a scope and an groupsInfo +// returns true if the scope and info match for the provided category. +func (s GroupsScope) matchesInfo(dii details.ItemInfo) bool { + var ( + infoCat = s.InfoCategory() + i = "" + info = dii.Groups + ) + + if info == nil { + return false + } + + switch infoCat { + case GroupsInfoSiteLibraryDrive: + ds := []string{} + + if len(info.DriveName) > 0 { + ds = append(ds, info.DriveName) + } + + if len(info.DriveID) > 0 { + ds = append(ds, info.DriveID) + } + + return matchesAny(s, GroupsInfoSiteLibraryDrive, ds) + } + + return s.Matches(infoCat, i) +} diff --git a/src/pkg/selectors/onedrive.go b/src/pkg/selectors/onedrive.go index 18fa0fca3..057634215 100644 --- a/src/pkg/selectors/onedrive.go +++ b/src/pkg/selectors/onedrive.go @@ -68,7 +68,7 @@ func (s Selector) ToOneDriveBackup() (*OneDriveBackup, error) { } func (s OneDriveBackup) SplitByResourceOwner(users []string) []OneDriveBackup { - sels := splitByResourceOwner[OneDriveScope](s.Selector, users, OneDriveUser) + sels := splitByProtectedResource[OneDriveScope](s.Selector, users, OneDriveUser) ss := make([]OneDriveBackup, 0, len(sels)) for _, sel := range sels { @@ -102,7 +102,7 @@ func (s Selector) ToOneDriveRestore() (*OneDriveRestore, error) { } func (s OneDriveRestore) SplitByResourceOwner(users []string) []OneDriveRestore { - sels := splitByResourceOwner[OneDriveScope](s.Selector, users, OneDriveUser) + sels := splitByProtectedResource[OneDriveScope](s.Selector, users, OneDriveUser) ss := make([]OneDriveRestore, 0, len(sels)) for _, sel := range sels { diff --git a/src/pkg/selectors/onedrive_test.go b/src/pkg/selectors/onedrive_test.go index aeb2f19cd..71a7132d3 100644 --- a/src/pkg/selectors/onedrive_test.go +++ b/src/pkg/selectors/onedrive_test.go @@ -10,7 +10,7 @@ import ( "github.com/stretchr/testify/suite" "github.com/alcionai/corso/src/internal/common/dttm" - odConsts "github.com/alcionai/corso/src/internal/m365/onedrive/consts" + odConsts "github.com/alcionai/corso/src/internal/m365/service/onedrive/consts" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/backup/details" "github.com/alcionai/corso/src/pkg/fault" @@ -43,16 +43,12 @@ func (suite *OneDriveSelectorSuite) TestToOneDriveBackup() { } func (suite *OneDriveSelectorSuite) TestOneDriveSelector_AllData() { - t := suite.T() - var ( users = []string{"u1", "u2"} sel = NewOneDriveBackup(users) allScopes = sel.AllData() ) - assert.ElementsMatch(t, users, sel.DiscreteResourceOwners()) - // Initialize the selector Include, Exclude, Filter sel.Exclude(allScopes) sel.Include(allScopes) diff --git a/src/pkg/selectors/scopes.go b/src/pkg/selectors/scopes.go index aebd0f156..5a453bf4f 100644 --- a/src/pkg/selectors/scopes.go +++ b/src/pkg/selectors/scopes.go @@ -161,6 +161,267 @@ type ( } ) +// appendScopes iterates through each scope in the list of scope slices, +// calling setDefaults() to ensure it is completely populated, and appends +// those scopes to the `to` slice. +func appendScopes[T scopeT](to []scope, scopes ...[]T) []scope { + if len(to) == 0 { + to = []scope{} + } + + for _, scopeSl := range scopes { + for _, s := range scopeSl { + s.setDefaults() + to = append(to, scope(s)) + } + } + + return to +} + +// scopes retrieves the list of scopes in the selector. +func scopes[T scopeT](s Selector) []T { + scopes := []T{} + + for _, v := range s.Includes { + scopes = append(scopes, T(v)) + } + + return scopes +} + +// --------------------------------------------------------------------------- +// scope config & constructors +// --------------------------------------------------------------------------- + +// constructs the default item-scope comparator options according +// to the selector configuration. +// - if cfg.OnlyMatchItemNames == false, then comparison assumes item IDs, +// which are case sensitive, resulting in StrictEqualsMatch +func defaultItemOptions(cfg Config) []option { + opts := []option{} + + if !cfg.OnlyMatchItemNames { + opts = append(opts, StrictEqualMatch()) + } + + return opts +} + +type scopeConfig struct { + usePathFilter bool + usePrefixFilter bool + useSuffixFilter bool + useEqualsFilter bool + useStrictEqualsFilter bool +} + +type option func(*scopeConfig) + +func (sc *scopeConfig) populate(opts ...option) { + for _, opt := range opts { + opt(sc) + } +} + +// PrefixMatch ensures the selector uses a Prefix comparator, instead +// of contains or equals. Will not override a default Any() or None() +// comparator. +func PrefixMatch() option { + return func(sc *scopeConfig) { + sc.usePrefixFilter = true + } +} + +// SuffixMatch ensures the selector uses a Suffix comparator, instead +// of contains or equals. Will not override a default Any() or None() +// comparator. +func SuffixMatch() option { + return func(sc *scopeConfig) { + sc.useSuffixFilter = true + } +} + +// StrictEqualsMatch ensures the selector uses a StrictEquals comparator, instead +// of contains. Will not override a default Any() or None() comparator. +func StrictEqualMatch() option { + return func(sc *scopeConfig) { + sc.useStrictEqualsFilter = true + } +} + +// ExactMatch ensures the selector uses an Equals comparator, instead +// of contains. Will not override a default Any() or None() comparator. +func ExactMatch() option { + return func(sc *scopeConfig) { + sc.useEqualsFilter = true + } +} + +// pathComparator is an internal-facing option. It is assumed that scope +// constructors will provide the pathComparator option whenever a folder- +// level scope (ie, a scope that compares path hierarchies) is created. +func pathComparator() option { + return func(sc *scopeConfig) { + sc.usePathFilter = true + } +} + +func badCastErr(cast, is service) error { + return clues.Stack(ErrorBadSelectorCast, clues.New(fmt.Sprintf("%s is not %s", cast, is))) +} + +// if the provided slice contains Any, returns [Any] +// if the slice contains None, returns [None] +// if the slice contains Any and None, returns the first +// if the slice is empty, returns [None] +// otherwise returns the input +func clean(s []string) []string { + if len(s) == 0 { + return None() + } + + for _, e := range s { + if e == AnyTgt { + return Any() + } + + if e == NoneTgt { + return None() + } + } + + return s +} + +type filterFunc func([]string) filters.Filter + +// filterize turns the slice into a filter. +// if the input is Any(), returns a passAny filter. +// if the input is None(), returns a failAny filter. +// if the scopeConfig specifies a filter, use that filter. +// if the input is len(1), returns an Equals filter. +// otherwise returns a Contains filter. +func filterFor(sc scopeConfig, targets ...string) filters.Filter { + return filterize(sc, nil, targets...) +} + +// filterize turns the slice into a filter. +// if the input is Any(), returns a passAny filter. +// if the input is None(), returns a failAny filter. +// if the scopeConfig specifies a filter, use that filter. +// if defaultFilter is non-nil, returns that filter. +// if the input is len(1), returns an Equals filter. +// otherwise returns a Contains filter. +func filterize( + sc scopeConfig, + defaultFilter filterFunc, + targets ...string, +) filters.Filter { + targets = clean(targets) + + if len(targets) == 0 || targets[0] == NoneTgt { + return failAny + } + + if targets[0] == AnyTgt { + return passAny + } + + if sc.usePathFilter { + if sc.useEqualsFilter { + return filters.PathEquals(targets) + } + + if sc.usePrefixFilter { + return filters.PathPrefix(targets) + } + + if sc.useSuffixFilter { + return filters.PathSuffix(targets) + } + + return filters.PathContains(targets) + } + + if sc.usePrefixFilter { + return filters.Prefix(targets) + } + + if sc.useSuffixFilter { + return filters.Suffix(targets) + } + + if sc.useStrictEqualsFilter { + return filters.StrictEqual(targets) + } + + if defaultFilter != nil { + return defaultFilter(targets) + } + + return filters.Equal(targets) +} + +// pathFilterFactory returns the appropriate path filter +// (contains, prefix, or suffix) for the provided options. +// If multiple options are flagged, Prefix takes priority. +// If no options are provided, returns PathContains. +func pathFilterFactory(opts ...option) filterFunc { + sc := &scopeConfig{} + sc.populate(opts...) + + var ff filterFunc + + switch true { + case sc.usePrefixFilter: + ff = filters.PathPrefix + case sc.useSuffixFilter: + ff = filters.PathSuffix + case sc.useEqualsFilter: + ff = filters.PathEquals + default: + ff = filters.PathContains + } + + return wrapSliceFilter(ff) +} + +func wrapSliceFilter(ff filterFunc) filterFunc { + return func(s []string) filters.Filter { + s = clean(s) + + if f, ok := isAnyOrNone(s); ok { + return f + } + + return ff(s) + } +} + +// returns (, true) if s is len==1 and s[0] is +// anyTgt or noneTgt, implying that the caller should use +// the returned filter. On (, false), the caller +// can ignore the returned filter. +// a special case exists for len(s)==0, interpreted as +// "noneTgt" +func isAnyOrNone(s []string) (filters.Filter, bool) { + switch len(s) { + case 0: + return failAny, true + + case 1: + switch s[0] { + case AnyTgt: + return passAny, true + case NoneTgt: + return failAny, true + } + } + + return failAny, false +} + // makeScope produces a well formatted, typed scope that ensures all base values are populated. func makeScope[T scopeT]( cat categorizer, @@ -239,95 +500,9 @@ func marshalScope(mss map[string]string) string { } // --------------------------------------------------------------------------- -// scope funcs +// reducer & filtering // --------------------------------------------------------------------------- -// matches returns true if the category is included in the scope's -// data type, and the input string passes the scope's filter for -// that category. -func matches[T scopeT, C categoryT](s T, cat C, inpt string) bool { - if !typeAndCategoryMatches(cat, s.categorizer()) { - return false - } - - if len(inpt) == 0 { - return false - } - - return s[cat.String()].Compare(inpt) -} - -// matchesAny returns true if the category is included in the scope's -// data type, and any one of the input strings passes the scope's filter. -func matchesAny[T scopeT, C categoryT](s T, cat C, inpts []string) bool { - if !typeAndCategoryMatches(cat, s.categorizer()) { - return false - } - - if len(inpts) == 0 { - return false - } - - return s[cat.String()].CompareAny(inpts...) -} - -// getCategory returns the scope's category value. -// if s is an info-type scope, returns the info category. -func getCategory[T scopeT](s T) string { - return s[scopeKeyCategory].Identity -} - -// getInfoCategory returns the scope's infoFilter category value. -func getInfoCategory[T scopeT](s T) string { - return s[scopeKeyInfoCategory].Identity -} - -// getCatValue takes the value of s[cat] and returns the slice. -// If s[cat] is nil, returns None(). -func getCatValue[T scopeT](s T, cat categorizer) []string { - filt, ok := s[cat.String()] - if !ok { - return None() - } - - if len(filt.Targets) > 0 { - return filt.Targets - } - - return filt.Targets -} - -// set sets a value by category to the scope. Only intended for internal -// use, not for exporting to callers. -func set[T scopeT](s T, cat categorizer, v []string, opts ...option) T { - sc := &scopeConfig{} - sc.populate(opts...) - - s[cat.String()] = filterFor(*sc, v...) - - return s -} - -// returns true if the category is included in the scope's category type, -// and the value is set to None(). -func isNoneTarget[T scopeT, C categoryT](s T, cat C) bool { - if !typeAndCategoryMatches(cat, s.categorizer()) { - return false - } - - return s[cat.String()].Comparator == filters.Fails -} - -// returns true if the category is included in the scope's category type, -// and the value is set to Any(). -func isAnyTarget[T scopeT, C categoryT](s T, cat C) bool { - if !typeAndCategoryMatches(cat, s.categorizer()) { - return false - } - - return s[cat.String()].Comparator == filters.Passes -} - // reduce filters the entries in the details to only those that match the // inclusions, filters, and exclusions in the selector. func reduce[T scopeT, C categoryT]( @@ -542,6 +717,92 @@ func matchesPathValues[T scopeT, C categoryT]( // helper funcs // --------------------------------------------------------------------------- +// matches returns true if the category is included in the scope's +// data type, and the input string passes the scope's filter for +// that category. +func matches[T scopeT, C categoryT](s T, cat C, inpt string) bool { + if !typeAndCategoryMatches(cat, s.categorizer()) { + return false + } + + if len(inpt) == 0 { + return false + } + + return s[cat.String()].Compare(inpt) +} + +// matchesAny returns true if the category is included in the scope's +// data type, and any one of the input strings passes the scope's filter. +func matchesAny[T scopeT, C categoryT](s T, cat C, inpts []string) bool { + if !typeAndCategoryMatches(cat, s.categorizer()) { + return false + } + + if len(inpts) == 0 { + return false + } + + return s[cat.String()].CompareAny(inpts...) +} + +// getCategory returns the scope's category value. +// if s is an info-type scope, returns the info category. +func getCategory[T scopeT](s T) string { + return s[scopeKeyCategory].Identity +} + +// getInfoCategory returns the scope's infoFilter category value. +func getInfoCategory[T scopeT](s T) string { + return s[scopeKeyInfoCategory].Identity +} + +// getCatValue takes the value of s[cat] and returns the slice. +// If s[cat] is nil, returns None(). +func getCatValue[T scopeT](s T, cat categorizer) []string { + filt, ok := s[cat.String()] + if !ok { + return None() + } + + if len(filt.Targets) > 0 { + return filt.Targets + } + + return filt.Targets +} + +// set sets a value by category to the scope. Only intended for internal +// use, not for exporting to callers. +func set[T scopeT](s T, cat categorizer, v []string, opts ...option) T { + sc := &scopeConfig{} + sc.populate(opts...) + + s[cat.String()] = filterFor(*sc, v...) + + return s +} + +// returns true if the category is included in the scope's category type, +// and the value is set to None(). +func isNoneTarget[T scopeT, C categoryT](s T, cat C) bool { + if !typeAndCategoryMatches(cat, s.categorizer()) { + return false + } + + return s[cat.String()].Comparator == filters.Fails +} + +// returns true if the category is included in the scope's category type, +// and the value is set to Any(). +func isAnyTarget[T scopeT, C categoryT](s T, cat C) bool { + if !typeAndCategoryMatches(cat, s.categorizer()) { + return false + } + + return s[cat.String()].Comparator == filters.Passes +} + // categoryMatches returns true if: // - neither type is 'unknown' // - either type is the root type diff --git a/src/pkg/selectors/selectors.go b/src/pkg/selectors/selectors.go index 936bc3a32..474ab60f5 100644 --- a/src/pkg/selectors/selectors.go +++ b/src/pkg/selectors/selectors.go @@ -23,6 +23,7 @@ const ( ServiceExchange // Exchange ServiceOneDrive // OneDrive ServiceSharePoint // SharePoint + ServiceGroups // Groups ) var serviceToPathType = map[service]path.ServiceType{ @@ -158,11 +159,25 @@ func (s *Selector) Configure(cfg Config) { s.Cfg = cfg } -// DiscreteResourceOwners returns the list of individual resourceOwners used -// in the selector. -// TODO(rkeepers): remove in favor of split and s.DiscreteOwner -func (s Selector) DiscreteResourceOwners() []string { - return s.ResourceOwners.Targets +// --------------------------------------------------------------------------- +// protected resources & idname provider compliance +// --------------------------------------------------------------------------- + +var _ idname.Provider = &Selector{} + +// ID returns s.discreteOwner, which is assumed to be a stable ID. +func (s Selector) ID() string { + return s.DiscreteOwner +} + +// Name returns s.discreteOwnerName. If that value is empty, it returns +// s.DiscreteOwner instead. +func (s Selector) Name() string { + if len(s.DiscreteOwnerName) == 0 { + return s.DiscreteOwner + } + + return s.DiscreteOwnerName } // SetDiscreteOwnerIDName ensures the selector has the correct discrete owner @@ -192,32 +207,17 @@ func (s Selector) SetDiscreteOwnerIDName(id, name string) Selector { return r } -// ID returns s.discreteOwner, which is assumed to be a stable ID. -func (s Selector) ID() string { - return s.DiscreteOwner -} - -// Name returns s.discreteOwnerName. If that value is empty, it returns -// s.DiscreteOwner instead. -func (s Selector) Name() string { - if len(s.DiscreteOwnerName) == 0 { - return s.DiscreteOwner - } - - return s.DiscreteOwnerName -} - -// isAnyResourceOwner returns true if the selector includes all resource owners. -func isAnyResourceOwner(s Selector) bool { +// isAnyProtectedResource returns true if the selector includes all resource owners. +func isAnyProtectedResource(s Selector) bool { return s.ResourceOwners.Comparator == filters.Passes } -// isNoneResourceOwner returns true if the selector includes no resource owners. -func isNoneResourceOwner(s Selector) bool { +// isNoneProtectedResource returns true if the selector includes no resource owners. +func isNoneProtectedResource(s Selector) bool { return s.ResourceOwners.Comparator == filters.Fails } -// SplitByResourceOwner makes one shallow clone for each resourceOwner in the +// splitByProtectedResource makes one shallow clone for each resourceOwner in the // selector, specifying a new DiscreteOwner for each one. // If the original selector already specified a discrete slice of resource owners, // only those owners are used in the result. @@ -229,14 +229,14 @@ func isNoneResourceOwner(s Selector) bool { // // temporarily, clones all scopes in each selector and replaces the owners with // the discrete owner. -func splitByResourceOwner[T scopeT, C categoryT](s Selector, allOwners []string, rootCat C) []Selector { - if isNoneResourceOwner(s) { +func splitByProtectedResource[T scopeT, C categoryT](s Selector, allOwners []string, rootCat C) []Selector { + if isNoneProtectedResource(s) { return []Selector{} } targets := allOwners - if !isAnyResourceOwner(s) { + if !isAnyProtectedResource(s) { targets = s.ResourceOwners.Targets } @@ -251,35 +251,6 @@ func splitByResourceOwner[T scopeT, C categoryT](s Selector, allOwners []string, return ss } -// appendScopes iterates through each scope in the list of scope slices, -// calling setDefaults() to ensure it is completely populated, and appends -// those scopes to the `to` slice. -func appendScopes[T scopeT](to []scope, scopes ...[]T) []scope { - if len(to) == 0 { - to = []scope{} - } - - for _, scopeSl := range scopes { - for _, s := range scopeSl { - s.setDefaults() - to = append(to, scope(s)) - } - } - - return to -} - -// scopes retrieves the list of scopes in the selector. -func scopes[T scopeT](s Selector) []T { - scopes := []T{} - - for _, v := range s.Includes { - scopes = append(scopes, T(v)) - } - - return scopes -} - // Returns the path.ServiceType matching the selector service. func (s Selector) PathService() path.ServiceType { return serviceToPathType[s.Service] @@ -330,6 +301,9 @@ func selectorAsIface[T any](s Selector) (T, error) { case ServiceSharePoint: a, err = func() (any, error) { return s.ToSharePointRestore() }() t = a.(T) + case ServiceGroups: + a, err = func() (any, error) { return s.ToGroupsRestore() }() + t = a.(T) default: err = clues.Stack(ErrorUnrecognizedService, clues.New(s.Service.String())) } @@ -419,28 +393,6 @@ func (ls loggableSelector) marshal() string { // helpers // --------------------------------------------------------------------------- -// produces the discrete set of resource owners in the slice of scopes. -// Any and None values are discarded. -func resourceOwnersIn(s []scope, rootCat string) []string { - rm := map[string]struct{}{} - - for _, sc := range s { - for _, v := range sc[rootCat].Targets { - rm[v] = struct{}{} - } - } - - rs := []string{} - - for k := range rm { - if k != AnyTgt && k != NoneTgt { - rs = append(rs, k) - } - } - - return rs -} - // produces the discrete set of path categories in the slice of scopes. func pathCategoriesIn[T scopeT, C categoryT](ss []scope) []path.CategoryType { m := map[path.CategoryType]struct{}{} @@ -458,235 +410,3 @@ func pathCategoriesIn[T scopeT, C categoryT](ss []scope) []path.CategoryType { return maps.Keys(m) } - -// --------------------------------------------------------------------------- -// scope constructors -// --------------------------------------------------------------------------- - -// constructs the default item-scope comparator options according -// to the selector configuration. -// - if cfg.OnlyMatchItemNames == false, then comparison assumes item IDs, -// which are case sensitive, resulting in StrictEqualsMatch -func defaultItemOptions(cfg Config) []option { - opts := []option{} - - if !cfg.OnlyMatchItemNames { - opts = append(opts, StrictEqualMatch()) - } - - return opts -} - -type scopeConfig struct { - usePathFilter bool - usePrefixFilter bool - useSuffixFilter bool - useEqualsFilter bool - useStrictEqualsFilter bool -} - -type option func(*scopeConfig) - -func (sc *scopeConfig) populate(opts ...option) { - for _, opt := range opts { - opt(sc) - } -} - -// PrefixMatch ensures the selector uses a Prefix comparator, instead -// of contains or equals. Will not override a default Any() or None() -// comparator. -func PrefixMatch() option { - return func(sc *scopeConfig) { - sc.usePrefixFilter = true - } -} - -// SuffixMatch ensures the selector uses a Suffix comparator, instead -// of contains or equals. Will not override a default Any() or None() -// comparator. -func SuffixMatch() option { - return func(sc *scopeConfig) { - sc.useSuffixFilter = true - } -} - -// StrictEqualsMatch ensures the selector uses a StrictEquals comparator, instead -// of contains. Will not override a default Any() or None() comparator. -func StrictEqualMatch() option { - return func(sc *scopeConfig) { - sc.useStrictEqualsFilter = true - } -} - -// ExactMatch ensures the selector uses an Equals comparator, instead -// of contains. Will not override a default Any() or None() comparator. -func ExactMatch() option { - return func(sc *scopeConfig) { - sc.useEqualsFilter = true - } -} - -// pathComparator is an internal-facing option. It is assumed that scope -// constructors will provide the pathComparator option whenever a folder- -// level scope (ie, a scope that compares path hierarchies) is created. -func pathComparator() option { - return func(sc *scopeConfig) { - sc.usePathFilter = true - } -} - -func badCastErr(cast, is service) error { - return clues.Stack(ErrorBadSelectorCast, clues.New(fmt.Sprintf("%s is not %s", cast, is))) -} - -// if the provided slice contains Any, returns [Any] -// if the slice contains None, returns [None] -// if the slice contains Any and None, returns the first -// if the slice is empty, returns [None] -// otherwise returns the input -func clean(s []string) []string { - if len(s) == 0 { - return None() - } - - for _, e := range s { - if e == AnyTgt { - return Any() - } - - if e == NoneTgt { - return None() - } - } - - return s -} - -type filterFunc func([]string) filters.Filter - -// filterize turns the slice into a filter. -// if the input is Any(), returns a passAny filter. -// if the input is None(), returns a failAny filter. -// if the scopeConfig specifies a filter, use that filter. -// if the input is len(1), returns an Equals filter. -// otherwise returns a Contains filter. -func filterFor(sc scopeConfig, targets ...string) filters.Filter { - return filterize(sc, nil, targets...) -} - -// filterize turns the slice into a filter. -// if the input is Any(), returns a passAny filter. -// if the input is None(), returns a failAny filter. -// if the scopeConfig specifies a filter, use that filter. -// if defaultFilter is non-nil, returns that filter. -// if the input is len(1), returns an Equals filter. -// otherwise returns a Contains filter. -func filterize( - sc scopeConfig, - defaultFilter filterFunc, - targets ...string, -) filters.Filter { - targets = clean(targets) - - if len(targets) == 0 || targets[0] == NoneTgt { - return failAny - } - - if targets[0] == AnyTgt { - return passAny - } - - if sc.usePathFilter { - if sc.useEqualsFilter { - return filters.PathEquals(targets) - } - - if sc.usePrefixFilter { - return filters.PathPrefix(targets) - } - - if sc.useSuffixFilter { - return filters.PathSuffix(targets) - } - - return filters.PathContains(targets) - } - - if sc.usePrefixFilter { - return filters.Prefix(targets) - } - - if sc.useSuffixFilter { - return filters.Suffix(targets) - } - - if sc.useStrictEqualsFilter { - return filters.StrictEqual(targets) - } - - if defaultFilter != nil { - return defaultFilter(targets) - } - - return filters.Equal(targets) -} - -// pathFilterFactory returns the appropriate path filter -// (contains, prefix, or suffix) for the provided options. -// If multiple options are flagged, Prefix takes priority. -// If no options are provided, returns PathContains. -func pathFilterFactory(opts ...option) filterFunc { - sc := &scopeConfig{} - sc.populate(opts...) - - var ff filterFunc - - switch true { - case sc.usePrefixFilter: - ff = filters.PathPrefix - case sc.useSuffixFilter: - ff = filters.PathSuffix - case sc.useEqualsFilter: - ff = filters.PathEquals - default: - ff = filters.PathContains - } - - return wrapSliceFilter(ff) -} - -func wrapSliceFilter(ff filterFunc) filterFunc { - return func(s []string) filters.Filter { - s = clean(s) - - if f, ok := isAnyOrNone(s); ok { - return f - } - - return ff(s) - } -} - -// returns (, true) if s is len==1 and s[0] is -// anyTgt or noneTgt, implying that the caller should use -// the returned filter. On (, false), the caller -// can ignore the returned filter. -// a special case exists for len(s)==0, interpreted as -// "noneTgt" -func isAnyOrNone(s []string) (filters.Filter, bool) { - switch len(s) { - case 0: - return failAny, true - - case 1: - switch s[0] { - case AnyTgt: - return passAny, true - case NoneTgt: - return failAny, true - } - } - - return failAny, false -} diff --git a/src/pkg/selectors/selectors_test.go b/src/pkg/selectors/selectors_test.go index 3931adfec..30d20c3c9 100644 --- a/src/pkg/selectors/selectors_test.go +++ b/src/pkg/selectors/selectors_test.go @@ -44,56 +44,6 @@ func (suite *SelectorSuite) TestBadCastErr() { assert.Error(suite.T(), err, clues.ToCore(err)) } -func (suite *SelectorSuite) TestResourceOwnersIn() { - rootCat := rootCatStub.String() - - table := []struct { - name string - input []scope - expect []string - }{ - { - name: "nil", - input: nil, - expect: []string{}, - }, - { - name: "empty", - input: []scope{}, - expect: []string{}, - }, - { - name: "single", - input: []scope{{rootCat: filters.Identity("foo")}}, - expect: []string{"foo"}, - }, - { - name: "multiple scopes", - input: []scope{ - {rootCat: filters.Identity("foo,bar")}, - {rootCat: filters.Identity("baz")}, - }, - expect: []string{"foo,bar", "baz"}, - }, - { - name: "multiple scopes with duplicates", - input: []scope{ - {rootCat: filters.Identity("foo")}, - {rootCat: filters.Identity("foo")}, - }, - expect: []string{"foo"}, - }, - } - for _, test := range table { - suite.Run(test.name, func() { - t := suite.T() - - result := resourceOwnersIn(test.input, rootCat) - assert.ElementsMatch(t, test.expect, result) - }) - } -} - func (suite *SelectorSuite) TestPathCategoriesIn() { leafCat := leafCatStub.String() f := filters.Identity(leafCat) @@ -144,20 +94,20 @@ func (suite *SelectorSuite) TestContains() { func (suite *SelectorSuite) TestIsAnyResourceOwner() { t := suite.T() - assert.False(t, isAnyResourceOwner(newSelector(ServiceUnknown, []string{"foo"}))) - assert.False(t, isAnyResourceOwner(newSelector(ServiceUnknown, []string{}))) - assert.False(t, isAnyResourceOwner(newSelector(ServiceUnknown, nil))) - assert.True(t, isAnyResourceOwner(newSelector(ServiceUnknown, []string{AnyTgt}))) - assert.True(t, isAnyResourceOwner(newSelector(ServiceUnknown, Any()))) + assert.False(t, isAnyProtectedResource(newSelector(ServiceUnknown, []string{"foo"}))) + assert.False(t, isAnyProtectedResource(newSelector(ServiceUnknown, []string{}))) + assert.False(t, isAnyProtectedResource(newSelector(ServiceUnknown, nil))) + assert.True(t, isAnyProtectedResource(newSelector(ServiceUnknown, []string{AnyTgt}))) + assert.True(t, isAnyProtectedResource(newSelector(ServiceUnknown, Any()))) } func (suite *SelectorSuite) TestIsNoneResourceOwner() { t := suite.T() - assert.False(t, isNoneResourceOwner(newSelector(ServiceUnknown, []string{"foo"}))) - assert.True(t, isNoneResourceOwner(newSelector(ServiceUnknown, []string{}))) - assert.True(t, isNoneResourceOwner(newSelector(ServiceUnknown, nil))) - assert.True(t, isNoneResourceOwner(newSelector(ServiceUnknown, []string{NoneTgt}))) - assert.True(t, isNoneResourceOwner(newSelector(ServiceUnknown, None()))) + assert.False(t, isNoneProtectedResource(newSelector(ServiceUnknown, []string{"foo"}))) + assert.True(t, isNoneProtectedResource(newSelector(ServiceUnknown, []string{}))) + assert.True(t, isNoneProtectedResource(newSelector(ServiceUnknown, nil))) + assert.True(t, isNoneProtectedResource(newSelector(ServiceUnknown, []string{NoneTgt}))) + assert.True(t, isNoneProtectedResource(newSelector(ServiceUnknown, None()))) } func (suite *SelectorSuite) TestSplitByResourceOnwer() { @@ -224,7 +174,7 @@ func (suite *SelectorSuite) TestSplitByResourceOnwer() { t := suite.T() s := newSelector(ServiceUnknown, test.input) - result := splitByResourceOwner[mockScope](s, allOwners, rootCatStub) + result := splitByProtectedResource[mockScope](s, allOwners, rootCatStub) assert.Len(t, result, test.expectLen) diff --git a/src/pkg/selectors/service_string.go b/src/pkg/selectors/service_string.go index 1608c9969..c20205128 100644 --- a/src/pkg/selectors/service_string.go +++ b/src/pkg/selectors/service_string.go @@ -12,11 +12,12 @@ func _() { _ = x[ServiceExchange-1] _ = x[ServiceOneDrive-2] _ = x[ServiceSharePoint-3] + _ = x[ServiceGroups-4] } -const _service_name = "Unknown ServiceExchangeOneDriveSharePoint" +const _service_name = "Unknown ServiceExchangeOneDriveSharePointGroups" -var _service_index = [...]uint8{0, 15, 23, 31, 41} +var _service_index = [...]uint8{0, 15, 23, 31, 41, 47} func (i service) String() string { if i < 0 || i >= service(len(_service_index)-1) { diff --git a/src/pkg/selectors/sharepoint.go b/src/pkg/selectors/sharepoint.go index a408f6339..31ad200c0 100644 --- a/src/pkg/selectors/sharepoint.go +++ b/src/pkg/selectors/sharepoint.go @@ -68,7 +68,7 @@ func (s Selector) ToSharePointBackup() (*SharePointBackup, error) { } func (s SharePointBackup) SplitByResourceOwner(sites []string) []SharePointBackup { - sels := splitByResourceOwner[SharePointScope](s.Selector, sites, SharePointSite) + sels := splitByProtectedResource[SharePointScope](s.Selector, sites, SharePointSite) ss := make([]SharePointBackup, 0, len(sels)) for _, sel := range sels { @@ -102,7 +102,7 @@ func (s Selector) ToSharePointRestore() (*SharePointRestore, error) { } func (s SharePointRestore) SplitByResourceOwner(sites []string) []SharePointRestore { - sels := splitByResourceOwner[SharePointScope](s.Selector, sites, SharePointSite) + sels := splitByProtectedResource[SharePointScope](s.Selector, sites, SharePointSite) ss := make([]SharePointRestore, 0, len(sels)) for _, sel := range sels { diff --git a/src/pkg/selectors/sharepoint_test.go b/src/pkg/selectors/sharepoint_test.go index a8003951e..d2b75469f 100644 --- a/src/pkg/selectors/sharepoint_test.go +++ b/src/pkg/selectors/sharepoint_test.go @@ -12,7 +12,7 @@ import ( "golang.org/x/exp/slices" "github.com/alcionai/corso/src/internal/common/dttm" - odConsts "github.com/alcionai/corso/src/internal/m365/onedrive/consts" + odConsts "github.com/alcionai/corso/src/internal/m365/service/onedrive/consts" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/backup/details" "github.com/alcionai/corso/src/pkg/fault" @@ -44,66 +44,6 @@ func (suite *SharePointSelectorSuite) TestToSharePointBackup() { assert.NotZero(t, ob.Scopes()) } -func (suite *SharePointSelectorSuite) TestSharePointSelector_AllData() { - t := suite.T() - - sites := []string{"s1", "s2"} - - sel := NewSharePointBackup(sites) - siteScopes := sel.AllData() - - assert.ElementsMatch(t, sites, sel.DiscreteResourceOwners()) - - // Initialize the selector Include, Exclude, Filter - sel.Exclude(siteScopes) - sel.Include(siteScopes) - sel.Filter(siteScopes) - - table := []struct { - name string - scopesToCheck []scope - }{ - {"Include Scopes", sel.Includes}, - {"Exclude Scopes", sel.Excludes}, - {"info scopes", sel.Filters}, - } - for _, test := range table { - require.Len(t, test.scopesToCheck, 3) - - for _, scope := range test.scopesToCheck { - var ( - spsc = SharePointScope(scope) - cat = spsc.Category() - ) - - suite.Run(test.name+"-"+cat.String(), func() { - t := suite.T() - - switch cat { - case SharePointLibraryItem: - scopeMustHave( - t, - spsc, - map[categorizer][]string{ - SharePointLibraryItem: Any(), - SharePointLibraryFolder: Any(), - }, - ) - case SharePointListItem: - scopeMustHave( - t, - spsc, - map[categorizer][]string{ - SharePointListItem: Any(), - SharePointList: Any(), - }, - ) - } - }) - } - } -} - func (suite *SharePointSelectorSuite) TestSharePointSelector_Include_WebURLs() { t := suite.T() diff --git a/src/pkg/services/m365/api/client_test.go b/src/pkg/services/m365/api/client_test.go index 6385ce41c..20a3007cc 100644 --- a/src/pkg/services/m365/api/client_test.go +++ b/src/pkg/services/m365/api/client_test.go @@ -9,7 +9,7 @@ import ( "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" - exchMock "github.com/alcionai/corso/src/internal/m365/exchange/mock" + exchMock "github.com/alcionai/corso/src/internal/m365/service/exchange/mock" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/internal/tester/tconfig" "github.com/alcionai/corso/src/pkg/account" diff --git a/src/pkg/services/m365/api/contacts_test.go b/src/pkg/services/m365/api/contacts_test.go index 865adf9a1..afc344cc5 100644 --- a/src/pkg/services/m365/api/contacts_test.go +++ b/src/pkg/services/m365/api/contacts_test.go @@ -11,7 +11,7 @@ import ( "github.com/stretchr/testify/suite" "github.com/alcionai/corso/src/internal/common/ptr" - exchMock "github.com/alcionai/corso/src/internal/m365/exchange/mock" + exchMock "github.com/alcionai/corso/src/internal/m365/service/exchange/mock" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/internal/tester/tconfig" "github.com/alcionai/corso/src/pkg/backup/details" diff --git a/src/pkg/services/m365/api/drive.go b/src/pkg/services/m365/api/drive.go index 85bc0a8ca..e40d7497a 100644 --- a/src/pkg/services/m365/api/drive.go +++ b/src/pkg/services/m365/api/drive.go @@ -255,7 +255,7 @@ func (c Drives) GetItemPermission( Permissions(). Get(ctx, nil) if err != nil { - return nil, graph.Wrap(ctx, err, "getting item metadata").With("item_id", itemID) + return nil, graph.Wrap(ctx, err, "getting item permission").With("item_id", itemID) } return perm, nil diff --git a/src/pkg/services/m365/api/drive_pager.go b/src/pkg/services/m365/api/drive_pager.go index 3ba6e4b46..7a8c100a3 100644 --- a/src/pkg/services/m365/api/drive_pager.go +++ b/src/pkg/services/m365/api/drive_pager.go @@ -13,7 +13,7 @@ import ( "github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/internal/m365/graph" - onedrive "github.com/alcionai/corso/src/internal/m365/onedrive/consts" + onedrive "github.com/alcionai/corso/src/internal/m365/service/onedrive/consts" "github.com/alcionai/corso/src/pkg/logger" ) diff --git a/src/pkg/services/m365/api/events_test.go b/src/pkg/services/m365/api/events_test.go index 383376cce..cf7d9873f 100644 --- a/src/pkg/services/m365/api/events_test.go +++ b/src/pkg/services/m365/api/events_test.go @@ -13,8 +13,8 @@ import ( "github.com/alcionai/corso/src/internal/common/dttm" "github.com/alcionai/corso/src/internal/common/ptr" - exchMock "github.com/alcionai/corso/src/internal/m365/exchange/mock" "github.com/alcionai/corso/src/internal/m365/graph" + exchMock "github.com/alcionai/corso/src/internal/m365/service/exchange/mock" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/internal/tester/tconfig" "github.com/alcionai/corso/src/pkg/backup/details" diff --git a/src/pkg/services/m365/api/mail_test.go b/src/pkg/services/m365/api/mail_test.go index 812e86a0c..5c0f1ccd8 100644 --- a/src/pkg/services/m365/api/mail_test.go +++ b/src/pkg/services/m365/api/mail_test.go @@ -12,7 +12,7 @@ import ( "github.com/stretchr/testify/suite" "github.com/alcionai/corso/src/internal/common/ptr" - exchMock "github.com/alcionai/corso/src/internal/m365/exchange/mock" + exchMock "github.com/alcionai/corso/src/internal/m365/service/exchange/mock" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/internal/tester/tconfig" "github.com/alcionai/corso/src/pkg/backup/details"