From cb4006eb6009cdde4853418b119439bc9c36448d Mon Sep 17 00:00:00 2001 From: ashmrtn <3891298+ashmrtn@users.noreply.github.com> Date: Wed, 24 Jan 2024 13:18:22 -0800 Subject: [PATCH] Configure new corso repos with 8 hr minEpochDuration (#5086) We've found that reducing the MinEpochDuration parameter in kopia can reduce GET requests sent to the object store by a large amount (>50% in some cases) for repos that run many backups each day This PR updates the init repo code path to set this parameter to 8hrs instead of the default 24hrs to bring in some of those savings --- #### Does this PR need a docs update or release note? - [ ] :white_check_mark: Yes, it's included - [ ] :clock1: Yes, but in a later PR - [x] :no_entry: No #### Type of change - [x] :sunflower: Feature - [ ] :bug: Bugfix - [ ] :world_map: Documentation - [ ] :robot: Supportability/Tests - [ ] :computer: CI/Deployment - [ ] :broom: Tech Debt/Cleanup #### Test Plan - [ ] :muscle: Manual - [x] :zap: Unit test - [ ] :green_heart: E2E --- CHANGELOG.md | 3 ++ src/internal/kopia/conn.go | 23 +++++++-- src/internal/kopia/conn_test.go | 89 ++++++++++++++++++++++++++++++++- 3 files changed, 109 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 83a8985cb..a0c4cc2cc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Events can now be exported from Exchange backups as .ics files. +- Update repo init configuration to reduce the total number of GET requests sent + to the object store when using corso. This affects repos that have many + backups created in them per day the most. ### Fixed - Retry transient 400 "invalidRequest" errors during onedrive & sharepoint backup. diff --git a/src/internal/kopia/conn.go b/src/internal/kopia/conn.go index fb0687d45..fb98dd438 100644 --- a/src/internal/kopia/conn.go +++ b/src/internal/kopia/conn.go @@ -34,12 +34,8 @@ const ( defaultCompressor = "zstd-better-compression" // Interval of 0 disables scheduling. defaultSchedulingInterval = time.Second * 0 -) - -var ( - ErrSettingDefaultConfig = clues.New("setting default repo config values") - ErrorRepoAlreadyExists = clues.New("repo already exists") + defaultMinEpochDuration = time.Hour * 8 // minEpochDurationLowerBound is the minimum corso will allow the kopia epoch // duration to be set to. This number can still be tuned further, right now // it's just to make sure it's not set to something totally wild. @@ -59,6 +55,11 @@ var ( minEpochDurationUpperBound = 7 * 24 * time.Hour ) +var ( + ErrSettingDefaultConfig = clues.New("setting default repo config values") + ErrorRepoAlreadyExists = clues.New("repo already exists") +) + // Having all fields set to 0 causes it to keep max-int versions of snapshots. var ( zeroOpt = policy.OptionalInt(0) @@ -168,6 +169,18 @@ func (w *conn) Initialize( return clues.StackWC(ctx, err) } + // In theory it should be possible to set this when creating the repo. + // However, the existing code paths for repo init in kopia end up clobbering + // any custom parameters passed in with default values. It's not clear if + // that's intentional or not. + if err := w.updatePersistentConfig( + ctx, + repository.PersistentConfig{ + MinEpochDuration: ptr.To(defaultMinEpochDuration), + }); err != nil { + return clues.Stack(err) + } + // Calling with all parameters here will set extend object locks for // maintenance. Parameters for actual retention should have been set during // initialization and won't be updated again. diff --git a/src/internal/kopia/conn_test.go b/src/internal/kopia/conn_test.go index b3dac0917..e6d534f80 100644 --- a/src/internal/kopia/conn_test.go +++ b/src/internal/kopia/conn_test.go @@ -284,7 +284,87 @@ func (suite *WrapperIntegrationSuite) TestSetCompressor() { string(policyTree.EffectivePolicy().CompressionPolicy.CompressorName)) } -func (suite *WrapperIntegrationSuite) TestConfigDefaultsSetOnInitAndNotOnConnect() { +func (suite *WrapperIntegrationSuite) TestConfigPersistentConfigOnInitAndNotOnConnect() { + repoNameHash := strTD.NewHashForRepoConfigName() + + table := []struct { + name string + mutateParams repository.PersistentConfig + checkFunc func( + t *testing.T, + wanted repository.PersistentConfig, + mutableParams format.MutableParameters, + blobConfig format.BlobStorageConfiguration, + ) + }{ + { + name: "MinEpochDuration", + mutateParams: repository.PersistentConfig{ + MinEpochDuration: ptr.To(defaultMinEpochDuration + time.Minute), + }, + checkFunc: func( + t *testing.T, + wanted repository.PersistentConfig, + mutableParams format.MutableParameters, + blobConfig format.BlobStorageConfiguration, + ) { + assert.Equal(t, *wanted.MinEpochDuration, mutableParams.EpochParameters.MinEpochDuration) + }, + }, + } + + for _, test := range table { + suite.Run(test.name, func() { + t := suite.T() + + ctx, flush := tester.NewContext(t) + defer flush() + + k, err := openLocalKopiaRepo(t, ctx) + require.NoError(t, err, clues.ToCore(err)) + + // Close is safe to call even if the repo is already closed. + t.Cleanup(func() { + k.Close(ctx) + }) + + // Need to disconnect and connect again because in-memory state in kopia + // isn't updated immediately. + err = k.Close(ctx) + require.NoError(t, err, clues.ToCore(err)) + + err = k.Connect(ctx, repository.Options{}, repoNameHash) + require.NoError(t, err, clues.ToCore(err)) + + mutable, blob, err := k.getPersistentConfig(ctx) + require.NoError(t, err, clues.ToCore(err)) + + defaultParams := repository.PersistentConfig{ + MinEpochDuration: ptr.To(defaultMinEpochDuration), + } + + test.checkFunc(t, defaultParams, mutable, blob) + + err = k.updatePersistentConfig(ctx, test.mutateParams) + require.NoError(t, err, clues.ToCore(err)) + + err = k.Close(ctx) + require.NoError(t, err, clues.ToCore(err)) + + err = k.Connect(ctx, repository.Options{}, repoNameHash) + require.NoError(t, err, clues.ToCore(err)) + + mutable, blob, err = k.getPersistentConfig(ctx) + require.NoError(t, err, clues.ToCore(err)) + test.checkFunc(t, test.mutateParams, mutable, blob) + + err = k.Close(ctx) + require.NoError(t, err, clues.ToCore(err)) + }) + } +} + +func (suite *WrapperIntegrationSuite) TestConfigPolicyDefaultsSetOnInitAndNotOnConnect() { newCompressor := "pgzip" newRetentionDaily := policy.OptionalInt(42) newRetention := policy.RetentionPolicy{KeepDaily: &newRetentionDaily} @@ -594,6 +674,13 @@ func (suite *WrapperIntegrationSuite) TestUpdatePersistentConfig() { err := connection.Initialize(ctx, repository.Options{}, repository.Retention{}, repoNameHash) require.NoError(t, err, "initializing repo: %v", clues.ToCore(err)) + // Need to close and reopen the repo due to kopia caching of values. + err = connection.Close(ctx) + require.NoError(t, err, clues.ToCore(err)) + + err = connection.Connect(ctx, repository.Options{}, repoNameHash) + require.NoError(t, err, clues.ToCore(err)) + startParams, startBlobConfig, err := connection.getPersistentConfig(ctx) require.NoError(t, err, clues.ToCore(err))