Configure new corso repos with 8 hr minEpochDuration (#5086)

We've found that reducing the MinEpochDuration parameter in kopia
can reduce GET requests sent to the object store by a large amount
(>50% in some cases) for repos that run many backups each day

This PR updates the init repo code path to set this parameter to
8hrs instead of the default 24hrs to bring in some of those savings

---

#### Does this PR need a docs update or release note?

- [ ]  Yes, it's included
- [ ] 🕐 Yes, but in a later PR
- [x]  No

#### Type of change

- [x] 🌻 Feature
- [ ] 🐛 Bugfix
- [ ] 🗺️ Documentation
- [ ] 🤖 Supportability/Tests
- [ ] 💻 CI/Deployment
- [ ] 🧹 Tech Debt/Cleanup

#### Test Plan

- [ ] 💪 Manual
- [x]  Unit test
- [ ] 💚 E2E
This commit is contained in:
ashmrtn 2024-01-24 13:18:22 -08:00 committed by GitHub
parent e1457c23bf
commit cb4006eb60
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 109 additions and 6 deletions

View File

@ -9,6 +9,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added ### Added
- Events can now be exported from Exchange backups as .ics files. - Events can now be exported from Exchange backups as .ics files.
- Update repo init configuration to reduce the total number of GET requests sent
to the object store when using corso. This affects repos that have many
backups created in them per day the most.
### Fixed ### Fixed
- Retry transient 400 "invalidRequest" errors during onedrive & sharepoint backup. - Retry transient 400 "invalidRequest" errors during onedrive & sharepoint backup.

View File

@ -34,12 +34,8 @@ const (
defaultCompressor = "zstd-better-compression" defaultCompressor = "zstd-better-compression"
// Interval of 0 disables scheduling. // Interval of 0 disables scheduling.
defaultSchedulingInterval = time.Second * 0 defaultSchedulingInterval = time.Second * 0
)
var (
ErrSettingDefaultConfig = clues.New("setting default repo config values")
ErrorRepoAlreadyExists = clues.New("repo already exists")
defaultMinEpochDuration = time.Hour * 8
// minEpochDurationLowerBound is the minimum corso will allow the kopia epoch // minEpochDurationLowerBound is the minimum corso will allow the kopia epoch
// duration to be set to. This number can still be tuned further, right now // duration to be set to. This number can still be tuned further, right now
// it's just to make sure it's not set to something totally wild. // it's just to make sure it's not set to something totally wild.
@ -59,6 +55,11 @@ var (
minEpochDurationUpperBound = 7 * 24 * time.Hour minEpochDurationUpperBound = 7 * 24 * time.Hour
) )
var (
ErrSettingDefaultConfig = clues.New("setting default repo config values")
ErrorRepoAlreadyExists = clues.New("repo already exists")
)
// Having all fields set to 0 causes it to keep max-int versions of snapshots. // Having all fields set to 0 causes it to keep max-int versions of snapshots.
var ( var (
zeroOpt = policy.OptionalInt(0) zeroOpt = policy.OptionalInt(0)
@ -168,6 +169,18 @@ func (w *conn) Initialize(
return clues.StackWC(ctx, err) return clues.StackWC(ctx, err)
} }
// In theory it should be possible to set this when creating the repo.
// However, the existing code paths for repo init in kopia end up clobbering
// any custom parameters passed in with default values. It's not clear if
// that's intentional or not.
if err := w.updatePersistentConfig(
ctx,
repository.PersistentConfig{
MinEpochDuration: ptr.To(defaultMinEpochDuration),
}); err != nil {
return clues.Stack(err)
}
// Calling with all parameters here will set extend object locks for // Calling with all parameters here will set extend object locks for
// maintenance. Parameters for actual retention should have been set during // maintenance. Parameters for actual retention should have been set during
// initialization and won't be updated again. // initialization and won't be updated again.

View File

@ -284,7 +284,87 @@ func (suite *WrapperIntegrationSuite) TestSetCompressor() {
string(policyTree.EffectivePolicy().CompressionPolicy.CompressorName)) string(policyTree.EffectivePolicy().CompressionPolicy.CompressorName))
} }
func (suite *WrapperIntegrationSuite) TestConfigDefaultsSetOnInitAndNotOnConnect() { func (suite *WrapperIntegrationSuite) TestConfigPersistentConfigOnInitAndNotOnConnect() {
repoNameHash := strTD.NewHashForRepoConfigName()
table := []struct {
name string
mutateParams repository.PersistentConfig
checkFunc func(
t *testing.T,
wanted repository.PersistentConfig,
mutableParams format.MutableParameters,
blobConfig format.BlobStorageConfiguration,
)
}{
{
name: "MinEpochDuration",
mutateParams: repository.PersistentConfig{
MinEpochDuration: ptr.To(defaultMinEpochDuration + time.Minute),
},
checkFunc: func(
t *testing.T,
wanted repository.PersistentConfig,
mutableParams format.MutableParameters,
blobConfig format.BlobStorageConfiguration,
) {
assert.Equal(t, *wanted.MinEpochDuration, mutableParams.EpochParameters.MinEpochDuration)
},
},
}
for _, test := range table {
suite.Run(test.name, func() {
t := suite.T()
ctx, flush := tester.NewContext(t)
defer flush()
k, err := openLocalKopiaRepo(t, ctx)
require.NoError(t, err, clues.ToCore(err))
// Close is safe to call even if the repo is already closed.
t.Cleanup(func() {
k.Close(ctx)
})
// Need to disconnect and connect again because in-memory state in kopia
// isn't updated immediately.
err = k.Close(ctx)
require.NoError(t, err, clues.ToCore(err))
err = k.Connect(ctx, repository.Options{}, repoNameHash)
require.NoError(t, err, clues.ToCore(err))
mutable, blob, err := k.getPersistentConfig(ctx)
require.NoError(t, err, clues.ToCore(err))
defaultParams := repository.PersistentConfig{
MinEpochDuration: ptr.To(defaultMinEpochDuration),
}
test.checkFunc(t, defaultParams, mutable, blob)
err = k.updatePersistentConfig(ctx, test.mutateParams)
require.NoError(t, err, clues.ToCore(err))
err = k.Close(ctx)
require.NoError(t, err, clues.ToCore(err))
err = k.Connect(ctx, repository.Options{}, repoNameHash)
require.NoError(t, err, clues.ToCore(err))
mutable, blob, err = k.getPersistentConfig(ctx)
require.NoError(t, err, clues.ToCore(err))
test.checkFunc(t, test.mutateParams, mutable, blob)
err = k.Close(ctx)
require.NoError(t, err, clues.ToCore(err))
})
}
}
func (suite *WrapperIntegrationSuite) TestConfigPolicyDefaultsSetOnInitAndNotOnConnect() {
newCompressor := "pgzip" newCompressor := "pgzip"
newRetentionDaily := policy.OptionalInt(42) newRetentionDaily := policy.OptionalInt(42)
newRetention := policy.RetentionPolicy{KeepDaily: &newRetentionDaily} newRetention := policy.RetentionPolicy{KeepDaily: &newRetentionDaily}
@ -594,6 +674,13 @@ func (suite *WrapperIntegrationSuite) TestUpdatePersistentConfig() {
err := connection.Initialize(ctx, repository.Options{}, repository.Retention{}, repoNameHash) err := connection.Initialize(ctx, repository.Options{}, repository.Retention{}, repoNameHash)
require.NoError(t, err, "initializing repo: %v", clues.ToCore(err)) require.NoError(t, err, "initializing repo: %v", clues.ToCore(err))
// Need to close and reopen the repo due to kopia caching of values.
err = connection.Close(ctx)
require.NoError(t, err, clues.ToCore(err))
err = connection.Connect(ctx, repository.Options{}, repoNameHash)
require.NoError(t, err, clues.ToCore(err))
startParams, startBlobConfig, err := connection.getPersistentConfig(ctx) startParams, startBlobConfig, err := connection.getPersistentConfig(ctx)
require.NoError(t, err, clues.ToCore(err)) require.NoError(t, err, clues.ToCore(err))