From 1f8c7598b47cf657257845a4a97161c4099377b6 Mon Sep 17 00:00:00 2001 From: Keepers Date: Tue, 11 Apr 2023 19:16:40 -0600 Subject: [PATCH] separate fault bus in fetch manifests (#3090) The global aggregation of recoverable errors causes us to mark operations as failed even in conditions where they should have succeeded, because higher level callers don't have sufficient control over ignorable error conditions. This is a short-term fix that allows us to log and dump fault error accumulation in a specific, scoped instance. --- #### Does this PR need a docs update or release note? - [x] :no_entry: No #### Type of change - [x] :bug: Bugfix #### Test Plan - [x] :zap: Unit test - [x] :green_heart: E2E --- src/internal/operations/backup.go | 3 +-- src/internal/operations/manifests.go | 19 ++++++++++++++++--- src/internal/operations/manifests_test.go | 6 ++---- 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/src/internal/operations/backup.go b/src/internal/operations/backup.go index 0f34a137e..82865231a 100644 --- a/src/internal/operations/backup.go +++ b/src/internal/operations/backup.go @@ -245,8 +245,7 @@ func (op *BackupOperation) do( op.store, reasons, fallbackReasons, op.account.ID(), - op.incremental, - op.Errors) + op.incremental) if err != nil { return nil, clues.Wrap(err, "producing manifests and metadata") } diff --git a/src/internal/operations/manifests.go b/src/internal/operations/manifests.go index 5701561e8..11f218c4b 100644 --- a/src/internal/operations/manifests.go +++ b/src/internal/operations/manifests.go @@ -46,7 +46,6 @@ func produceManifestsAndMetadata( reasons, fallbackReasons []kopia.Reason, tenantID string, getMetadata bool, - errs *fault.Bus, ) ([]*kopia.ManifestEntry, []data.RestoreCollection, bool, error) { var ( tags = map[string]string{kopia.TagBackupCategory: ""} @@ -151,7 +150,17 @@ func produceManifestsAndMetadata( return ms, nil, false, nil } - colls, err := collectMetadata(mctx, mr, man, metadataFiles, tenantID, errs) + // a local fault.Bus intance is used to collect metadata files here. + // we avoid the global fault.Bus because all failures here are ignorable, + // and cascading errors up to the operation can cause a conflict that forces + // the operation into a failure state unnecessarily. + // TODO(keepers): this is not a pattern we want to + // spread around. Need to find more idiomatic handling. + fb := fault.New(true) + + colls, err := collectMetadata(mctx, mr, man, metadataFiles, tenantID, fb) + LogFaultErrors(ctx, fb.Errors(), "collecting metadata") + if err != nil && !errors.Is(err, data.ErrNotFound) { // prior metadata isn't guaranteed to exist. // if it doesn't, we'll just have to do a @@ -162,7 +171,11 @@ func produceManifestsAndMetadata( collections = append(collections, colls...) } - return ms, collections, true, err + if err != nil { + return nil, nil, false, err + } + + return ms, collections, true, nil } // unionManifests reduces the two manifest slices into a single slice. diff --git a/src/internal/operations/manifests_test.go b/src/internal/operations/manifests_test.go index 86861d8c4..3dfd25dec 100644 --- a/src/internal/operations/manifests_test.go +++ b/src/internal/operations/manifests_test.go @@ -678,8 +678,7 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata() { &test.gb, test.reasons, nil, tid, - test.getMeta, - fault.New(true)) + test.getMeta) test.assertErr(t, err, clues.ToCore(err)) test.assertB(t, b) @@ -844,8 +843,7 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata_fallb []kopia.Reason{manReason}, []kopia.Reason{fbReason}, "tid", - false, - fault.New(true)) + false) require.NoError(t, err, clues.ToCore(err)) assert.False(t, b, "no-metadata is forced for this test")