Only make incremental backup if Reason:base is 1:1 (#2050)

## Description

Ensure that each (resource owner, service, category) set of data is only sourced from a single base snapshot when doing an incremental backup. If not, fallback to doing a full backup.

Failure to error out or fallback to a full backup may result in repeated or zombie items in the resulting backup as multiple Point-In-Time backups will be used to source the same data

Incomplete manifests are ignored as they are currently only used for kopia-assisted incrementals, not sourcing items/backup details info when making a delta token-based incremental backup

## Does this PR need a docs update or release note?

- [ ]  Yes, it's included
- [ ] 🕐 Yes, but in a later PR
- [x]  No 

## Type of change

- [x] 🌻 Feature
- [x] 🐛 Bugfix
- [ ] 🗺️ Documentation
- [ ] 🤖 Test
- [ ] 💻 CI/Deployment
- [ ] 🧹 Tech Debt/Cleanup

## Issue(s)

* closes #1945 

## Test Plan

- [ ] 💪 Manual
- [x]  Unit test
- [ ] 💚 E2E
This commit is contained in:
ashmrtn 2023-01-06 15:10:55 -08:00 committed by GitHub
parent 70a8f53d65
commit 186569087c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 188 additions and 0 deletions

View File

@ -6,6 +6,7 @@ import (
"github.com/google/uuid"
multierror "github.com/hashicorp/go-multierror"
"github.com/kopia/kopia/repo/manifest"
"github.com/pkg/errors"
"github.com/alcionai/corso/src/internal/common"
@ -252,6 +253,46 @@ type backuper interface {
) (*kopia.BackupStats, *details.Builder, map[string]path.Path, error)
}
func verifyDistinctBases(mans []*kopia.ManifestEntry) error {
var (
errs *multierror.Error
reasons = map[string]manifest.ID{}
)
for _, man := range mans {
// Incomplete snapshots are used only for kopia-assisted incrementals. The
// fact that we need this check here makes it seem like this should live in
// the kopia code. However, keeping it here allows for better debugging as
// the kopia code only has access to a path builder which means it cannot
// remove the resource owner from the error/log output. That is also below
// the point where we decide if we should do a full backup or an
// incremental.
if len(man.IncompleteReason) > 0 {
continue
}
for _, reason := range man.Reasons {
reasonKey := reason.ResourceOwner + reason.Service.String() + reason.Category.String()
if b, ok := reasons[reasonKey]; ok {
errs = multierror.Append(errs, errors.Errorf(
"multiple base snapshots source data for %s %s. IDs: %s, %s",
reason.Service.String(),
reason.Category.String(),
b,
man.ID,
))
continue
}
reasons[reasonKey] = man.ID
}
}
return errs.ErrorOrNil()
}
// calls kopia to retrieve prior backup manifests, metadata collections to supply backup heuristics.
func produceManifestsAndMetadata(
ctx context.Context,
@ -278,6 +319,22 @@ func produceManifestsAndMetadata(
return ms, nil, nil
}
// We only need to check that we have 1:1 reason:base if we're doing an
// incremental with associated metadata. This ensures that we're only sourcing
// data from a single Point-In-Time (base) for each incremental backup.
//
// TODO(ashmrtn): This may need updating if we start sourcing item backup
// details from previous snapshots when using kopia-assisted incrementals.
if err := verifyDistinctBases(ms); err != nil {
logger.Ctx(ctx).Warnw(
"base snapshot collision, falling back to full backup",
"error",
err,
)
return ms, nil, nil
}
for _, man := range ms {
if len(man.IncompleteReason) > 0 {
continue

View File

@ -386,6 +386,137 @@ func (suite *BackupOpSuite) TestBackupOperation_PersistResults() {
}
}
func (suite *BackupOpSuite) TestBackupOperation_VerifyDistinctBases() {
const user = "a-user"
table := []struct {
name string
input []*kopia.ManifestEntry
errCheck assert.ErrorAssertionFunc
}{
{
name: "SingleManifestMultipleReasons",
input: []*kopia.ManifestEntry{
{
Manifest: &snapshot.Manifest{
ID: "id1",
},
Reasons: []kopia.Reason{
{
ResourceOwner: user,
Service: path.ExchangeService,
Category: path.EmailCategory,
},
{
ResourceOwner: user,
Service: path.ExchangeService,
Category: path.EventsCategory,
},
},
},
},
errCheck: assert.NoError,
},
{
name: "MultipleManifestsDistinctReason",
input: []*kopia.ManifestEntry{
{
Manifest: &snapshot.Manifest{
ID: "id1",
},
Reasons: []kopia.Reason{
{
ResourceOwner: user,
Service: path.ExchangeService,
Category: path.EmailCategory,
},
},
},
{
Manifest: &snapshot.Manifest{
ID: "id2",
},
Reasons: []kopia.Reason{
{
ResourceOwner: user,
Service: path.ExchangeService,
Category: path.EventsCategory,
},
},
},
},
errCheck: assert.NoError,
},
{
name: "MultipleManifestsSameReason",
input: []*kopia.ManifestEntry{
{
Manifest: &snapshot.Manifest{
ID: "id1",
},
Reasons: []kopia.Reason{
{
ResourceOwner: user,
Service: path.ExchangeService,
Category: path.EmailCategory,
},
},
},
{
Manifest: &snapshot.Manifest{
ID: "id2",
},
Reasons: []kopia.Reason{
{
ResourceOwner: user,
Service: path.ExchangeService,
Category: path.EmailCategory,
},
},
},
},
errCheck: assert.Error,
},
{
name: "MultipleManifestsSameReasonOneIncomplete",
input: []*kopia.ManifestEntry{
{
Manifest: &snapshot.Manifest{
ID: "id1",
},
Reasons: []kopia.Reason{
{
ResourceOwner: user,
Service: path.ExchangeService,
Category: path.EmailCategory,
},
},
},
{
Manifest: &snapshot.Manifest{
ID: "id2",
IncompleteReason: "checkpoint",
},
Reasons: []kopia.Reason{
{
ResourceOwner: user,
Service: path.ExchangeService,
Category: path.EmailCategory,
},
},
},
},
errCheck: assert.NoError,
},
}
for _, test := range table {
suite.T().Run(test.name, func(t *testing.T) {
test.errCheck(t, verifyDistinctBases(test.input))
})
}
}
func (suite *BackupOpSuite) TestBackupOperation_CollectMetadata() {
var (
tenant = "a-tenant"