corso/src/internal/kopia/backup_bases.go
ashmrtn f61448d650
Groups version bump (#4561)
Bump the backup version and force a full backup if
there's a backup for teams/groups that has base(s)
from an older version of corso

This will avoid propagating older details formats
forward. Those formats don't have all the data
newer formats do

This is mostly a stop-gap, a more robust solution
can be added later

Manually tested that it forces a full backup

---

#### Does this PR need a docs update or release note?

- [ ]  Yes, it's included
- [ ] 🕐 Yes, but in a later PR
- [x]  No

#### Type of change

- [ ] 🌻 Feature
- [x] 🐛 Bugfix
- [ ] 🗺️ Documentation
- [ ] 🤖 Supportability/Tests
- [ ] 💻 CI/Deployment
- [ ] 🧹 Tech Debt/Cleanup

#### Issue(s)

* #4569

#### Test Plan

- [x] 💪 Manual
- [ ]  Unit test
- [ ] 💚 E2E
2023-10-27 19:37:39 +00:00

460 lines
14 KiB
Go

package kopia
import (
"context"
"github.com/alcionai/clues"
"github.com/kopia/kopia/repo/manifest"
"golang.org/x/exp/maps"
"golang.org/x/exp/slices"
"github.com/alcionai/corso/src/internal/model"
"github.com/alcionai/corso/src/internal/version"
"github.com/alcionai/corso/src/pkg/backup/identity"
"github.com/alcionai/corso/src/pkg/logger"
)
// TODO(ashmrtn): Move this into some inject package. Here to avoid import
// cycles.
type BackupBases interface {
// ConvertToAssistBase converts the base with the given backup ID from a merge
// base to an assist base.
ConvertToAssistBase(backupID model.StableID)
// MergeBases returns a []BackupBase that corresponds to all the bases that
// will source unchanged information for this backup during hierarchy merging,
// snapshot creation, and details merging.
MergeBases() []BackupBase
// DisableMergeBases converts all merge bases in this BackupBases to assist
// bases. These bases can still participate in sourcing data kopia considers
// "cached" during the snapshot process and can source backup details entries
// for those cached items. However, they won't be used to source unchanged
// items during hierarchy merging, snapshot creation, or details merging.
//
// This call is order sensitive with DisableAssistBases.
DisableMergeBases()
// UniqueAssistBases returns the set of assist bases for the backup operation.
// Assist bases are used to source item data and details entries if the item
// is considered "cached" by kopia. They are not used to source unchanged
// items during hierarchy merging.
UniqueAssistBases() []BackupBase
// DisableAssistBases clears the set of assist bases for this backup. Doing so
// will result in kopia not finding any "cached" items and assist bases won't
// participate in details merging.
//
// This call is order sensitive with DisableMergeBases.
DisableAssistBases()
// MinBackupVersion returns the lowest version of all merge backups in the
// BackupBases.
MinBackupVersion() int
// MinAssisttVersion returns the lowest version of all assist backups in the
// BackupBases.
MinAssistVersion() int
// MergeBackupBases takes another BackupBases and merges it's contained assist
// and merge bases into this BackupBases. The passed in BackupBases is
// considered an older alternative to this BackupBases meaning bases from
// other won't be selected unless there's no item in this BackupBases to cover
// that Reason.
//
// Callers pass in reasonToKey to control how individual BackupBase items are
// selected. For example, to migrate from using user name to user ID as the
// protected resource in the Reason the reasonToKey function could map
// BackupBase items with the same tenant, service, and category to the same
// key. This works because backup operations are already per protected
// resource.
//
// This call is order sensitive with DisableMergeBases and DisableAssistBases.
MergeBackupBases(
ctx context.Context,
other BackupBases,
reasonToKey func(identity.Reasoner) string,
) BackupBases
// SnapshotAssistBases returns the set of bases to use for kopia assisted
// incremental snapshot operations. It consists of the union of merge bases
// and assist bases. If DisableAssistBases has been called then it returns
// nil.
SnapshotAssistBases() []BackupBase
}
type backupBases struct {
mergeBases []BackupBase
assistBases []BackupBase
// disableAssistBases denote whether any assist bases should be returned to
// kopia during snapshot operation.
disableAssistBases bool
}
func (bb *backupBases) SnapshotAssistBases() []BackupBase {
if bb.disableAssistBases {
return nil
}
// Need to use the actual variables here because the functions will return nil
// depending on what's been marked as disabled.
return append(slices.Clone(bb.mergeBases), bb.assistBases...)
}
func (bb *backupBases) ConvertToAssistBase(backupID model.StableID) {
idx := slices.IndexFunc(
bb.mergeBases,
func(base BackupBase) bool {
return base.Backup.ID == backupID
})
if idx >= 0 {
bb.assistBases = append(bb.assistBases, bb.mergeBases[idx])
bb.mergeBases = slices.Delete(bb.mergeBases, idx, idx+1)
}
}
func (bb *backupBases) MinBackupVersion() int {
min := version.NoBackup
if bb == nil {
return min
}
for _, base := range bb.mergeBases {
if min == version.NoBackup || base.Backup.Version < min {
min = base.Backup.Version
}
}
return min
}
func (bb *backupBases) MinAssistVersion() int {
min := version.NoBackup
if bb == nil {
return min
}
for _, base := range bb.assistBases {
if min == version.NoBackup || base.Backup.Version < min {
min = base.Backup.Version
}
}
return min
}
func (bb backupBases) MergeBases() []BackupBase {
return slices.Clone(bb.mergeBases)
}
func (bb *backupBases) DisableMergeBases() {
// Turn all merge bases into assist bases. We don't want to remove them
// completely because we still want to allow kopia assisted incrementals
// unless that's also explicitly disabled. However, we can't just leave them
// in the merge set since then we won't return the bases when merging backup
// details.
bb.assistBases = append(bb.assistBases, bb.mergeBases...)
bb.mergeBases = nil
}
func (bb backupBases) UniqueAssistBases() []BackupBase {
if bb.disableAssistBases {
return nil
}
return slices.Clone(bb.assistBases)
}
func (bb *backupBases) DisableAssistBases() {
bb.disableAssistBases = true
}
func getMissingBases(
reasonToKey func(identity.Reasoner) string,
seen map[string]struct{},
toCheck []BackupBase,
) []BackupBase {
var res []BackupBase
for _, base := range toCheck {
useReasons := []identity.Reasoner{}
for _, r := range base.Reasons {
k := reasonToKey(r)
if _, ok := seen[k]; ok {
// This Reason is already "covered" by a previously seen base. Skip
// adding the Reason to the base being examined.
continue
}
useReasons = append(useReasons, r)
}
if len(useReasons) > 0 {
base.Reasons = useReasons
res = append(res, base)
}
}
return res
}
// MergeBackupBases reduces the two BackupBases into a single BackupBase.
// Assumes the passed in BackupBases represents a prior backup version (across
// some migration that disrupts lookup), and that the BackupBases used to call
// this function contains the current version.
//
// This call should be made prior to Disable*Bases being called on either the
// called BackupBases or the passed in BackupBases.
//
// reasonToKey should be a function that, given a Reasoner, will produce some
// string that represents Reasoner in the context of the merge operation. For
// example, to merge BackupBases across a ProtectedResource migration, the
// Reasoner's service and category can be used as the key.
//
// Selection priority, for each reason key generated by reasonsToKey, follows
// these rules:
// 1. If the called BackupBases has an entry for a given reason, ignore the
// other BackupBases matching that reason.
// 2. If the called BackupBases has only AssistBases, look for a matching
// MergeBase manifest in the other BackupBases.
// 3. If the called BackupBases has no entry for a reason, look for a matching
// MergeBase in the other BackupBases.
func (bb *backupBases) MergeBackupBases(
ctx context.Context,
other BackupBases,
reasonToKey func(reason identity.Reasoner) string,
) BackupBases {
if other == nil || (len(other.MergeBases()) == 0 && len(other.UniqueAssistBases()) == 0) {
return bb
}
if bb == nil || (len(bb.MergeBases()) == 0 && len(bb.UniqueAssistBases()) == 0) {
return other
}
toMerge := map[string]struct{}{}
assist := map[string]struct{}{}
// Track the bases in bb. We need to know the Reason(s) covered by merge bases
// and the Reason(s) covered by assist bases separately because the former
// dictates whether we need to select a merge base and an assist base from
// other while the latter dictates whether we need to select an assist base
// from other.
for _, m := range bb.MergeBases() {
for _, r := range m.Reasons {
k := reasonToKey(r)
toMerge[k] = struct{}{}
assist[k] = struct{}{}
}
}
for _, m := range bb.UniqueAssistBases() {
for _, r := range m.Reasons {
k := reasonToKey(r)
assist[k] = struct{}{}
}
}
addMerge := getMissingBases(reasonToKey, toMerge, other.MergeBases())
addAssist := getMissingBases(reasonToKey, assist, other.UniqueAssistBases())
res := &backupBases{
mergeBases: append(addMerge, bb.MergeBases()...),
assistBases: append(addAssist, bb.UniqueAssistBases()...),
}
return res
}
func fixupMinRequirements(
ctx context.Context,
baseSet []BackupBase,
) []BackupBase {
res := make([]BackupBase, 0, len(baseSet))
for _, base := range baseSet {
var (
backupID model.StableID
snapID manifest.ID
snapIncomplete bool
deetsID string
)
if base.Backup != nil {
backupID = base.Backup.ID
deetsID = base.Backup.StreamStoreID
if len(deetsID) == 0 {
deetsID = base.Backup.DetailsID
}
}
if base.ItemDataSnapshot != nil {
snapID = base.ItemDataSnapshot.ID
snapIncomplete = len(base.ItemDataSnapshot.IncompleteReason) > 0
}
ictx := clues.Add(
ctx,
"base_backup_id", backupID,
"base_item_data_snapshot_id", snapID,
"base_details_id", deetsID)
switch {
case len(backupID) == 0:
logger.Ctx(ictx).Info("dropping base missing backup model")
continue
case len(snapID) == 0:
logger.Ctx(ictx).Info("dropping base missing item data snapshot")
continue
case snapIncomplete:
logger.Ctx(ictx).Info("dropping base with incomplete item data snapshot")
continue
case len(deetsID) == 0:
logger.Ctx(ictx).Info("dropping base missing backup details")
continue
case len(base.Reasons) == 0:
// Not sure how we'd end up here, but just to make sure we're really
// getting what we expect.
logger.Ctx(ictx).Info("dropping base with no marked Reasons")
continue
}
res = append(res, base)
}
return res
}
func fixupReasons(
ctx context.Context,
baseSet []BackupBase,
) []BackupBase {
// Associate a Reason with a set of bases since the basesByReason map needs a
// string key.
type baseEntry struct {
bases []BackupBase
reason identity.Reasoner
}
var (
basesByReason = map[string]baseEntry{}
// res holds a mapping from backup ID -> base. We need this additional level
// of indirection when determining what to return because a base may be
// selected for multiple reasons. This map allows us to consolidate that
// into a single base result for all reasons easily.
res = map[model.StableID]BackupBase{}
)
// Organize all the base(s) by the Reason(s) they were chosen. A base can
// exist in multiple slices in the map if it was selected for multiple
// Reasons.
for _, base := range baseSet {
for _, reason := range base.Reasons {
foundBases := basesByReason[reasonKey(reason)]
foundBases.reason = reason
foundBases.bases = append(foundBases.bases, base)
basesByReason[reasonKey(reason)] = foundBases
}
}
// Go through the map and check that the length of each slice is 1. If it's
// longer than that then we somehow got multiple bases for the same Reason and
// should drop the extras.
for _, bases := range basesByReason {
ictx := clues.Add(
ctx,
"verify_service", bases.reason.Service().String(),
"verify_category", bases.reason.Category().String())
// Not sure how we'd actually get here but handle it anyway.
if len(bases.bases) == 0 {
logger.Ctx(ictx).Info("no bases found for reason")
continue
}
// We've got at least one base for this Reason. The below finds which base
// to keep based on the creation time of the bases. If there's multiple
// bases in the input slice then we'll log information about the ones that
// we didn't add to the result set.
// Sort in reverse chronological order so that it's easy to find the
// youngest base.
slices.SortFunc(bases.bases, func(a, b BackupBase) int {
return -a.Backup.CreationTime.Compare(b.Backup.CreationTime)
})
keepBase := bases.bases[0]
// Add the youngest base to the result set. We add each Reason for selecting
// the base individually so that bases dropped for a particular Reason (or
// dropped completely because they overlap for all Reasons) happens without
// additional logic. The dropped (Reason, base) pair will just never be
// added to the result set to begin with.
b, ok := res[keepBase.Backup.ID]
if ok {
// We've already seen this base, just add this Reason to it as well.
b.Reasons = append(b.Reasons, bases.reason)
res[keepBase.Backup.ID] = b
continue
}
// We haven't seen this base before. We want to clear all the Reasons for it
// except the one we're currently examining. That allows us to just not add
// bases that are duplicates for a Reason to res and still end up with the
// correct output.
keepBase.Reasons = []identity.Reasoner{bases.reason}
res[keepBase.Backup.ID] = keepBase
// Don't log about dropped bases if there was only one base.
if len(bases.bases) == 1 {
continue
}
// This is purely for debugging, but log the base(s) that we dropped for
// this Reason.
var dropped []model.StableID
for _, b := range bases.bases[1:] {
dropped = append(dropped, b.Backup.ID)
}
logger.Ctx(ictx).Infow(
"dropping bases for reason",
"dropped_backup_ids", dropped)
}
return maps.Values(res)
}
// fixupAndVerify goes through the set of backups and snapshots used for merging
// and ensures:
// - the reasons for selecting merge snapshots are distinct
// - all bases have a backup model with item and details snapshot IDs
// - all bases have both a backup and item data snapshot present
// - all bases have item data snapshots with no incomplete reason
//
// Backups that have overlapping reasons or that are not complete are removed
// from the set. Dropping these is safe because it only affects how much data we
// pull. On the other hand, *not* dropping them is unsafe as it will muck up
// merging when we add stuff to kopia (possibly multiple entries for the same
// item etc).
func (bb *backupBases) fixupAndVerify(ctx context.Context) {
// Start off by removing bases that don't meet the minimum requirements of
// having a backup model and item data snapshot or having a backup details ID.
// These requirements apply to both merge and assist bases.
bb.mergeBases = fixupMinRequirements(ctx, bb.mergeBases)
bb.assistBases = fixupMinRequirements(ctx, bb.assistBases)
// Remove merge bases that have overlapping Reasons. It's alright to call this
// on assist bases too because we only expect at most one assist base per
// Reason.
bb.mergeBases = fixupReasons(ctx, bb.mergeBases)
bb.assistBases = fixupReasons(ctx, bb.assistBases)
}