corso/src/internal/kopia/backup_bases.go
Ashlie Martinez 65af82f0f5 Fix function, variable, and type references
Fix the package specifier for function, variable, and type references
since the package path changed for them.
2023-10-04 13:24:38 -07:00

428 lines
11 KiB
Go

package kopia
import (
"context"
"github.com/alcionai/clues"
"github.com/kopia/kopia/repo/manifest"
"golang.org/x/exp/slices"
"github.com/alcionai/corso/src/internal/version"
"github.com/alcionai/corso/src/pkg/backup"
"github.com/alcionai/corso/src/pkg/backup/identity"
"github.com/alcionai/corso/src/pkg/logger"
)
var _ backup.BackupBases = &backupBases{}
type backupBases struct {
// backups and mergeBases should be modified together as they relate similar
// data.
backups []backup.BackupEntry
mergeBases []backup.ManifestEntry
assistBackups []backup.BackupEntry
assistBases []backup.ManifestEntry
// disableAssistBases denote whether any assist bases should be returned to
// kopia during snapshot operation.
disableAssistBases bool
}
func (bb *backupBases) SnapshotAssistBases() []backup.ManifestEntry {
if bb.disableAssistBases {
return nil
}
// Need to use the actual variables here because the functions will return nil
// depending on what's been marked as disabled.
return append(slices.Clone(bb.assistBases), bb.mergeBases...)
}
func (bb *backupBases) ConvertToAssistBase(manifestID manifest.ID) {
var (
snapshotMan backup.ManifestEntry
base backup.BackupEntry
snapFound bool
)
idx := slices.IndexFunc(
bb.mergeBases,
func(man backup.ManifestEntry) bool {
return man.ID == manifestID
})
if idx >= 0 {
snapFound = true
snapshotMan = bb.mergeBases[idx]
bb.mergeBases = slices.Delete(bb.mergeBases, idx, idx+1)
}
idx = slices.IndexFunc(
bb.backups,
func(bup backup.BackupEntry) bool {
return bup.SnapshotID == string(manifestID)
})
if idx >= 0 {
base = bb.backups[idx]
bb.backups = slices.Delete(bb.backups, idx, idx+1)
}
// Account for whether we found the backup.
if idx >= 0 && snapFound {
bb.assistBackups = append(bb.assistBackups, base)
bb.assistBases = append(bb.assistBases, snapshotMan)
}
}
func (bb backupBases) Backups() []backup.BackupEntry {
return slices.Clone(bb.backups)
}
func (bb backupBases) UniqueAssistBackups() []backup.BackupEntry {
if bb.disableAssistBases {
return nil
}
return slices.Clone(bb.assistBackups)
}
func (bb *backupBases) MinBackupVersion() int {
min := version.NoBackup
if bb == nil {
return min
}
for _, bup := range bb.backups {
if min == version.NoBackup || bup.Version < min {
min = bup.Version
}
}
return min
}
func (bb backupBases) MergeBases() []backup.ManifestEntry {
return slices.Clone(bb.mergeBases)
}
func (bb *backupBases) DisableMergeBases() {
// Turn all merge bases into assist bases. We don't want to remove them
// completely because we still want to allow kopia assisted incrementals
// unless that's also explicitly disabled. However, we can't just leave them
// in the merge set since then we won't return the bases when merging backup
// details.
bb.assistBases = append(bb.assistBases, bb.mergeBases...)
bb.assistBackups = append(bb.assistBackups, bb.backups...)
bb.mergeBases = nil
bb.backups = nil
}
func (bb backupBases) UniqueAssistBases() []backup.ManifestEntry {
if bb.disableAssistBases {
return nil
}
return slices.Clone(bb.assistBases)
}
func (bb *backupBases) DisableAssistBases() {
bb.disableAssistBases = true
}
// MergeBackupBases reduces the two BackupBases into a single BackupBase.
// Assumes the passed in BackupBases represents a prior backup version (across
// some migration that disrupts lookup), and that the BackupBases used to call
// this function contains the current version.
//
// This call should be made prior to Disable*Bases being called on either the
// called BackupBases or the passed in BackupBases.
//
// reasonToKey should be a function that, given a Reasoner, will produce some
// string that represents Reasoner in the context of the merge operation. For
// example, to merge BackupBases across a ProtectedResource migration, the
// Reasoner's service and category can be used as the key.
//
// Selection priority, for each reason key generated by reasonsToKey, follows
// these rules:
// 1. If the called BackupBases has an entry for a given reason, ignore the
// other BackupBases matching that reason.
// 2. If the called BackupBases has only AssistBases, look for a matching
// MergeBase manifest in the other BackupBases.
// 3. If the called BackupBases has no entry for a reason, look for a matching
// MergeBase in the other BackupBases.
func (bb *backupBases) MergeBackupBases(
ctx context.Context,
other backup.BackupBases,
reasonToKey func(reason identity.Reasoner) string,
) backup.BackupBases {
if other == nil || (len(other.MergeBases()) == 0 && len(other.UniqueAssistBases()) == 0) {
return bb
}
if bb == nil || (len(bb.MergeBases()) == 0 && len(bb.UniqueAssistBases()) == 0) {
return other
}
toMerge := map[string]struct{}{}
assist := map[string]struct{}{}
// Track the bases in bb.
for _, m := range bb.mergeBases {
for _, r := range m.Reasons {
k := reasonToKey(r)
toMerge[k] = struct{}{}
assist[k] = struct{}{}
}
}
for _, m := range bb.assistBases {
for _, r := range m.Reasons {
k := reasonToKey(r)
assist[k] = struct{}{}
}
}
var toAdd []backup.ManifestEntry
// Calculate the set of mergeBases to pull from other into this one.
for _, m := range other.MergeBases() {
useReasons := []identity.Reasoner{}
for _, r := range m.Reasons {
k := reasonToKey(r)
if _, ok := toMerge[k]; ok {
// Assume other contains prior manifest versions.
// We don't want to stack a prior version incomplete onto
// a current version's complete snapshot.
continue
}
useReasons = append(useReasons, r)
}
if len(useReasons) > 0 {
m.Reasons = useReasons
toAdd = append(toAdd, m)
}
}
res := &backupBases{
backups: bb.Backups(),
mergeBases: bb.MergeBases(),
assistBases: bb.UniqueAssistBases(),
// Note that assistBackups are a new feature and don't exist
// in prior versions where we were using UPN based reasons i.e.
// other won't have any assistBackups.
assistBackups: bb.UniqueAssistBackups(),
}
// Add new mergeBases and backups.
for _, man := range toAdd {
// Will get empty string if not found which is fine, it'll fail one of the
// other checks.
bID, _ := man.GetTag(TagBackupID)
bup, ok := getBackupByID(other.Backups(), bID)
if !ok {
logger.Ctx(ctx).Infow(
"not unioning snapshot missing backup",
"other_manifest_id", man.ID,
"other_backup_id", bID)
continue
}
bup.Reasons = man.Reasons
res.backups = append(res.backups, bup)
res.mergeBases = append(res.mergeBases, man)
}
return res
}
func findNonUniqueManifests(
ctx context.Context,
manifests []backup.ManifestEntry,
) map[manifest.ID]struct{} {
// ReasonKey -> manifests with that reason.
reasons := map[string][]backup.ManifestEntry{}
toDrop := map[manifest.ID]struct{}{}
for _, man := range manifests {
// Incomplete snapshots are used only for kopia-assisted incrementals. The
// fact that we need this check here makes it seem like this should live in
// the kopia code. However, keeping it here allows for better debugging as
// the kopia code only has access to a path builder which means it cannot
// remove the resource owner from the error/log output. That is also below
// the point where we decide if we should do a full backup or an incremental.
if len(man.IncompleteReason) > 0 {
logger.Ctx(ctx).Infow(
"dropping incomplete manifest",
"manifest_id", man.ID)
toDrop[man.ID] = struct{}{}
continue
}
for _, reason := range man.Reasons {
mapKey := reasonKey(reason)
reasons[mapKey] = append(reasons[mapKey], man)
}
}
for reason, mans := range reasons {
ictx := clues.Add(ctx, "reason", reason)
if len(mans) == 0 {
// Not sure how this would happen but just in case...
continue
} else if len(mans) > 1 {
mIDs := make([]manifest.ID, 0, len(mans))
for _, m := range mans {
toDrop[m.ID] = struct{}{}
mIDs = append(mIDs, m.ID)
}
// TODO(ashmrtn): We should actually just remove this reason from the
// manifests and then if they have no reasons remaining drop them from the
// set.
logger.Ctx(ictx).Infow(
"dropping manifests with duplicate reason",
"manifest_ids", mIDs)
continue
}
}
return toDrop
}
func getBackupByID(
backups []backup.BackupEntry,
bID string,
) (backup.BackupEntry, bool) {
if len(bID) == 0 {
return backup.BackupEntry{}, false
}
idx := slices.IndexFunc(backups, func(b backup.BackupEntry) bool {
return string(b.ID) == bID
})
if idx < 0 || idx >= len(backups) {
return backup.BackupEntry{}, false
}
return backups[idx], true
}
// fixupAndVerify goes through the set of backups and snapshots used for merging
// and ensures:
// - the reasons for selecting merge snapshots are distinct
// - all bases used for merging have a backup model with item and details
// snapshot ID
//
// Backups that have overlapping reasons or that are not complete are removed
// from the set. Dropping these is safe because it only affects how much data we
// pull. On the other hand, *not* dropping them is unsafe as it will muck up
// merging when we add stuff to kopia (possibly multiple entries for the same
// item etc).
//
// TODO(pandeyabs): Refactor common code into a helper as part of #3943.
func (bb *backupBases) fixupAndVerify(ctx context.Context) {
toDrop := findNonUniqueManifests(ctx, bb.mergeBases)
var (
backupsToKeep []backup.BackupEntry
assistBackupsToKeep []backup.BackupEntry
mergeToKeep []backup.ManifestEntry
assistToKeep []backup.ManifestEntry
)
for _, man := range bb.mergeBases {
if _, ok := toDrop[man.ID]; ok {
continue
}
bID, _ := man.GetTag(TagBackupID)
bup, ok := getBackupByID(bb.backups, bID)
if !ok {
toDrop[man.ID] = struct{}{}
logger.Ctx(ctx).Info(
"dropping merge base due to missing backup",
"manifest_id", man.ID)
continue
}
deetsID := bup.StreamStoreID
if len(deetsID) == 0 {
deetsID = bup.DetailsID
}
if len(bup.SnapshotID) == 0 || len(deetsID) == 0 {
toDrop[man.ID] = struct{}{}
logger.Ctx(ctx).Info(
"dropping merge base due to invalid backup",
"manifest_id", man.ID)
continue
}
backupsToKeep = append(backupsToKeep, bup)
mergeToKeep = append(mergeToKeep, man)
}
// Drop assist snapshots with overlapping reasons.
toDropAssists := findNonUniqueManifests(ctx, bb.assistBases)
for _, man := range bb.assistBases {
if _, ok := toDropAssists[man.ID]; ok {
continue
}
bID, _ := man.GetTag(TagBackupID)
bup, ok := getBackupByID(bb.assistBackups, bID)
if !ok {
toDrop[man.ID] = struct{}{}
logger.Ctx(ctx).Info(
"dropping assist base due to missing backup",
"manifest_id", man.ID)
continue
}
deetsID := bup.StreamStoreID
if len(deetsID) == 0 {
deetsID = bup.DetailsID
}
if len(bup.SnapshotID) == 0 || len(deetsID) == 0 {
toDrop[man.ID] = struct{}{}
logger.Ctx(ctx).Info(
"dropping assist base due to invalid backup",
"manifest_id", man.ID)
continue
}
assistBackupsToKeep = append(assistBackupsToKeep, bup)
assistToKeep = append(assistToKeep, man)
}
bb.backups = backupsToKeep
bb.mergeBases = mergeToKeep
bb.assistBases = assistToKeep
bb.assistBackups = assistBackupsToKeep
}