From 313586a57ec6b0cb67886e8ebd8b8b1c36cff0f5 Mon Sep 17 00:00:00 2001 From: Ashlie Martinez Date: Fri, 12 Jan 2024 12:10:40 -0800 Subject: [PATCH] Add verify implementation Bulk of code is based on the cleanup backup code that also resides in this package. This code probably doesn't use the project standard method of printing data to the CLI, it just used Printf at the moment. --- src/internal/kopia/conn.go | 16 +++ src/internal/kopia/verify_backups.go | 168 +++++++++++++++++++++++++++ 2 files changed, 184 insertions(+) create mode 100644 src/internal/kopia/verify_backups.go diff --git a/src/internal/kopia/conn.go b/src/internal/kopia/conn.go index fb0687d45..2c7675110 100644 --- a/src/internal/kopia/conn.go +++ b/src/internal/kopia/conn.go @@ -88,6 +88,14 @@ type ( ) (*snapshot.Manifest, error) } + multiSnapshotLoader interface { + manifestFinder + LoadSnapshots( + ctx context.Context, + manifestIDs []manifest.ID, + ) ([]*snapshot.Manifest, error) + } + snapshotLoader interface { SnapshotRoot(man *snapshot.Manifest) (fs.Entry, error) } @@ -590,6 +598,14 @@ func persistRetentionConfigs( return clues.WrapWC(ctx, err, "persisting config changes").OrNil() } +func (w *conn) LoadSnapshots( + ctx context.Context, + manifestIDs []manifest.ID, +) ([]*snapshot.Manifest, error) { + mans, err := snapshot.LoadSnapshots(ctx, w.Repository, manifestIDs) + return mans, clues.StackWC(ctx, err).OrNil() +} + func (w *conn) LoadSnapshot( ctx context.Context, id manifest.ID, diff --git a/src/internal/kopia/verify_backups.go b/src/internal/kopia/verify_backups.go new file mode 100644 index 000000000..988ef3c23 --- /dev/null +++ b/src/internal/kopia/verify_backups.go @@ -0,0 +1,168 @@ +package kopia + +import ( + "context" + "fmt" + + "github.com/alcionai/clues" + "github.com/kopia/kopia/repo/manifest" + "github.com/kopia/kopia/snapshot" + "golang.org/x/exp/maps" + + "github.com/alcionai/corso/src/internal/model" + "github.com/alcionai/corso/src/pkg/backup" + "github.com/alcionai/corso/src/pkg/logger" + "github.com/alcionai/corso/src/pkg/store" +) + +// verifyBackups uses bs and mf to lookup all models/snapshots for backups +// and outputs summary information for backups that are not "complete" backups +// with: +// - a backup model +// - an item data snapshot +// - a details snapshot or details model +// +// Output summary information has the form: +// +// BackupID: +// ItemDataSnapshotID: +// DetailsSnapshotID: +// +// Items that are missing will have a (missing) note appended to them. +func verifyBackups( + ctx context.Context, + bs store.Storer, + mf multiSnapshotLoader, +) error { + logger.Ctx(ctx).Infow("scanning for incomplete backups") + + // Get all snapshot manifests. + snapMetas, err := mf.FindManifests( + ctx, + map[string]string{ + manifest.TypeLabelKey: snapshot.ManifestType, + }) + if err != nil { + return clues.Wrap(err, "getting snapshot metadata") + } + + snapIDs := make([]manifest.ID, 0, len(snapMetas)) + for _, m := range snapMetas { + snapIDs = append(snapIDs, m.ID) + } + + snaps, err := mf.LoadSnapshots(ctx, snapIDs) + if err != nil { + return clues.Wrap(err, "getting snapshots") + } + + var ( + // deets is a hash set of the ModelStoreID or snapshot IDs for backup + // details. It contains the IDs for both legacy details stored in the model + // store and newer details stored as a snapshot because it doesn't matter + // what the storage format is. We only need to know the ID so we can: + // 1. check if there's a corresponding backup for them + deets = map[manifest.ID]struct{}{} + // dataSnaps is a hash set of the snapshot IDs for item data snapshots. + dataSnaps = map[manifest.ID]struct{}{} + ) + + // Sort all the snapshots as either details snapshots or item data snapshots. + for _, snap := range snaps { + // Filter out checkpoint snapshots as they aren't expected to have a backup + // associated with them. + if snap.IncompleteReason == "checkpoint" { + continue + } + + k, _ := makeTagKV(TagBackupCategory) + if _, ok := snap.Tags[k]; ok { + dataSnaps[snap.ID] = struct{}{} + continue + } + + deets[snap.ID] = struct{}{} + } + + // Get all legacy backup details models. The initial version of backup delete + // didn't seem to delete them so they may also be orphaned if the repo is old + // enough. + deetsModels, err := bs.GetIDsForType(ctx, model.BackupDetailsSchema, nil) + if err != nil { + return clues.Wrap(err, "getting legacy backup details") + } + + for _, d := range deetsModels { + deets[d.ModelStoreID] = struct{}{} + } + + // Get all backup models. + bups, err := bs.GetIDsForType(ctx, model.BackupSchema, nil) + if err != nil { + return clues.Wrap(err, "getting all backup models") + } + + fmt.Println("Incomplete backups:") + + for _, bup := range bups { + bm := backup.Backup{} + + if err := bs.GetWithModelStoreID( + ctx, + model.BackupSchema, + bup.ModelStoreID, + &bm); err != nil { + logger.CtxErr(ctx, err).Infow( + "backup model not found", + "search_backup_id", bup.ModelStoreID) + + continue + } + + ssid := bm.StreamStoreID + if len(ssid) == 0 { + ssid = bm.DetailsID + } + + dataMissing := "" + deetsMissing := "" + + if _, dataOK := dataSnaps[manifest.ID(bm.SnapshotID)]; !dataOK { + dataMissing = " (missing)" + } + + if _, deetsOK := deets[manifest.ID(ssid)]; !deetsOK { + deetsMissing = " (missing)" + } + + // Remove from the set so we can mention items that don't seem to have + // backup models referring to them. + delete(dataSnaps, manifest.ID(bm.SnapshotID)) + delete(deets, manifest.ID(ssid)) + + // Output info about the state of the backup if needed. + if len(dataMissing) > 0 || len(deetsMissing) > 0 { + fmt.Printf( + "\tBackupID: %s\n\t\tItemDataSnapshotID: %s%s\n\t\tDetailsSnapshotID: %s%s\n", + bm.ID, + bm.SnapshotID, + dataMissing, + ssid, + deetsMissing) + } + } + + fmt.Println("Additional ItemDataSnapshotIDs missing backup models:") + printIDs(maps.Keys(dataSnaps)) + + fmt.Println("Additional DetailsSnapshotIDs missing backup models:") + printIDs(maps.Keys(deets)) + + return nil +} + +func printIDs(ids []manifest.ID) { + for _, id := range ids { + fmt.Printf("\t%s\n", id) + } +}