corso/src/internal/kopia/snapshot_manager.go
Keepers 9d73d0c8e2
replace errors.* with clues.* (#2924)
Mostly find/replace on errors.N and errors.W. Also turns all wrapf into wrap, and removes as many errorf calls as possible.

Might follow up with a linter to enforce this change.

---

#### Does this PR need a docs update or release note?

- [x]  No

#### Type of change

- [x] 🧹 Tech Debt/Cleanup

#### Issue(s)

* #1970

#### Test Plan

- [x]  Unit test
- [x] 💚 E2E
2023-03-28 04:01:54 +00:00

301 lines
9.1 KiB
Go

package kopia
import (
"context"
"sort"
"github.com/alcionai/clues"
"github.com/kopia/kopia/repo/manifest"
"github.com/kopia/kopia/snapshot"
"golang.org/x/exp/maps"
"github.com/alcionai/corso/src/pkg/logger"
"github.com/alcionai/corso/src/pkg/path"
)
const (
// Kopia does not do comparisons properly for empty tags right now so add some
// placeholder value to them.
defaultTagValue = "0"
// Kopia CLI prefixes all user tags with "tag:"[1]. Maintaining this will
// ensure we don't accidentally take reserved tags and that tags can be
// displayed with kopia CLI.
// (permalinks)
// [1] https://github.com/kopia/kopia/blob/05e729a7858a6e86cb48ba29fb53cb6045efce2b/cli/command_snapshot_create.go#L169
userTagPrefix = "tag:"
)
type Reason struct {
ResourceOwner string
Service path.ServiceType
Category path.CategoryType
}
func (r Reason) TagKeys() []string {
return []string{
r.ResourceOwner,
serviceCatString(r.Service, r.Category),
}
}
type ManifestEntry struct {
*snapshot.Manifest
// Reason contains the ResourceOwners and Service/Categories that caused this
// snapshot to be selected as a base. We can't reuse OwnersCats here because
// it's possible some ResourceOwners will have a subset of the Categories as
// the reason for selecting a snapshot. For example:
// 1. backup user1 email,contacts -> B1
// 2. backup user1 contacts -> B2 (uses B1 as base)
// 3. backup user1 email,contacts,events (uses B1 for email, B2 for contacts)
Reasons []Reason
}
func (me ManifestEntry) GetTag(key string) (string, bool) {
k, _ := makeTagKV(key)
v, ok := me.Tags[k]
return v, ok
}
type snapshotManager interface {
FindManifests(
ctx context.Context,
tags map[string]string,
) ([]*manifest.EntryMetadata, error)
LoadSnapshots(ctx context.Context, ids []manifest.ID) ([]*snapshot.Manifest, error)
}
func serviceCatString(s path.ServiceType, c path.CategoryType) string {
return s.String() + c.String()
}
// MakeTagKV normalizes the provided key to protect it from clobbering
// similarly named tags from non-user input (user inputs are still open
// to collisions amongst eachother).
// Returns the normalized Key plus a default value. If you're embedding a
// key-only tag, the returned default value msut be used instead of an
// empty string.
func makeTagKV(k string) (string, string) {
return userTagPrefix + k, defaultTagValue
}
// getLastIdx searches for manifests contained in both foundMans and metas
// and returns the most recent complete manifest index and the manifest it
// corresponds to. If no complete manifest is in both lists returns nil, -1.
func getLastIdx(
foundMans map[manifest.ID]*ManifestEntry,
metas []*manifest.EntryMetadata,
) (*ManifestEntry, int) {
// Minor optimization: the current code seems to return the entries from
// earliest timestamp to latest (this is undocumented). Sort in the same
// fashion so that we don't incur a bunch of swaps.
sort.Slice(metas, func(i, j int) bool {
return metas[i].ModTime.Before(metas[j].ModTime)
})
// Search newest to oldest.
for i := len(metas) - 1; i >= 0; i-- {
m := foundMans[metas[i].ID]
if m == nil || len(m.IncompleteReason) > 0 {
continue
}
return m, i
}
return nil, -1
}
// manifestsSinceLastComplete searches through mans and returns the most recent
// complete manifest (if one exists), maybe the most recent incomplete
// manifest, and a bool denoting if a complete manifest was found. If the newest
// incomplete manifest is more recent than the newest complete manifest then
// adds it to the returned list. Otherwise no incomplete manifest is returned.
// Returns nil if there are no complete or incomplete manifests in mans.
func manifestsSinceLastComplete(
ctx context.Context,
mans []*snapshot.Manifest,
) ([]*snapshot.Manifest, bool) {
var (
res []*snapshot.Manifest
foundIncomplete bool
foundComplete bool
)
// Manifests should maintain the sort order of the original IDs that were used
// to fetch the data, but just in case sort oldest to newest.
mans = snapshot.SortByTime(mans, false)
for i := len(mans) - 1; i >= 0; i-- {
m := mans[i]
if len(m.IncompleteReason) > 0 {
if !foundIncomplete {
res = append(res, m)
foundIncomplete = true
logger.Ctx(ctx).Infow("found incomplete snapshot", "snapshot_id", m.ID)
}
continue
}
// Once we find a complete snapshot we're done, even if we haven't
// found an incomplete one yet.
res = append(res, m)
foundComplete = true
logger.Ctx(ctx).Infow("found complete snapshot", "snapshot_id", m.ID)
break
}
return res, foundComplete
}
// fetchPrevManifests returns the most recent, as-of-yet unfound complete and
// (maybe) incomplete manifests in metas. If the most recent incomplete manifest
// is older than the most recent complete manifest no incomplete manifest is
// returned. If only incomplete manifests exists, returns the most recent one.
// Returns no manifests if an error occurs.
func fetchPrevManifests(
ctx context.Context,
sm snapshotManager,
foundMans map[manifest.ID]*ManifestEntry,
reason Reason,
tags map[string]string,
) ([]*snapshot.Manifest, error) {
allTags := map[string]string{}
for _, k := range reason.TagKeys() {
allTags[k] = ""
}
maps.Copy(allTags, tags)
allTags = normalizeTagKVs(allTags)
metas, err := sm.FindManifests(ctx, allTags)
if err != nil {
return nil, clues.Wrap(err, "fetching manifest metas by tag")
}
if len(metas) == 0 {
return nil, nil
}
man, lastCompleteIdx := getLastIdx(foundMans, metas)
// We have a complete cached snapshot and it's the most recent. No need
// to do anything else.
if lastCompleteIdx == len(metas)-1 {
return []*snapshot.Manifest{man.Manifest}, nil
}
// TODO(ashmrtn): Remainder of the function can be simplified if we can inject
// different tags to the snapshot checkpoints than the complete snapshot.
// Fetch all manifests newer than the oldest complete snapshot. A little
// wasteful as we may also re-fetch the most recent incomplete manifest, but
// it reduces the complexity of returning the most recent incomplete manifest
// if it is newer than the most recent complete manifest.
ids := make([]manifest.ID, 0, len(metas)-(lastCompleteIdx+1))
for i := lastCompleteIdx + 1; i < len(metas); i++ {
ids = append(ids, metas[i].ID)
}
mans, err := sm.LoadSnapshots(ctx, ids)
if err != nil {
return nil, clues.Wrap(err, "fetching previous manifests")
}
found, hasCompleted := manifestsSinceLastComplete(ctx, mans)
// If we didn't find another complete manifest then we need to mark the
// previous complete manifest as having this ResourceOwner, Service, Category
// as the reason as well.
if !hasCompleted && man != nil {
found = append(found, man.Manifest)
logger.Ctx(ctx).Infow(
"reusing cached complete snapshot",
"snapshot_id", man.ID)
}
return found, nil
}
// fetchPrevSnapshotManifests returns a set of manifests for complete and maybe
// incomplete snapshots for the given (resource owner, service, category)
// tuples. Up to two manifests can be returned per tuple: one complete and one
// incomplete. An incomplete manifest may be returned if it is newer than the
// newest complete manifest for the tuple. Manifests are deduped such that if
// multiple tuples match the same manifest it will only be returned once.
// External callers can access this via wrapper.FetchPrevSnapshotManifests().
// If tags are provided, manifests must include a superset of the k:v pairs
// specified by those tags. Tags should pass their raw values, and will be
// normalized inside the func using MakeTagKV.
func fetchPrevSnapshotManifests(
ctx context.Context,
sm snapshotManager,
reasons []Reason,
tags map[string]string,
) []*ManifestEntry {
mans := map[manifest.ID]*ManifestEntry{}
// For each serviceCat/resource owner pair that we will be backing up, see if
// there's a previous incomplete snapshot and/or a previous complete snapshot
// we can pass in. Can be expanded to return more than the most recent
// snapshots, but may require more memory at runtime.
for _, reason := range reasons {
ictx := clues.Add(ctx, "service", reason.Service.String(), "category", reason.Category.String())
logger.Ctx(ictx).Info("searching for previous manifests for reason")
found, err := fetchPrevManifests(ictx, sm, mans, reason, tags)
if err != nil {
logger.CtxErr(ictx, err).Info("fetching previous snapshot manifests for service/category/resource owner")
// Snapshot can still complete fine, just not as efficient.
continue
}
// If we found more recent snapshots then add them.
for _, m := range found {
man := mans[m.ID]
if man == nil {
mans[m.ID] = &ManifestEntry{
Manifest: m,
Reasons: []Reason{reason},
}
continue
}
// This manifest has multiple reasons for being chosen. Merge them here.
man.Reasons = append(man.Reasons, reason)
}
}
res := make([]*ManifestEntry, 0, len(mans))
for _, m := range mans {
res = append(res, m)
}
return res
}
func normalizeTagKVs(tags map[string]string) map[string]string {
t2 := make(map[string]string, len(tags))
for k, v := range tags {
mk, mv := makeTagKV(k)
if len(v) == 0 {
v = mv
}
t2[mk] = v
}
return t2
}