package kopia import ( "context" "sort" "github.com/kopia/kopia/repo/manifest" "github.com/kopia/kopia/snapshot" "github.com/pkg/errors" "golang.org/x/exp/maps" "github.com/alcionai/corso/src/pkg/logger" "github.com/alcionai/corso/src/pkg/path" ) const ( // Kopia does not do comparisons properly for empty tags right now so add some // placeholder value to them. defaultTagValue = "0" // Kopia CLI prefixes all user tags with "tag:"[1]. Maintaining this will // ensure we don't accidentally take reserved tags and that tags can be // displayed with kopia CLI. // (permalinks) // [1] https://github.com/kopia/kopia/blob/05e729a7858a6e86cb48ba29fb53cb6045efce2b/cli/command_snapshot_create.go#L169 userTagPrefix = "tag:" ) type Reason struct { ResourceOwner string Service path.ServiceType Category path.CategoryType } type ManifestEntry struct { *snapshot.Manifest // Reason contains the ResourceOwners and Service/Categories that caused this // snapshot to be selected as a base. We can't reuse OwnersCats here because // it's possible some ResourceOwners will have a subset of the Categories as // the reason for selecting a snapshot. For example: // 1. backup user1 email,contacts -> B1 // 2. backup user1 contacts -> B2 (uses B1 as base) // 3. backup user1 email,contacts,events (uses B1 for email, B2 for contacts) Reasons []Reason } type snapshotManager interface { FindManifests( ctx context.Context, tags map[string]string, ) ([]*manifest.EntryMetadata, error) LoadSnapshots(ctx context.Context, ids []manifest.ID) ([]*snapshot.Manifest, error) } type OwnersCats struct { ResourceOwners map[string]struct{} ServiceCats map[string]ServiceCat } type ServiceCat struct { Service path.ServiceType Category path.CategoryType } // MakeServiceCat produces the expected OwnersCats.ServiceCats key from a // path service and path category, as well as the ServiceCat value. func MakeServiceCat(s path.ServiceType, c path.CategoryType) (string, ServiceCat) { return serviceCatString(s, c), ServiceCat{s, c} } func serviceCatTag(p path.Path) string { return serviceCatString(p.Service(), p.Category()) } func serviceCatString(s path.ServiceType, c path.CategoryType) string { return s.String() + c.String() } // MakeTagKV normalizes the provided key to protect it from clobbering // similarly named tags from non-user input (user inputs are still open // to collisions amongst eachother). // Returns the normalized Key plus a default value. If you're embedding a // key-only tag, the returned default value msut be used instead of an // empty string. func MakeTagKV(k string) (string, string) { return userTagPrefix + k, defaultTagValue } // tagsFromStrings returns a map[string]string with tags for all ownersCats // passed in. Currently uses placeholder values for each tag because there can // be multiple instances of resource owners and categories in a single snapshot. func tagsFromStrings(oc *OwnersCats) map[string]string { if oc == nil { return map[string]string{} } res := make(map[string]string, len(oc.ServiceCats)+len(oc.ResourceOwners)) for k := range oc.ServiceCats { tk, tv := MakeTagKV(k) res[tk] = tv } for k := range oc.ResourceOwners { tk, tv := MakeTagKV(k) res[tk] = tv } return res } // getLastIdx searches for manifests contained in both foundMans and metas // and returns the most recent complete manifest index and the manifest it // corresponds to. If no complete manifest is in both lists returns nil, -1. func getLastIdx( foundMans map[manifest.ID]*ManifestEntry, metas []*manifest.EntryMetadata, ) (*ManifestEntry, int) { // Minor optimization: the current code seems to return the entries from // earliest timestamp to latest (this is undocumented). Sort in the same // fashion so that we don't incur a bunch of swaps. sort.Slice(metas, func(i, j int) bool { return metas[i].ModTime.Before(metas[j].ModTime) }) // Search newest to oldest. for i := len(metas) - 1; i >= 0; i-- { m := foundMans[metas[i].ID] if m == nil || len(m.IncompleteReason) > 0 { continue } return m, i } return nil, -1 } // manifestsSinceLastComplete searches through mans and returns the most recent // complete manifest (if one exists), maybe the most recent incomplete // manifest, and a bool denoting if a complete manifest was found. If the newest // incomplete manifest is more recent than the newest complete manifest then // adds it to the returned list. Otherwise no incomplete manifest is returned. // Returns nil if there are no complete or incomplete manifests in mans. func manifestsSinceLastComplete( mans []*snapshot.Manifest, ) ([]*snapshot.Manifest, bool) { var ( res []*snapshot.Manifest foundIncomplete bool foundComplete bool ) // Manifests should maintain the sort order of the original IDs that were used // to fetch the data, but just in case sort oldest to newest. mans = snapshot.SortByTime(mans, false) for i := len(mans) - 1; i >= 0; i-- { m := mans[i] if len(m.IncompleteReason) > 0 { if !foundIncomplete { foundIncomplete = true res = append(res, m) } continue } // Once we find a complete snapshot we're done, even if we haven't // found an incomplete one yet. res = append(res, m) foundComplete = true break } return res, foundComplete } // fetchPrevManifests returns the most recent, as-of-yet unfound complete and // (maybe) incomplete manifests in metas. If the most recent incomplete manifest // is older than the most recent complete manifest no incomplete manifest is // returned. If only incomplete manifests exists, returns the most recent one. // Returns no manifests if an error occurs. func fetchPrevManifests( ctx context.Context, sm snapshotManager, foundMans map[manifest.ID]*ManifestEntry, serviceCat ServiceCat, resourceOwner string, tags map[string]string, ) ([]*ManifestEntry, error) { tags = normalizeTagKVs(tags) serviceCatKey, _ := MakeServiceCat(serviceCat.Service, serviceCat.Category) allTags := normalizeTagKVs(map[string]string{ serviceCatKey: "", resourceOwner: "", }) maps.Copy(allTags, tags) reason := Reason{ ResourceOwner: resourceOwner, Service: serviceCat.Service, Category: serviceCat.Category, } metas, err := sm.FindManifests(ctx, allTags) if err != nil { return nil, errors.Wrap(err, "fetching manifest metas by tag") } if len(metas) == 0 { return nil, nil } man, lastCompleteIdx := getLastIdx(foundMans, metas) // We have a complete cached snapshot and it's the most recent. No need // to do anything else. if lastCompleteIdx == len(metas)-1 { man.Reasons = append(man.Reasons, reason) return nil, nil } // TODO(ashmrtn): Remainder of the function can be simplified if we can inject // different tags to the snapshot checkpoints than the complete snapshot. // Fetch all manifests newer than the oldest complete snapshot. A little // wasteful as we may also re-fetch the most recent incomplete manifest, but // it reduces the complexity of returning the most recent incomplete manifest // if it is newer than the most recent complete manifest. ids := make([]manifest.ID, 0, len(metas)-(lastCompleteIdx+1)) for i := lastCompleteIdx + 1; i < len(metas); i++ { ids = append(ids, metas[i].ID) } mans, err := sm.LoadSnapshots(ctx, ids) if err != nil { return nil, errors.Wrap(err, "fetching previous manifests") } found, hasCompleted := manifestsSinceLastComplete(mans) res := make([]*ManifestEntry, 0, len(found)) for _, m := range found { res = append(res, &ManifestEntry{ Manifest: m, Reasons: []Reason{reason}, }) } // If we didn't find another complete manifest then we need to mark the // previous complete manifest as having this ResourceOwner, Service, Category // as the reason as well. if !hasCompleted && man != nil { man.Reasons = append(man.Reasons, reason) } return res, nil } // fetchPrevSnapshotManifests returns a set of manifests for complete and maybe // incomplete snapshots for the given (resource owner, service, category) // tuples. Up to two manifests can be returned per tuple: one complete and one // incomplete. An incomplete manifest may be returned if it is newer than the // newest complete manifest for the tuple. Manifests are deduped such that if // multiple tuples match the same manifest it will only be returned once. // External callers can access this via wrapper.FetchPrevSnapshotManifests(). // If tags are provided, manifests must include a superset of the k:v pairs // specified by those tags. Tags should pass their raw values, and will be // normalized inside the func using MakeTagKV. func fetchPrevSnapshotManifests( ctx context.Context, sm snapshotManager, oc *OwnersCats, tags map[string]string, ) []*ManifestEntry { if oc == nil { return nil } mans := map[manifest.ID]*ManifestEntry{} // For each serviceCat/resource owner pair that we will be backing up, see if // there's a previous incomplete snapshot and/or a previous complete snapshot // we can pass in. Can be expanded to return more than the most recent // snapshots, but may require more memory at runtime. for _, serviceCat := range oc.ServiceCats { for resourceOwner := range oc.ResourceOwners { found, err := fetchPrevManifests( ctx, sm, mans, serviceCat, resourceOwner, tags, ) if err != nil { logger.Ctx(ctx).Warnw( "fetching previous snapshot manifests for service/category/resource owner", "error", err, "service/category", serviceCat, ) // Snapshot can still complete fine, just not as efficient. continue } // If we found more recent snapshots then add them. for _, m := range found { found := mans[m.ID] if found == nil { mans[m.ID] = m continue } // If the manifest already exists and it's incomplete then we should // merge the reasons for consistency. This will become easier to handle // once we update how checkpoint manifests are tagged. if len(found.IncompleteReason) == 0 { continue } found.Reasons = append(found.Reasons, m.Reasons...) } } } res := make([]*ManifestEntry, 0, len(mans)) for _, m := range mans { res = append(res, m) } return res } func normalizeTagKVs(tags map[string]string) map[string]string { t2 := make(map[string]string, len(tags)) for k, v := range tags { mk, mv := MakeTagKV(k) if len(v) == 0 { v = mv } t2[mk] = v } return t2 }