From 6fd453a6994e19608b4954cfcc69247d2d513bec Mon Sep 17 00:00:00 2001 From: ashmrtn Date: Thu, 1 Jun 2023 08:41:16 -0700 Subject: [PATCH 1/6] Remove old code for finding bases (#3526) Now that we're finding bases with a different component remove the code that used to find bases since it's no longer in use --- #### Does this PR need a docs update or release note? - [ ] :white_check_mark: Yes, it's included - [ ] :clock1: Yes, but in a later PR - [x] :no_entry: No #### Type of change - [ ] :sunflower: Feature - [ ] :bug: Bugfix - [ ] :world_map: Documentation - [ ] :robot: Supportability/Tests - [ ] :computer: CI/Deployment - [x] :broom: Tech Debt/Cleanup #### Issue(s) * #3202 #### Test Plan - [ ] :muscle: Manual - [x] :zap: Unit test - [x] :green_heart: E2E --- src/internal/kopia/base_finder.go | 89 ++ src/internal/kopia/base_finder_test.go | 2 - src/internal/kopia/conn.go | 12 - src/internal/kopia/snapshot_manager.go | 307 ------ src/internal/kopia/snapshot_manager_test.go | 932 ------------------ src/internal/kopia/wrapper.go | 21 - src/internal/kopia/wrapper_test.go | 12 +- .../operations/backup_integration_test.go | 5 +- 8 files changed, 96 insertions(+), 1284 deletions(-) delete mode 100644 src/internal/kopia/snapshot_manager.go delete mode 100644 src/internal/kopia/snapshot_manager_test.go diff --git a/src/internal/kopia/base_finder.go b/src/internal/kopia/base_finder.go index 837691d89..b01c4401a 100644 --- a/src/internal/kopia/base_finder.go +++ b/src/internal/kopia/base_finder.go @@ -13,8 +13,40 @@ import ( "github.com/alcionai/corso/src/internal/operations/inject" "github.com/alcionai/corso/src/pkg/backup" "github.com/alcionai/corso/src/pkg/logger" + "github.com/alcionai/corso/src/pkg/path" ) +const ( + // Kopia does not do comparisons properly for empty tags right now so add some + // placeholder value to them. + defaultTagValue = "0" + + // Kopia CLI prefixes all user tags with "tag:"[1]. Maintaining this will + // ensure we don't accidentally take reserved tags and that tags can be + // displayed with kopia CLI. + // (permalinks) + // [1] https://github.com/kopia/kopia/blob/05e729a7858a6e86cb48ba29fb53cb6045efce2b/cli/command_snapshot_create.go#L169 + userTagPrefix = "tag:" +) + +type Reason struct { + ResourceOwner string + Service path.ServiceType + Category path.CategoryType +} + +func (r Reason) TagKeys() []string { + return []string{ + r.ResourceOwner, + serviceCatString(r.Service, r.Category), + } +} + +// Key is the concatenation of the ResourceOwner, Service, and Category. +func (r Reason) Key() string { + return r.ResourceOwner + r.Service.String() + r.Category.String() +} + type backupBases struct { backups []BackupEntry mergeBases []ManifestEntry @@ -26,6 +58,63 @@ type BackupEntry struct { Reasons []Reason } +type ManifestEntry struct { + *snapshot.Manifest + // Reason contains the ResourceOwners and Service/Categories that caused this + // snapshot to be selected as a base. We can't reuse OwnersCats here because + // it's possible some ResourceOwners will have a subset of the Categories as + // the reason for selecting a snapshot. For example: + // 1. backup user1 email,contacts -> B1 + // 2. backup user1 contacts -> B2 (uses B1 as base) + // 3. backup user1 email,contacts,events (uses B1 for email, B2 for contacts) + Reasons []Reason +} + +func (me ManifestEntry) GetTag(key string) (string, bool) { + k, _ := makeTagKV(key) + v, ok := me.Tags[k] + + return v, ok +} + +type snapshotManager interface { + FindManifests( + ctx context.Context, + tags map[string]string, + ) ([]*manifest.EntryMetadata, error) + LoadSnapshot(ctx context.Context, id manifest.ID) (*snapshot.Manifest, error) +} + +func serviceCatString(s path.ServiceType, c path.CategoryType) string { + return s.String() + c.String() +} + +// MakeTagKV normalizes the provided key to protect it from clobbering +// similarly named tags from non-user input (user inputs are still open +// to collisions amongst eachother). +// Returns the normalized Key plus a default value. If you're embedding a +// key-only tag, the returned default value msut be used instead of an +// empty string. +func makeTagKV(k string) (string, string) { + return userTagPrefix + k, defaultTagValue +} + +func normalizeTagKVs(tags map[string]string) map[string]string { + t2 := make(map[string]string, len(tags)) + + for k, v := range tags { + mk, mv := makeTagKV(k) + + if len(v) == 0 { + v = mv + } + + t2[mk] = v + } + + return t2 +} + type baseFinder struct { sm snapshotManager bg inject.GetBackuper diff --git a/src/internal/kopia/base_finder_test.go b/src/internal/kopia/base_finder_test.go index 0c3761618..2382063cd 100644 --- a/src/internal/kopia/base_finder_test.go +++ b/src/internal/kopia/base_finder_test.go @@ -27,11 +27,9 @@ const ( var ( testT1 = time.Now() testT2 = testT1.Add(1 * time.Hour) - testT3 = testT2.Add(1 * time.Hour) testID1 = manifest.ID("snap1") testID2 = manifest.ID("snap2") - testID3 = manifest.ID("snap3") testBackup1 = "backupID1" testBackup2 = "backupID2" diff --git a/src/internal/kopia/conn.go b/src/internal/kopia/conn.go index 3447e810f..30870a1b3 100644 --- a/src/internal/kopia/conn.go +++ b/src/internal/kopia/conn.go @@ -418,18 +418,6 @@ func (w *conn) LoadSnapshot( return man, nil } -func (w *conn) LoadSnapshots( - ctx context.Context, - ids []manifest.ID, -) ([]*snapshot.Manifest, error) { - mans, err := snapshot.LoadSnapshots(ctx, w.Repository, ids) - if err != nil { - return nil, clues.Stack(err).WithClues(ctx) - } - - return mans, nil -} - func (w *conn) SnapshotRoot(man *snapshot.Manifest) (fs.Entry, error) { return snapshotfs.SnapshotRoot(w.Repository, man) } diff --git a/src/internal/kopia/snapshot_manager.go b/src/internal/kopia/snapshot_manager.go deleted file mode 100644 index 5fa09223c..000000000 --- a/src/internal/kopia/snapshot_manager.go +++ /dev/null @@ -1,307 +0,0 @@ -package kopia - -import ( - "context" - "sort" - - "github.com/alcionai/clues" - "github.com/kopia/kopia/repo/manifest" - "github.com/kopia/kopia/snapshot" - "golang.org/x/exp/maps" - - "github.com/alcionai/corso/src/pkg/logger" - "github.com/alcionai/corso/src/pkg/path" -) - -const ( - // Kopia does not do comparisons properly for empty tags right now so add some - // placeholder value to them. - defaultTagValue = "0" - - // Kopia CLI prefixes all user tags with "tag:"[1]. Maintaining this will - // ensure we don't accidentally take reserved tags and that tags can be - // displayed with kopia CLI. - // (permalinks) - // [1] https://github.com/kopia/kopia/blob/05e729a7858a6e86cb48ba29fb53cb6045efce2b/cli/command_snapshot_create.go#L169 - userTagPrefix = "tag:" -) - -type Reason struct { - ResourceOwner string - Service path.ServiceType - Category path.CategoryType -} - -func (r Reason) TagKeys() []string { - return []string{ - r.ResourceOwner, - serviceCatString(r.Service, r.Category), - } -} - -// Key is the concatenation of the ResourceOwner, Service, and Category. -func (r Reason) Key() string { - return r.ResourceOwner + r.Service.String() + r.Category.String() -} - -type ManifestEntry struct { - *snapshot.Manifest - // Reason contains the ResourceOwners and Service/Categories that caused this - // snapshot to be selected as a base. We can't reuse OwnersCats here because - // it's possible some ResourceOwners will have a subset of the Categories as - // the reason for selecting a snapshot. For example: - // 1. backup user1 email,contacts -> B1 - // 2. backup user1 contacts -> B2 (uses B1 as base) - // 3. backup user1 email,contacts,events (uses B1 for email, B2 for contacts) - Reasons []Reason -} - -func (me ManifestEntry) GetTag(key string) (string, bool) { - k, _ := makeTagKV(key) - v, ok := me.Tags[k] - - return v, ok -} - -type snapshotManager interface { - FindManifests( - ctx context.Context, - tags map[string]string, - ) ([]*manifest.EntryMetadata, error) - LoadSnapshot(ctx context.Context, id manifest.ID) (*snapshot.Manifest, error) - // TODO(ashmrtn): Remove this when we switch to the new BaseFinder. - LoadSnapshots(ctx context.Context, ids []manifest.ID) ([]*snapshot.Manifest, error) -} - -func serviceCatString(s path.ServiceType, c path.CategoryType) string { - return s.String() + c.String() -} - -// MakeTagKV normalizes the provided key to protect it from clobbering -// similarly named tags from non-user input (user inputs are still open -// to collisions amongst eachother). -// Returns the normalized Key plus a default value. If you're embedding a -// key-only tag, the returned default value msut be used instead of an -// empty string. -func makeTagKV(k string) (string, string) { - return userTagPrefix + k, defaultTagValue -} - -// getLastIdx searches for manifests contained in both foundMans and metas -// and returns the most recent complete manifest index and the manifest it -// corresponds to. If no complete manifest is in both lists returns nil, -1. -func getLastIdx( - foundMans map[manifest.ID]*ManifestEntry, - metas []*manifest.EntryMetadata, -) (*ManifestEntry, int) { - // Minor optimization: the current code seems to return the entries from - // earliest timestamp to latest (this is undocumented). Sort in the same - // fashion so that we don't incur a bunch of swaps. - sort.Slice(metas, func(i, j int) bool { - return metas[i].ModTime.Before(metas[j].ModTime) - }) - - // Search newest to oldest. - for i := len(metas) - 1; i >= 0; i-- { - m := foundMans[metas[i].ID] - if m == nil || len(m.IncompleteReason) > 0 { - continue - } - - return m, i - } - - return nil, -1 -} - -// manifestsSinceLastComplete searches through mans and returns the most recent -// complete manifest (if one exists), maybe the most recent incomplete -// manifest, and a bool denoting if a complete manifest was found. If the newest -// incomplete manifest is more recent than the newest complete manifest then -// adds it to the returned list. Otherwise no incomplete manifest is returned. -// Returns nil if there are no complete or incomplete manifests in mans. -func manifestsSinceLastComplete( - ctx context.Context, - mans []*snapshot.Manifest, -) ([]*snapshot.Manifest, bool) { - var ( - res []*snapshot.Manifest - foundIncomplete bool - foundComplete bool - ) - - // Manifests should maintain the sort order of the original IDs that were used - // to fetch the data, but just in case sort oldest to newest. - mans = snapshot.SortByTime(mans, false) - - for i := len(mans) - 1; i >= 0; i-- { - m := mans[i] - - if len(m.IncompleteReason) > 0 { - if !foundIncomplete { - res = append(res, m) - foundIncomplete = true - - logger.Ctx(ctx).Infow("found incomplete snapshot", "snapshot_id", m.ID) - } - - continue - } - - // Once we find a complete snapshot we're done, even if we haven't - // found an incomplete one yet. - res = append(res, m) - foundComplete = true - - logger.Ctx(ctx).Infow("found complete snapshot", "snapshot_id", m.ID) - - break - } - - return res, foundComplete -} - -// fetchPrevManifests returns the most recent, as-of-yet unfound complete and -// (maybe) incomplete manifests in metas. If the most recent incomplete manifest -// is older than the most recent complete manifest no incomplete manifest is -// returned. If only incomplete manifests exists, returns the most recent one. -// Returns no manifests if an error occurs. -func fetchPrevManifests( - ctx context.Context, - sm snapshotManager, - foundMans map[manifest.ID]*ManifestEntry, - reason Reason, - tags map[string]string, -) ([]*snapshot.Manifest, error) { - allTags := map[string]string{} - - for _, k := range reason.TagKeys() { - allTags[k] = "" - } - - maps.Copy(allTags, tags) - allTags = normalizeTagKVs(allTags) - - metas, err := sm.FindManifests(ctx, allTags) - if err != nil { - return nil, clues.Wrap(err, "fetching manifest metas by tag") - } - - if len(metas) == 0 { - return nil, nil - } - - man, lastCompleteIdx := getLastIdx(foundMans, metas) - - // We have a complete cached snapshot and it's the most recent. No need - // to do anything else. - if lastCompleteIdx == len(metas)-1 { - return []*snapshot.Manifest{man.Manifest}, nil - } - - // TODO(ashmrtn): Remainder of the function can be simplified if we can inject - // different tags to the snapshot checkpoints than the complete snapshot. - - // Fetch all manifests newer than the oldest complete snapshot. A little - // wasteful as we may also re-fetch the most recent incomplete manifest, but - // it reduces the complexity of returning the most recent incomplete manifest - // if it is newer than the most recent complete manifest. - ids := make([]manifest.ID, 0, len(metas)-(lastCompleteIdx+1)) - for i := lastCompleteIdx + 1; i < len(metas); i++ { - ids = append(ids, metas[i].ID) - } - - mans, err := sm.LoadSnapshots(ctx, ids) - if err != nil { - return nil, clues.Wrap(err, "fetching previous manifests") - } - - found, hasCompleted := manifestsSinceLastComplete(ctx, mans) - - // If we didn't find another complete manifest then we need to mark the - // previous complete manifest as having this ResourceOwner, Service, Category - // as the reason as well. - if !hasCompleted && man != nil { - found = append(found, man.Manifest) - logger.Ctx(ctx).Infow( - "reusing cached complete snapshot", - "snapshot_id", man.ID) - } - - return found, nil -} - -// fetchPrevSnapshotManifests returns a set of manifests for complete and maybe -// incomplete snapshots for the given (resource owner, service, category) -// tuples. Up to two manifests can be returned per tuple: one complete and one -// incomplete. An incomplete manifest may be returned if it is newer than the -// newest complete manifest for the tuple. Manifests are deduped such that if -// multiple tuples match the same manifest it will only be returned once. -// External callers can access this via wrapper.FetchPrevSnapshotManifests(). -// If tags are provided, manifests must include a superset of the k:v pairs -// specified by those tags. Tags should pass their raw values, and will be -// normalized inside the func using MakeTagKV. -func fetchPrevSnapshotManifests( - ctx context.Context, - sm snapshotManager, - reasons []Reason, - tags map[string]string, -) []*ManifestEntry { - mans := map[manifest.ID]*ManifestEntry{} - - // For each serviceCat/resource owner pair that we will be backing up, see if - // there's a previous incomplete snapshot and/or a previous complete snapshot - // we can pass in. Can be expanded to return more than the most recent - // snapshots, but may require more memory at runtime. - for _, reason := range reasons { - ictx := clues.Add(ctx, "service", reason.Service.String(), "category", reason.Category.String()) - logger.Ctx(ictx).Info("searching for previous manifests for reason") - - found, err := fetchPrevManifests(ictx, sm, mans, reason, tags) - if err != nil { - logger.CtxErr(ictx, err).Info("fetching previous snapshot manifests for service/category/resource owner") - - // Snapshot can still complete fine, just not as efficient. - continue - } - - // If we found more recent snapshots then add them. - for _, m := range found { - man := mans[m.ID] - if man == nil { - mans[m.ID] = &ManifestEntry{ - Manifest: m, - Reasons: []Reason{reason}, - } - - continue - } - - // This manifest has multiple reasons for being chosen. Merge them here. - man.Reasons = append(man.Reasons, reason) - } - } - - res := make([]*ManifestEntry, 0, len(mans)) - for _, m := range mans { - res = append(res, m) - } - - return res -} - -func normalizeTagKVs(tags map[string]string) map[string]string { - t2 := make(map[string]string, len(tags)) - - for k, v := range tags { - mk, mv := makeTagKV(k) - - if len(v) == 0 { - v = mv - } - - t2[mk] = v - } - - return t2 -} diff --git a/src/internal/kopia/snapshot_manager_test.go b/src/internal/kopia/snapshot_manager_test.go deleted file mode 100644 index 9acc1735f..000000000 --- a/src/internal/kopia/snapshot_manager_test.go +++ /dev/null @@ -1,932 +0,0 @@ -package kopia - -import ( - "context" - "testing" - "time" - - "github.com/alcionai/clues" - "github.com/kopia/kopia/fs" - "github.com/kopia/kopia/repo/manifest" - "github.com/kopia/kopia/snapshot" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/suite" - - "github.com/alcionai/corso/src/internal/tester" - "github.com/alcionai/corso/src/pkg/path" -) - -func newManifestInfo( - id manifest.ID, - modTime time.Time, - incomplete bool, - tags ...string, -) manifestInfo { - incompleteStr := "" - if incomplete { - incompleteStr = "checkpoint" - } - - structTags := make(map[string]string, len(tags)) - - for _, t := range tags { - tk, _ := makeTagKV(t) - structTags[tk] = "" - } - - return manifestInfo{ - tags: structTags, - metadata: &manifest.EntryMetadata{ - ID: id, - ModTime: modTime, - }, - man: &snapshot.Manifest{ - ID: id, - StartTime: fs.UTCTimestamp(modTime.UnixNano()), - IncompleteReason: incompleteStr, - }, - } -} - -type mockSnapshotManager struct { - data []manifestInfo - loadCallback func(ids []manifest.ID) -} - -func matchesTags(mi manifestInfo, tags map[string]string) bool { - for k := range tags { - if _, ok := mi.tags[k]; !ok { - return false - } - } - - return true -} - -func (msm *mockSnapshotManager) FindManifests( - ctx context.Context, - tags map[string]string, -) ([]*manifest.EntryMetadata, error) { - if msm == nil { - return nil, assert.AnError - } - - res := []*manifest.EntryMetadata{} - - for _, mi := range msm.data { - if matchesTags(mi, tags) { - res = append(res, mi.metadata) - } - } - - return res, nil -} - -func (msm *mockSnapshotManager) LoadSnapshots( - ctx context.Context, - ids []manifest.ID, -) ([]*snapshot.Manifest, error) { - if msm == nil { - return nil, assert.AnError - } - - // Allow checking set of IDs passed in. - if msm.loadCallback != nil { - msm.loadCallback(ids) - } - - res := []*snapshot.Manifest{} - - for _, id := range ids { - for _, mi := range msm.data { - if mi.man.ID == id { - res = append(res, mi.man) - } - } - } - - return res, nil -} - -func (msm *mockSnapshotManager) LoadSnapshot( - ctx context.Context, - id manifest.ID, -) (*snapshot.Manifest, error) { - return nil, clues.New("not implemented") -} - -type SnapshotFetchUnitSuite struct { - tester.Suite -} - -func TestSnapshotFetchUnitSuite(t *testing.T) { - suite.Run(t, &SnapshotFetchUnitSuite{Suite: tester.NewUnitSuite(t)}) -} - -func (suite *SnapshotFetchUnitSuite) TestFetchPrevSnapshots() { - table := []struct { - name string - input []Reason - data []manifestInfo - // Use this to denote which manifests in data should be expected. Allows - // defining data in a table while not repeating things between data and - // expected. - expectedIdxs []int - // Use this to denote the Reasons a manifest is selected. The int maps to - // the index of the manifest in data. - expectedReasons map[int][]Reason - // Expected number of times a manifest should try to be loaded from kopia. - // Used to check that caching is functioning properly. - expectedLoadCounts map[manifest.ID]int - }{ - { - name: "AllOneSnapshot", - input: testAllUsersAllCats, - data: []manifestInfo{ - newManifestInfo( - testID1, - testT1, - testCompleteMan, - testMail, - testEvents, - testUser1, - testUser2, - testUser3, - ), - }, - expectedIdxs: []int{0}, - expectedReasons: map[int][]Reason{ - 0: { - Reason{ - ResourceOwner: testUser1, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - Reason{ - ResourceOwner: testUser2, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - Reason{ - ResourceOwner: testUser3, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - Reason{ - ResourceOwner: testUser1, - Service: path.ExchangeService, - Category: path.EventsCategory, - }, - Reason{ - ResourceOwner: testUser2, - Service: path.ExchangeService, - Category: path.EventsCategory, - }, - Reason{ - ResourceOwner: testUser3, - Service: path.ExchangeService, - Category: path.EventsCategory, - }, - }, - }, - expectedLoadCounts: map[manifest.ID]int{ - testID1: 1, - }, - }, - { - name: "SplitByCategory", - input: testAllUsersAllCats, - data: []manifestInfo{ - newManifestInfo( - testID1, - testT1, - testCompleteMan, - testMail, - testUser1, - testUser2, - testUser3, - ), - newManifestInfo( - testID2, - testT2, - testCompleteMan, - testEvents, - testUser1, - testUser2, - testUser3, - ), - }, - expectedIdxs: []int{0, 1}, - expectedReasons: map[int][]Reason{ - 0: { - { - ResourceOwner: testUser1, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - { - ResourceOwner: testUser2, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - { - ResourceOwner: testUser3, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - }, - 1: { - Reason{ - ResourceOwner: testUser1, - Service: path.ExchangeService, - Category: path.EventsCategory, - }, - Reason{ - ResourceOwner: testUser2, - Service: path.ExchangeService, - Category: path.EventsCategory, - }, - Reason{ - ResourceOwner: testUser3, - Service: path.ExchangeService, - Category: path.EventsCategory, - }, - }, - }, - expectedLoadCounts: map[manifest.ID]int{ - testID1: 1, - testID2: 1, - }, - }, - { - name: "IncompleteNewerThanComplete", - input: testAllUsersMail, - data: []manifestInfo{ - newManifestInfo( - testID1, - testT1, - testCompleteMan, - testMail, - testUser1, - testUser2, - testUser3, - ), - newManifestInfo( - testID2, - testT2, - testIncompleteMan, - testMail, - testUser1, - testUser2, - testUser3, - ), - }, - expectedIdxs: []int{0, 1}, - expectedReasons: map[int][]Reason{ - 0: { - Reason{ - ResourceOwner: testUser1, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - Reason{ - ResourceOwner: testUser2, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - Reason{ - ResourceOwner: testUser3, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - }, - 1: { - Reason{ - ResourceOwner: testUser1, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - Reason{ - ResourceOwner: testUser2, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - Reason{ - ResourceOwner: testUser3, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - }, - }, - expectedLoadCounts: map[manifest.ID]int{ - testID1: 1, - testID2: 3, - }, - }, - { - name: "IncompleteOlderThanComplete", - input: testAllUsersMail, - data: []manifestInfo{ - newManifestInfo( - testID1, - testT1, - testIncompleteMan, - testMail, - testUser1, - testUser2, - testUser3, - ), - newManifestInfo( - testID2, - testT2, - testCompleteMan, - testMail, - testUser1, - testUser2, - testUser3, - ), - }, - expectedIdxs: []int{1}, - expectedReasons: map[int][]Reason{ - 1: { - Reason{ - ResourceOwner: testUser1, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - Reason{ - ResourceOwner: testUser2, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - Reason{ - ResourceOwner: testUser3, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - }, - }, - expectedLoadCounts: map[manifest.ID]int{ - testID1: 1, - testID2: 1, - }, - }, - { - name: "OnlyIncomplete", - input: testAllUsersMail, - data: []manifestInfo{ - newManifestInfo( - testID1, - testT1, - testIncompleteMan, - testMail, - testUser1, - testUser2, - testUser3, - ), - }, - expectedIdxs: []int{0}, - expectedReasons: map[int][]Reason{ - 0: { - Reason{ - ResourceOwner: testUser1, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - Reason{ - ResourceOwner: testUser2, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - Reason{ - ResourceOwner: testUser3, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - }, - }, - expectedLoadCounts: map[manifest.ID]int{ - testID1: 3, - }, - }, - { - name: "NewestComplete", - input: testAllUsersMail, - data: []manifestInfo{ - newManifestInfo( - testID1, - testT1, - testCompleteMan, - testMail, - testUser1, - testUser2, - testUser3, - ), - newManifestInfo( - testID2, - testT2, - testCompleteMan, - testMail, - testUser1, - testUser2, - testUser3, - ), - }, - expectedIdxs: []int{1}, - expectedReasons: map[int][]Reason{ - 1: { - Reason{ - ResourceOwner: testUser1, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - Reason{ - ResourceOwner: testUser2, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - Reason{ - ResourceOwner: testUser3, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - }, - }, - expectedLoadCounts: map[manifest.ID]int{ - testID1: 1, - testID2: 1, - }, - }, - { - name: "NewestIncomplete", - input: testAllUsersMail, - data: []manifestInfo{ - newManifestInfo( - testID1, - testT1, - testIncompleteMan, - testMail, - testUser1, - testUser2, - testUser3, - ), - newManifestInfo( - testID2, - testT2, - testIncompleteMan, - testMail, - testUser1, - testUser2, - testUser3, - ), - }, - expectedIdxs: []int{1}, - expectedReasons: map[int][]Reason{ - 1: { - Reason{ - ResourceOwner: testUser1, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - Reason{ - ResourceOwner: testUser2, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - Reason{ - ResourceOwner: testUser3, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - }, - }, - expectedLoadCounts: map[manifest.ID]int{ - testID1: 3, - testID2: 3, - }, - }, - { - name: "SomeCachedSomeNewer", - input: testAllUsersMail, - data: []manifestInfo{ - newManifestInfo( - testID1, - testT1, - testCompleteMan, - testMail, - testUser1, - testUser2, - testUser3, - ), - newManifestInfo( - testID2, - testT2, - testCompleteMan, - testMail, - testUser3, - ), - }, - expectedIdxs: []int{0, 1}, - expectedReasons: map[int][]Reason{ - 0: { - Reason{ - ResourceOwner: testUser1, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - Reason{ - ResourceOwner: testUser2, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - }, - 1: { - Reason{ - ResourceOwner: testUser3, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - }, - }, - expectedLoadCounts: map[manifest.ID]int{ - testID1: 2, - testID2: 1, - }, - }, - { - name: "SomeCachedSomeNewerDifferentCategories", - input: testAllUsersAllCats, - data: []manifestInfo{ - newManifestInfo( - testID1, - testT1, - testCompleteMan, - testMail, - testEvents, - testUser1, - testUser2, - testUser3, - ), - newManifestInfo( - testID2, - testT2, - testCompleteMan, - testMail, - testUser3, - ), - }, - expectedIdxs: []int{0, 1}, - expectedReasons: map[int][]Reason{ - 0: { - Reason{ - ResourceOwner: testUser1, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - Reason{ - ResourceOwner: testUser2, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - Reason{ - ResourceOwner: testUser1, - Service: path.ExchangeService, - Category: path.EventsCategory, - }, - Reason{ - ResourceOwner: testUser2, - Service: path.ExchangeService, - Category: path.EventsCategory, - }, - Reason{ - ResourceOwner: testUser3, - Service: path.ExchangeService, - Category: path.EventsCategory, - }, - }, - 1: { - Reason{ - ResourceOwner: testUser3, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - }, - }, - expectedLoadCounts: map[manifest.ID]int{ - testID1: 2, - testID2: 1, - }, - }, - { - name: "SomeCachedSomeNewerIncomplete", - input: testAllUsersMail, - data: []manifestInfo{ - newManifestInfo( - testID1, - testT1, - testCompleteMan, - testMail, - testUser1, - testUser2, - testUser3, - ), - newManifestInfo( - testID2, - testT2, - testIncompleteMan, - testMail, - testUser3, - ), - }, - expectedIdxs: []int{0, 1}, - expectedReasons: map[int][]Reason{ - 0: { - Reason{ - ResourceOwner: testUser1, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - Reason{ - ResourceOwner: testUser2, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - Reason{ - ResourceOwner: testUser3, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - }, - 1: { - Reason{ - ResourceOwner: testUser3, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - }, - }, - expectedLoadCounts: map[manifest.ID]int{ - testID1: 1, - testID2: 1, - }, - }, - { - name: "NoMatches", - input: testAllUsersMail, - data: nil, - expectedIdxs: nil, - // Stop failure for nil-map comparison. - expectedLoadCounts: map[manifest.ID]int{}, - }, - } - - for _, test := range table { - suite.Run(test.name, func() { - t := suite.T() - - ctx, flush := tester.NewContext(t) - defer flush() - - msm := &mockSnapshotManager{ - data: test.data, - } - - loadCounts := map[manifest.ID]int{} - msm.loadCallback = func(ids []manifest.ID) { - for _, id := range ids { - loadCounts[id]++ - } - } - - snaps := fetchPrevSnapshotManifests(ctx, msm, test.input, nil) - - // Check the proper snapshot manifests were returned. - expected := make([]*snapshot.Manifest, 0, len(test.expectedIdxs)) - for _, i := range test.expectedIdxs { - expected = append(expected, test.data[i].man) - } - - got := make([]*snapshot.Manifest, 0, len(snaps)) - for _, s := range snaps { - got = append(got, s.Manifest) - } - - assert.ElementsMatch(t, expected, got) - - // Check the reasons for selecting each manifest are correct. - expectedReasons := make(map[manifest.ID][]Reason, len(test.expectedReasons)) - for idx, reason := range test.expectedReasons { - expectedReasons[test.data[idx].man.ID] = reason - } - - for _, found := range snaps { - reason, ok := expectedReasons[found.ID] - if !ok { - // Missing or extra snapshots will be reported by earlier checks. - continue - } - - assert.ElementsMatch( - t, - reason, - found.Reasons, - "incorrect reasons for snapshot with ID %s", - found.ID, - ) - } - - // Check number of loads to make sure caching is working properly. - // Need to manually check because we don't know the order the - // user/service/category labels will be iterated over. For some tests this - // could cause more loads than the ideal case. - assert.Len(t, loadCounts, len(test.expectedLoadCounts)) - for id, count := range loadCounts { - assert.GreaterOrEqual(t, test.expectedLoadCounts[id], count) - } - }) - } -} - -func (suite *SnapshotFetchUnitSuite) TestFetchPrevSnapshots_customTags() { - data := []manifestInfo{ - newManifestInfo( - testID1, - testT1, - false, - testMail, - testUser1, - "fnords", - "smarf", - ), - } - expectLoad1T1 := map[manifest.ID]int{ - testID1: 1, - } - - table := []struct { - name string - input []Reason - tags map[string]string - // Use this to denote which manifests in data should be expected. Allows - // defining data in a table while not repeating things between data and - // expected. - expectedIdxs []int - // Expected number of times a manifest should try to be loaded from kopia. - // Used to check that caching is functioning properly. - expectedLoadCounts map[manifest.ID]int - }{ - { - name: "no tags specified", - tags: nil, - expectedIdxs: []int{0}, - expectedLoadCounts: expectLoad1T1, - }, - { - name: "all custom tags", - tags: map[string]string{ - "fnords": "", - "smarf": "", - }, - expectedIdxs: []int{0}, - expectedLoadCounts: expectLoad1T1, - }, - { - name: "subset of custom tags", - tags: map[string]string{"fnords": ""}, - expectedIdxs: []int{0}, - expectedLoadCounts: expectLoad1T1, - }, - { - name: "custom tag mismatch", - tags: map[string]string{"bojangles": ""}, - expectedIdxs: nil, - expectedLoadCounts: nil, - }, - } - - for _, test := range table { - suite.Run(test.name, func() { - t := suite.T() - - ctx, flush := tester.NewContext(t) - defer flush() - - msm := &mockSnapshotManager{ - data: data, - } - - loadCounts := map[manifest.ID]int{} - msm.loadCallback = func(ids []manifest.ID) { - for _, id := range ids { - loadCounts[id]++ - } - } - - snaps := fetchPrevSnapshotManifests(ctx, msm, testAllUsersAllCats, test.tags) - - expected := make([]*snapshot.Manifest, 0, len(test.expectedIdxs)) - for _, i := range test.expectedIdxs { - expected = append(expected, data[i].man) - } - - got := make([]*snapshot.Manifest, 0, len(snaps)) - for _, s := range snaps { - got = append(got, s.Manifest) - } - - assert.ElementsMatch(t, expected, got) - - // Need to manually check because we don't know the order the - // user/service/category labels will be iterated over. For some tests this - // could cause more loads than the ideal case. - assert.Len(t, loadCounts, len(test.expectedLoadCounts)) - for id, count := range loadCounts { - assert.GreaterOrEqual(t, test.expectedLoadCounts[id], count) - } - }) - } -} - -// mockErrorSnapshotManager returns an error the first time LoadSnapshot and -// FindSnapshot are called. After that it passes the calls through to the -// contained snapshotManager. -type mockErrorSnapshotManager struct { - retFindErr bool - retLoadErr bool - sm snapshotManager -} - -func (msm *mockErrorSnapshotManager) FindManifests( - ctx context.Context, - tags map[string]string, -) ([]*manifest.EntryMetadata, error) { - if !msm.retFindErr { - msm.retFindErr = true - return nil, assert.AnError - } - - return msm.sm.FindManifests(ctx, tags) -} - -func (msm *mockErrorSnapshotManager) LoadSnapshots( - ctx context.Context, - ids []manifest.ID, -) ([]*snapshot.Manifest, error) { - if !msm.retLoadErr { - msm.retLoadErr = true - return nil, assert.AnError - } - - return msm.sm.LoadSnapshots(ctx, ids) -} - -func (msm *mockErrorSnapshotManager) LoadSnapshot( - ctx context.Context, - id manifest.ID, -) (*snapshot.Manifest, error) { - return nil, clues.New("not implemented") -} - -func (suite *SnapshotFetchUnitSuite) TestFetchPrevSnapshots_withErrors() { - t := suite.T() - - ctx, flush := tester.NewContext(t) - defer flush() - - input := testAllUsersMail - mockData := []manifestInfo{ - newManifestInfo( - testID1, - testT1, - testCompleteMan, - testMail, - testUser1, - ), - newManifestInfo( - testID2, - testT2, - testCompleteMan, - testMail, - testUser2, - ), - newManifestInfo( - testID3, - testT3, - testCompleteMan, - testMail, - testUser3, - ), - } - - msm := &mockErrorSnapshotManager{ - sm: &mockSnapshotManager{ - data: mockData, - }, - } - - snaps := fetchPrevSnapshotManifests(ctx, msm, input, nil) - - // Only 1 snapshot should be chosen because the other two attempts fail. - // However, which one is returned is non-deterministic because maps are used. - assert.Len(t, snaps, 1) -} diff --git a/src/internal/kopia/wrapper.go b/src/internal/kopia/wrapper.go index f7830eded..f23bef534 100644 --- a/src/internal/kopia/wrapper.go +++ b/src/internal/kopia/wrapper.go @@ -596,27 +596,6 @@ func (w Wrapper) DeleteSnapshot( return nil } -// FetchPrevSnapshotManifests returns a set of manifests for complete and maybe -// incomplete snapshots for the given (resource owner, service, category) -// tuples. Up to two manifests can be returned per tuple: one complete and one -// incomplete. An incomplete manifest may be returned if it is newer than the -// newest complete manifest for the tuple. Manifests are deduped such that if -// multiple tuples match the same manifest it will only be returned once. -// If tags are provided, manifests must include a superset of the k:v pairs -// specified by those tags. Tags should pass their raw values, and will be -// normalized inside the func using MakeTagKV. -func (w Wrapper) FetchPrevSnapshotManifests( - ctx context.Context, - reasons []Reason, - tags map[string]string, -) ([]*ManifestEntry, error) { - if w.c == nil { - return nil, clues.Stack(errNotConnected).WithClues(ctx) - } - - return fetchPrevSnapshotManifests(ctx, w.c, reasons, tags), nil -} - func (w Wrapper) NewBaseFinder(bg inject.GetBackuper) (*baseFinder, error) { return newBaseFinder(w.c, bg) } diff --git a/src/internal/kopia/wrapper_test.go b/src/internal/kopia/wrapper_test.go index 00416c8ce..d00828bad 100644 --- a/src/internal/kopia/wrapper_test.go +++ b/src/internal/kopia/wrapper_test.go @@ -1101,14 +1101,8 @@ func (suite *KopiaSimpleRepoIntegrationSuite) TestBackupExcludeItem() { subtreePath := subtreePathTmp.ToBuilder().Dir() - manifests, err := suite.w.FetchPrevSnapshotManifests( - suite.ctx, - []Reason{reason}, - nil, - ) - require.NoError(suite.T(), err, clues.ToCore(err)) - require.Len(suite.T(), manifests, 1) - require.Equal(suite.T(), suite.snapshotID, manifests[0].ID) + man, err := suite.w.c.LoadSnapshot(suite.ctx, suite.snapshotID) + require.NoError(suite.T(), err, "getting base snapshot: %v", clues.ToCore(err)) tags := map[string]string{} @@ -1206,7 +1200,7 @@ func (suite *KopiaSimpleRepoIntegrationSuite) TestBackupExcludeItem() { suite.ctx, []IncrementalBase{ { - Manifest: manifests[0].Manifest, + Manifest: man, SubtreePaths: []*path.Builder{ subtreePath, }, diff --git a/src/internal/operations/backup_integration_test.go b/src/internal/operations/backup_integration_test.go index e0db072c8..0d1407978 100644 --- a/src/internal/operations/backup_integration_test.go +++ b/src/internal/operations/backup_integration_test.go @@ -226,7 +226,10 @@ func checkBackupIsInManifests( found bool ) - mans, err := kw.FetchPrevSnapshotManifests(ctx, reasons, tags) + bf, err := kw.NewBaseFinder(bo.store) + require.NoError(t, err, clues.ToCore(err)) + + mans, err := bf.FindBases(ctx, reasons, tags) require.NoError(t, err, clues.ToCore(err)) for _, man := range mans { From aa271fe09bda2455860a1e187785989859c48a4a Mon Sep 17 00:00:00 2001 From: Georgi Matev Date: Thu, 1 Jun 2023 10:46:12 -0700 Subject: [PATCH 2/6] Exchange cleanup improvements (#3551) Exchange cleanup improvements: * Explicitly clean-up folders in DeletedItems - Empty folder does not seem to work reliably * Cleanup any folders in DeletedItems - helps get rid of random manual deletions * Harden paging - issues where if no items were deleted initial page, we'd never make progress * More focused search on `msgfolderroot` --- #### Does this PR need a docs update or release note? - [ ] :white_check_mark: Yes, it's included - [ ] :clock1: Yes, but in a later PR - [x] :no_entry: No #### Type of change - [ ] :sunflower: Feature - [x] :bug: Bugfix - [ ] :world_map: Documentation - [ ] :robot: Supportability/Tests - [x] :computer: CI/Deployment - [ ] :broom: Tech Debt/Cleanup #### Issue(s) * # #### Test Plan - [x] :muscle: Manual - [ ] :zap: Unit test - [ ] :green_heart: E2E --- src/cmd/purge/scripts/exchangePurge.ps1 | 194 +++++++++++++++--------- 1 file changed, 122 insertions(+), 72 deletions(-) diff --git a/src/cmd/purge/scripts/exchangePurge.ps1 b/src/cmd/purge/scripts/exchangePurge.ps1 index 240364b8d..c6ca7228a 100644 --- a/src/cmd/purge/scripts/exchangePurge.ps1 +++ b/src/cmd/purge/scripts/exchangePurge.ps1 @@ -185,77 +185,109 @@ function Get-FoldersToPurge { [string[]]$FolderNamePurgeList = @(), [Parameter(Mandatory = $False, HelpMessage = "Purge folders with these prefixes")] - [string[]]$FolderPrefixPurgeList = @() + [string[]]$FolderPrefixPurgeList = @(), + + [Parameter(Mandatory = $False, HelpMessage = "Perform shallow traversal only")] + [bool]$PurgeTraversalShallow = $false ) - $foldersToDelete = @() + Write-Host "`nLooking for folders under well-known folder: $WellKnownRoot matching folders: $FolderNamePurgeList or prefixes: $FolderPrefixPurgeList for user: $User" - # SOAP message for getting the folders - $body = @" - + $foldersToDelete = @() + $traversal = "Deep" + if ($PurgeTraversalShallow) { + $traversal = "Shallow" + } + + $offset = 0 + $moreToList = $true + + # get all folder pages + while ($moreToList) { + # SOAP message for getting the folders + $body = @" + Default + "@ - Write-Host "`nLooking for folders under well-known folder: $WellKnownRoot matching folders: $FolderNamePurgeList or prefixes: $FolderPrefixPurgeList for user: $User" - $getFolderIdMsg = Initialize-SOAPMessage -User $User -Body $body - $response = Invoke-SOAPRequest -Token $Token -Message $getFolderIdMsg + try { + Write-Host "`nRetrieving folders starting from offset: $offset" - # Get the folders from the response - $folders = $response | Select-Xml -XPath "//t:Folders/*" -Namespace @{t = "http://schemas.microsoft.com/exchange/services/2006/types" } | - Select-Object -ExpandProperty Node + $getFolderIdMsg = Initialize-SOAPMessage -User $User -Body $body + $response = Invoke-SOAPRequest -Token $Token -Message $getFolderIdMsg - # Are there more folders to list - $rootFolder = $response | Select-Xml -XPath "//m:RootFolder" -Namespace @{m = "http://schemas.microsoft.com/exchange/services/2006/messages" } | - Select-Object -ExpandProperty Node - $moreToList = ![System.Convert]::ToBoolean($rootFolder.IncludesLastItemInRange) + # Are there more folders to list + $rootFolder = $response | Select-Xml -XPath "//m:RootFolder" -Namespace @{m = "http://schemas.microsoft.com/exchange/services/2006/messages" } | + Select-Object -ExpandProperty Node + $moreToList = ![System.Convert]::ToBoolean($rootFolder.IncludesLastItemInRange) + } + catch { + Write-Host "Error retrieving folders" - # Loop through folders - foreach ($folder in $folders) { - $folderName = $folder.DisplayName - $folderCreateTime = $folder.ExtendedProperty - | Where-Object { $_.ExtendedFieldURI.PropertyTag -eq "0x3007" } - | Select-Object -ExpandProperty Value - | Get-Date + Write-Host $response.OuterXml + Exit + } + + # Get the folders from the response + $folders = $response | Select-Xml -XPath "//t:Folders/*" -Namespace @{t = "http://schemas.microsoft.com/exchange/services/2006/types" } | + Select-Object -ExpandProperty Node - if ($FolderNamePurgeList.count -gt 0) { - $IsNameMatchParams = @{ - 'FolderName' = $folderName; - 'FolderNamePurgeList' = $FolderNamePurgeList - } + # Loop through folders + foreach ($folder in $folders) { + $folderName = $folder.DisplayName + $folderCreateTime = $folder.ExtendedProperty + | Where-Object { $_.ExtendedFieldURI.PropertyTag -eq "0x3007" } + | Select-Object -ExpandProperty Value + | Get-Date - if ((IsNameMatch @IsNameMatchParams)) { - Write-Host "• Found name match: $folderName ($folderCreateTime)" - $foldersToDelete += $folder - continue + if ($FolderNamePurgeList.count -gt 0) { + $IsNameMatchParams = @{ + 'FolderName' = $folderName; + 'FolderNamePurgeList' = $FolderNamePurgeList + } + + if ((IsNameMatch @IsNameMatchParams)) { + Write-Host "• Found name match: $folderName ($folderCreateTime)" + $foldersToDelete += $folder + continue + } + } + + if ($FolderPrefixPurgeList.count -gt 0) { + $IsPrefixAndAgeMatchParams = @{ + 'FolderName' = $folderName; + 'FolderCreateTime' = $folderCreateTime; + 'FolderPrefixPurgeList' = $FolderPrefixPurgeList; + 'PurgeBeforeTimestamp' = $PurgeBeforeTimestamp; + } + + if ((IsPrefixAndAgeMatch @IsPrefixAndAgeMatchParams)) { + Write-Host "• Found prefix match: $folderName ($folderCreateTime)" + $foldersToDelete += $folder + } } } - if ($FolderPrefixPurgeList.count -gt 0) { - $IsPrefixAndAgeMatchParams = @{ - 'FolderName' = $folderName; - 'FolderCreateTime' = $folderCreateTime; - 'FolderPrefixPurgeList' = $FolderPrefixPurgeList; - 'PurgeBeforeTimestamp' = $PurgeBeforeTimestamp; - } - - if ((IsPrefixAndAgeMatch @IsPrefixAndAgeMatchParams)) { - Write-Host "• Found prefix match: $folderName ($folderCreateTime)" - $foldersToDelete += $folder - } + if (!$moreToList -or $null -eq $folders) { + Write-Host "Retrieved all folders." + } + else { + $offset += $folders.count } } # powershel does not do well when returning empty arrays - return $foldersToDelete, $moreToList + return , $foldersToDelete } function Empty-Folder { @@ -355,7 +387,10 @@ function Purge-Folders { [string[]]$FolderPrefixPurgeList = @(), [Parameter(Mandatory = $False, HelpMessage = "Purge folders before this date time (UTC)")] - [datetime]$PurgeBeforeTimestamp + [datetime]$PurgeBeforeTimestamp, + + [Parameter(Mandatory = $False, HelpMessage = "Perform shallow traversal only")] + [bool]$PurgeTraversalShallow = $false ) if (($FolderNamePurgeList.count -eq 0) -and @@ -364,9 +399,6 @@ function Purge-Folders { Exit } - Write-Host "`nPurging CI-produced folders..." - Write-Host "--------------------------------" - if ($FolderNamePurgeList.count -gt 0) { Write-Host "Folders with names: $FolderNamePurgeList" } @@ -382,30 +414,27 @@ function Purge-Folders { 'WellKnownRoot' = $WellKnownRoot; 'FolderNamePurgeList' = $FolderNamePurgeList; 'FolderPrefixPurgeList' = $FolderPrefixPurgeList; - 'PurgeBeforeTimestamp' = $PurgeBeforeTimestamp + 'PurgeBeforeTimestamp' = $PurgeBeforeTimestamp; + 'PurgeTraversalShallow' = $PurgeTraversalShallow } - $moreToList = $True - # only get max of 1000 results so we may need to iterate over eligible folders - while ($moreToList) { - $foldersToDelete, $moreToList = Get-FoldersToPurge @foldersToDeleteParams - $foldersToDeleteCount = $foldersToDelete.count - $foldersToDeleteIds = @() - $folderNames = @() + $foldersToDelete = Get-FoldersToPurge @foldersToDeleteParams + $foldersToDeleteCount = $foldersToDelete.count + $foldersToDeleteIds = @() + $folderNames = @() - if ($foldersToDeleteCount -eq 0) { - Write-Host "`nNo folders to purge matching the criteria" - break - } - - foreach ($folder in $foldersToDelete) { - $foldersToDeleteIds += $folder.FolderId.Id - $folderNames += $folder.DisplayName - } - - Empty-Folder -FolderIdList $foldersToDeleteIds -FolderNameList $folderNames - Delete-Folder -FolderIdList $foldersToDeleteIds -FolderNameList $folderNames + if ($foldersToDeleteCount -eq 0) { + Write-Host "`nNo folders to purge matching the criteria" + return } + + foreach ($folder in $foldersToDelete) { + $foldersToDeleteIds += $folder.FolderId.Id + $folderNames += $folder.DisplayName + } + + Empty-Folder -FolderIdList $foldersToDeleteIds -FolderNameList $folderNames + Delete-Folder -FolderIdList $foldersToDeleteIds -FolderNameList $folderNames } function Create-Contact { @@ -459,7 +488,7 @@ function Get-ItemsToPurge { $foldersToSearchBody = "" if (![String]::IsNullOrEmpty($SubFolderName)) { - $subFolders, $moreToList = Get-FoldersToPurge -WellKnownRoot $WellKnownRoot -FolderNamePurgeList $SubFolderName -PurgeBeforeTimestamp $PurgeBeforeTimestamp + $subFolders = Get-FoldersToPurge -WellKnownRoot $WellKnownRoot -FolderNamePurgeList $SubFolderName -PurgeBeforeTimestamp $PurgeBeforeTimestamp if ($subFolders.count -gt 0 ) { $foldersToSearchBody = "" @@ -615,6 +644,8 @@ function Purge-Items { } } +### MAIN #### + Write-Host 'Authenticating with Exchange Web Services ...' $global:Token = Get-AccessToken | ConvertTo-SecureString -AsPlainText -Force @@ -622,14 +653,17 @@ $global:Token = Get-AccessToken | ConvertTo-SecureString -AsPlainText -Force $FolderNamePurgeList = $FolderNamePurgeList | ForEach-Object { @($_.Split(',').Trim()) } $FolderPrefixPurgeList = $FolderPrefixPurgeList | ForEach-Object { @($_.Split(',').Trim()) } +Write-Host "`nPurging CI-produced folders under 'msgfolderroot' ..." +Write-Host "--------------------------------------------------------" + $purgeFolderParams = @{ - 'WellKnownRoot' = "root"; + 'WellKnownRoot' = "msgfolderroot"; 'FolderNamePurgeList' = $FolderNamePurgeList; 'FolderPrefixPurgeList' = $FolderPrefixPurgeList; 'PurgeBeforeTimestamp' = $PurgeBeforeTimestamp } -#purge older prefix folders +#purge older prefix folders from msgfolderroot Purge-Folders @purgeFolderParams #purge older contacts @@ -647,4 +681,20 @@ Purge-Items -ItemsFolder "calendar" -ItemsSubFolder "Birthdays" -PurgeBeforeTime # -/Recoverable Items/SubstrateHolds Write-Host "`nProcess well-known folders that are always purged" Write-Host "---------------------------------------------------" -Empty-Folder -WellKnownRoot "deleteditems", "recoverableitemsroot" \ No newline at end of file + +# We explicitly also clean direct folders under Deleted Items since there is some evidence +# that suggests that emptying alone may not be reliable +Write-Host "`nExplicit delete of all folders under 'DeletedItems' ..." +Write-Host "----------------------------------------------------------" + +$purgeFolderParams = @{ + 'WellKnownRoot' = "deleteditems"; + 'FolderNamePurgeList' = $FolderNamePurgeList; + 'FolderPrefixPurgeList' = @('*'); + 'PurgeBeforeTimestamp' = (Get-Date); + 'PurgeTraversalShallow' = $true +} + +Purge-Folders @purgeFolderParams + +Empty-Folder -WellKnownRootList "deleteditems", "recoverableitemsroot" From 54442a3ca79fec3ce3eb0a7c850926c3fd8c7a6c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 1 Jun 2023 18:11:17 +0000 Subject: [PATCH 3/6] =?UTF-8?q?=E2=AC=86=EF=B8=8F=20Bump=20golang.org/x/to?= =?UTF-8?q?ols=20from=200.9.1=20to=200.9.2=20in=20/src=20(#3553)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [golang.org/x/tools](https://github.com/golang/tools) from 0.9.1 to 0.9.2.
Release notes

Sourced from golang.org/x/tools's releases.

gopls/v0.9.2

This release contains many bug fixes, particularly related to problems that would require restarting gopls.

Note about network usage: among these fixes was a change to allow network usage when reloading the workspace. Reloading occurs when a go.mod, go.sum, or go.work file changes on disk. In the past, gopls would only allow network during the first workspace load. This resulted in confusing behavior when, for example, a new dependency is added to a go.mod file and gopls could not load it, but loading succeeded on restart. See #54069 for more details.

Configuration changes

directoryFilters at arbitrary depth

The "directoryFilters" setting now supports excluding directories at arbitrary depth, using the ** operator. (note that for v0.9.2, the default value for this setting is still ["-node_modules]". In the next release, this will change to ["-**/node_modules"]).

Bug fixes and Performance improvements...

This release contains the following notable bug fixes / performance improvements:

  • Additional change optimization - Following up on the work to optimize change processing from the v0.9.0 release, this release contains additional optimizations that result in around 50% faster change processing (measured via edits in the Kubernetes repo).
  • Fix for a long-standing memory leak - #53780 fixed a long-standing bug that caused gopls to hold on to its initial state, gradually leaking memory as state changed during the editing session.
  • Fewer restarts - This release contains many fixes for cache-invalidation bugs that would cause gopls to get confused and require restarting. Additionally, see the note at top about enabling the network when reloading the workspace. We believe we are close to our goal that restarting gopls should never be required to fix workspace errors. If you encounter such a bug, please file an issue!

A full list of all issues fixed can be found in the gopls/v0.9.2 milestone. To report a new problem, please file a new issue at https://go.dev/issues/new.

Thank you to our contributors!

Thank you for your contribution, @​alandonovan, @​antoineco, @​dle8, @​euroelessar, @​findleyr, @​hyangah, @​jamalc, @​mssdvd, @​pjweinbgo, @​rentziass, and @​suzmue!

What's next

The next planned gopls release is v0.10.0. We’re excited about features and improvements on the horizon, for example:

  • Package renaming (#41567)
  • More accurate static-analysis (#48738)
  • Improved support for the new 1.19 doc comment format (#54260)
  • Making it easier to work with go.work files (many issues, for example #53880 or #54261)
Commits
  • 96844c3 cmd/{guru,callgraph}: stop using go/pointer
  • cd694d8 go/packages: include "unsafe".GoFiles=["unsafe.go"]
  • 33c741d gopls/internal/lsp: add min/max builtin
  • 933c7cc internal/lsp/source: use exact match in import highlighting
  • 5974258 gopls/internal/lsp: clear vuln diagnostics on config changes
  • f3faea1 go/packages: pass -pgo=off on go1.21 and later
  • 5f74ec7 internal/lsp/debug: add links to profiles and GC
  • ed90c6d internal/diff: unexport various identifiers
  • 827f5aa gopls/internal/lsp/source: test references bug on struct{p.T}
  • a12e1a6 go/ssa/interp: implement min/max builtins
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=golang.org/x/tools&package-manager=go_modules&previous-version=0.9.1&new-version=0.9.2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) You can trigger a rebase of this PR by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
--- src/go.mod | 2 +- src/go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/go.mod b/src/go.mod index 2dd8fccae..625c2a632 100644 --- a/src/go.mod +++ b/src/go.mod @@ -34,7 +34,7 @@ require ( go.uber.org/zap v1.24.0 golang.org/x/exp v0.0.0-20230213192124-5e25df0256eb golang.org/x/time v0.3.0 - golang.org/x/tools v0.9.1 + golang.org/x/tools v0.9.2 ) require ( diff --git a/src/go.sum b/src/go.sum index 84fa701e3..81b373246 100644 --- a/src/go.sum +++ b/src/go.sum @@ -672,8 +672,8 @@ golang.org/x/tools v0.0.0-20210105154028-b0ab187a4818/go.mod h1:emZCQorbCU4vsT4f golang.org/x/tools v0.0.0-20210108195828-e2f9c7f1fc8e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= -golang.org/x/tools v0.9.1 h1:8WMNJAz3zrtPmnYC7ISf5dEn3MT0gY7jBJfw27yrrLo= -golang.org/x/tools v0.9.1/go.mod h1:owI94Op576fPu3cIGQeHs3joujW/2Oc6MtlxbF5dfNc= +golang.org/x/tools v0.9.2 h1:UXbndbirwCAx6TULftIfie/ygDNCwxEie+IiNP1IcNc= +golang.org/x/tools v0.9.2/go.mod h1:owI94Op576fPu3cIGQeHs3joujW/2Oc6MtlxbF5dfNc= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= From 6b7502bcb82ec325e851a9e018ed6583fafe4a42 Mon Sep 17 00:00:00 2001 From: Abin Simon Date: Fri, 2 Jun 2023 00:18:37 +0530 Subject: [PATCH 4/6] Ensure calling `Details()` twice does not duplicate folder names (#3556) Related to https://github.com/alcionai/corso/pull/3140 . We were editing the deets in place and returning a pointer. This is fine as long as we were only calling it once, but https://github.com/alcionai/corso/pull/3480 created an additional call for this which causes the `konwnFolders` to be appended twice. Old backups will still have incorrect data, but an incremental backup on top of it should fix up the data. --- #### Does this PR need a docs update or release note? - [x] :white_check_mark: Yes, it's included - [ ] :clock1: Yes, but in a later PR - [ ] :no_entry: No #### Type of change - [ ] :sunflower: Feature - [x] :bug: Bugfix - [ ] :world_map: Documentation - [ ] :robot: Supportability/Tests - [ ] :computer: CI/Deployment - [ ] :broom: Tech Debt/Cleanup #### Issue(s) * Fixes https://github.com/alcionai/corso/issues/3555 #### Test Plan - [ ] :muscle: Manual - [x] :zap: Unit test - [ ] :green_heart: E2E --- CHANGELOG.md | 1 + src/pkg/backup/details/details.go | 13 ++++- src/pkg/backup/details/details_test.go | 78 ++++++++++++++++++++++++++ 3 files changed, 89 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5b0b9c711..c3f47d973 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - Fix Exchange folder cache population error when parent folder isn't found. - Fix Exchange backup issue caused by incorrect json serialization +- Fix issues with details model containing duplicate entry for api consumers ### Changed - Do not display all the items that we restored at the end if there are more than 15. You can override this with `--verbose`. diff --git a/src/pkg/backup/details/details.go b/src/pkg/backup/details/details.go index 15a0e3a7d..c24f8fb42 100644 --- a/src/pkg/backup/details/details.go +++ b/src/pkg/backup/details/details.go @@ -388,10 +388,17 @@ func (b *Builder) Details() *Details { b.mu.Lock() defer b.mu.Unlock() - // Write the cached folder entries to details - b.d.Entries = append(b.d.Entries, maps.Values(b.knownFolders)...) + ents := make([]Entry, len(b.d.Entries)) + copy(ents, b.d.Entries) - return &b.d + // Write the cached folder entries to details + details := &Details{ + DetailsModel{ + Entries: append(ents, maps.Values(b.knownFolders)...), + }, + } + + return details } // -------------------------------------------------------------------------------- diff --git a/src/pkg/backup/details/details_test.go b/src/pkg/backup/details/details_test.go index d6aae6bbc..f6c1097ae 100644 --- a/src/pkg/backup/details/details_test.go +++ b/src/pkg/backup/details/details_test.go @@ -1033,6 +1033,84 @@ func (suite *DetailsUnitSuite) TestBuilder_Add_cleansFileIDSuffixes() { } } +func (suite *DetailsUnitSuite) TestBuilder_DetailsNoDuplicate() { + var ( + t = suite.T() + b = Builder{} + svc = path.OneDriveService + cat = path.FilesCategory + info = ItemInfo{ + OneDrive: &OneDriveInfo{ + ItemType: OneDriveItem, + ItemName: "in", + DriveName: "dn", + DriveID: "d", + }, + } + + dataSfx = makeItemPath(t, svc, cat, "t", "u", []string{"d", "r:", "f", "i1" + metadata.DataFileSuffix}) + dataSfx2 = makeItemPath(t, svc, cat, "t", "u", []string{"d", "r:", "f", "i2" + metadata.DataFileSuffix}) + dirMetaSfx = makeItemPath(t, svc, cat, "t", "u", []string{"d", "r:", "f", "i1" + metadata.DirMetaFileSuffix}) + metaSfx = makeItemPath(t, svc, cat, "t", "u", []string{"d", "r:", "f", "i1" + metadata.MetaFileSuffix}) + ) + + // Don't need to generate folders for this entry, we just want the itemRef + loc := &path.Builder{} + + err := b.Add(dataSfx, loc, false, info) + require.NoError(t, err, clues.ToCore(err)) + + err = b.Add(dataSfx2, loc, false, info) + require.NoError(t, err, clues.ToCore(err)) + + err = b.Add(dirMetaSfx, loc, false, info) + require.NoError(t, err, clues.ToCore(err)) + + err = b.Add(metaSfx, loc, false, info) + require.NoError(t, err, clues.ToCore(err)) + + b.knownFolders = map[string]Entry{ + "dummy": { + RepoRef: "xyz", + ShortRef: "abcd", + ParentRef: "1234", + LocationRef: "ab", + ItemRef: "cd", + Updated: false, + ItemInfo: info, + }, + "dummy2": { + RepoRef: "xyz2", + ShortRef: "abcd2", + ParentRef: "12342", + LocationRef: "ab2", + ItemRef: "cd2", + Updated: false, + ItemInfo: info, + }, + "dummy3": { + RepoRef: "xyz3", + ShortRef: "abcd3", + ParentRef: "12343", + LocationRef: "ab3", + ItemRef: "cd3", + Updated: false, + ItemInfo: info, + }, + } + + // mark the capacity prior to calling details. + // if the entries slice gets modified and grows to a + // 5th space, then the capacity would grow as well. + capCheck := cap(b.d.Entries) + + assert.Len(t, b.Details().Entries, 7) // 4 ents + 3 known folders + assert.Len(t, b.Details().Entries, 7) // possible reason for err: knownFolders got added twice + + assert.Len(t, b.d.Entries, 4) // len should not have grown + assert.Equal(t, capCheck, cap(b.d.Entries)) // capacity should not have grown +} + func makeItemPath( t *testing.T, service path.ServiceType, From cdf26b7988868b7ac2bfce187277556be88244c9 Mon Sep 17 00:00:00 2001 From: Keepers Date: Thu, 1 Jun 2023 18:58:09 -0600 Subject: [PATCH 5/6] refactor exchange restore to use interfaces (#3456) refactors exchange restore from near-duplicate per-category functions and switch-based process trees with interfaces. At the top of restoring all collections, each category creates a categoryRestoreHandler to supply the necessary restore behavior. The appropriate handler gets passed in to the collection restore, and all restore code after that takes a single path using a common restore interface to switch between categorical behavior. --- #### Does this PR need a docs update or release note? - [x] :no_entry: No #### Type of change - [x] :broom: Tech Debt/Cleanup #### Issue(s) * #1996 #### Test Plan - [x] :zap: Unit test - [x] :green_heart: E2E --- src/internal/connector/data_collections.go | 6 +- .../connector/data_collections_test.go | 30 +- .../exchange/contact_folder_cache.go | 6 +- .../connector/exchange/contacts_backup.go | 40 + .../connector/exchange/contacts_restore.go | 86 +++ .../exchange/contacts_restore_test.go | 57 ++ .../exchange/container_resolver_test.go | 221 ++---- .../connector/exchange/data_collections.go | 64 +- .../exchange/data_collections_test.go | 81 ++- .../connector/exchange/events_backup.go | 40 + .../connector/exchange/events_restore.go | 109 +++ .../connector/exchange/events_restore_test.go | 57 ++ .../exchange/exchange_data_collection.go | 19 +- src/internal/connector/exchange/handlers.go | 131 ++++ .../connector/exchange/mail_backup.go | 40 + .../connector/exchange/mail_restore.go | 140 ++++ .../connector/exchange/mail_restore_test.go | 57 ++ .../connector/exchange/restore_test.go | 82 +-- .../connector/exchange/service_functions.go | 160 ---- .../connector/exchange/service_iterators.go | 122 +++- .../exchange/service_iterators_test.go | 57 +- .../connector/exchange/service_restore.go | 688 +++++------------- .../connector/exchange/testdata/handlers.go | 34 + src/internal/connector/exchange/transform.go | 9 +- .../connector/graph/cache_container.go | 2 +- src/internal/connector/graph/errors.go | 1 + src/internal/connector/graph/service.go | 3 +- .../operations/backup_integration_test.go | 21 +- src/pkg/path/path.go | 9 + src/pkg/services/m365/api/contacts.go | 7 +- src/pkg/services/m365/api/events.go | 15 +- src/pkg/services/m365/api/mail.go | 4 +- 32 files changed, 1325 insertions(+), 1073 deletions(-) create mode 100644 src/internal/connector/exchange/contacts_backup.go create mode 100644 src/internal/connector/exchange/contacts_restore.go create mode 100644 src/internal/connector/exchange/contacts_restore_test.go create mode 100644 src/internal/connector/exchange/events_backup.go create mode 100644 src/internal/connector/exchange/events_restore.go create mode 100644 src/internal/connector/exchange/events_restore_test.go create mode 100644 src/internal/connector/exchange/handlers.go create mode 100644 src/internal/connector/exchange/mail_backup.go create mode 100644 src/internal/connector/exchange/mail_restore.go create mode 100644 src/internal/connector/exchange/mail_restore_test.go delete mode 100644 src/internal/connector/exchange/service_functions.go create mode 100644 src/internal/connector/exchange/testdata/handlers.go diff --git a/src/internal/connector/data_collections.go b/src/internal/connector/data_collections.go index 8d5abdcda..c3591a0e6 100644 --- a/src/internal/connector/data_collections.go +++ b/src/internal/connector/data_collections.go @@ -65,8 +65,7 @@ func (gc *GraphConnector) ProduceBackupCollections( ctx, gc.Discovery.Users(), path.ServiceType(sels.Service), - sels.DiscreteOwner, - ) + sels.DiscreteOwner) if err != nil { return nil, nil, err } @@ -90,10 +89,11 @@ func (gc *GraphConnector) ProduceBackupCollections( case selectors.ServiceExchange: colls, ssmb, err = exchange.DataCollections( ctx, + gc.Discovery, sels, + gc.credentials.AzureTenantID, owner, metadata, - gc.credentials, gc.UpdateStatus, ctrlOpts, errs) diff --git a/src/internal/connector/data_collections_test.go b/src/internal/connector/data_collections_test.go index 299842e82..4c55b0d77 100644 --- a/src/internal/connector/data_collections_test.go +++ b/src/internal/connector/data_collections_test.go @@ -21,6 +21,7 @@ import ( "github.com/alcionai/corso/src/pkg/path" "github.com/alcionai/corso/src/pkg/selectors" selTD "github.com/alcionai/corso/src/pkg/selectors/testdata" + "github.com/alcionai/corso/src/pkg/services/m365/api" ) // --------------------------------------------------------------------------- @@ -29,8 +30,10 @@ import ( type DataCollectionIntgSuite struct { tester.Suite - user string - site string + user string + site string + tenantID string + ac api.Client } func TestDataCollectionIntgSuite(t *testing.T) { @@ -42,10 +45,19 @@ func TestDataCollectionIntgSuite(t *testing.T) { } func (suite *DataCollectionIntgSuite) SetupSuite() { - suite.user = tester.M365UserID(suite.T()) - suite.site = tester.M365SiteID(suite.T()) + t := suite.T() - tester.LogTimeOfTest(suite.T()) + suite.user = tester.M365UserID(t) + suite.site = tester.M365SiteID(t) + + acct := tester.NewM365Account(t) + creds, err := acct.M365Config() + require.NoError(t, err, clues.ToCore(err)) + + suite.tenantID = creds.AzureTenantID + + suite.ac, err = api.NewClient(creds) + require.NoError(t, err, clues.ToCore(err)) } // TestExchangeDataCollection verifies interface between operation and @@ -111,16 +123,18 @@ func (suite *DataCollectionIntgSuite) TestExchangeDataCollection() { defer flush() sel := test.getSelector(t) + uidn := inMock.NewProvider(sel.ID(), sel.Name()) ctrlOpts := control.Defaults() ctrlOpts.ToggleFeatures.DisableDelta = !canMakeDeltaQueries collections, excludes, err := exchange.DataCollections( ctx, + suite.ac, sel, - sel, + suite.tenantID, + uidn, nil, - connector.credentials, connector.UpdateStatus, ctrlOpts, fault.New(true)) @@ -133,7 +147,7 @@ func (suite *DataCollectionIntgSuite) TestExchangeDataCollection() { // Categories with delta endpoints will produce a collection for metadata // as well as the actual data pulled, and the "temp" root collection. - assert.GreaterOrEqual(t, len(collections), 1, "expected 1 <= num collections <= 2") + assert.LessOrEqual(t, 1, len(collections), "expected 1 <= num collections <= 3") assert.GreaterOrEqual(t, 3, len(collections), "expected 1 <= num collections <= 3") for _, col := range collections { diff --git a/src/internal/connector/exchange/contact_folder_cache.go b/src/internal/connector/exchange/contact_folder_cache.go index 5526bf7b7..75cc2f66d 100644 --- a/src/internal/connector/exchange/contact_folder_cache.go +++ b/src/internal/connector/exchange/contact_folder_cache.go @@ -71,9 +71,9 @@ func (cfc *contactFolderCache) Populate( ctx context.Context, errs *fault.Bus, baseID string, - baseContainerPather ...string, + baseContainerPath ...string, ) error { - if err := cfc.init(ctx, baseID, baseContainerPather); err != nil { + if err := cfc.init(ctx, baseID, baseContainerPath); err != nil { return clues.Wrap(err, "initializing") } @@ -95,7 +95,7 @@ func (cfc *contactFolderCache) init( baseContainerPath []string, ) error { if len(baseNode) == 0 { - return clues.New("m365 folderID required for base folder").WithClues(ctx) + return clues.New("m365 folderID required for base contact folder").WithClues(ctx) } if cfc.containerResolver == nil { diff --git a/src/internal/connector/exchange/contacts_backup.go b/src/internal/connector/exchange/contacts_backup.go new file mode 100644 index 000000000..4054a17a8 --- /dev/null +++ b/src/internal/connector/exchange/contacts_backup.go @@ -0,0 +1,40 @@ +package exchange + +import ( + "github.com/alcionai/corso/src/internal/connector/graph" + "github.com/alcionai/corso/src/pkg/services/m365/api" +) + +var _ backupHandler = &contactBackupHandler{} + +type contactBackupHandler struct { + ac api.Contacts +} + +func newContactBackupHandler( + ac api.Client, +) contactBackupHandler { + acc := ac.Contacts() + + return contactBackupHandler{ + ac: acc, + } +} + +func (h contactBackupHandler) itemEnumerator() addedAndRemovedItemGetter { + return h.ac +} + +func (h contactBackupHandler) itemHandler() itemGetterSerializer { + return h.ac +} + +func (h contactBackupHandler) NewContainerCache( + userID string, +) (string, graph.ContainerResolver) { + return DefaultContactFolder, &contactFolderCache{ + userID: userID, + enumer: h.ac, + getter: h.ac, + } +} diff --git a/src/internal/connector/exchange/contacts_restore.go b/src/internal/connector/exchange/contacts_restore.go new file mode 100644 index 000000000..63d0e87c8 --- /dev/null +++ b/src/internal/connector/exchange/contacts_restore.go @@ -0,0 +1,86 @@ +package exchange + +import ( + "context" + + "github.com/alcionai/clues" + "github.com/microsoftgraph/msgraph-sdk-go/models" + + "github.com/alcionai/corso/src/internal/common/ptr" + "github.com/alcionai/corso/src/internal/connector/graph" + "github.com/alcionai/corso/src/pkg/backup/details" + "github.com/alcionai/corso/src/pkg/fault" + "github.com/alcionai/corso/src/pkg/path" + "github.com/alcionai/corso/src/pkg/services/m365/api" +) + +var _ itemRestorer = &contactRestoreHandler{} + +type contactRestoreHandler struct { + ac api.Contacts + ip itemPoster[models.Contactable] +} + +func newContactRestoreHandler( + ac api.Client, +) contactRestoreHandler { + return contactRestoreHandler{ + ac: ac.Contacts(), + ip: ac.Contacts(), + } +} + +func (h contactRestoreHandler) newContainerCache(userID string) graph.ContainerResolver { + return &contactFolderCache{ + userID: userID, + enumer: h.ac, + getter: h.ac, + } +} + +func (h contactRestoreHandler) formatRestoreDestination( + destinationContainerName string, + _ path.Path, // contact folders cannot be nested +) *path.Builder { + return path.Builder{}.Append(destinationContainerName) +} + +func (h contactRestoreHandler) CreateContainer( + ctx context.Context, + userID, containerName, _ string, // parent container not used +) (graph.Container, error) { + return h.ac.CreateContainer(ctx, userID, containerName, "") +} + +func (h contactRestoreHandler) containerSearcher() containerByNamer { + return nil +} + +// always returns the provided value +func (h contactRestoreHandler) orRootContainer(c string) string { + return c +} + +func (h contactRestoreHandler) restore( + ctx context.Context, + body []byte, + userID, destinationID string, + errs *fault.Bus, +) (*details.ExchangeInfo, error) { + contact, err := api.BytesToContactable(body) + if err != nil { + return nil, graph.Wrap(ctx, err, "creating contact from bytes") + } + + ctx = clues.Add(ctx, "item_id", ptr.Val(contact.GetId())) + + item, err := h.ip.PostItem(ctx, userID, destinationID, contact) + if err != nil { + return nil, graph.Wrap(ctx, err, "restoring mail message") + } + + info := api.ContactInfo(item) + info.Size = int64(len(body)) + + return info, nil +} diff --git a/src/internal/connector/exchange/contacts_restore_test.go b/src/internal/connector/exchange/contacts_restore_test.go new file mode 100644 index 000000000..d33e9fb61 --- /dev/null +++ b/src/internal/connector/exchange/contacts_restore_test.go @@ -0,0 +1,57 @@ +package exchange + +import ( + "testing" + + "github.com/alcionai/clues" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + + "github.com/alcionai/corso/src/internal/tester" + "github.com/alcionai/corso/src/pkg/account" + "github.com/alcionai/corso/src/pkg/path" + "github.com/alcionai/corso/src/pkg/services/m365/api" +) + +type ContactsRestoreIntgSuite struct { + tester.Suite + creds account.M365Config + ac api.Client + userID string +} + +func TestContactsRestoreIntgSuite(t *testing.T) { + suite.Run(t, &ContactsRestoreIntgSuite{ + Suite: tester.NewIntegrationSuite( + t, + [][]string{tester.M365AcctCredEnvs}), + }) +} + +func (suite *ContactsRestoreIntgSuite) SetupSuite() { + t := suite.T() + + a := tester.NewM365Account(t) + creds, err := a.M365Config() + require.NoError(t, err, clues.ToCore(err)) + + suite.creds = creds + + suite.ac, err = api.NewClient(creds) + require.NoError(t, err, clues.ToCore(err)) + + suite.userID = tester.M365UserID(t) +} + +// Testing to ensure that cache system works for in multiple different environments +func (suite *ContactsRestoreIntgSuite) TestCreateContainerDestination() { + runCreateDestinationTest( + suite.T(), + newMailRestoreHandler(suite.ac), + path.EmailCategory, + suite.creds.AzureTenantID, + suite.userID, + tester.DefaultTestRestoreDestination("").ContainerName, + []string{"Hufflepuff"}, + []string{"Ravenclaw"}) +} diff --git a/src/internal/connector/exchange/container_resolver_test.go b/src/internal/connector/exchange/container_resolver_test.go index 562749cec..1cfe4690e 100644 --- a/src/internal/connector/exchange/container_resolver_test.go +++ b/src/internal/connector/exchange/container_resolver_test.go @@ -15,7 +15,6 @@ import ( "github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/tester" - "github.com/alcionai/corso/src/pkg/account" "github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/path" ) @@ -676,181 +675,69 @@ func (suite *ConfiguredFolderCacheUnitSuite) TestAddToCache() { // integration suite // --------------------------------------------------------------------------- -type FolderCacheIntegrationSuite struct { - tester.Suite - credentials account.M365Config - gs graph.Servicer -} - -func TestFolderCacheIntegrationSuite(t *testing.T) { - suite.Run(t, &FolderCacheIntegrationSuite{ - Suite: tester.NewIntegrationSuite( - t, - [][]string{tester.M365AcctCredEnvs}, - ), - }) -} - -func (suite *FolderCacheIntegrationSuite) SetupSuite() { - t := suite.T() - - a := tester.NewM365Account(t) - m365, err := a.M365Config() - require.NoError(t, err, clues.ToCore(err)) - - suite.credentials = m365 - - adpt, err := graph.CreateAdapter( - m365.AzureTenantID, - m365.AzureClientID, - m365.AzureClientSecret) - require.NoError(t, err, clues.ToCore(err)) - - suite.gs = graph.NewService(adpt) -} - -// Testing to ensure that cache system works for in multiple different environments -func (suite *FolderCacheIntegrationSuite) TestCreateContainerDestination() { - a := tester.NewM365Account(suite.T()) - m365, err := a.M365Config() - require.NoError(suite.T(), err, clues.ToCore(err)) +func runCreateDestinationTest( + t *testing.T, + handler restoreHandler, + category path.CategoryType, + tenantID, userID, destinationName string, + containerNames1 []string, + containerNames2 []string, +) { + ctx, flush := tester.NewContext(t) + defer flush() var ( - user = tester.M365UserID(suite.T()) - directoryCaches = make(map[path.CategoryType]graph.ContainerResolver) - folderName = tester.DefaultTestRestoreDestination("").ContainerName - tests = []struct { - name string - pathFunc1 func(t *testing.T) path.Path - pathFunc2 func(t *testing.T) path.Path - category path.CategoryType - folderPrefix string - }{ - { - name: "Mail Cache Test", - category: path.EmailCategory, - pathFunc1: func(t *testing.T) path.Path { - pth, err := path.Build( - suite.credentials.AzureTenantID, - user, - path.ExchangeService, - path.EmailCategory, - false, - "Griffindor", "Croix") - require.NoError(t, err, clues.ToCore(err)) - - return pth - }, - pathFunc2: func(t *testing.T) path.Path { - pth, err := path.Build( - suite.credentials.AzureTenantID, - user, - path.ExchangeService, - path.EmailCategory, - false, - "Griffindor", "Felicius") - require.NoError(t, err, clues.ToCore(err)) - - return pth - }, - }, - { - name: "Contact Cache Test", - category: path.ContactsCategory, - pathFunc1: func(t *testing.T) path.Path { - pth, err := path.Build( - suite.credentials.AzureTenantID, - user, - path.ExchangeService, - path.ContactsCategory, - false, - "HufflePuff") - require.NoError(t, err, clues.ToCore(err)) - - return pth - }, - pathFunc2: func(t *testing.T) path.Path { - pth, err := path.Build( - suite.credentials.AzureTenantID, - user, - path.ExchangeService, - path.ContactsCategory, - false, - "Ravenclaw") - require.NoError(t, err, clues.ToCore(err)) - - return pth - }, - }, - { - name: "Event Cache Test", - category: path.EventsCategory, - pathFunc1: func(t *testing.T) path.Path { - pth, err := path.Build( - suite.credentials.AzureTenantID, - user, - path.ExchangeService, - path.EventsCategory, - false, - "Durmstrang") - require.NoError(t, err, clues.ToCore(err)) - - return pth - }, - pathFunc2: func(t *testing.T) path.Path { - pth, err := path.Build( - suite.credentials.AzureTenantID, - user, - path.ExchangeService, - path.EventsCategory, - false, - "Beauxbatons") - require.NoError(t, err, clues.ToCore(err)) - - return pth - }, - }, - } + svc = path.ExchangeService + gcr = handler.newContainerCache(userID) ) - for _, test := range tests { - suite.Run(test.name, func() { - t := suite.T() + path1, err := path.Build( + tenantID, + userID, + svc, + category, + false, + containerNames1...) + require.NoError(t, err, clues.ToCore(err)) - ctx, flush := tester.NewContext(t) - defer flush() + containerID, gcr, err := createDestination( + ctx, + handler, + handler.formatRestoreDestination(destinationName, path1), + userID, + gcr, + true, + fault.New(true)) + require.NoError(t, err, clues.ToCore(err)) - folderID, err := CreateContainerDestination( - ctx, - m365, - test.pathFunc1(t), - folderName, - directoryCaches, - fault.New(true)) - require.NoError(t, err, clues.ToCore(err)) + _, _, err = gcr.IDToPath(ctx, containerID) + assert.NoError(t, err, clues.ToCore(err)) - resolver := directoryCaches[test.category] + path2, err := path.Build( + tenantID, + userID, + svc, + category, + false, + containerNames2...) + require.NoError(t, err, clues.ToCore(err)) - _, _, err = resolver.IDToPath(ctx, folderID) - assert.NoError(t, err, clues.ToCore(err)) + containerID, gcr, err = createDestination( + ctx, + handler, + handler.formatRestoreDestination(destinationName, path2), + userID, + gcr, + false, + fault.New(true)) + require.NoError(t, err, clues.ToCore(err)) - secondID, err := CreateContainerDestination( - ctx, - m365, - test.pathFunc2(t), - folderName, - directoryCaches, - fault.New(true)) - require.NoError(t, err, clues.ToCore(err)) + p, l, err := gcr.IDToPath(ctx, containerID) + require.NoError(t, err, clues.ToCore(err)) - p, l, err := resolver.IDToPath(ctx, secondID) - require.NoError(t, err, clues.ToCore(err)) + _, ok := gcr.LocationInCache(l.String()) + require.True(t, ok, "looking for location in cache: %s", l) - _, ok := resolver.LocationInCache(l.String()) - require.True(t, ok, "looking for location in cache: %s", l) - - _, ok = resolver.PathInCache(p.String()) - require.True(t, ok, "looking for path in cache: %s", p) - }) - } + _, ok = gcr.PathInCache(p.String()) + require.True(t, ok, "looking for path in cache: %s", p) } diff --git a/src/internal/connector/exchange/data_collections.go b/src/internal/connector/exchange/data_collections.go index d9905f3a3..a179156c6 100644 --- a/src/internal/connector/exchange/data_collections.go +++ b/src/internal/connector/exchange/data_collections.go @@ -12,7 +12,6 @@ import ( "github.com/alcionai/corso/src/internal/connector/support" "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/observe" - "github.com/alcionai/corso/src/pkg/account" "github.com/alcionai/corso/src/pkg/control" "github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/path" @@ -159,15 +158,13 @@ func parseMetadataCollections( // DataCollections returns a DataCollection which the caller can // use to read mailbox data out for the specified user -// Assumption: User exists -// -// Add iota to this call -> mail, contacts, calendar, etc. func DataCollections( ctx context.Context, + ac api.Client, selector selectors.Selector, + tenantID string, user idname.Provider, metadata []data.RestoreCollection, - acct account.M365Config, su support.StatusUpdater, ctrlOpts control.Options, errs *fault.Bus, @@ -181,6 +178,7 @@ func DataCollections( collections = []data.BackupCollection{} el = errs.Local() categories = map[path.CategoryType]struct{}{} + handlers = BackupHandlers(ac) ) // Turn on concurrency limiter middleware for exchange backups @@ -201,7 +199,8 @@ func DataCollections( dcs, err := createCollections( ctx, - acct, + handlers, + tenantID, user, scope, cdps[scope.Category().PathType()], @@ -222,7 +221,7 @@ func DataCollections( baseCols, err := graph.BaseCollections( ctx, collections, - acct.AzureTenantID, + tenantID, user.ID(), path.ExchangeService, categories, @@ -238,25 +237,13 @@ func DataCollections( return collections, nil, el.Failure() } -func getterByType(ac api.Client, category path.CategoryType) (addedAndRemovedItemIDsGetter, error) { - switch category { - case path.EmailCategory: - return ac.Mail(), nil - case path.EventsCategory: - return ac.Events(), nil - case path.ContactsCategory: - return ac.Contacts(), nil - default: - return nil, clues.New("no api client registered for category") - } -} - // createCollections - utility function that retrieves M365 // IDs through Microsoft Graph API. The selectors.ExchangeScope // determines the type of collections that are retrieved. func createCollections( ctx context.Context, - creds account.M365Config, + handlers map[path.CategoryType]backupHandler, + tenantID string, user idname.Provider, scope selectors.ExchangeScope, dps DeltaPaths, @@ -264,27 +251,21 @@ func createCollections( su support.StatusUpdater, errs *fault.Bus, ) ([]data.BackupCollection, error) { + ctx = clues.Add(ctx, "category", scope.Category().PathType()) + var ( allCollections = make([]data.BackupCollection, 0) category = scope.Category().PathType() + qp = graph.QueryParams{ + Category: category, + ResourceOwner: user, + TenantID: tenantID, + } ) - ac, err := api.NewClient(creds) - if err != nil { - return nil, clues.Wrap(err, "getting api client").WithClues(ctx) - } - - ctx = clues.Add(ctx, "category", category) - - getter, err := getterByType(ac, category) - if err != nil { - return nil, clues.Stack(err).WithClues(ctx) - } - - qp := graph.QueryParams{ - Category: category, - ResourceOwner: user, - Credentials: creds, + handler, ok := handlers[category] + if !ok { + return nil, clues.New("unsupported backup category type").WithClues(ctx) } foldersComplete, closer := observe.MessageWithCompletion( @@ -293,17 +274,18 @@ func createCollections( defer closer() defer close(foldersComplete) - resolver, err := PopulateExchangeContainerResolver(ctx, qp, errs) - if err != nil { + rootFolder, cc := handler.NewContainerCache(user.ID()) + + if err := cc.Populate(ctx, errs, rootFolder); err != nil { return nil, clues.Wrap(err, "populating container cache") } collections, err := filterContainersAndFillCollections( ctx, qp, - getter, + handler, su, - resolver, + cc, scope, dps, ctrlOpts, diff --git a/src/internal/connector/exchange/data_collections_test.go b/src/internal/connector/exchange/data_collections_test.go index 00dde5ebd..42761f9ac 100644 --- a/src/internal/connector/exchange/data_collections_test.go +++ b/src/internal/connector/exchange/data_collections_test.go @@ -222,8 +222,10 @@ func newStatusUpdater(t *testing.T, wg *sync.WaitGroup) func(status *support.Con type DataCollectionsIntegrationSuite struct { tester.Suite - user string - site string + user string + site string + tenantID string + ac api.Client } func TestDataCollectionsIntegrationSuite(t *testing.T) { @@ -239,18 +241,25 @@ func (suite *DataCollectionsIntegrationSuite) SetupSuite() { suite.user = tester.M365UserID(suite.T()) suite.site = tester.M365SiteID(suite.T()) + acct := tester.NewM365Account(suite.T()) + creds, err := acct.M365Config() + require.NoError(suite.T(), err, clues.ToCore(err)) + + suite.ac, err = api.NewClient(creds) + require.NoError(suite.T(), err, clues.ToCore(err)) + + suite.tenantID = creds.AzureTenantID + tester.LogTimeOfTest(suite.T()) } func (suite *DataCollectionsIntegrationSuite) TestMailFetch() { var ( - userID = tester.M365UserID(suite.T()) - users = []string{userID} - acct, err = tester.NewM365Account(suite.T()).M365Config() + userID = tester.M365UserID(suite.T()) + users = []string{userID} + handlers = BackupHandlers(suite.ac) ) - require.NoError(suite.T(), err, clues.ToCore(err)) - tests := []struct { name string scope selectors.ExchangeScope @@ -293,7 +302,8 @@ func (suite *DataCollectionsIntegrationSuite) TestMailFetch() { collections, err := createCollections( ctx, - acct, + handlers, + suite.tenantID, inMock.NewProvider(userID, userID), test.scope, DeltaPaths{}, @@ -329,13 +339,11 @@ func (suite *DataCollectionsIntegrationSuite) TestMailFetch() { func (suite *DataCollectionsIntegrationSuite) TestDelta() { var ( - userID = tester.M365UserID(suite.T()) - users = []string{userID} - acct, err = tester.NewM365Account(suite.T()).M365Config() + userID = tester.M365UserID(suite.T()) + users = []string{userID} + handlers = BackupHandlers(suite.ac) ) - require.NoError(suite.T(), err, clues.ToCore(err)) - tests := []struct { name string scope selectors.ExchangeScope @@ -372,7 +380,8 @@ func (suite *DataCollectionsIntegrationSuite) TestDelta() { // get collections without providing any delta history (ie: full backup) collections, err := createCollections( ctx, - acct, + handlers, + suite.tenantID, inMock.NewProvider(userID, userID), test.scope, DeltaPaths{}, @@ -403,7 +412,8 @@ func (suite *DataCollectionsIntegrationSuite) TestDelta() { // which should only contain the difference. collections, err = createCollections( ctx, - acct, + handlers, + suite.tenantID, inMock.NewProvider(userID, userID), test.scope, dps, @@ -438,19 +448,18 @@ func (suite *DataCollectionsIntegrationSuite) TestMailSerializationRegression() defer flush() var ( - wg sync.WaitGroup - users = []string{suite.user} + wg sync.WaitGroup + users = []string{suite.user} + handlers = BackupHandlers(suite.ac) ) - acct, err := tester.NewM365Account(t).M365Config() - require.NoError(t, err, clues.ToCore(err)) - sel := selectors.NewExchangeBackup(users) sel.Include(sel.MailFolders([]string{DefaultMailFolder}, selectors.PrefixMatch())) collections, err := createCollections( ctx, - acct, + handlers, + suite.tenantID, inMock.NewProvider(suite.user, suite.user), sel.Scopes()[0], DeltaPaths{}, @@ -497,10 +506,10 @@ func (suite *DataCollectionsIntegrationSuite) TestMailSerializationRegression() // and to store contact within Collection. Downloaded contacts are run through // a regression test to ensure that downloaded items can be uploaded. func (suite *DataCollectionsIntegrationSuite) TestContactSerializationRegression() { - acct, err := tester.NewM365Account(suite.T()).M365Config() - require.NoError(suite.T(), err, clues.ToCore(err)) - - users := []string{suite.user} + var ( + users = []string{suite.user} + handlers = BackupHandlers(suite.ac) + ) tests := []struct { name string @@ -525,7 +534,8 @@ func (suite *DataCollectionsIntegrationSuite) TestContactSerializationRegression edcs, err := createCollections( ctx, - acct, + handlers, + suite.tenantID, inMock.NewProvider(suite.user, suite.user), test.scope, DeltaPaths{}, @@ -589,17 +599,11 @@ func (suite *DataCollectionsIntegrationSuite) TestEventsSerializationRegression( ctx, flush := tester.NewContext(t) defer flush() - acct, err := tester.NewM365Account(t).M365Config() - require.NoError(t, err, clues.ToCore(err)) - - users := []string{suite.user} - - ac, err := api.NewClient(acct) - require.NoError(t, err, "creating client", clues.ToCore(err)) - var ( - calID string - bdayID string + users = []string{suite.user} + handlers = BackupHandlers(suite.ac) + calID string + bdayID string ) fn := func(gcf graph.CachedContainer) error { @@ -614,7 +618,7 @@ func (suite *DataCollectionsIntegrationSuite) TestEventsSerializationRegression( return nil } - err = ac.Events().EnumerateContainers(ctx, suite.user, DefaultCalendar, fn, fault.New(true)) + err := suite.ac.Events().EnumerateContainers(ctx, suite.user, DefaultCalendar, fn, fault.New(true)) require.NoError(t, err, clues.ToCore(err)) tests := []struct { @@ -650,7 +654,8 @@ func (suite *DataCollectionsIntegrationSuite) TestEventsSerializationRegression( collections, err := createCollections( ctx, - acct, + handlers, + suite.tenantID, inMock.NewProvider(suite.user, suite.user), test.scope, DeltaPaths{}, diff --git a/src/internal/connector/exchange/events_backup.go b/src/internal/connector/exchange/events_backup.go new file mode 100644 index 000000000..f77a6a1a3 --- /dev/null +++ b/src/internal/connector/exchange/events_backup.go @@ -0,0 +1,40 @@ +package exchange + +import ( + "github.com/alcionai/corso/src/internal/connector/graph" + "github.com/alcionai/corso/src/pkg/services/m365/api" +) + +var _ backupHandler = &eventBackupHandler{} + +type eventBackupHandler struct { + ac api.Events +} + +func newEventBackupHandler( + ac api.Client, +) eventBackupHandler { + ace := ac.Events() + + return eventBackupHandler{ + ac: ace, + } +} + +func (h eventBackupHandler) itemEnumerator() addedAndRemovedItemGetter { + return h.ac +} + +func (h eventBackupHandler) itemHandler() itemGetterSerializer { + return h.ac +} + +func (h eventBackupHandler) NewContainerCache( + userID string, +) (string, graph.ContainerResolver) { + return DefaultCalendar, &eventCalendarCache{ + userID: userID, + enumer: h.ac, + getter: h.ac, + } +} diff --git a/src/internal/connector/exchange/events_restore.go b/src/internal/connector/exchange/events_restore.go new file mode 100644 index 000000000..a45de05aa --- /dev/null +++ b/src/internal/connector/exchange/events_restore.go @@ -0,0 +1,109 @@ +package exchange + +import ( + "context" + + "github.com/alcionai/clues" + "github.com/microsoftgraph/msgraph-sdk-go/models" + + "github.com/alcionai/corso/src/internal/common/ptr" + "github.com/alcionai/corso/src/internal/connector/graph" + "github.com/alcionai/corso/src/pkg/backup/details" + "github.com/alcionai/corso/src/pkg/fault" + "github.com/alcionai/corso/src/pkg/path" + "github.com/alcionai/corso/src/pkg/services/m365/api" +) + +var _ itemRestorer = &eventRestoreHandler{} + +type eventRestoreHandler struct { + ac api.Events + ip itemPoster[models.Eventable] +} + +func newEventRestoreHandler( + ac api.Client, +) eventRestoreHandler { + ace := ac.Events() + + return eventRestoreHandler{ + ac: ace, + ip: ace, + } +} + +func (h eventRestoreHandler) newContainerCache(userID string) graph.ContainerResolver { + return &eventCalendarCache{ + userID: userID, + enumer: h.ac, + getter: h.ac, + } +} + +func (h eventRestoreHandler) formatRestoreDestination( + destinationContainerName string, + _ path.Path, // ignored because calendars cannot be nested +) *path.Builder { + return path.Builder{}.Append(destinationContainerName) +} + +func (h eventRestoreHandler) CreateContainer( + ctx context.Context, + userID, containerName, _ string, // parent container not used +) (graph.Container, error) { + return h.ac.CreateContainer(ctx, userID, containerName, "") +} + +func (h eventRestoreHandler) containerSearcher() containerByNamer { + return h.ac +} + +// always returns the provided value +func (h eventRestoreHandler) orRootContainer(c string) string { + return c +} + +func (h eventRestoreHandler) restore( + ctx context.Context, + body []byte, + userID, destinationID string, + errs *fault.Bus, +) (*details.ExchangeInfo, error) { + event, err := api.BytesToEventable(body) + if err != nil { + return nil, clues.Wrap(err, "creating event from bytes").WithClues(ctx) + } + + ctx = clues.Add(ctx, "item_id", ptr.Val(event.GetId())) + + event = toEventSimplified(event) + + var attachments []models.Attachmentable + + if ptr.Val(event.GetHasAttachments()) { + attachments = event.GetAttachments() + event.SetAttachments([]models.Attachmentable{}) + } + + item, err := h.ip.PostItem(ctx, userID, destinationID, event) + if err != nil { + return nil, graph.Wrap(ctx, err, "restoring mail message") + } + + err = uploadAttachments( + ctx, + h.ac, + attachments, + userID, + destinationID, + ptr.Val(item.GetId()), + errs) + if err != nil { + return nil, clues.Stack(err) + } + + info := api.EventInfo(event) + info.Size = int64(len(body)) + + return info, nil +} diff --git a/src/internal/connector/exchange/events_restore_test.go b/src/internal/connector/exchange/events_restore_test.go new file mode 100644 index 000000000..2060bf21a --- /dev/null +++ b/src/internal/connector/exchange/events_restore_test.go @@ -0,0 +1,57 @@ +package exchange + +import ( + "testing" + + "github.com/alcionai/clues" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + + "github.com/alcionai/corso/src/internal/tester" + "github.com/alcionai/corso/src/pkg/account" + "github.com/alcionai/corso/src/pkg/path" + "github.com/alcionai/corso/src/pkg/services/m365/api" +) + +type EventsRestoreIntgSuite struct { + tester.Suite + creds account.M365Config + ac api.Client + userID string +} + +func TestEventsRestoreIntgSuite(t *testing.T) { + suite.Run(t, &EventsRestoreIntgSuite{ + Suite: tester.NewIntegrationSuite( + t, + [][]string{tester.M365AcctCredEnvs}), + }) +} + +func (suite *EventsRestoreIntgSuite) SetupSuite() { + t := suite.T() + + a := tester.NewM365Account(t) + creds, err := a.M365Config() + require.NoError(t, err, clues.ToCore(err)) + + suite.creds = creds + + suite.ac, err = api.NewClient(creds) + require.NoError(t, err, clues.ToCore(err)) + + suite.userID = tester.M365UserID(t) +} + +// Testing to ensure that cache system works for in multiple different environments +func (suite *EventsRestoreIntgSuite) TestCreateContainerDestination() { + runCreateDestinationTest( + suite.T(), + newMailRestoreHandler(suite.ac), + path.EmailCategory, + suite.creds.AzureTenantID, + suite.userID, + tester.DefaultTestRestoreDestination("").ContainerName, + []string{"Durmstrang"}, + []string{"Beauxbatons"}) +} diff --git a/src/internal/connector/exchange/exchange_data_collection.go b/src/internal/connector/exchange/exchange_data_collection.go index 441056ed6..83c000b2c 100644 --- a/src/internal/connector/exchange/exchange_data_collection.go +++ b/src/internal/connector/exchange/exchange_data_collection.go @@ -12,7 +12,6 @@ import ( "time" "github.com/alcionai/clues" - "github.com/microsoft/kiota-abstractions-go/serialization" "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/support" @@ -37,20 +36,6 @@ const ( numberOfRetries = 4 ) -type itemer interface { - GetItem( - ctx context.Context, - user, itemID string, - immutableIDs bool, - errs *fault.Bus, - ) (serialization.Parsable, *details.ExchangeInfo, error) - Serialize( - ctx context.Context, - item serialization.Parsable, - user, itemID string, - ) ([]byte, error) -} - // Collection implements the interface from data.Collection // Structure holds data for an Exchange application for a single user type Collection struct { @@ -63,7 +48,7 @@ type Collection struct { // removed is a list of item IDs that were deleted from, or moved out, of a container removed map[string]struct{} - items itemer + items itemGetterSerializer category path.CategoryType statusUpdater support.StatusUpdater @@ -98,7 +83,7 @@ func NewCollection( curr, prev path.Path, location *path.Builder, category path.CategoryType, - items itemer, + items itemGetterSerializer, statusUpdater support.StatusUpdater, ctrlOpts control.Options, doNotMergeItems bool, diff --git a/src/internal/connector/exchange/handlers.go b/src/internal/connector/exchange/handlers.go new file mode 100644 index 000000000..0538ebd17 --- /dev/null +++ b/src/internal/connector/exchange/handlers.go @@ -0,0 +1,131 @@ +package exchange + +import ( + "context" + + "github.com/microsoft/kiota-abstractions-go/serialization" + + "github.com/alcionai/corso/src/internal/connector/graph" + "github.com/alcionai/corso/src/pkg/backup/details" + "github.com/alcionai/corso/src/pkg/fault" + "github.com/alcionai/corso/src/pkg/path" + "github.com/alcionai/corso/src/pkg/services/m365/api" +) + +// --------------------------------------------------------------------------- +// backup +// --------------------------------------------------------------------------- + +type backupHandler interface { + itemEnumerator() addedAndRemovedItemGetter + itemHandler() itemGetterSerializer + NewContainerCache(userID string) (string, graph.ContainerResolver) +} + +type addedAndRemovedItemGetter interface { + GetAddedAndRemovedItemIDs( + ctx context.Context, + user, containerID, oldDeltaToken string, + immutableIDs bool, + canMakeDeltaQueries bool, + ) ([]string, []string, api.DeltaUpdate, error) +} + +type itemGetterSerializer interface { + GetItem( + ctx context.Context, + user, itemID string, + immutableIDs bool, + errs *fault.Bus, + ) (serialization.Parsable, *details.ExchangeInfo, error) + Serialize( + ctx context.Context, + item serialization.Parsable, + user, itemID string, + ) ([]byte, error) +} + +func BackupHandlers(ac api.Client) map[path.CategoryType]backupHandler { + return map[path.CategoryType]backupHandler{ + path.ContactsCategory: newContactBackupHandler(ac), + path.EmailCategory: newMailBackupHandler(ac), + path.EventsCategory: newEventBackupHandler(ac), + } +} + +// --------------------------------------------------------------------------- +// restore +// --------------------------------------------------------------------------- + +type restoreHandler interface { + itemRestorer + containerAPI + newContainerCache(userID string) graph.ContainerResolver + formatRestoreDestination( + destinationContainerName string, + collectionFullPath path.Path, + ) *path.Builder +} + +// runs the item restoration (ie: item creation) process +// for a single item, whose summary contents are held in +// the body property. +type itemRestorer interface { + restore( + ctx context.Context, + body []byte, + userID, destinationID string, + errs *fault.Bus, + ) (*details.ExchangeInfo, error) +} + +// runs the actual graph API post request. +type itemPoster[T any] interface { + PostItem( + ctx context.Context, + userID, dirID string, + body T, + ) (T, error) +} + +// produces structs that interface with the graph/cache_container +// CachedContainer interface. +type containerAPI interface { + // POSTs the creation of a new container + CreateContainer( + ctx context.Context, + userID, containerName, parentContainerID string, + ) (graph.Container, error) + + // GETs a container by name. + // if containerByNamer is nil, this functionality is not supported + // and should be skipped by the caller. + // normally, we'd alias the func directly. The indirection here + // is because not all types comply with GetContainerByName. + containerSearcher() containerByNamer + + // returns either the provided value (assumed to be the root + // folder for that cache tree), or the default root container + // (if the category uses a root folder that exists above the + // restore location path). + orRootContainer(string) string +} + +type containerByNamer interface { + // searches for a container by name. + GetContainerByName( + ctx context.Context, + userID, containerName string, + ) (graph.Container, error) +} + +// primary interface controller for all per-cateogry restoration behavior. +func restoreHandlers( + ac api.Client, +) map[path.CategoryType]restoreHandler { + return map[path.CategoryType]restoreHandler{ + path.ContactsCategory: newContactRestoreHandler(ac), + path.EmailCategory: newMailRestoreHandler(ac), + path.EventsCategory: newEventRestoreHandler(ac), + } +} diff --git a/src/internal/connector/exchange/mail_backup.go b/src/internal/connector/exchange/mail_backup.go new file mode 100644 index 000000000..1491a683a --- /dev/null +++ b/src/internal/connector/exchange/mail_backup.go @@ -0,0 +1,40 @@ +package exchange + +import ( + "github.com/alcionai/corso/src/internal/connector/graph" + "github.com/alcionai/corso/src/pkg/services/m365/api" +) + +var _ backupHandler = &mailBackupHandler{} + +type mailBackupHandler struct { + ac api.Mail +} + +func newMailBackupHandler( + ac api.Client, +) mailBackupHandler { + acm := ac.Mail() + + return mailBackupHandler{ + ac: acm, + } +} + +func (h mailBackupHandler) itemEnumerator() addedAndRemovedItemGetter { + return h.ac +} + +func (h mailBackupHandler) itemHandler() itemGetterSerializer { + return h.ac +} + +func (h mailBackupHandler) NewContainerCache( + userID string, +) (string, graph.ContainerResolver) { + return rootFolderAlias, &mailFolderCache{ + userID: userID, + enumer: h.ac, + getter: h.ac, + } +} diff --git a/src/internal/connector/exchange/mail_restore.go b/src/internal/connector/exchange/mail_restore.go new file mode 100644 index 000000000..a6e8d2e0e --- /dev/null +++ b/src/internal/connector/exchange/mail_restore.go @@ -0,0 +1,140 @@ +package exchange + +import ( + "context" + + "github.com/alcionai/clues" + "github.com/microsoftgraph/msgraph-sdk-go/models" + + "github.com/alcionai/corso/src/internal/common/dttm" + "github.com/alcionai/corso/src/internal/common/ptr" + "github.com/alcionai/corso/src/internal/connector/graph" + "github.com/alcionai/corso/src/pkg/backup/details" + "github.com/alcionai/corso/src/pkg/fault" + "github.com/alcionai/corso/src/pkg/path" + "github.com/alcionai/corso/src/pkg/services/m365/api" +) + +var _ itemRestorer = &mailRestoreHandler{} + +type mailRestoreHandler struct { + ac api.Mail + ip itemPoster[models.Messageable] +} + +func newMailRestoreHandler( + ac api.Client, +) mailRestoreHandler { + acm := ac.Mail() + + return mailRestoreHandler{ + ac: acm, + ip: acm, + } +} + +func (h mailRestoreHandler) newContainerCache(userID string) graph.ContainerResolver { + return &mailFolderCache{ + userID: userID, + enumer: h.ac, + getter: h.ac, + } +} + +func (h mailRestoreHandler) formatRestoreDestination( + destinationContainerName string, + collectionFullPath path.Path, +) *path.Builder { + return path.Builder{}.Append(destinationContainerName).Append(collectionFullPath.Folders()...) +} + +func (h mailRestoreHandler) CreateContainer( + ctx context.Context, + userID, containerName, parentContainerID string, +) (graph.Container, error) { + if len(parentContainerID) == 0 { + parentContainerID = rootFolderAlias + } + + return h.ac.CreateContainer(ctx, userID, containerName, parentContainerID) +} + +func (h mailRestoreHandler) containerSearcher() containerByNamer { + return nil +} + +// always returns rootFolderAlias +func (h mailRestoreHandler) orRootContainer(string) string { + return rootFolderAlias +} + +func (h mailRestoreHandler) restore( + ctx context.Context, + body []byte, + userID, destinationID string, + errs *fault.Bus, +) (*details.ExchangeInfo, error) { + msg, err := api.BytesToMessageable(body) + if err != nil { + return nil, clues.Wrap(err, "creating mail from bytes").WithClues(ctx) + } + + ctx = clues.Add(ctx, "item_id", ptr.Val(msg.GetId())) + msg = setMessageSVEPs(toMessage(msg)) + + attachments := msg.GetAttachments() + // Item.Attachments --> HasAttachments doesn't always have a value populated when deserialized + msg.SetAttachments([]models.Attachmentable{}) + + item, err := h.ip.PostItem(ctx, userID, destinationID, msg) + if err != nil { + return nil, graph.Wrap(ctx, err, "restoring mail message") + } + + err = uploadAttachments( + ctx, + h.ac, + attachments, + userID, + destinationID, + ptr.Val(item.GetId()), + errs) + if err != nil { + return nil, clues.Stack(err) + } + + return api.MailInfo(msg, int64(len(body))), nil +} + +func setMessageSVEPs(msg models.Messageable) models.Messageable { + // Set Extended Properties: + svlep := make([]models.SingleValueLegacyExtendedPropertyable, 0) + + // prevent "resending" of the mail in the graph api backstore + sv1 := models.NewSingleValueLegacyExtendedProperty() + sv1.SetId(ptr.To(MailRestorePropertyTag)) + sv1.SetValue(ptr.To(RestoreCanonicalEnableValue)) + svlep = append(svlep, sv1) + + // establish the sent date + if msg.GetSentDateTime() != nil { + sv2 := models.NewSingleValueLegacyExtendedProperty() + sv2.SetId(ptr.To(MailSendDateTimeOverrideProperty)) + sv2.SetValue(ptr.To(dttm.FormatToLegacy(ptr.Val(msg.GetSentDateTime())))) + + svlep = append(svlep, sv2) + } + + // establish the received Date + if msg.GetReceivedDateTime() != nil { + sv3 := models.NewSingleValueLegacyExtendedProperty() + sv3.SetId(ptr.To(MailReceiveDateTimeOverriveProperty)) + sv3.SetValue(ptr.To(dttm.FormatToLegacy(ptr.Val(msg.GetReceivedDateTime())))) + + svlep = append(svlep, sv3) + } + + msg.SetSingleValueExtendedProperties(svlep) + + return msg +} diff --git a/src/internal/connector/exchange/mail_restore_test.go b/src/internal/connector/exchange/mail_restore_test.go new file mode 100644 index 000000000..8edcedd4c --- /dev/null +++ b/src/internal/connector/exchange/mail_restore_test.go @@ -0,0 +1,57 @@ +package exchange + +import ( + "testing" + + "github.com/alcionai/clues" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + + "github.com/alcionai/corso/src/internal/tester" + "github.com/alcionai/corso/src/pkg/account" + "github.com/alcionai/corso/src/pkg/path" + "github.com/alcionai/corso/src/pkg/services/m365/api" +) + +type MailRestoreIntgSuite struct { + tester.Suite + creds account.M365Config + ac api.Client + userID string +} + +func TestMailRestoreIntgSuite(t *testing.T) { + suite.Run(t, &MailRestoreIntgSuite{ + Suite: tester.NewIntegrationSuite( + t, + [][]string{tester.M365AcctCredEnvs}), + }) +} + +func (suite *MailRestoreIntgSuite) SetupSuite() { + t := suite.T() + + a := tester.NewM365Account(t) + creds, err := a.M365Config() + require.NoError(t, err, clues.ToCore(err)) + + suite.creds = creds + + suite.ac, err = api.NewClient(creds) + require.NoError(t, err, clues.ToCore(err)) + + suite.userID = tester.M365UserID(t) +} + +// Testing to ensure that cache system works for in multiple different environments +func (suite *MailRestoreIntgSuite) TestCreateContainerDestination() { + runCreateDestinationTest( + suite.T(), + newMailRestoreHandler(suite.ac), + path.EmailCategory, + suite.creds.AzureTenantID, + suite.userID, + tester.DefaultTestRestoreDestination("").ContainerName, + []string{"Griffindor", "Croix"}, + []string{"Griffindor", "Felicius"}) +} diff --git a/src/internal/connector/exchange/restore_test.go b/src/internal/connector/exchange/restore_test.go index 5c60dbbb6..07c075b14 100644 --- a/src/internal/connector/exchange/restore_test.go +++ b/src/internal/connector/exchange/restore_test.go @@ -14,7 +14,6 @@ import ( "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/account" - "github.com/alcionai/corso/src/pkg/control" "github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/path" "github.com/alcionai/corso/src/pkg/services/m365/api" @@ -66,9 +65,10 @@ func (suite *RestoreIntgSuite) TestRestoreContact() { var ( userID = tester.M365UserID(t) folderName = tester.DefaultTestRestoreDestination("contact").ContainerName + handler = newContactRestoreHandler(suite.ac) ) - aFolder, err := suite.ac.Contacts().CreateContactFolder(ctx, userID, folderName) + aFolder, err := handler.ac.CreateContainer(ctx, userID, folderName, "") require.NoError(t, err, clues.ToCore(err)) folderID := ptr.Val(aFolder.GetId()) @@ -79,13 +79,11 @@ func (suite *RestoreIntgSuite) TestRestoreContact() { assert.NoError(t, err, clues.ToCore(err)) }() - info, err := RestoreContact( + info, err := handler.restore( ctx, exchMock.ContactBytes("Corso TestContact"), - suite.ac.Contacts(), - control.Copy, - folderID, - userID) + userID, folderID, + fault.New(true)) assert.NoError(t, err, clues.ToCore(err)) assert.NotNil(t, info, "contact item info") } @@ -101,9 +99,10 @@ func (suite *RestoreIntgSuite) TestRestoreEvent() { var ( userID = tester.M365UserID(t) subject = tester.DefaultTestRestoreDestination("event").ContainerName + handler = newEventRestoreHandler(suite.ac) ) - calendar, err := suite.ac.Events().CreateCalendar(ctx, userID, subject) + calendar, err := handler.ac.CreateContainer(ctx, userID, subject, "") require.NoError(t, err, clues.ToCore(err)) calendarID := ptr.Val(calendar.GetId()) @@ -135,15 +134,10 @@ func (suite *RestoreIntgSuite) TestRestoreEvent() { ctx, flush := tester.NewContext(t) defer flush() - info, err := RestoreEvent( + info, err := handler.restore( ctx, test.bytes, - suite.ac.Events(), - suite.ac.Events(), - suite.gs, - control.Copy, - calendarID, - userID, + userID, calendarID, fault.New(true)) assert.NoError(t, err, clues.ToCore(err)) assert.NotNil(t, info, "event item info") @@ -154,12 +148,8 @@ func (suite *RestoreIntgSuite) TestRestoreEvent() { // TestRestoreExchangeObject verifies path.Category usage for restored objects func (suite *RestoreIntgSuite) TestRestoreExchangeObject() { t := suite.T() - a := tester.NewM365Account(t) - m365, err := a.M365Config() - require.NoError(t, err, clues.ToCore(err)) - service, err := createService(m365) - require.NoError(t, err, clues.ToCore(err)) + handlers := restoreHandlers(suite.ac) userID := tester.M365UserID(suite.T()) @@ -175,7 +165,8 @@ func (suite *RestoreIntgSuite) TestRestoreExchangeObject() { category: path.EmailCategory, destination: func(t *testing.T, ctx context.Context) string { folderName := tester.DefaultTestRestoreDestination("mailobj").ContainerName - folder, err := suite.ac.Mail().CreateMailFolder(ctx, userID, folderName) + folder, err := handlers[path.EmailCategory]. + CreateContainer(ctx, userID, folderName, "") require.NoError(t, err, clues.ToCore(err)) return ptr.Val(folder.GetId()) @@ -187,7 +178,8 @@ func (suite *RestoreIntgSuite) TestRestoreExchangeObject() { category: path.EmailCategory, destination: func(t *testing.T, ctx context.Context) string { folderName := tester.DefaultTestRestoreDestination("mailwattch").ContainerName - folder, err := suite.ac.Mail().CreateMailFolder(ctx, userID, folderName) + folder, err := handlers[path.EmailCategory]. + CreateContainer(ctx, userID, folderName, "") require.NoError(t, err, clues.ToCore(err)) return ptr.Val(folder.GetId()) @@ -199,7 +191,8 @@ func (suite *RestoreIntgSuite) TestRestoreExchangeObject() { category: path.EmailCategory, destination: func(t *testing.T, ctx context.Context) string { folderName := tester.DefaultTestRestoreDestination("eventwattch").ContainerName - folder, err := suite.ac.Mail().CreateMailFolder(ctx, userID, folderName) + folder, err := handlers[path.EmailCategory]. + CreateContainer(ctx, userID, folderName, "") require.NoError(t, err, clues.ToCore(err)) return ptr.Val(folder.GetId()) @@ -211,7 +204,8 @@ func (suite *RestoreIntgSuite) TestRestoreExchangeObject() { category: path.EmailCategory, destination: func(t *testing.T, ctx context.Context) string { folderName := tester.DefaultTestRestoreDestination("mailitemattch").ContainerName - folder, err := suite.ac.Mail().CreateMailFolder(ctx, userID, folderName) + folder, err := handlers[path.EmailCategory]. + CreateContainer(ctx, userID, folderName, "") require.NoError(t, err, clues.ToCore(err)) return ptr.Val(folder.GetId()) @@ -226,7 +220,8 @@ func (suite *RestoreIntgSuite) TestRestoreExchangeObject() { category: path.EmailCategory, destination: func(t *testing.T, ctx context.Context) string { folderName := tester.DefaultTestRestoreDestination("mailbasicattch").ContainerName - folder, err := suite.ac.Mail().CreateMailFolder(ctx, userID, folderName) + folder, err := handlers[path.EmailCategory]. + CreateContainer(ctx, userID, folderName, "") require.NoError(t, err, clues.ToCore(err)) return ptr.Val(folder.GetId()) @@ -241,7 +236,8 @@ func (suite *RestoreIntgSuite) TestRestoreExchangeObject() { category: path.EmailCategory, destination: func(t *testing.T, ctx context.Context) string { folderName := tester.DefaultTestRestoreDestination("mailnestattch").ContainerName - folder, err := suite.ac.Mail().CreateMailFolder(ctx, userID, folderName) + folder, err := handlers[path.EmailCategory]. + CreateContainer(ctx, userID, folderName, "") require.NoError(t, err, clues.ToCore(err)) return ptr.Val(folder.GetId()) @@ -256,7 +252,8 @@ func (suite *RestoreIntgSuite) TestRestoreExchangeObject() { category: path.EmailCategory, destination: func(t *testing.T, ctx context.Context) string { folderName := tester.DefaultTestRestoreDestination("mailcontactattch").ContainerName - folder, err := suite.ac.Mail().CreateMailFolder(ctx, userID, folderName) + folder, err := handlers[path.EmailCategory]. + CreateContainer(ctx, userID, folderName, "") require.NoError(t, err, clues.ToCore(err)) return ptr.Val(folder.GetId()) @@ -268,7 +265,8 @@ func (suite *RestoreIntgSuite) TestRestoreExchangeObject() { category: path.EmailCategory, destination: func(t *testing.T, ctx context.Context) string { folderName := tester.DefaultTestRestoreDestination("nestedattch").ContainerName - folder, err := suite.ac.Mail().CreateMailFolder(ctx, userID, folderName) + folder, err := handlers[path.EmailCategory]. + CreateContainer(ctx, userID, folderName, "") require.NoError(t, err, clues.ToCore(err)) return ptr.Val(folder.GetId()) @@ -280,7 +278,8 @@ func (suite *RestoreIntgSuite) TestRestoreExchangeObject() { category: path.EmailCategory, destination: func(t *testing.T, ctx context.Context) string { folderName := tester.DefaultTestRestoreDestination("maillargeattch").ContainerName - folder, err := suite.ac.Mail().CreateMailFolder(ctx, userID, folderName) + folder, err := handlers[path.EmailCategory]. + CreateContainer(ctx, userID, folderName, "") require.NoError(t, err, clues.ToCore(err)) return ptr.Val(folder.GetId()) @@ -292,7 +291,8 @@ func (suite *RestoreIntgSuite) TestRestoreExchangeObject() { category: path.EmailCategory, destination: func(t *testing.T, ctx context.Context) string { folderName := tester.DefaultTestRestoreDestination("mailtwoattch").ContainerName - folder, err := suite.ac.Mail().CreateMailFolder(ctx, userID, folderName) + folder, err := handlers[path.EmailCategory]. + CreateContainer(ctx, userID, folderName, "") require.NoError(t, err, clues.ToCore(err)) return ptr.Val(folder.GetId()) @@ -304,20 +304,21 @@ func (suite *RestoreIntgSuite) TestRestoreExchangeObject() { category: path.EmailCategory, destination: func(t *testing.T, ctx context.Context) string { folderName := tester.DefaultTestRestoreDestination("mailrefattch").ContainerName - folder, err := suite.ac.Mail().CreateMailFolder(ctx, userID, folderName) + folder, err := handlers[path.EmailCategory]. + CreateContainer(ctx, userID, folderName, "") require.NoError(t, err, clues.ToCore(err)) return ptr.Val(folder.GetId()) }, }, - // TODO: #884 - reinstate when able to specify root folder by name { name: "Test Contact", bytes: exchMock.ContactBytes("Test_Omega"), category: path.ContactsCategory, destination: func(t *testing.T, ctx context.Context) string { folderName := tester.DefaultTestRestoreDestination("contact").ContainerName - folder, err := suite.ac.Contacts().CreateContactFolder(ctx, userID, folderName) + folder, err := handlers[path.ContactsCategory]. + CreateContainer(ctx, userID, folderName, "") require.NoError(t, err, clues.ToCore(err)) return ptr.Val(folder.GetId()) @@ -329,7 +330,8 @@ func (suite *RestoreIntgSuite) TestRestoreExchangeObject() { category: path.EventsCategory, destination: func(t *testing.T, ctx context.Context) string { folderName := tester.DefaultTestRestoreDestination("event").ContainerName - calendar, err := suite.ac.Events().CreateCalendar(ctx, userID, folderName) + calendar, err := handlers[path.EventsCategory]. + CreateContainer(ctx, userID, folderName, "") require.NoError(t, err, clues.ToCore(err)) return ptr.Val(calendar.GetId()) @@ -341,7 +343,8 @@ func (suite *RestoreIntgSuite) TestRestoreExchangeObject() { category: path.EventsCategory, destination: func(t *testing.T, ctx context.Context) string { folderName := tester.DefaultTestRestoreDestination("eventobj").ContainerName - calendar, err := suite.ac.Events().CreateCalendar(ctx, userID, folderName) + calendar, err := handlers[path.EventsCategory]. + CreateContainer(ctx, userID, folderName, "") require.NoError(t, err, clues.ToCore(err)) return ptr.Val(calendar.GetId()) @@ -357,15 +360,10 @@ func (suite *RestoreIntgSuite) TestRestoreExchangeObject() { defer flush() destination := test.destination(t, ctx) - info, err := RestoreItem( + info, err := handlers[test.category].restore( ctx, test.bytes, - test.category, - control.Copy, - suite.ac, - service, - destination, - userID, + userID, destination, fault.New(true)) assert.NoError(t, err, clues.ToCore(err)) assert.NotNil(t, info, "item info was not populated") diff --git a/src/internal/connector/exchange/service_functions.go b/src/internal/connector/exchange/service_functions.go deleted file mode 100644 index bde0f1ae4..000000000 --- a/src/internal/connector/exchange/service_functions.go +++ /dev/null @@ -1,160 +0,0 @@ -package exchange - -import ( - "context" - - "github.com/alcionai/clues" - - "github.com/alcionai/corso/src/internal/common/ptr" - "github.com/alcionai/corso/src/internal/connector/graph" - "github.com/alcionai/corso/src/pkg/account" - "github.com/alcionai/corso/src/pkg/fault" - "github.com/alcionai/corso/src/pkg/logger" - "github.com/alcionai/corso/src/pkg/path" - "github.com/alcionai/corso/src/pkg/selectors" - "github.com/alcionai/corso/src/pkg/services/m365/api" -) - -var ErrFolderNotFound = clues.New("folder not found") - -func createService(credentials account.M365Config) (*graph.Service, error) { - adapter, err := graph.CreateAdapter( - credentials.AzureTenantID, - credentials.AzureClientID, - credentials.AzureClientSecret) - if err != nil { - return nil, clues.Wrap(err, "creating microsoft graph service for exchange") - } - - return graph.NewService(adapter), nil -} - -// populateExchangeContainerResolver gets a folder resolver if one is available for -// this category of data. If one is not available, returns nil so that other -// logic in the caller can complete as long as they check if the resolver is not -// nil. If an error occurs populating the resolver, returns an error. -func PopulateExchangeContainerResolver( - ctx context.Context, - qp graph.QueryParams, - errs *fault.Bus, -) (graph.ContainerResolver, error) { - var ( - res graph.ContainerResolver - cacheRoot string - ) - - ac, err := api.NewClient(qp.Credentials) - if err != nil { - return nil, err - } - - switch qp.Category { - case path.EmailCategory: - acm := ac.Mail() - res = &mailFolderCache{ - userID: qp.ResourceOwner.ID(), - getter: acm, - enumer: acm, - } - cacheRoot = rootFolderAlias - - case path.ContactsCategory: - acc := ac.Contacts() - res = &contactFolderCache{ - userID: qp.ResourceOwner.ID(), - getter: acc, - enumer: acc, - } - cacheRoot = DefaultContactFolder - - case path.EventsCategory: - ecc := ac.Events() - res = &eventCalendarCache{ - userID: qp.ResourceOwner.ID(), - getter: ecc, - enumer: ecc, - } - cacheRoot = DefaultCalendar - - default: - return nil, clues.New("no container resolver registered for category").WithClues(ctx) - } - - if err := res.Populate(ctx, errs, cacheRoot); err != nil { - return nil, clues.Wrap(err, "populating directory resolver").WithClues(ctx) - } - - return res, nil -} - -// Returns true if the container passes the scope comparison and should be included. -// Returns: -// - the path representing the directory as it should be stored in the repository. -// - the human-readable path using display names. -// - true if the path passes the scope comparison. -func includeContainer( - ctx context.Context, - qp graph.QueryParams, - c graph.CachedContainer, - scope selectors.ExchangeScope, - category path.CategoryType, -) (path.Path, *path.Builder, bool) { - var ( - directory string - locPath path.Path - pb = c.Path() - loc = c.Location() - ) - - // Clause ensures that DefaultContactFolder is inspected properly - if category == path.ContactsCategory && ptr.Val(c.GetDisplayName()) == DefaultContactFolder { - loc = loc.Append(DefaultContactFolder) - } - - dirPath, err := pb.ToDataLayerExchangePathForCategory( - qp.Credentials.AzureTenantID, - qp.ResourceOwner.ID(), - category, - false) - // Containers without a path (e.g. Root mail folder) always err here. - if err != nil { - return nil, nil, false - } - - directory = dirPath.Folder(false) - - if loc != nil { - locPath, err = loc.ToDataLayerExchangePathForCategory( - qp.Credentials.AzureTenantID, - qp.ResourceOwner.ID(), - category, - false) - // Containers without a path (e.g. Root mail folder) always err here. - if err != nil { - return nil, nil, false - } - - directory = locPath.Folder(false) - } - - var ok bool - - switch category { - case path.EmailCategory: - ok = scope.Matches(selectors.ExchangeMailFolder, directory) - case path.ContactsCategory: - ok = scope.Matches(selectors.ExchangeContactFolder, directory) - case path.EventsCategory: - ok = scope.Matches(selectors.ExchangeEventCalendar, directory) - default: - return nil, nil, false - } - - logger.Ctx(ctx).With( - "included", ok, - "scope", scope, - "matches_input", directory, - ).Debug("backup folder selection filter") - - return dirPath, loc, ok -} diff --git a/src/internal/connector/exchange/service_iterators.go b/src/internal/connector/exchange/service_iterators.go index 3d96cf4b5..021c15cbc 100644 --- a/src/internal/connector/exchange/service_iterators.go +++ b/src/internal/connector/exchange/service_iterators.go @@ -18,15 +18,6 @@ import ( "github.com/alcionai/corso/src/pkg/services/m365/api" ) -type addedAndRemovedItemIDsGetter interface { - GetAddedAndRemovedItemIDs( - ctx context.Context, - user, containerID, oldDeltaToken string, - immutableIDs bool, - canMakeDeltaQueries bool, - ) ([]string, []string, api.DeltaUpdate, error) -} - // filterContainersAndFillCollections is a utility function // that places the M365 object ids belonging to specific directories // into a BackupCollection. Messages outside of those directories are omitted. @@ -39,7 +30,7 @@ type addedAndRemovedItemIDsGetter interface { func filterContainersAndFillCollections( ctx context.Context, qp graph.QueryParams, - getter addedAndRemovedItemIDsGetter, + bh backupHandler, statusUpdater support.StatusUpdater, resolver graph.ContainerResolver, scope selectors.ExchangeScope, @@ -61,19 +52,6 @@ func filterContainersAndFillCollections( logger.Ctx(ctx).Infow("filling collections", "len_deltapaths", len(dps)) - // TODO(rkeepers): this should be passed in from the caller, probably - // as an interface that satisfies the NewCollection requirements. - // But this will work for the short term. - ac, err := api.NewClient(qp.Credentials) - if err != nil { - return nil, err - } - - ibt, err := itemerByType(ac, category) - if err != nil { - return nil, err - } - el := errs.Local() for _, c := range resolver.Items() { @@ -85,6 +63,7 @@ func filterContainersAndFillCollections( delete(tombstones, cID) var ( + err error dp = dps[cID] prevDelta = dp.Delta prevPathStr = dp.Path // do not log: pii; log prevPath instead @@ -115,13 +94,14 @@ func filterContainersAndFillCollections( ictx = clues.Add(ictx, "previous_path", prevPath) - added, removed, newDelta, err := getter.GetAddedAndRemovedItemIDs( - ictx, - qp.ResourceOwner.ID(), - cID, - prevDelta, - ctrlOpts.ToggleFeatures.ExchangeImmutableIDs, - !ctrlOpts.ToggleFeatures.DisableDelta) + added, removed, newDelta, err := bh.itemEnumerator(). + GetAddedAndRemovedItemIDs( + ictx, + qp.ResourceOwner.ID(), + cID, + prevDelta, + ctrlOpts.ToggleFeatures.ExchangeImmutableIDs, + !ctrlOpts.ToggleFeatures.DisableDelta) if err != nil { if !graph.IsErrDeletedInFlight(err) { el.AddRecoverable(clues.Stack(err).Label(fault.LabelForceNoBackupCreation)) @@ -148,7 +128,7 @@ func filterContainersAndFillCollections( prevPath, locPath, category, - ibt, + bh.itemHandler(), statusUpdater, ctrlOpts, newDelta.Reset) @@ -181,7 +161,10 @@ func filterContainersAndFillCollections( return nil, el.Failure() } - ictx := clues.Add(ctx, "tombstone_id", id) + var ( + err error + ictx = clues.Add(ctx, "tombstone_id", id) + ) if collections[id] != nil { el.AddRecoverable(clues.Wrap(err, "conflict: tombstone exists for a live collection").WithClues(ictx)) @@ -207,7 +190,7 @@ func filterContainersAndFillCollections( prevPath, nil, // tombstones don't need a location category, - ibt, + bh.itemHandler(), statusUpdater, ctrlOpts, false) @@ -220,7 +203,7 @@ func filterContainersAndFillCollections( "num_deltas_entries", len(deltaURLs)) col, err := graph.MakeMetadataCollection( - qp.Credentials.AzureTenantID, + qp.TenantID, qp.ResourceOwner.ID(), path.ExchangeService, qp.Category, @@ -260,15 +243,74 @@ func pathFromPrevString(ps string) (path.Path, error) { return p, nil } -func itemerByType(ac api.Client, category path.CategoryType) (itemer, error) { +// Returns true if the container passes the scope comparison and should be included. +// Returns: +// - the path representing the directory as it should be stored in the repository. +// - the human-readable path using display names. +// - true if the path passes the scope comparison. +func includeContainer( + ctx context.Context, + qp graph.QueryParams, + c graph.CachedContainer, + scope selectors.ExchangeScope, + category path.CategoryType, +) (path.Path, *path.Builder, bool) { + var ( + directory string + locPath path.Path + pb = c.Path() + loc = c.Location() + ) + + // Clause ensures that DefaultContactFolder is inspected properly + if category == path.ContactsCategory && ptr.Val(c.GetDisplayName()) == DefaultContactFolder { + loc = loc.Append(DefaultContactFolder) + } + + dirPath, err := pb.ToDataLayerExchangePathForCategory( + qp.TenantID, + qp.ResourceOwner.ID(), + category, + false) + // Containers without a path (e.g. Root mail folder) always err here. + if err != nil { + return nil, nil, false + } + + directory = dirPath.Folder(false) + + if loc != nil { + locPath, err = loc.ToDataLayerExchangePathForCategory( + qp.TenantID, + qp.ResourceOwner.ID(), + category, + false) + // Containers without a path (e.g. Root mail folder) always err here. + if err != nil { + return nil, nil, false + } + + directory = locPath.Folder(false) + } + + var ok bool + switch category { case path.EmailCategory: - return ac.Mail(), nil - case path.EventsCategory: - return ac.Events(), nil + ok = scope.Matches(selectors.ExchangeMailFolder, directory) case path.ContactsCategory: - return ac.Contacts(), nil + ok = scope.Matches(selectors.ExchangeContactFolder, directory) + case path.EventsCategory: + ok = scope.Matches(selectors.ExchangeEventCalendar, directory) default: - return nil, clues.New("category not registered in getFetchIDFunc") + return nil, nil, false } + + logger.Ctx(ctx).With( + "included", ok, + "scope", scope, + "matches_input", directory, + ).Debug("backup folder selection filter") + + return dirPath, loc, ok } diff --git a/src/internal/connector/exchange/service_iterators_test.go b/src/internal/connector/exchange/service_iterators_test.go index 7588f39c8..f3b75ffa3 100644 --- a/src/internal/connector/exchange/service_iterators_test.go +++ b/src/internal/connector/exchange/service_iterators_test.go @@ -27,7 +27,25 @@ import ( // mocks // --------------------------------------------------------------------------- -var _ addedAndRemovedItemIDsGetter = &mockGetter{} +var _ backupHandler = &mockBackupHandler{} + +type mockBackupHandler struct { + mg mockGetter + category path.CategoryType + ac api.Client + userID string +} + +func (bh mockBackupHandler) itemEnumerator() addedAndRemovedItemGetter { return bh.mg } +func (bh mockBackupHandler) itemHandler() itemGetterSerializer { return nil } + +func (bh mockBackupHandler) NewContainerCache( + userID string, +) (string, graph.ContainerResolver) { + return BackupHandlers(bh.ac)[bh.category].NewContainerCache(bh.userID) +} + +var _ addedAndRemovedItemGetter = &mockGetter{} type ( mockGetter struct { @@ -115,7 +133,7 @@ type ServiceIteratorsSuite struct { creds account.M365Config } -func TestServiceIteratorsSuite(t *testing.T) { +func TestServiceIteratorsUnitSuite(t *testing.T) { suite.Run(t, &ServiceIteratorsSuite{Suite: tester.NewUnitSuite(t)}) } @@ -131,7 +149,7 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections() { qp = graph.QueryParams{ Category: path.EmailCategory, // doesn't matter which one we use. ResourceOwner: inMock.NewProvider("user_id", "user_name"), - Credentials: suite.creds, + TenantID: suite.creds.AzureTenantID, } statusUpdater = func(*support.ConnectorOperationStatus) {} allScope = selectors.NewExchangeBackup(nil).MailFolders(selectors.Any())[0] @@ -326,10 +344,15 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections() { ctrlOpts := control.Options{FailureHandling: test.failFast} ctrlOpts.ToggleFeatures.DisableDelta = !canMakeDeltaQueries + mbh := mockBackupHandler{ + mg: test.getter, + category: qp.Category, + } + collections, err := filterContainersAndFillCollections( ctx, qp, - test.getter, + mbh, statusUpdater, test.resolver, test.scope, @@ -422,7 +445,7 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections_Dupli var ( qp = graph.QueryParams{ ResourceOwner: inMock.NewProvider("user_id", "user_name"), - Credentials: suite.creds, + TenantID: suite.creds.AzureTenantID, } statusUpdater = func(*support.ConnectorOperationStatus) {} @@ -660,10 +683,15 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections_Dupli ctx, flush := tester.NewContext(t) defer flush() + mbh := mockBackupHandler{ + mg: test.getter, + category: qp.Category, + } + collections, err := filterContainersAndFillCollections( ctx, qp, - test.getter, + mbh, statusUpdater, test.resolver, sc.scope, @@ -803,7 +831,7 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections_repea qp = graph.QueryParams{ Category: path.EmailCategory, // doesn't matter which one we use. ResourceOwner: inMock.NewProvider("user_id", "user_name"), - Credentials: suite.creds, + TenantID: suite.creds.AzureTenantID, } statusUpdater = func(*support.ConnectorOperationStatus) {} allScope = selectors.NewExchangeBackup(nil).MailFolders(selectors.Any())[0] @@ -815,6 +843,10 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections_repea l: path.Builder{}.Append("display_name_1"), } resolver = newMockResolver(container1) + mbh = mockBackupHandler{ + mg: test.getter, + category: qp.Category, + } ) require.Equal(t, "user_id", qp.ResourceOwner.ID(), qp.ResourceOwner) @@ -823,7 +855,7 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections_repea collections, err := filterContainersAndFillCollections( ctx, qp, - test.getter, + mbh, statusUpdater, resolver, allScope, @@ -884,7 +916,7 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections_incre qp = graph.QueryParams{ Category: cat, ResourceOwner: inMock.NewProvider("user_id", "user_name"), - Credentials: suite.creds, + TenantID: suite.creds.AzureTenantID, } statusUpdater = func(*support.ConnectorOperationStatus) {} allScope = selectors.NewExchangeBackup(nil).MailFolders(selectors.Any())[0] @@ -1226,6 +1258,11 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections_incre getter.noReturnDelta = false } + mbh := mockBackupHandler{ + mg: test.getter, + category: qp.Category, + } + dps := test.dps if !deltaBefore { for k, dp := range dps { @@ -1237,7 +1274,7 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections_incre collections, err := filterContainersAndFillCollections( ctx, qp, - test.getter, + mbh, statusUpdater, test.resolver, allScope, diff --git a/src/internal/connector/exchange/service_restore.go b/src/internal/connector/exchange/service_restore.go index f5435249e..4e77ef5cc 100644 --- a/src/internal/connector/exchange/service_restore.go +++ b/src/internal/connector/exchange/service_restore.go @@ -3,13 +3,11 @@ package exchange import ( "bytes" "context" - "fmt" "runtime/trace" "github.com/alcionai/clues" "github.com/microsoftgraph/msgraph-sdk-go/models" - "github.com/alcionai/corso/src/internal/common/dttm" "github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/support" @@ -25,228 +23,6 @@ import ( "github.com/alcionai/corso/src/pkg/services/m365/api" ) -type itemPoster[T any] interface { - PostItem( - ctx context.Context, - userID, dirID string, - body T, - ) (T, error) -} - -// RestoreItem directs restore pipeline towards restore function -// based on the path.CategoryType. All input params are necessary to perform -// the type-specific restore function. -func RestoreItem( - ctx context.Context, - bits []byte, - category path.CategoryType, - policy control.CollisionPolicy, - ac api.Client, - gs graph.Servicer, - destination, user string, - errs *fault.Bus, -) (*details.ExchangeInfo, error) { - if policy != control.Copy { - return nil, clues.Wrap(clues.New(policy.String()), "policy not supported for Exchange restore").WithClues(ctx) - } - - switch category { - case path.EmailCategory: - return RestoreMessage(ctx, bits, ac.Mail(), ac.Mail(), gs, control.Copy, destination, user, errs) - case path.ContactsCategory: - return RestoreContact(ctx, bits, ac.Contacts(), control.Copy, destination, user) - case path.EventsCategory: - return RestoreEvent(ctx, bits, ac.Events(), ac.Events(), gs, control.Copy, destination, user, errs) - default: - return nil, clues.Wrap(clues.New(category.String()), "not supported for Exchange restore") - } -} - -// RestoreContact wraps api.Contacts().PostItem() -func RestoreContact( - ctx context.Context, - body []byte, - cli itemPoster[models.Contactable], - cp control.CollisionPolicy, - destination, user string, -) (*details.ExchangeInfo, error) { - contact, err := api.BytesToContactable(body) - if err != nil { - return nil, graph.Wrap(ctx, err, "creating contact from bytes") - } - - ctx = clues.Add(ctx, "item_id", ptr.Val(contact.GetId())) - - _, err = cli.PostItem(ctx, user, destination, contact) - if err != nil { - return nil, clues.Stack(err) - } - - info := api.ContactInfo(contact) - info.Size = int64(len(body)) - - return info, nil -} - -// RestoreEvent wraps api.Events().PostItem() -func RestoreEvent( - ctx context.Context, - body []byte, - itemCli itemPoster[models.Eventable], - attachmentCli attachmentPoster, - gs graph.Servicer, - cp control.CollisionPolicy, - destination, user string, - errs *fault.Bus, -) (*details.ExchangeInfo, error) { - event, err := api.BytesToEventable(body) - if err != nil { - return nil, clues.Wrap(err, "creating event from bytes").WithClues(ctx) - } - - ctx = clues.Add(ctx, "item_id", ptr.Val(event.GetId())) - - var ( - el = errs.Local() - transformedEvent = toEventSimplified(event) - attached []models.Attachmentable - ) - - if ptr.Val(event.GetHasAttachments()) { - attached = event.GetAttachments() - - transformedEvent.SetAttachments([]models.Attachmentable{}) - } - - item, err := itemCli.PostItem(ctx, user, destination, event) - if err != nil { - return nil, clues.Stack(err) - } - - for _, a := range attached { - if el.Failure() != nil { - break - } - - err := uploadAttachment( - ctx, - attachmentCli, - user, - destination, - ptr.Val(item.GetId()), - a) - if err != nil { - el.AddRecoverable(err) - } - } - - info := api.EventInfo(event) - info.Size = int64(len(body)) - - return info, el.Failure() -} - -// RestoreMessage wraps api.Mail().PostItem(), handling attachment creation along the way -func RestoreMessage( - ctx context.Context, - body []byte, - itemCli itemPoster[models.Messageable], - attachmentCli attachmentPoster, - gs graph.Servicer, - cp control.CollisionPolicy, - destination, user string, - errs *fault.Bus, -) (*details.ExchangeInfo, error) { - // Creates messageable object from original bytes - msg, err := api.BytesToMessageable(body) - if err != nil { - return nil, clues.Wrap(err, "creating mail from bytes").WithClues(ctx) - } - - ctx = clues.Add(ctx, "item_id", ptr.Val(msg.GetId())) - - var ( - clone = toMessage(msg) - valueID = MailRestorePropertyTag - enableValue = RestoreCanonicalEnableValue - ) - - // Set Extended Properties: - // 1st: No transmission - // 2nd: Send Date - // 3rd: Recv Date - svlep := make([]models.SingleValueLegacyExtendedPropertyable, 0) - sv1 := models.NewSingleValueLegacyExtendedProperty() - sv1.SetId(&valueID) - sv1.SetValue(&enableValue) - svlep = append(svlep, sv1) - - if clone.GetSentDateTime() != nil { - sv2 := models.NewSingleValueLegacyExtendedProperty() - sendPropertyValue := dttm.FormatToLegacy(ptr.Val(clone.GetSentDateTime())) - sendPropertyTag := MailSendDateTimeOverrideProperty - sv2.SetId(&sendPropertyTag) - sv2.SetValue(&sendPropertyValue) - - svlep = append(svlep, sv2) - } - - if clone.GetReceivedDateTime() != nil { - sv3 := models.NewSingleValueLegacyExtendedProperty() - recvPropertyValue := dttm.FormatToLegacy(ptr.Val(clone.GetReceivedDateTime())) - recvPropertyTag := MailReceiveDateTimeOverriveProperty - sv3.SetId(&recvPropertyTag) - sv3.SetValue(&recvPropertyValue) - - svlep = append(svlep, sv3) - } - - clone.SetSingleValueExtendedProperties(svlep) - - attached := clone.GetAttachments() - - // Item.Attachments --> HasAttachments doesn't always have a value populated when deserialized - clone.SetAttachments([]models.Attachmentable{}) - - item, err := itemCli.PostItem(ctx, user, destination, clone) - if err != nil { - return nil, graph.Wrap(ctx, err, "restoring mail message") - } - - el := errs.Local() - - for _, a := range attached { - if el.Failure() != nil { - return nil, el.Failure() - } - - err := uploadAttachment( - ctx, - attachmentCli, - user, - destination, - ptr.Val(item.GetId()), - a) - if err != nil { - // FIXME: I don't know why we're swallowing this error case. - // It needs investigation: https://github.com/alcionai/corso/issues/3498 - if ptr.Val(a.GetOdataType()) == "#microsoft.graph.itemAttachment" { - name := ptr.Val(a.GetName()) - - logger.CtxErr(ctx, err). - With("attachment_name", name). - Info("mail upload failed") - - continue - } - - el.AddRecoverable(clues.Wrap(err, "uploading mail attachment")) - } - } - - return api.MailInfo(clone, int64(len(body))), el.Failure() -} - // RestoreCollections restores M365 objects in data.RestoreCollection to MSFT // store through GraphAPI. func RestoreCollections( @@ -259,49 +35,82 @@ func RestoreCollections( deets *details.Builder, errs *fault.Bus, ) (*support.ConnectorOperationStatus, error) { + if len(dcs) == 0 { + return support.CreateStatus(ctx, support.Restore, 0, support.CollectionMetrics{}, ""), nil + } + var ( - directoryCaches = make(map[string]map[path.CategoryType]graph.ContainerResolver) - metrics support.CollectionMetrics - userID string + userID = dcs[0].FullPath().ResourceOwner() + directoryCache = make(map[path.CategoryType]graph.ContainerResolver) + handlers = restoreHandlers(ac) + metrics support.CollectionMetrics // TODO policy to be updated from external source after completion of refactoring policy = control.Copy el = errs.Local() ) - if len(dcs) > 0 { - userID = dcs[0].FullPath().ResourceOwner() - ctx = clues.Add(ctx, "resource_owner", clues.Hide(userID)) - } + ctx = clues.Add(ctx, "resource_owner", clues.Hide(userID)) for _, dc := range dcs { if el.Failure() != nil { break } - userCaches := directoryCaches[userID] - if userCaches == nil { - directoryCaches[userID] = make(map[path.CategoryType]graph.ContainerResolver) - userCaches = directoryCaches[userID] - } + var ( + isNewCache bool + category = dc.FullPath().Category() + ictx = clues.Add( + ctx, + "restore_category", category, + "restore_full_path", dc.FullPath()) + ) - containerID, err := CreateContainerDestination( - ctx, - creds, - dc.FullPath(), - dest.ContainerName, - userCaches, - errs) - if err != nil { - el.AddRecoverable(clues.Wrap(err, "creating destination").WithClues(ctx)) + handler, ok := handlers[category] + if !ok { + el.AddRecoverable(clues.New("unsupported restore path category").WithClues(ictx)) continue } - temp, canceled := restoreCollection(ctx, ac, gs, dc, containerID, policy, deets, errs) + if directoryCache[category] == nil { + directoryCache[category] = handler.newContainerCache(userID) + isNewCache = true + } + + containerID, gcr, err := createDestination( + ictx, + handler, + handler.formatRestoreDestination(dest.ContainerName, dc.FullPath()), + userID, + directoryCache[category], + isNewCache, + errs) + if err != nil { + el.AddRecoverable(err) + continue + } + + directoryCache[category] = gcr + + ictx = clues.Add(ictx, "restore_destination_id", containerID) + + temp, err := restoreCollection( + ictx, + handler, + dc, + userID, + containerID, + policy, + deets, + errs) metrics = support.CombineMetrics(metrics, temp) - if canceled { - break + if err != nil { + if graph.IsErrTimeout(err) { + break + } + + el.AddRecoverable(err) } } @@ -318,48 +127,39 @@ func RestoreCollections( // restoreCollection handles restoration of an individual collection. func restoreCollection( ctx context.Context, - ac api.Client, - gs graph.Servicer, + ir itemRestorer, dc data.RestoreCollection, - folderID string, + userID, destinationID string, policy control.CollisionPolicy, deets *details.Builder, errs *fault.Bus, -) (support.CollectionMetrics, bool) { +) (support.CollectionMetrics, error) { ctx, end := diagnostics.Span(ctx, "gc:exchange:restoreCollection", diagnostics.Label("path", dc.FullPath())) defer end() var ( - metrics support.CollectionMetrics - items = dc.Items(ctx, errs) - directory = dc.FullPath() - service = directory.Service() - category = directory.Category() - user = directory.ResourceOwner() + el = errs.Local() + metrics support.CollectionMetrics + items = dc.Items(ctx, errs) + fullPath = dc.FullPath() + category = fullPath.Category() ) - ctx = clues.Add( - ctx, - "full_path", directory, - "service", service, - "category", category) - colProgress, closer := observe.CollectionProgress( ctx, category.String(), - clues.Hide(directory.Folder(false))) + fullPath.Folder(false)) defer closer() defer close(colProgress) for { select { case <-ctx.Done(): - errs.AddRecoverable(clues.Wrap(ctx.Err(), "context cancelled").WithClues(ctx)) - return metrics, true + return metrics, clues.Wrap(ctx.Err(), "context cancelled").WithClues(ctx) case itemData, ok := <-items: - if !ok || errs.Failure() != nil { - return metrics, false + if !ok || el.Failure() != nil { + return metrics, el.Failure() } ictx := clues.Add(ctx, "item_id", itemData.UUID()) @@ -370,33 +170,26 @@ func restoreCollection( _, err := buf.ReadFrom(itemData.ToReader()) if err != nil { - errs.AddRecoverable(clues.Wrap(err, "reading item bytes").WithClues(ictx)) + el.AddRecoverable(clues.Wrap(err, "reading item bytes").WithClues(ictx)) continue } - byteArray := buf.Bytes() + body := buf.Bytes() - info, err := RestoreItem( - ictx, - byteArray, - category, - policy, - ac, - gs, - folderID, - user, - errs) + info, err := ir.restore(ictx, body, userID, destinationID, errs) if err != nil { - errs.AddRecoverable(err) + el.AddRecoverable(err) continue } - metrics.Bytes += int64(len(byteArray)) + metrics.Bytes += int64(len(body)) metrics.Successes++ - itemPath, err := dc.FullPath().AppendItem(itemData.UUID()) + // FIXME: this may be the incorrect path. If we restored within a top-level + // destination folder, then the restore path no longer matches the fullPath. + itemPath, err := fullPath.AppendItem(itemData.UUID()) if err != nil { - errs.AddRecoverable(clues.Wrap(err, "building full path with item").WithClues(ctx)) + el.AddRecoverable(clues.Wrap(err, "adding item to collection path").WithClues(ctx)) continue } @@ -410,7 +203,8 @@ func restoreCollection( Exchange: info, }) if err != nil { - // Not critical enough to need to stop restore operation. + // These deets additions are for cli display purposes only. + // no need to fail out on error. logger.Ctx(ctx).Infow("accounting for restored item", "error", err) } @@ -419,257 +213,143 @@ func restoreCollection( } } -// CreateContainerDestination builds the destination into the container -// at the provided path. As a precondition, the destination cannot -// already exist. If it does then an error is returned. The provided -// containerResolver is updated with the new destination. -// @ returns the container ID of the new destination container. -func CreateContainerDestination( +// createDestination creates folders in sequence +// [root leaf1 leaf2] similar to a linked list. +// @param directory is the desired path from the root to the container +// that the items will be restored into. +func createDestination( ctx context.Context, - creds account.M365Config, - directory path.Path, - destination string, - caches map[path.CategoryType]graph.ContainerResolver, + ca containerAPI, + destination *path.Builder, + userID string, + gcr graph.ContainerResolver, + isNewCache bool, errs *fault.Bus, -) (string, error) { +) (string, graph.ContainerResolver, error) { var ( - newCache = false - user = directory.ResourceOwner() - category = directory.Category() - directoryCache = caches[category] + cache = gcr + restoreLoc = &path.Builder{} + containerParentID string ) - // TODO(rkeepers): pass the api client into this func, rather than generating one. - ac, err := api.NewClient(creds) - if err != nil { - return "", clues.Stack(err).WithClues(ctx) - } + for _, container := range destination.Elements() { + restoreLoc = restoreLoc.Append(container) - switch category { - case path.EmailCategory: - folders := append([]string{destination}, directory.Folders()...) - - if directoryCache == nil { - acm := ac.Mail() - mfc := &mailFolderCache{ - userID: user, - enumer: acm, - getter: acm, - } - - caches[category] = mfc - newCache = true - directoryCache = mfc - } - - return establishMailRestoreLocation( + ictx := clues.Add( ctx, - ac, - folders, - directoryCache, - user, - newCache, + "is_new_cache", isNewCache, + "container_parent_id", containerParentID, + "container_name", container, + "restore_location", restoreLoc) + + fid, err := getOrPopulateContainer( + ictx, + ca, + cache, + restoreLoc, + userID, + containerParentID, + container, + isNewCache, errs) - - case path.ContactsCategory: - folders := append([]string{destination}, directory.Folders()...) - - if directoryCache == nil { - acc := ac.Contacts() - cfc := &contactFolderCache{ - userID: user, - enumer: acc, - getter: acc, - } - caches[category] = cfc - newCache = true - directoryCache = cfc - } - - return establishContactsRestoreLocation( - ctx, - ac, - folders, - directoryCache, - user, - newCache, - errs) - - case path.EventsCategory: - dest := destination - - if directoryCache == nil { - ace := ac.Events() - ecc := &eventCalendarCache{ - userID: user, - getter: ace, - enumer: ace, - } - caches[category] = ecc - newCache = true - directoryCache = ecc - } - - folders := append([]string{dest}, directory.Folders()...) - - return establishEventsRestoreLocation( - ctx, - ac, - folders, - directoryCache, - user, - newCache, - errs) - - default: - return "", clues.New(fmt.Sprintf("type not supported: %T", category)).WithClues(ctx) - } -} - -// establishMailRestoreLocation creates Mail folders in sequence -// [root leaf1 leaf2] in a similar to a linked list. -// @param folders is the desired path from the root to the container -// that the items will be restored into -// @param isNewCache identifies if the cache is created and not populated -func establishMailRestoreLocation( - ctx context.Context, - ac api.Client, - folders []string, - mfc graph.ContainerResolver, - user string, - isNewCache bool, - errs *fault.Bus, -) (string, error) { - // Process starts with the root folder in order to recreate - // the top-level folder with the same tactic - folderID := rootFolderAlias - pb := path.Builder{} - - ctx = clues.Add(ctx, "is_new_cache", isNewCache) - - for _, folder := range folders { - pb = *pb.Append(folder) - - cached, ok := mfc.LocationInCache(pb.String()) - if ok { - folderID = cached - continue - } - - temp, err := ac.Mail().CreateMailFolderWithParent(ctx, user, folder, folderID) if err != nil { - // Should only error if cache malfunctions or incorrect parameters - return "", err + return "", cache, clues.Stack(err) } - folderID = ptr.Val(temp.GetId()) - - // Only populate the cache if we actually had to create it. Since we set - // newCache to false in this we'll only try to populate it once per function - // call even if we make a new cache. - if isNewCache { - if err := mfc.Populate(ctx, errs, rootFolderAlias); err != nil { - return "", clues.Wrap(err, "populating folder cache") - } - - isNewCache = false - } - - // NOOP if the folder is already in the cache. - if err = mfc.AddToCache(ctx, temp); err != nil { - return "", clues.Wrap(err, "adding folder to cache") - } + containerParentID = fid } - return folderID, nil + // containerParentID now identifies the last created container, + // not its parent. + return containerParentID, cache, nil } -// establishContactsRestoreLocation creates Contact Folders in sequence -// and updates the container resolver appropriately. Contact Folders are -// displayed in a flat representation. Therefore, only the root can be populated and all content -// must be restored into the root location. -// @param folders is the list of intended folders from root to leaf (e.g. [root ...]) -// @param isNewCache bool representation of whether Populate function needs to be run -func establishContactsRestoreLocation( +func getOrPopulateContainer( ctx context.Context, - ac api.Client, - folders []string, - cfc graph.ContainerResolver, - user string, + ca containerAPI, + gcr graph.ContainerResolver, + restoreLoc *path.Builder, + userID, containerParentID, containerName string, isNewCache bool, errs *fault.Bus, ) (string, error) { - cached, ok := cfc.LocationInCache(folders[0]) + cached, ok := gcr.LocationInCache(restoreLoc.String()) if ok { return cached, nil } - ctx = clues.Add(ctx, "is_new_cache", isNewCache) + c, err := ca.CreateContainer(ctx, userID, containerName, containerParentID) - temp, err := ac.Contacts().CreateContactFolder(ctx, user, folders[0]) - if err != nil { - return "", err - } - - folderID := ptr.Val(temp.GetId()) - - if isNewCache { - if err := cfc.Populate(ctx, errs, folderID, folders[0]); err != nil { - return "", clues.Wrap(err, "populating contact cache") - } - - if err = cfc.AddToCache(ctx, temp); err != nil { - return "", clues.Wrap(err, "adding contact folder to cache") - } - } - - return folderID, nil -} - -func establishEventsRestoreLocation( - ctx context.Context, - ac api.Client, - folders []string, - ecc graph.ContainerResolver, // eventCalendarCache - user string, - isNewCache bool, - errs *fault.Bus, -) (string, error) { - // Need to prefix with the "Other Calendars" folder so lookup happens properly. - cached, ok := ecc.LocationInCache(folders[0]) - if ok { - return cached, nil - } - - ctx = clues.Add(ctx, "is_new_cache", isNewCache) - - temp, err := ac.Events().CreateCalendar(ctx, user, folders[0]) - if err != nil && !graph.IsErrFolderExists(err) { - return "", err - } - - // 409 handling: Fetch folder if it exists and add to cache. - // This is rare, but may happen if CreateCalendar() POST fails with 5xx, - // potentially leaving dirty state in graph. + // 409 handling case: + // attempt to fetch the container by name and add that result to the cache. + // This is rare, but may happen if CreateContainer() POST fails with 5xx: + // sometimes the backend will create the folder despite the 5xx response, + // leaving our local containerResolver with inconsistent state. if graph.IsErrFolderExists(err) { - temp, err = ac.Events().GetContainerByName(ctx, user, folders[0]) - if err != nil { - return "", err + cs := ca.containerSearcher() + if cs != nil { + cc, e := cs.GetContainerByName(ctx, userID, containerName) + c = cc + err = clues.Stack(err, e) } } - folderID := ptr.Val(temp.GetId()) + if err != nil { + return "", clues.Wrap(err, "creating restore container") + } + + folderID := ptr.Val(c.GetId()) if isNewCache { - if err = ecc.Populate(ctx, errs, folderID, folders[0]); err != nil { - return "", clues.Wrap(err, "populating event cache") + if err := gcr.Populate(ctx, errs, folderID, ca.orRootContainer(restoreLoc.HeadElem())); err != nil { + return "", clues.Wrap(err, "populating container cache") } + } - displayable := api.CalendarDisplayable{Calendarable: temp} - if err = ecc.AddToCache(ctx, displayable); err != nil { - return "", clues.Wrap(err, "adding new calendar to cache") - } + if err = gcr.AddToCache(ctx, c); err != nil { + return "", clues.Wrap(err, "adding container to cache") } return folderID, nil } + +func uploadAttachments( + ctx context.Context, + ap attachmentPoster, + as []models.Attachmentable, + userID, destinationID, itemID string, + errs *fault.Bus, +) error { + el := errs.Local() + + for _, a := range as { + if el.Failure() != nil { + return el.Failure() + } + + err := uploadAttachment( + ctx, + ap, + userID, + destinationID, + itemID, + a) + if err != nil { + // FIXME: I don't know why we're swallowing this error case. + // It needs investigation: https://github.com/alcionai/corso/issues/3498 + if ptr.Val(a.GetOdataType()) == "#microsoft.graph.itemAttachment" { + name := ptr.Val(a.GetName()) + + logger.CtxErr(ctx, err). + With("attachment_name", name). + Info("mail upload failed") + + continue + } + + el.AddRecoverable(clues.Wrap(err, "uploading mail attachment").WithClues(ctx)) + } + } + + return el.Failure() +} diff --git a/src/internal/connector/exchange/testdata/handlers.go b/src/internal/connector/exchange/testdata/handlers.go new file mode 100644 index 000000000..396de9008 --- /dev/null +++ b/src/internal/connector/exchange/testdata/handlers.go @@ -0,0 +1,34 @@ +package testdata + +import ( + "context" + "testing" + + "github.com/alcionai/clues" + "github.com/stretchr/testify/require" + + "github.com/alcionai/corso/src/internal/connector/exchange" + "github.com/alcionai/corso/src/internal/connector/graph" + "github.com/alcionai/corso/src/pkg/fault" + "github.com/alcionai/corso/src/pkg/path" + "github.com/alcionai/corso/src/pkg/services/m365/api" +) + +func PopulateContainerCache( + t *testing.T, + ctx context.Context, //revive:disable-line:context-as-argument + ac api.Client, + category path.CategoryType, + resourceOwnerID string, + errs *fault.Bus, +) graph.ContainerResolver { + handler, ok := exchange.BackupHandlers(ac)[category] + require.Truef(t, ok, "container resolver registered for category %s", category) + + root, cc := handler.NewContainerCache(resourceOwnerID) + + err := cc.Populate(ctx, errs, root) + require.NoError(t, err, clues.ToCore(err)) + + return cc +} diff --git a/src/internal/connector/exchange/transform.go b/src/internal/connector/exchange/transform.go index 99df1805c..67b790a9e 100644 --- a/src/internal/connector/exchange/transform.go +++ b/src/internal/connector/exchange/transform.go @@ -55,14 +55,7 @@ func CloneMessageableFields(orig, message models.Messageable) models.Messageable func toMessage(orig models.Messageable) models.Messageable { message := models.NewMessage() - temp := CloneMessageableFields(orig, message) - - aMessage, ok := temp.(*models.Message) - if !ok { - return nil - } - - return aMessage + return CloneMessageableFields(orig, message) } // ToEventSimplified transforms an event to simplified restore format diff --git a/src/internal/connector/graph/cache_container.go b/src/internal/connector/graph/cache_container.go index 2b12354f4..d83b17d74 100644 --- a/src/internal/connector/graph/cache_container.go +++ b/src/internal/connector/graph/cache_container.go @@ -64,7 +64,7 @@ type ContainerResolver interface { // @param ctx is necessary param for Graph API tracing // @param baseFolderID represents the M365ID base that the resolver will // conclude its search. Default input is "". - Populate(ctx context.Context, errs *fault.Bus, baseFolderID string, baseContainerPather ...string) error + Populate(ctx context.Context, errs *fault.Bus, baseFolderID string, baseContainerPath ...string) error // PathInCache performs a look up of a path representation // and returns the m365ID of directory iff the pathString diff --git a/src/internal/connector/graph/errors.go b/src/internal/connector/graph/errors.go index 81886965e..bca55a9ec 100644 --- a/src/internal/connector/graph/errors.go +++ b/src/internal/connector/graph/errors.go @@ -139,6 +139,7 @@ func IsErrTimeout(err error) bool { } return errors.Is(err, ErrTimeout) || + errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) || errors.Is(err, http.ErrHandlerTimeout) || os.IsTimeout(err) diff --git a/src/internal/connector/graph/service.go b/src/internal/connector/graph/service.go index 4335972a4..1c85e760c 100644 --- a/src/internal/connector/graph/service.go +++ b/src/internal/connector/graph/service.go @@ -13,7 +13,6 @@ import ( msgraphgocore "github.com/microsoftgraph/msgraph-sdk-go-core" "github.com/alcionai/corso/src/internal/common/idname" - "github.com/alcionai/corso/src/pkg/account" "github.com/alcionai/corso/src/pkg/path" ) @@ -41,7 +40,7 @@ func AllMetadataFileNames() []string { type QueryParams struct { Category path.CategoryType ResourceOwner idname.Provider - Credentials account.M365Config + TenantID string } // --------------------------------------------------------------------------- diff --git a/src/internal/operations/backup_integration_test.go b/src/internal/operations/backup_integration_test.go index 0d1407978..786563393 100644 --- a/src/internal/operations/backup_integration_test.go +++ b/src/internal/operations/backup_integration_test.go @@ -24,6 +24,7 @@ import ( "github.com/alcionai/corso/src/internal/connector" "github.com/alcionai/corso/src/internal/connector/exchange" exchMock "github.com/alcionai/corso/src/internal/connector/exchange/mock" + exchTD "github.com/alcionai/corso/src/internal/connector/exchange/testdata" "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/mock" "github.com/alcionai/corso/src/internal/connector/onedrive" @@ -719,7 +720,7 @@ func (suite *BackupOpIntegrationSuite) TestBackup_Run_incrementalExchange() { testExchangeContinuousBackups(suite, control.Toggles{}) } -func (suite *BackupOpIntegrationSuite) TestBackup_Run_nonIncrementalExchange() { +func (suite *BackupOpIntegrationSuite) TestBackup_Run_incrementalNonDeltaExchange() { testExchangeContinuousBackups(suite, control.Toggles{DisableDelta: true}) } @@ -930,14 +931,7 @@ func testExchangeContinuousBackups(suite *BackupOpIntegrationSuite, toggles cont // verify test data was populated, and track it for comparisons // TODO: this can be swapped out for InDeets checks if we add itemRefs to folder ents. for category, gen := range dataset { - qp := graph.QueryParams{ - Category: category, - ResourceOwner: uidn, - Credentials: m365, - } - - cr, err := exchange.PopulateExchangeContainerResolver(ctx, qp, fault.New(true)) - require.NoError(t, err, "populating container resolver", category, clues.ToCore(err)) + cr := exchTD.PopulateContainerCache(t, ctx, ac, category, uidn.ID(), fault.New(true)) for destName, dest := range gen.dests { id, ok := cr.LocationInCache(dest.locRef) @@ -1043,19 +1037,12 @@ func testExchangeContinuousBackups(suite *BackupOpIntegrationSuite, toggles cont version.Backup, gen.dbf) - qp := graph.QueryParams{ - Category: category, - ResourceOwner: uidn, - Credentials: m365, - } - expectedLocRef := container3 if category == path.EmailCategory { expectedLocRef = path.Builder{}.Append(container3, container3).String() } - cr, err := exchange.PopulateExchangeContainerResolver(ctx, qp, fault.New(true)) - require.NoError(t, err, "populating container resolver", category, clues.ToCore(err)) + cr := exchTD.PopulateContainerCache(t, ctx, ac, category, uidn.ID(), fault.New(true)) id, ok := cr.LocationInCache(expectedLocRef) require.Truef(t, ok, "dir %s found in %s cache", expectedLocRef, category) diff --git a/src/pkg/path/path.go b/src/pkg/path/path.go index 189e24449..f35e4a065 100644 --- a/src/pkg/path/path.go +++ b/src/pkg/path/path.go @@ -240,6 +240,15 @@ func (pb Builder) Dir() *Builder { } } +// HeadElem returns the first element in the Builder. +func (pb Builder) HeadElem() string { + if len(pb.elements) == 0 { + return "" + } + + return pb.elements[0] +} + // LastElem returns the last element in the Builder. func (pb Builder) LastElem() string { if len(pb.elements) == 0 { diff --git a/src/pkg/services/m365/api/contacts.go b/src/pkg/services/m365/api/contacts.go index 8588711f8..7574223c9 100644 --- a/src/pkg/services/m365/api/contacts.go +++ b/src/pkg/services/m365/api/contacts.go @@ -34,12 +34,13 @@ type Contacts struct { // containers // --------------------------------------------------------------------------- -// CreateContactFolder makes a contact folder with the displayName of folderName. +// CreateContainer makes a contact folder with the displayName of folderName. // If successful, returns the created folder object. -func (c Contacts) CreateContactFolder( +func (c Contacts) CreateContainer( ctx context.Context, userID, containerName string, -) (models.ContactFolderable, error) { + _ string, // parentContainerID needed for iface, doesn't apply to contacts +) (graph.Container, error) { body := models.NewContactFolder() body.SetDisplayName(ptr.To(containerName)) diff --git a/src/pkg/services/m365/api/events.go b/src/pkg/services/m365/api/events.go index bd63490f8..73503c0a2 100644 --- a/src/pkg/services/m365/api/events.go +++ b/src/pkg/services/m365/api/events.go @@ -38,16 +38,17 @@ type Events struct { // containers // --------------------------------------------------------------------------- -// CreateCalendar makes an event Calendar with the name in the user's M365 exchange account +// CreateContainer makes an event Calendar with the name in the user's M365 exchange account // Reference: https://docs.microsoft.com/en-us/graph/api/user-post-calendars?view=graph-rest-1.0&tabs=go -func (c Events) CreateCalendar( +func (c Events) CreateContainer( ctx context.Context, userID, containerName string, -) (models.Calendarable, error) { + _ string, // parentContainerID needed for iface, doesn't apply to contacts +) (graph.Container, error) { body := models.NewCalendar() body.SetName(&containerName) - mdl, err := c.Stable. + container, err := c.Stable. Client(). Users(). ByUserId(userID). @@ -57,7 +58,7 @@ func (c Events) CreateCalendar( return nil, graph.Wrap(ctx, err, "creating calendar") } - return mdl, nil + return CalendarDisplayable{Calendarable: container}, nil } // DeleteContainer removes a calendar from user's M365 account @@ -130,7 +131,7 @@ func (c Events) GetContainerByID( func (c Events) GetContainerByName( ctx context.Context, userID, containerName string, -) (models.Calendarable, error) { +) (graph.Container, error) { filter := fmt.Sprintf("name eq '%s'", containerName) options := &users.ItemCalendarsRequestBuilderGetRequestConfiguration{ QueryParameters: &users.ItemCalendarsRequestBuilderGetQueryParameters{ @@ -167,7 +168,7 @@ func (c Events) GetContainerByName( return nil, err } - return cal, nil + return graph.CalendarDisplayable{Calendarable: cal}, nil } func (c Events) PatchCalendar( diff --git a/src/pkg/services/m365/api/mail.go b/src/pkg/services/m365/api/mail.go index ba275e762..872547ffe 100644 --- a/src/pkg/services/m365/api/mail.go +++ b/src/pkg/services/m365/api/mail.go @@ -64,10 +64,10 @@ func (c Mail) CreateMailFolder( return mdl, nil } -func (c Mail) CreateMailFolderWithParent( +func (c Mail) CreateContainer( ctx context.Context, userID, containerName, parentContainerID string, -) (models.MailFolderable, error) { +) (graph.Container, error) { isHidden := false body := models.NewMailFolder() body.SetDisplayName(&containerName) From 5c4d57b416fabf007fc03632bc72e296e1015e06 Mon Sep 17 00:00:00 2001 From: Abin Simon Date: Fri, 2 Jun 2023 10:26:11 +0530 Subject: [PATCH 6/6] Parallelize restores within a collection for OneDrive (#3492) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This should massively speed up when restoring a collection with many items. Will not impact much if we have a lot of collections with few items each. Numbers 🔢 : - Restoring ~7000 files, mostly small, totaling 1.5GB - Sequential: ~70m - Parallel: ~50m - Restoring 1200 50mb files - Sequential: 4h 45m - Parallel: <40m --- #### Does this PR need a docs update or release note? - [x] :white_check_mark: Yes, it's included - [ ] :clock1: Yes, but in a later PR - [ ] :no_entry: No #### Type of change - [x] :sunflower: Feature - [ ] :bug: Bugfix - [ ] :world_map: Documentation - [ ] :robot: Supportability/Tests - [ ] :computer: CI/Deployment - [ ] :broom: Tech Debt/Cleanup #### Issue(s) * https://github.com/alcionai/corso/issues/3011 * closes https://github.com/alcionai/corso/issues/3536 #### Test Plan - [ ] :muscle: Manual - [x] :zap: Unit test - [x] :green_heart: E2E --- CHANGELOG.md | 1 + src/internal/connector/data_collections.go | 47 +++- .../connector/exchange/data_collections.go | 3 +- .../exchange/exchange_data_collection.go | 5 +- .../connector/exchange/service_restore.go | 3 +- .../connector/graph/concurrency_middleware.go | 2 + src/internal/connector/graph/consts.go | 25 +- src/internal/connector/onedrive/collection.go | 9 +- .../connector/onedrive/collections.go | 3 +- src/internal/connector/onedrive/item.go | 21 -- src/internal/connector/onedrive/item_test.go | 6 +- src/internal/connector/onedrive/restore.go | 225 ++++++++++++------ .../connector/sharepoint/collection.go | 3 +- .../connector/sharepoint/data_collections.go | 3 +- src/internal/connector/sharepoint/restore.go | 3 + src/internal/observe/observe.go | 56 +++-- src/internal/observe/observe_test.go | 43 +--- src/internal/operations/backup.go | 6 +- src/internal/operations/restore.go | 6 +- src/pkg/repository/repository.go | 6 +- 20 files changed, 297 insertions(+), 179 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c3f47d973..a877f4d3a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Added ProtectedResourceName to the backup list json output. ProtectedResourceName holds either a UPN or a WebURL, depending on the resource type. - Rework base selection logic for incremental backups so it's more likely to find a valid base. +- Improve OneDrive restore performance by paralleling item restores ### Fixed - Fix Exchange folder cache population error when parent folder isn't found. diff --git a/src/internal/connector/data_collections.go b/src/internal/connector/data_collections.go index c3591a0e6..10f4fab2b 100644 --- a/src/internal/connector/data_collections.go +++ b/src/internal/connector/data_collections.go @@ -3,6 +3,7 @@ package connector import ( "context" "strings" + "sync" "github.com/alcionai/clues" @@ -26,6 +27,13 @@ import ( "github.com/alcionai/corso/src/pkg/selectors" ) +const ( + // copyBufferSize is used for chunked upload + // Microsoft recommends 5-10MB buffers + // https://docs.microsoft.com/en-us/graph/api/driveitem-createuploadsession?view=graph-rest-1.0#best-practices + copyBufferSize = 5 * 1024 * 1024 +) + // --------------------------------------------------------------------------- // Data Collections // --------------------------------------------------------------------------- @@ -256,13 +264,46 @@ func (gc *GraphConnector) ConsumeRestoreCollections( return nil, clues.Wrap(err, "malformed azure credentials") } + // Buffer pool for uploads + pool := sync.Pool{ + New: func() interface{} { + b := make([]byte, copyBufferSize) + return &b + }, + } + switch sels.Service { case selectors.ServiceExchange: - status, err = exchange.RestoreCollections(ctx, creds, gc.Discovery, gc.Service, dest, dcs, deets, errs) + status, err = exchange.RestoreCollections(ctx, + creds, + gc.Discovery, + gc.Service, + dest, + dcs, + deets, + errs) case selectors.ServiceOneDrive: - status, err = onedrive.RestoreCollections(ctx, creds, backupVersion, gc.Service, dest, opts, dcs, deets, errs) + status, err = onedrive.RestoreCollections(ctx, + creds, + backupVersion, + gc.Service, + dest, + opts, + dcs, + deets, + &pool, + errs) case selectors.ServiceSharePoint: - status, err = sharepoint.RestoreCollections(ctx, backupVersion, creds, gc.Service, dest, opts, dcs, deets, errs) + status, err = sharepoint.RestoreCollections(ctx, + backupVersion, + creds, + gc.Service, + dest, + opts, + dcs, + deets, + &pool, + errs) default: err = clues.Wrap(clues.New(sels.Service.String()), "service not supported") } diff --git a/src/internal/connector/exchange/data_collections.go b/src/internal/connector/exchange/data_collections.go index a179156c6..0ee250b2e 100644 --- a/src/internal/connector/exchange/data_collections.go +++ b/src/internal/connector/exchange/data_collections.go @@ -268,10 +268,9 @@ func createCollections( return nil, clues.New("unsupported backup category type").WithClues(ctx) } - foldersComplete, closer := observe.MessageWithCompletion( + foldersComplete := observe.MessageWithCompletion( ctx, observe.Bulletf("%s", qp.Category)) - defer closer() defer close(foldersComplete) rootFolder, cc := handler.NewContainerCache(user.ID()) diff --git a/src/internal/connector/exchange/exchange_data_collection.go b/src/internal/connector/exchange/exchange_data_collection.go index 83c000b2c..921952b88 100644 --- a/src/internal/connector/exchange/exchange_data_collection.go +++ b/src/internal/connector/exchange/exchange_data_collection.go @@ -163,14 +163,11 @@ func (col *Collection) streamItems(ctx context.Context, errs *fault.Bus) { }() if len(col.added)+len(col.removed) > 0 { - var closer func() - colProgress, closer = observe.CollectionProgress( + colProgress = observe.CollectionProgress( ctx, col.fullPath.Category().String(), col.LocationPath().Elements()) - go closer() - defer func() { close(colProgress) }() diff --git a/src/internal/connector/exchange/service_restore.go b/src/internal/connector/exchange/service_restore.go index 4e77ef5cc..d8680d92b 100644 --- a/src/internal/connector/exchange/service_restore.go +++ b/src/internal/connector/exchange/service_restore.go @@ -145,11 +145,10 @@ func restoreCollection( category = fullPath.Category() ) - colProgress, closer := observe.CollectionProgress( + colProgress := observe.CollectionProgress( ctx, category.String(), fullPath.Folder(false)) - defer closer() defer close(colProgress) for { diff --git a/src/internal/connector/graph/concurrency_middleware.go b/src/internal/connector/graph/concurrency_middleware.go index ba2a08fa6..c70988f65 100644 --- a/src/internal/connector/graph/concurrency_middleware.go +++ b/src/internal/connector/graph/concurrency_middleware.go @@ -142,6 +142,8 @@ type limiterConsumptionKey string const limiterConsumptionCtxKey limiterConsumptionKey = "corsoGraphRateLimiterConsumption" const ( + // https://learn.microsoft.com/en-us/sharepoint/dev/general-development + // /how-to-avoid-getting-throttled-or-blocked-in-sharepoint-online#application-throttling defaultLC = 1 driveDefaultLC = 2 // limit consumption rate for single-item GETs requests, diff --git a/src/internal/connector/graph/consts.go b/src/internal/connector/graph/consts.go index 0438ab10c..00785f1fa 100644 --- a/src/internal/connector/graph/consts.go +++ b/src/internal/connector/graph/consts.go @@ -7,7 +7,15 @@ import ( "github.com/alcionai/corso/src/pkg/path" ) -const AttachmentChunkSize = 4 * 1024 * 1024 +const ( + AttachmentChunkSize = 4 * 1024 * 1024 + + // Upper limit on the number of concurrent uploads as we + // create buffer pools for each upload. This is not the actual + // number of uploads, but the max that can be specified. This is + // added as a safeguard in case we misconfigure the values. + maxConccurrentUploads = 20 +) // --------------------------------------------------------------------------- // item response AdditionalData @@ -44,6 +52,8 @@ type parallelism struct { collectionBuffer int // sets the parallelism of item population within a collection. item int + // sets the parallelism of concurrent uploads within a collection + itemUpload int } func (p parallelism) CollectionBufferSize() int { @@ -88,6 +98,18 @@ func (p parallelism) Item() int { return p.item } +func (p parallelism) ItemUpload() int { + if p.itemUpload == 0 { + return 1 + } + + if p.itemUpload > maxConccurrentUploads { + return maxConccurrentUploads + } + + return p.itemUpload +} + // returns low <= v <= high // if high < low, returns low <= v func isWithin(low, high, v int) bool { @@ -102,6 +124,7 @@ var sp = map[path.ServiceType]parallelism{ path.OneDriveService: { collectionBuffer: 5, item: 4, + itemUpload: 7, }, // sharepoint libraries are considered "onedrive" parallelism. // this only controls lists/pages. diff --git a/src/internal/connector/onedrive/collection.go b/src/internal/connector/onedrive/collection.go index 6f7cf3da8..f4f3807fb 100644 --- a/src/internal/connector/onedrive/collection.go +++ b/src/internal/connector/onedrive/collection.go @@ -439,12 +439,11 @@ func (oc *Collection) populateItems(ctx context.Context, errs *fault.Bus) { queuedPath = "/" + oc.driveName + queuedPath } - folderProgress, colCloser := observe.ProgressWithCount( + folderProgress := observe.ProgressWithCount( ctx, observe.ItemQueueMsg, path.NewElements(queuedPath), int64(len(oc.driveItems))) - defer colCloser() defer close(folderProgress) semaphoreCh := make(chan struct{}, graph.Parallelism(path.OneDriveService).Item()) @@ -535,13 +534,12 @@ func (oc *Collection) populateItems(ctx context.Context, errs *fault.Bus) { } // display/log the item download - progReader, closer := observe.ItemProgress( + progReader, _ := observe.ItemProgress( ctx, itemData, observe.ItemBackupMsg, clues.Hide(itemName+dataSuffix), itemSize) - go closer() return progReader, nil }) @@ -554,13 +552,12 @@ func (oc *Collection) populateItems(ctx context.Context, errs *fault.Bus) { } metaReader := lazy.NewLazyReadCloser(func() (io.ReadCloser, error) { - progReader, closer := observe.ItemProgress( + progReader, _ := observe.ItemProgress( ctx, itemMeta, observe.ItemBackupMsg, clues.Hide(itemName+metaSuffix), int64(itemMetaSize)) - go closer() return progReader, nil }) diff --git a/src/internal/connector/onedrive/collections.go b/src/internal/connector/onedrive/collections.go index aabee1bcf..71e665053 100644 --- a/src/internal/connector/onedrive/collections.go +++ b/src/internal/connector/onedrive/collections.go @@ -283,8 +283,7 @@ func (c *Collections) Get( driveTombstones[driveID] = struct{}{} } - driveComplete, closer := observe.MessageWithCompletion(ctx, observe.Bulletf("files")) - defer closer() + driveComplete := observe.MessageWithCompletion(ctx, observe.Bulletf("files")) defer close(driveComplete) // Enumerate drives for the specified resourceOwner diff --git a/src/internal/connector/onedrive/item.go b/src/internal/connector/onedrive/item.go index ac05887cc..97578589f 100644 --- a/src/internal/connector/onedrive/item.go +++ b/src/internal/connector/onedrive/item.go @@ -346,27 +346,6 @@ func sharePointItemInfo(di models.DriveItemable, itemSize int64) *details.ShareP } } -// driveItemWriter is used to initialize and return an io.Writer to upload data for the specified item -// It does so by creating an upload session and using that URL to initialize an `itemWriter` -// TODO: @vkamra verify if var session is the desired input -func driveItemWriter( - ctx context.Context, - gs graph.Servicer, - driveID, itemID string, - itemSize int64, -) (io.Writer, error) { - ctx = clues.Add(ctx, "upload_item_id", itemID) - - r, err := api.PostDriveItem(ctx, gs, driveID, itemID) - if err != nil { - return nil, clues.Stack(err) - } - - iw := graph.NewLargeItemWriter(itemID, ptr.Val(r.GetUploadUrl()), itemSize) - - return iw, nil -} - // constructWebURL helper function for recreating the webURL // for the originating SharePoint site. Uses additional data map // from a models.DriveItemable that possesses a downloadURL within the map. diff --git a/src/internal/connector/onedrive/item_test.go b/src/internal/connector/onedrive/item_test.go index a30037b43..5ad4f537a 100644 --- a/src/internal/connector/onedrive/item_test.go +++ b/src/internal/connector/onedrive/item_test.go @@ -189,9 +189,13 @@ func (suite *ItemIntegrationSuite) TestItemWriter() { // Initialize a 100KB mockDataProvider td, writeSize := mockDataReader(int64(100 * 1024)) - w, err := driveItemWriter(ctx, srv, test.driveID, ptr.Val(newItem.GetId()), writeSize) + itemID := ptr.Val(newItem.GetId()) + + r, err := api.PostDriveItem(ctx, srv, test.driveID, itemID) require.NoError(t, err, clues.ToCore(err)) + w := graph.NewLargeItemWriter(itemID, ptr.Val(r.GetUploadUrl()), writeSize) + // Using a 32 KB buffer for the copy allows us to validate the // multi-part upload. `io.CopyBuffer` will only write 32 KB at // a time diff --git a/src/internal/connector/onedrive/restore.go b/src/internal/connector/onedrive/restore.go index 755496420..124668bb1 100644 --- a/src/internal/connector/onedrive/restore.go +++ b/src/internal/connector/onedrive/restore.go @@ -3,10 +3,13 @@ package onedrive import ( "context" "encoding/json" + "fmt" "io" "runtime/trace" "sort" "strings" + "sync" + "sync/atomic" "github.com/alcionai/clues" "github.com/pkg/errors" @@ -28,10 +31,10 @@ import ( "github.com/alcionai/corso/src/pkg/services/m365/api" ) -// copyBufferSize is used for chunked upload -// Microsoft recommends 5-10MB buffers -// https://docs.microsoft.com/en-us/graph/api/driveitem-createuploadsession?view=graph-rest-1.0#best-practices -const copyBufferSize = 5 * 1024 * 1024 +const ( + // Maximum number of retries for upload failures + maxUploadRetries = 3 +) type restoreCaches struct { Folders *folderCache @@ -59,6 +62,7 @@ func RestoreCollections( opts control.Options, dcs []data.RestoreCollection, deets *details.Builder, + pool *sync.Pool, errs *fault.Bus, ) (*support.ConnectorOperationStatus, error) { var ( @@ -104,6 +108,7 @@ func RestoreCollections( dest.ContainerName, deets, opts.RestorePermissions, + pool, errs) if err != nil { el.AddRecoverable(err) @@ -142,13 +147,18 @@ func RestoreCollection( restoreContainerName string, deets *details.Builder, restorePerms bool, + pool *sync.Pool, errs *fault.Bus, ) (support.CollectionMetrics, error) { var ( - metrics = support.CollectionMetrics{} - copyBuffer = make([]byte, copyBufferSize) - directory = dc.FullPath() - el = errs.Local() + metrics = support.CollectionMetrics{} + directory = dc.FullPath() + el = errs.Local() + metricsObjects int64 + metricsBytes int64 + metricsSuccess int64 + wg sync.WaitGroup + complete bool ) ctx, end := diagnostics.Span(ctx, "gc:drive:restoreCollection", diagnostics.Label("path", directory)) @@ -212,8 +222,30 @@ func RestoreCollection( caches.ParentDirToMeta[dc.FullPath().String()] = colMeta items := dc.Items(ctx, errs) + semaphoreCh := make(chan struct{}, graph.Parallelism(path.OneDriveService).ItemUpload()) + defer close(semaphoreCh) + + deetsLock := sync.Mutex{} + + updateDeets := func( + ctx context.Context, + repoRef path.Path, + locationRef *path.Builder, + updated bool, + info details.ItemInfo, + ) { + deetsLock.Lock() + defer deetsLock.Unlock() + + err = deets.Add(repoRef, locationRef, updated, info) + if err != nil { + // Not critical enough to need to stop restore operation. + logger.CtxErr(ctx, err).Infow("adding restored item to details") + } + } + for { - if el.Failure() != nil { + if el.Failure() != nil || complete { break } @@ -223,62 +255,76 @@ func RestoreCollection( case itemData, ok := <-items: if !ok { - return metrics, nil + // We've processed all items in this collection, exit the loop + complete = true + break } - ictx := clues.Add(ctx, "restore_item_id", itemData.UUID()) + wg.Add(1) + semaphoreCh <- struct{}{} - itemPath, err := dc.FullPath().AppendItem(itemData.UUID()) - if err != nil { - el.AddRecoverable(clues.Wrap(err, "appending item to full path").WithClues(ictx)) - continue - } + go func(ctx context.Context, itemData data.Stream) { + defer wg.Done() + defer func() { <-semaphoreCh }() - itemInfo, skipped, err := restoreItem( - ictx, - creds, - dc, - backupVersion, - source, - service, - drivePath, - restoreFolderID, - copyBuffer, - caches, - restorePerms, - itemData, - itemPath) + copyBufferPtr := pool.Get().(*[]byte) + defer pool.Put(copyBufferPtr) - // skipped items don't get counted, but they can error - if !skipped { - metrics.Objects++ - metrics.Bytes += int64(len(copyBuffer)) - } + copyBuffer := *copyBufferPtr - if err != nil { - el.AddRecoverable(clues.Wrap(err, "restoring item")) - continue - } + ictx := clues.Add(ctx, "restore_item_id", itemData.UUID()) - if skipped { - logger.Ctx(ictx).With("item_path", itemPath).Debug("did not restore item") - continue - } + itemPath, err := dc.FullPath().AppendItem(itemData.UUID()) + if err != nil { + el.AddRecoverable(clues.Wrap(err, "appending item to full path").WithClues(ictx)) + return + } - err = deets.Add( - itemPath, - &path.Builder{}, // TODO: implement locationRef - true, - itemInfo) - if err != nil { - // Not critical enough to need to stop restore operation. - logger.CtxErr(ictx, err).Infow("adding restored item to details") - } + itemInfo, skipped, err := restoreItem( + ictx, + creds, + dc, + backupVersion, + source, + service, + drivePath, + restoreFolderID, + copyBuffer, + caches, + restorePerms, + itemData, + itemPath) - metrics.Successes++ + // skipped items don't get counted, but they can error + if !skipped { + atomic.AddInt64(&metricsObjects, 1) + atomic.AddInt64(&metricsBytes, int64(len(copyBuffer))) + } + + if err != nil { + el.AddRecoverable(clues.Wrap(err, "restoring item")) + return + } + + if skipped { + logger.Ctx(ictx).With("item_path", itemPath).Debug("did not restore item") + return + } + + // TODO: implement locationRef + updateDeets(ictx, itemPath, &path.Builder{}, true, itemInfo) + + atomic.AddInt64(&metricsSuccess, 1) + }(ctx, itemData) } } + wg.Wait() + + metrics.Objects = int(metricsObjects) + metrics.Bytes = metricsBytes + metrics.Successes = int(metricsSuccess) + return metrics, el.Failure() } @@ -308,6 +354,7 @@ func restoreItem( source, service, drivePath, + dc, restoreFolderID, copyBuffer, itemData) @@ -399,6 +446,7 @@ func restoreV0File( source driveSource, service graph.Servicer, drivePath *path.DrivePath, + fetcher fileFetcher, restoreFolderID string, copyBuffer []byte, itemData data.Stream, @@ -406,6 +454,7 @@ func restoreV0File( _, itemInfo, err := restoreData( ctx, service, + fetcher, itemData.UUID(), itemData, drivePath.DriveID, @@ -442,6 +491,7 @@ func restoreV1File( itemID, itemInfo, err := restoreData( ctx, service, + fetcher, trimmedName, itemData, drivePath.DriveID, @@ -525,6 +575,7 @@ func restoreV6File( itemID, itemInfo, err := restoreData( ctx, service, + fetcher, meta.FileName, itemData, drivePath.DriveID, @@ -673,6 +724,7 @@ func createRestoreFolders( func restoreData( ctx context.Context, service graph.Servicer, + fetcher fileFetcher, name string, itemData data.Stream, driveID, parentFolderID string, @@ -696,26 +748,65 @@ func restoreData( return "", details.ItemInfo{}, err } - // Get a drive item writer - w, err := driveItemWriter(ctx, service, driveID, ptr.Val(newItem.GetId()), ss.Size()) + itemID := ptr.Val(newItem.GetId()) + ctx = clues.Add(ctx, "upload_item_id", itemID) + + r, err := api.PostDriveItem(ctx, service, driveID, itemID) if err != nil { - return "", details.ItemInfo{}, err + return "", details.ItemInfo{}, clues.Wrap(err, "get upload session") } - iReader := itemData.ToReader() - progReader, closer := observe.ItemProgress( - ctx, - iReader, - observe.ItemRestoreMsg, - clues.Hide(name), - ss.Size()) + var written int64 - go closer() + // This is just to retry file upload, the uploadSession creation is + // not retried here We need extra logic to retry file upload as we + // have to pull the file again from kopia If we fail a file upload, + // we restart from scratch and try to upload again. Graph does not + // show "register" any partial file uploads and so if we fail an + // upload the file size will be 0. + for i := 0; i <= maxUploadRetries; i++ { + // Initialize and return an io.Writer to upload data for the + // specified item It does so by creating an upload session and + // using that URL to initialize an `itemWriter` + // TODO: @vkamra verify if var session is the desired input + w := graph.NewLargeItemWriter(itemID, ptr.Val(r.GetUploadUrl()), ss.Size()) + + pname := name + iReader := itemData.ToReader() + + if i > 0 { + pname = fmt.Sprintf("%s (retry %d)", name, i) + + // If it is not the first try, we have to pull the file + // again from kopia. Ideally we could just seek the stream + // but we don't have a Seeker available here. + itemData, err := fetcher.Fetch(ctx, itemData.UUID()) + if err != nil { + return "", details.ItemInfo{}, clues.Wrap(err, "get data file") + } + + iReader = itemData.ToReader() + } + + progReader, abort := observe.ItemProgress( + ctx, + iReader, + observe.ItemRestoreMsg, + clues.Hide(pname), + ss.Size()) + + // Upload the stream data + written, err = io.CopyBuffer(w, progReader, copyBuffer) + if err == nil { + break + } + + // clear out the bar if err + abort() + } - // Upload the stream data - written, err := io.CopyBuffer(w, progReader, copyBuffer) if err != nil { - return "", details.ItemInfo{}, graph.Wrap(ctx, err, "writing item bytes") + return "", details.ItemInfo{}, clues.Wrap(err, "uploading file") } dii := details.ItemInfo{} diff --git a/src/internal/connector/sharepoint/collection.go b/src/internal/connector/sharepoint/collection.go index 87bd268d7..da63e9b89 100644 --- a/src/internal/connector/sharepoint/collection.go +++ b/src/internal/connector/sharepoint/collection.go @@ -183,11 +183,10 @@ func (sc *Collection) runPopulate(ctx context.Context, errs *fault.Bus) (support ) // TODO: Insert correct ID for CollectionProgress - colProgress, closer := observe.CollectionProgress( + colProgress := observe.CollectionProgress( ctx, sc.fullPath.Category().String(), sc.fullPath.Folders()) - go closer() defer func() { close(colProgress) diff --git a/src/internal/connector/sharepoint/data_collections.go b/src/internal/connector/sharepoint/data_collections.go index e23de9493..f7f0f2481 100644 --- a/src/internal/connector/sharepoint/data_collections.go +++ b/src/internal/connector/sharepoint/data_collections.go @@ -61,10 +61,9 @@ func DataCollections( break } - foldersComplete, closer := observe.MessageWithCompletion( + foldersComplete := observe.MessageWithCompletion( ctx, observe.Bulletf("%s", scope.Category().PathType())) - defer closer() defer close(foldersComplete) var spcs []data.BackupCollection diff --git a/src/internal/connector/sharepoint/restore.go b/src/internal/connector/sharepoint/restore.go index f889a25b9..2f15152a8 100644 --- a/src/internal/connector/sharepoint/restore.go +++ b/src/internal/connector/sharepoint/restore.go @@ -6,6 +6,7 @@ import ( "fmt" "io" "runtime/trace" + "sync" "github.com/alcionai/clues" "github.com/microsoftgraph/msgraph-sdk-go/models" @@ -48,6 +49,7 @@ func RestoreCollections( opts control.Options, dcs []data.RestoreCollection, deets *details.Builder, + pool *sync.Pool, errs *fault.Bus, ) (*support.ConnectorOperationStatus, error) { var ( @@ -90,6 +92,7 @@ func RestoreCollections( dest.ContainerName, deets, opts.RestorePermissions, + pool, errs) case path.ListsCategory: diff --git a/src/internal/observe/observe.go b/src/internal/observe/observe.go index ea822f431..bd94b6531 100644 --- a/src/internal/observe/observe.go +++ b/src/internal/observe/observe.go @@ -180,7 +180,7 @@ func Message(ctx context.Context, msgs ...any) { func MessageWithCompletion( ctx context.Context, msg any, -) (chan<- struct{}, func()) { +) chan<- struct{} { var ( plain = plainString(msg) loggable = fmt.Sprintf("%v", msg) @@ -191,7 +191,8 @@ func MessageWithCompletion( log.Info(loggable) if cfg.hidden() { - return ch, func() { log.Info("done - " + loggable) } + defer log.Info("done - " + loggable) + return ch } wg.Add(1) @@ -219,11 +220,11 @@ func MessageWithCompletion( bar.SetTotal(-1, true) }) - wacb := waitAndCloseBar(bar, func() { + go waitAndCloseBar(bar, func() { log.Info("done - " + loggable) - }) + })() - return ch, wacb + return ch } // --------------------------------------------------------------------------- @@ -247,7 +248,8 @@ func ItemProgress( log.Debug(header) if cfg.hidden() || rc == nil || totalBytes == 0 { - return rc, func() { log.Debug("done - " + header) } + defer log.Debug("done - " + header) + return rc, func() {} } wg.Add(1) @@ -266,12 +268,17 @@ func ItemProgress( bar := progress.New(totalBytes, mpb.NopStyle(), barOpts...) - wacb := waitAndCloseBar(bar, func() { + go waitAndCloseBar(bar, func() { // might be overly chatty, we can remove if needed. log.Debug("done - " + header) - }) + })() - return bar.ProxyReader(rc), wacb + abort := func() { + bar.SetTotal(-1, true) + bar.Abort(true) + } + + return bar.ProxyReader(rc), abort } // ProgressWithCount tracks the display of a bar that tracks the completion @@ -283,7 +290,7 @@ func ProgressWithCount( header string, msg any, count int64, -) (chan<- struct{}, func()) { +) chan<- struct{} { var ( plain = plainString(msg) loggable = fmt.Sprintf("%s %v - %d", header, msg, count) @@ -295,7 +302,10 @@ func ProgressWithCount( if cfg.hidden() { go listen(ctx, ch, nop, nop) - return ch, func() { log.Info("done - " + loggable) } + + defer log.Info("done - " + loggable) + + return ch } wg.Add(1) @@ -319,11 +329,11 @@ func ProgressWithCount( func() { bar.Abort(true) }, bar.Increment) - wacb := waitAndCloseBar(bar, func() { + go waitAndCloseBar(bar, func() { log.Info("done - " + loggable) - }) + })() - return ch, wacb + return ch } // --------------------------------------------------------------------------- @@ -365,7 +375,7 @@ func CollectionProgress( ctx context.Context, category string, dirName any, -) (chan<- struct{}, func()) { +) chan<- struct{} { var ( counted int plain = plainString(dirName) @@ -388,7 +398,10 @@ func CollectionProgress( if cfg.hidden() || len(plain) == 0 { go listen(ctx, ch, nop, incCount) - return ch, func() { log.Infow("done - "+message, "count", counted) } + + defer log.Infow("done - "+message, "count", counted) + + return ch } wg.Add(1) @@ -420,18 +433,21 @@ func CollectionProgress( bar.Increment() }) - wacb := waitAndCloseBar(bar, func() { + go waitAndCloseBar(bar, func() { log.Infow("done - "+message, "count", counted) - }) + })() - return ch, wacb + return ch } func waitAndCloseBar(bar *mpb.Bar, log func()) func() { return func() { bar.Wait() wg.Done() - log() + + if !bar.Aborted() { + log() + } } } diff --git a/src/internal/observe/observe_test.go b/src/internal/observe/observe_test.go index 4fc36a912..08365462a 100644 --- a/src/internal/observe/observe_test.go +++ b/src/internal/observe/observe_test.go @@ -51,16 +51,14 @@ func (suite *ObserveProgressUnitSuite) TestItemProgress() { }() from := make([]byte, 100) - prog, closer := ItemProgress( + prog, abort := ItemProgress( ctx, io.NopCloser(bytes.NewReader(from)), "folder", tst, 100) require.NotNil(t, prog) - require.NotNil(t, closer) - - defer closer() + require.NotNil(t, abort) var i int @@ -105,9 +103,8 @@ func (suite *ObserveProgressUnitSuite) TestCollectionProgress_unblockOnCtxCancel SeedWriter(context.Background(), nil, nil) }() - progCh, closer := CollectionProgress(ctx, testcat, testertons) + progCh := CollectionProgress(ctx, testcat, testertons) require.NotNil(t, progCh) - require.NotNil(t, closer) defer close(progCh) @@ -119,9 +116,6 @@ func (suite *ObserveProgressUnitSuite) TestCollectionProgress_unblockOnCtxCancel time.Sleep(1 * time.Second) cancel() }() - - // blocks, but should resolve due to the ctx cancel - closer() } func (suite *ObserveProgressUnitSuite) TestCollectionProgress_unblockOnChannelClose() { @@ -140,9 +134,8 @@ func (suite *ObserveProgressUnitSuite) TestCollectionProgress_unblockOnChannelCl SeedWriter(context.Background(), nil, nil) }() - progCh, closer := CollectionProgress(ctx, testcat, testertons) + progCh := CollectionProgress(ctx, testcat, testertons) require.NotNil(t, progCh) - require.NotNil(t, closer) for i := 0; i < 50; i++ { progCh <- struct{}{} @@ -152,9 +145,6 @@ func (suite *ObserveProgressUnitSuite) TestCollectionProgress_unblockOnChannelCl time.Sleep(1 * time.Second) close(progCh) }() - - // blocks, but should resolve due to the cancel - closer() } func (suite *ObserveProgressUnitSuite) TestObserveProgress() { @@ -197,14 +187,11 @@ func (suite *ObserveProgressUnitSuite) TestObserveProgressWithCompletion() { message := "Test Message" - ch, closer := MessageWithCompletion(ctx, message) + ch := MessageWithCompletion(ctx, message) // Trigger completion ch <- struct{}{} - // Run the closer - this should complete because the bar was compelted above - closer() - Complete() require.NotEmpty(t, recorder.String()) @@ -229,14 +216,11 @@ func (suite *ObserveProgressUnitSuite) TestObserveProgressWithChannelClosed() { message := "Test Message" - ch, closer := MessageWithCompletion(ctx, message) + ch := MessageWithCompletion(ctx, message) // Close channel without completing close(ch) - // Run the closer - this should complete because the channel was closed above - closer() - Complete() require.NotEmpty(t, recorder.String()) @@ -263,14 +247,11 @@ func (suite *ObserveProgressUnitSuite) TestObserveProgressWithContextCancelled() message := "Test Message" - _, closer := MessageWithCompletion(ctx, message) + _ = MessageWithCompletion(ctx, message) // cancel context cancel() - // Run the closer - this should complete because the context was closed above - closer() - Complete() require.NotEmpty(t, recorder.String()) @@ -296,15 +277,12 @@ func (suite *ObserveProgressUnitSuite) TestObserveProgressWithCount() { message := "Test Message" count := 3 - ch, closer := ProgressWithCount(ctx, header, message, int64(count)) + ch := ProgressWithCount(ctx, header, message, int64(count)) for i := 0; i < count; i++ { ch <- struct{}{} } - // Run the closer - this should complete because the context was closed above - closer() - Complete() require.NotEmpty(t, recorder.String()) @@ -331,13 +309,10 @@ func (suite *ObserveProgressUnitSuite) TestrogressWithCountChannelClosed() { message := "Test Message" count := 3 - ch, closer := ProgressWithCount(ctx, header, message, int64(count)) + ch := ProgressWithCount(ctx, header, message, int64(count)) close(ch) - // Run the closer - this should complete because the context was closed above - closer() - Complete() require.NotEmpty(t, recorder.String()) diff --git a/src/internal/operations/backup.go b/src/internal/operations/backup.go index 5ad400bb3..db7a007fa 100644 --- a/src/internal/operations/backup.go +++ b/src/internal/operations/backup.go @@ -407,11 +407,10 @@ func produceBackupDataCollections( ctrlOpts control.Options, errs *fault.Bus, ) ([]data.BackupCollection, prefixmatcher.StringSetReader, error) { - complete, closer := observe.MessageWithCompletion(ctx, "Discovering items to backup") + complete := observe.MessageWithCompletion(ctx, "Discovering items to backup") defer func() { complete <- struct{}{} close(complete) - closer() }() return bp.ProduceBackupCollections( @@ -490,11 +489,10 @@ func consumeBackupCollections( isIncremental bool, errs *fault.Bus, ) (*kopia.BackupStats, *details.Builder, kopia.DetailsMergeInfoer, error) { - complete, closer := observe.MessageWithCompletion(ctx, "Backing up data") + complete := observe.MessageWithCompletion(ctx, "Backing up data") defer func() { complete <- struct{}{} close(complete) - closer() }() tags := map[string]string{ diff --git a/src/internal/operations/restore.go b/src/internal/operations/restore.go index 5b2c9bd71..ab0b5a4c8 100644 --- a/src/internal/operations/restore.go +++ b/src/internal/operations/restore.go @@ -236,8 +236,7 @@ func (op *RestoreOperation) do( observe.Message(ctx, fmt.Sprintf("Discovered %d items in backup %s to restore", len(paths), op.BackupID)) - kopiaComplete, closer := observe.MessageWithCompletion(ctx, "Enumerating items in repository") - defer closer() + kopiaComplete := observe.MessageWithCompletion(ctx, "Enumerating items in repository") defer close(kopiaComplete) dcs, err := op.kopia.ProduceRestoreCollections(ctx, bup.SnapshotID, paths, opStats.bytesRead, op.Errors) @@ -322,11 +321,10 @@ func consumeRestoreCollections( dcs []data.RestoreCollection, errs *fault.Bus, ) (*details.Details, error) { - complete, closer := observe.MessageWithCompletion(ctx, "Restoring data") + complete := observe.MessageWithCompletion(ctx, "Restoring data") defer func() { complete <- struct{}{} close(complete) - closer() }() deets, err := rc.ConsumeRestoreCollections( diff --git a/src/pkg/repository/repository.go b/src/pkg/repository/repository.go index 7f37f4eaf..fbe374223 100644 --- a/src/pkg/repository/repository.go +++ b/src/pkg/repository/repository.go @@ -203,8 +203,7 @@ func Connect( // their output getting clobbered (#1720) defer observe.Complete() - complete, closer := observe.MessageWithCompletion(ctx, "Connecting to repository") - defer closer() + complete := observe.MessageWithCompletion(ctx, "Connecting to repository") defer close(complete) kopiaRef := kopia.NewConn(s) @@ -630,11 +629,10 @@ func connectToM365( sel selectors.Selector, acct account.Account, ) (*connector.GraphConnector, error) { - complete, closer := observe.MessageWithCompletion(ctx, "Connecting to M365") + complete := observe.MessageWithCompletion(ctx, "Connecting to M365") defer func() { complete <- struct{}{} close(complete) - closer() }() // retrieve data from the producer