Always make empty collections for prefix directories (#2929)

Use empty collections to ensure the prefix directories are available in
kopia no matter what user data we find.

This helps ensure we have some set of things we can assume about
non-failed backups including:

    They have a valid snapshot ID for data
    They have a valid snapshot ID for backup details

Eventually, this will allow us to say that backups using a base that
doesn't have a prefix directory is invalid in some way

SharePoint tests are disabled because lists and pages ignore the none
target in selectors

---

#### Does this PR need a docs update or release note?

- [x]  No

#### Type of change

- [x] 🐛 Bugfix

#### Issue(s)

* #2550

#### Test Plan

- [x]  Unit test
- [x] 💚 E2E
This commit is contained in:
Keepers 2023-03-23 18:04:03 -06:00 committed by GitHub
parent 55be2f4e2e
commit b82c994b91
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 294 additions and 10 deletions

View File

@ -117,9 +117,9 @@ func (suite *ConnectorDataCollectionIntegrationSuite) TestExchangeDataCollection
}
// Categories with delta endpoints will produce a collection for metadata
// as well as the actual data pulled.
// as well as the actual data pulled, and the "temp" root collection.
assert.GreaterOrEqual(t, len(collections), 1, "expected 1 <= num collections <= 2")
assert.GreaterOrEqual(t, 2, len(collections), "expected 1 <= num collections <= 2")
assert.GreaterOrEqual(t, 3, len(collections), "expected 1 <= num collections <= 3")
for _, col := range collections {
for object := range col.Items(ctx, fault.New(true)) {
@ -343,17 +343,21 @@ func (suite *ConnectorCreateSharePointCollectionIntegrationSuite) TestCreateShar
control.Options{},
fault.New(true))
require.NoError(t, err, clues.ToCore(err))
assert.Len(t, cols, 1)
require.Len(t, cols, 2) // 1 collection, 1 path prefix directory to ensure the root path exists.
// No excludes yet as this isn't an incremental backup.
assert.Empty(t, excludes)
for _, collection := range cols {
t.Logf("Path: %s\n", collection.FullPath().String())
assert.Equal(
t,
path.SharePointMetadataService.String(),
collection.FullPath().Service().String())
}
t.Logf("cols[0] Path: %s\n", cols[0].FullPath().String())
assert.Equal(
t,
path.SharePointMetadataService.String(),
cols[0].FullPath().Service().String())
t.Logf("cols[1] Path: %s\n", cols[1].FullPath().String())
assert.Equal(
t,
path.SharePointService.String(),
cols[1].FullPath().Service().String())
}
func (suite *ConnectorCreateSharePointCollectionIntegrationSuite) TestCreateSharePointCollection_Lists() {

View File

@ -179,6 +179,7 @@ func DataCollections(
user = selector.DiscreteOwner
collections = []data.BackupCollection{}
el = errs.Local()
categories = map[path.CategoryType]struct{}{}
)
cdps, err := parseMetadataCollections(ctx, metadata, errs)
@ -205,9 +206,27 @@ func DataCollections(
continue
}
categories[scope.Category().PathType()] = struct{}{}
collections = append(collections, dcs...)
}
if len(collections) > 0 {
baseCols, err := graph.BaseCollections(
ctx,
acct.AzureTenantID,
user,
path.ExchangeService,
categories,
su,
errs)
if err != nil {
return nil, nil, err
}
collections = append(collections, baseCols...)
}
return collections, nil, el.Failure()
}

View File

@ -0,0 +1,91 @@
package graph
import (
"context"
"github.com/alcionai/clues"
"github.com/alcionai/corso/src/internal/connector/support"
"github.com/alcionai/corso/src/internal/data"
"github.com/alcionai/corso/src/pkg/fault"
"github.com/alcionai/corso/src/pkg/path"
)
var _ data.BackupCollection = emptyCollection{}
type emptyCollection struct {
p path.Path
su support.StatusUpdater
}
func (c emptyCollection) Items(ctx context.Context, _ *fault.Bus) <-chan data.Stream {
res := make(chan data.Stream)
close(res)
s := support.CreateStatus(ctx, support.Backup, 0, support.CollectionMetrics{}, "")
c.su(s)
return res
}
func (c emptyCollection) FullPath() path.Path {
return c.p
}
func (c emptyCollection) PreviousPath() path.Path {
return c.p
}
func (c emptyCollection) State() data.CollectionState {
// This assumes we won't change the prefix path. Could probably use MovedState
// as well if we do need to change things around.
return data.NotMovedState
}
func (c emptyCollection) DoNotMergeItems() bool {
return false
}
func BaseCollections(
ctx context.Context,
tenant, rOwner string,
service path.ServiceType,
categories map[path.CategoryType]struct{},
su support.StatusUpdater,
errs *fault.Bus,
) ([]data.BackupCollection, error) {
var (
res = []data.BackupCollection{}
el = errs.Local()
lastErr error
)
for cat := range categories {
ictx := clues.Add(ctx, "base_service", service, "base_category", cat)
p, err := path.Build(tenant, rOwner, service, cat, false, "tmp")
if err != nil {
// Shouldn't happen.
err = clues.Wrap(err, "making path").WithClues(ictx)
el.AddRecoverable(err)
lastErr = err
continue
}
// Pop off the last path element because we just want the prefix.
p, err = p.Dir()
if err != nil {
// Shouldn't happen.
err = clues.Wrap(err, "getting base prefix").WithClues(ictx)
el.AddRecoverable(err)
lastErr = err
continue
}
res = append(res, emptyCollection{p: p, su: su})
}
return res, lastErr
}

View File

@ -1054,3 +1054,134 @@ func (suite *GraphConnectorIntegrationSuite) TestRestoreAndBackup_largeMailAttac
},
)
}
func (suite *GraphConnectorIntegrationSuite) TestBackup_CreatesPrefixCollections() {
table := []struct {
name string
resource resource
selectorFunc func(t *testing.T) selectors.Selector
service path.ServiceType
categories []string
}{
{
name: "Exchange",
resource: Users,
selectorFunc: func(t *testing.T) selectors.Selector {
sel := selectors.NewExchangeBackup([]string{suite.user})
sel.Include(
sel.ContactFolders([]string{selectors.NoneTgt}),
sel.EventCalendars([]string{selectors.NoneTgt}),
sel.MailFolders([]string{selectors.NoneTgt}),
)
return sel.Selector
},
service: path.ExchangeService,
categories: []string{
path.EmailCategory.String(),
path.ContactsCategory.String(),
path.EventsCategory.String(),
},
},
{
name: "OneDrive",
resource: Users,
selectorFunc: func(t *testing.T) selectors.Selector {
sel := selectors.NewOneDriveBackup([]string{suite.user})
sel.Include(
sel.Folders([]string{selectors.NoneTgt}),
)
return sel.Selector
},
service: path.OneDriveService,
categories: []string{
path.FilesCategory.String(),
},
},
// SharePoint lists and pages don't seem to check selectors as expected.
//{
// name: "SharePoint",
// resource: Sites,
// selectorFunc: func(t *testing.T) selectors.Selector {
// sel := selectors.NewSharePointBackup([]string{tester.M365SiteID(t)})
// sel.Include(
// sel.Pages([]string{selectors.NoneTgt}),
// sel.Lists([]string{selectors.NoneTgt}),
// sel.Libraries([]string{selectors.NoneTgt}),
// )
// return sel.Selector
// },
// service: path.SharePointService,
// categories: []string{
// path.PagesCategory.String(),
// path.ListsCategory.String(),
// path.LibrariesCategory.String(),
// },
//},
}
for _, test := range table {
suite.Run(test.name, func() {
ctx, flush := tester.NewContext()
defer flush()
var (
t = suite.T()
backupGC = loadConnector(ctx, t, graph.HTTPClient(graph.NoTimeout()), test.resource)
backupSel = test.selectorFunc(t)
errs = fault.New(true)
start = time.Now()
)
dcs, excludes, err := backupGC.DataCollections(
ctx,
backupSel,
nil,
control.Options{
RestorePermissions: false,
ToggleFeatures: control.Toggles{EnablePermissionsBackup: false},
},
fault.New(true))
require.NoError(t, err)
// No excludes yet because this isn't an incremental backup.
assert.Empty(t, excludes)
t.Logf("Backup enumeration complete in %v\n", time.Since(start))
// Use a map to find duplicates.
foundCategories := []string{}
for _, col := range dcs {
// TODO(ashmrtn): We should be able to remove the below if we change how
// status updates are done. Ideally we shouldn't have to fetch items in
// these collections to avoid deadlocking.
var found int
// Need to iterate through this before the continue below else we'll
// hang checking the status.
for range col.Items(ctx, errs) {
found++
}
// Ignore metadata collections.
fullPath := col.FullPath()
if fullPath.Service() != test.service {
continue
}
assert.Empty(t, fullPath.Folders(), "non-prefix collection")
assert.NotEqual(t, col.State(), data.NewState, "prefix collection marked as new")
foundCategories = append(foundCategories, fullPath.Category().String())
assert.Zero(t, found, "non-empty collection")
}
assert.ElementsMatch(t, test.categories, foundCategories)
backupGC.AwaitStatus()
assert.NoError(t, errs.Failure())
})
}
}

View File

@ -11,6 +11,7 @@ import (
"github.com/alcionai/corso/src/pkg/control"
"github.com/alcionai/corso/src/pkg/fault"
"github.com/alcionai/corso/src/pkg/logger"
"github.com/alcionai/corso/src/pkg/path"
"github.com/alcionai/corso/src/pkg/selectors"
"golang.org/x/exp/maps"
)
@ -48,6 +49,7 @@ func DataCollections(
var (
el = errs.Local()
user = selector.DiscreteOwner
categories = map[path.CategoryType]struct{}{}
collections = []data.BackupCollection{}
allExcludes = map[string]map[string]struct{}{}
)
@ -75,6 +77,8 @@ func DataCollections(
el.AddRecoverable(clues.Stack(err).Label(fault.LabelForceNoBackupCreation))
}
categories[scope.Category().PathType()] = struct{}{}
collections = append(collections, odcs...)
for k, ex := range excludes {
@ -86,5 +90,21 @@ func DataCollections(
}
}
if len(collections) > 0 {
baseCols, err := graph.BaseCollections(
ctx,
tenant,
user,
path.OneDriveService,
categories,
su,
errs)
if err != nil {
return nil, nil, err
}
collections = append(collections, baseCols...)
}
return collections, allExcludes, el.Failure()
}

View File

@ -47,6 +47,7 @@ func DataCollections(
el = errs.Local()
site = b.DiscreteOwner
collections = []data.BackupCollection{}
categories = map[path.CategoryType]struct{}{}
)
for _, scope := range b.Scopes() {
@ -110,6 +111,24 @@ func DataCollections(
collections = append(collections, spcs...)
foldersComplete <- struct{}{}
categories[scope.Category().PathType()] = struct{}{}
}
if len(collections) > 0 {
baseCols, err := graph.BaseCollections(
ctx,
creds.AzureTenantID,
site,
path.SharePointService,
categories,
su.UpdateStatus,
errs)
if err != nil {
return nil, nil, err
}
collections = append(collections, baseCols...)
}
return collections, nil, el.Failure()