create missing drives on restore (#3795)

when restoring sharepoint, if a document library
was deleted between the time of backup and restore, create a new drive to hold the restored data.
This commit is contained in:
Keepers 2023-07-18 11:47:45 -06:00 committed by GitHub
parent f4b92139bc
commit 875eded902
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 548 additions and 33 deletions

View File

@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased] (beta)
### Fixed
- SharePoint document libraries deleted after the last backup can now be restored.
## [v0.11.0] (beta) - 2023-07-18
### Added
@ -17,6 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Fixed
- Return a ServiceNotEnabled error when a tenant has no active SharePoint license.
- Added retries for http/2 stream connection failures when downloading large item content.
- SharePoint document libraries that were deleted after the last backup can now be restored.
### Known issues
- If a link share is created for an item with inheritance disabled

View File

@ -362,7 +362,7 @@ func (suite *OneDriveIntgSuite) TestCreateGetDeleteFolder() {
}
caches := NewRestoreCaches()
caches.DriveIDToRootFolderID[driveID] = ptr.Val(rootFolder.GetId())
caches.DriveIDToDriveInfo[driveID] = driveInfo{rootFolderID: ptr.Val(rootFolder.GetId())}
rh := NewRestoreHandler(suite.ac)

View File

@ -5,10 +5,10 @@ import (
"net/http"
"strings"
"github.com/alcionai/clues"
"github.com/microsoftgraph/msgraph-sdk-go/drives"
"github.com/microsoftgraph/msgraph-sdk-go/models"
"github.com/alcionai/clues"
"github.com/alcionai/corso/src/internal/common/ptr"
odConsts "github.com/alcionai/corso/src/internal/m365/onedrive/consts"
"github.com/alcionai/corso/src/pkg/backup/details"

View File

@ -249,9 +249,25 @@ type RestoreHandler struct {
PostItemResp models.DriveItemable
PostItemErr error
DrivePagerV api.DrivePager
PostDriveResp models.Driveable
PostDriveErr error
UploadSessionErr error
}
func (h RestoreHandler) PostDrive(
ctx context.Context,
protectedResourceID, driveName string,
) (models.Driveable, error) {
return h.PostDriveResp, h.PostDriveErr
}
func (h RestoreHandler) NewDrivePager(string, []string) api.DrivePager {
return h.DrivePagerV
}
func (h *RestoreHandler) AugmentItemInfo(
details.ItemInfo,
models.DriveItemable,

View File

@ -37,9 +37,16 @@ const (
maxUploadRetries = 3
)
type driveInfo struct {
id string
name string
rootFolderID string
}
type restoreCaches struct {
collisionKeyToItemID map[string]api.DriveItemIDType
DriveIDToRootFolderID map[string]string
DriveIDToDriveInfo map[string]driveInfo
DriveNameToDriveInfo map[string]driveInfo
Folders *folderCache
OldLinkShareIDToNewID map[string]string
OldPermIDToNewID map[string]string
@ -48,10 +55,66 @@ type restoreCaches struct {
pool sync.Pool
}
func (rc *restoreCaches) AddDrive(
ctx context.Context,
md models.Driveable,
grf GetRootFolderer,
) error {
di := driveInfo{
id: ptr.Val(md.GetId()),
name: ptr.Val(md.GetName()),
}
ctx = clues.Add(ctx, "drive_info", di)
root, err := grf.GetRootFolder(ctx, di.id)
if err != nil {
return clues.Wrap(err, "getting drive root id")
}
di.rootFolderID = ptr.Val(root.GetId())
rc.DriveIDToDriveInfo[di.id] = di
rc.DriveNameToDriveInfo[di.name] = di
return nil
}
// Populate looks up drive items available to the protectedResource
// and adds their info to the caches.
func (rc *restoreCaches) Populate(
ctx context.Context,
gdparf GetDrivePagerAndRootFolderer,
protectedResourceID string,
) error {
drives, err := api.GetAllDrives(
ctx,
gdparf.NewDrivePager(protectedResourceID, nil),
true,
maxDrivesRetries)
if err != nil {
return clues.Wrap(err, "getting drives")
}
for _, md := range drives {
if err := rc.AddDrive(ctx, md, gdparf); err != nil {
return clues.Wrap(err, "caching drive")
}
}
return nil
}
type GetDrivePagerAndRootFolderer interface {
GetRootFolderer
NewDrivePagerer
}
func NewRestoreCaches() *restoreCaches {
return &restoreCaches{
collisionKeyToItemID: map[string]api.DriveItemIDType{},
DriveIDToRootFolderID: map[string]string{},
DriveIDToDriveInfo: map[string]driveInfo{},
DriveNameToDriveInfo: map[string]driveInfo{},
Folders: NewFolderCache(),
OldLinkShareIDToNewID: map[string]string{},
OldPermIDToNewID: map[string]string{},
@ -79,13 +142,20 @@ func ConsumeRestoreCollections(
ctr *count.Bus,
) (*support.ControllerOperationStatus, error) {
var (
restoreMetrics support.CollectionMetrics
caches = NewRestoreCaches()
el = errs.Local()
restoreMetrics support.CollectionMetrics
el = errs.Local()
caches = NewRestoreCaches()
protectedResourceID = dcs[0].FullPath().ResourceOwner()
fallbackDriveName = "" // onedrive cannot create drives
)
ctx = clues.Add(ctx, "backup_version", backupVersion)
err := caches.Populate(ctx, rh, protectedResourceID)
if err != nil {
return nil, clues.Wrap(err, "initializing restore caches")
}
// Reorder collections so that the parents directories are created
// before the child directories; a requirement for permissions.
data.SortRestoreCollections(dcs)
@ -102,7 +172,7 @@ func ConsumeRestoreCollections(
ictx = clues.Add(
ctx,
"category", dc.FullPath().Category(),
"resource_owner", clues.Hide(dc.FullPath().ResourceOwner()),
"resource_owner", clues.Hide(protectedResourceID),
"full_path", dc.FullPath())
)
@ -115,6 +185,7 @@ func ConsumeRestoreCollections(
caches,
deets,
opts.RestorePermissions,
fallbackDriveName,
errs,
ctr.Local())
if err != nil {
@ -152,18 +223,20 @@ func RestoreCollection(
caches *restoreCaches,
deets *details.Builder,
restorePerms bool, // TODD: move into restoreConfig
fallbackDriveName string,
errs *fault.Bus,
ctr *count.Bus,
) (support.CollectionMetrics, error) {
var (
metrics = support.CollectionMetrics{}
directory = dc.FullPath()
el = errs.Local()
metricsObjects int64
metricsBytes int64
metricsSuccess int64
wg sync.WaitGroup
complete bool
metrics = support.CollectionMetrics{}
directory = dc.FullPath()
protectedResourceID = directory.ResourceOwner()
el = errs.Local()
metricsObjects int64
metricsBytes int64
metricsSuccess int64
wg sync.WaitGroup
complete bool
)
ctx, end := diagnostics.Span(ctx, "gc:drive:restoreCollection", diagnostics.Label("path", directory))
@ -174,13 +247,15 @@ func RestoreCollection(
return metrics, clues.Wrap(err, "creating drive path").WithClues(ctx)
}
if _, ok := caches.DriveIDToRootFolderID[drivePath.DriveID]; !ok {
root, err := rh.GetRootFolder(ctx, drivePath.DriveID)
if err != nil {
return metrics, clues.Wrap(err, "getting drive root id")
}
caches.DriveIDToRootFolderID[drivePath.DriveID] = ptr.Val(root.GetId())
err = ensureDriveExists(
ctx,
rh,
caches,
drivePath,
protectedResourceID,
fallbackDriveName)
if err != nil {
return metrics, clues.Wrap(err, "ensuring drive exists")
}
// Assemble folder hierarchy we're going to restore into (we recreate the folder hierarchy
@ -704,7 +779,7 @@ func createRestoreFolders(
driveID = drivePath.DriveID
folders = restoreDir.Elements()
location = path.Builder{}.Append(driveID)
parentFolderID = caches.DriveIDToRootFolderID[drivePath.DriveID]
parentFolderID = caches.DriveIDToDriveInfo[drivePath.DriveID].id
)
ctx = clues.Add(
@ -1113,3 +1188,67 @@ func AugmentRestorePaths(
return paths, nil
}
type PostDriveAndGetRootFolderer interface {
PostDriver
GetRootFolderer
}
// ensureDriveExists looks up the drive by its id. If no drive is found with
// that ID, a new drive is generated with the same name. If the name collides
// with an existing drive, a number is appended to the drive name. Eg: foo ->
// foo 1. This will repeat as many times as is needed.
// Returns the root folder of the drive
func ensureDriveExists(
ctx context.Context,
pdagrf PostDriveAndGetRootFolderer,
caches *restoreCaches,
drivePath *path.DrivePath,
protectedResourceID, driveName string,
) error {
driveID := drivePath.DriveID
// the drive might already be cached
if _, ok := caches.DriveIDToDriveInfo[driveID]; ok {
return nil
}
var (
newDriveName = driveName
newDrive models.Driveable
err error
)
if _, ok := caches.DriveNameToDriveInfo[newDriveName]; ok {
newDriveName = fmt.Sprintf("%s %d", driveName, 1)
}
// if not, double check that the name won't collide by looking
// up drives by name until we can make some name like `foo N` that
// doesn't collide with `foo` or other values of N in `foo N`.
// Ex: foo -> foo 1 -> foo 2 -> ... -> foo N
//
// For sharepoint, document libraries can collide by name with
// item types beyond just drive. Lists, for example, cannot share
// names with document libraries. In those cases it's not enough
// to compare the names of drives; we also need to continue this
// loop until we can create a drive without error.
for i := 2; ; i++ {
ictx := clues.Add(ctx, "new_drive_name", clues.Hide(newDriveName))
newDrive, err = pdagrf.PostDrive(ictx, protectedResourceID, newDriveName)
if err != nil && !errors.Is(err, graph.ErrItemAlreadyExistsConflict) {
return clues.Wrap(err, "creating new drive")
}
if err == nil {
break
}
newDriveName = fmt.Sprintf("%s %d", driveName, i)
}
err = caches.AddDrive(ctx, newDrive, pdagrf)
return clues.Wrap(err, "adding drive to cache").OrNil()
}

View File

@ -21,6 +21,7 @@ import (
"github.com/alcionai/corso/src/pkg/count"
"github.com/alcionai/corso/src/pkg/path"
"github.com/alcionai/corso/src/pkg/services/m365/api"
apiMock "github.com/alcionai/corso/src/pkg/services/m365/api/mock"
)
type RestoreUnitSuite struct {
@ -617,3 +618,346 @@ func (suite *RestoreUnitSuite) TestCreateFolder() {
})
}
}
type mockGRF struct {
err error
rootFolder models.DriveItemable
}
func (m *mockGRF) GetRootFolder(
context.Context,
string,
) (models.DriveItemable, error) {
return m.rootFolder, m.err
}
func (suite *RestoreUnitSuite) TestRestoreCaches_AddDrive() {
rfID := "this-is-id"
driveID := "another-id"
name := "name"
rf := models.NewDriveItem()
rf.SetId(&rfID)
md := models.NewDrive()
md.SetId(&driveID)
md.SetName(&name)
table := []struct {
name string
mock *mockGRF
expectErr require.ErrorAssertionFunc
expectID string
checkValues bool
}{
{
name: "good",
mock: &mockGRF{rootFolder: rf},
expectErr: require.NoError,
expectID: rfID,
checkValues: true,
},
{
name: "err",
mock: &mockGRF{err: assert.AnError},
expectErr: require.Error,
expectID: "",
},
}
for _, test := range table {
suite.Run(test.name, func() {
t := suite.T()
ctx, flush := tester.NewContext(t)
defer flush()
rc := NewRestoreCaches()
err := rc.AddDrive(ctx, md, test.mock)
test.expectErr(t, err, clues.ToCore(err))
if test.checkValues {
idResult := rc.DriveIDToDriveInfo[driveID]
assert.Equal(t, driveID, idResult.id, "drive id")
assert.Equal(t, name, idResult.name, "drive name")
assert.Equal(t, test.expectID, idResult.rootFolderID, "root folder id")
nameResult := rc.DriveNameToDriveInfo[name]
assert.Equal(t, driveID, nameResult.id, "drive id")
assert.Equal(t, name, nameResult.name, "drive name")
assert.Equal(t, test.expectID, nameResult.rootFolderID, "root folder id")
}
})
}
}
type mockGDPARF struct {
err error
rootFolder models.DriveItemable
pager *apiMock.DrivePager
}
func (m *mockGDPARF) GetRootFolder(
context.Context,
string,
) (models.DriveItemable, error) {
return m.rootFolder, m.err
}
func (m *mockGDPARF) NewDrivePager(
string,
[]string,
) api.DrivePager {
return m.pager
}
func (suite *RestoreUnitSuite) TestRestoreCaches_Populate() {
rfID := "this-is-id"
driveID := "another-id"
name := "name"
rf := models.NewDriveItem()
rf.SetId(&rfID)
md := models.NewDrive()
md.SetId(&driveID)
md.SetName(&name)
table := []struct {
name string
mock *apiMock.DrivePager
expectErr require.ErrorAssertionFunc
expectLen int
checkValues bool
}{
{
name: "no results",
mock: &apiMock.DrivePager{
ToReturn: []apiMock.PagerResult{
{Drives: []models.Driveable{}},
},
},
expectErr: require.NoError,
expectLen: 0,
},
{
name: "one result",
mock: &apiMock.DrivePager{
ToReturn: []apiMock.PagerResult{
{Drives: []models.Driveable{md}},
},
},
expectErr: require.NoError,
expectLen: 1,
checkValues: true,
},
{
name: "error",
mock: &apiMock.DrivePager{
ToReturn: []apiMock.PagerResult{
{Err: assert.AnError},
},
},
expectErr: require.Error,
expectLen: 0,
},
}
for _, test := range table {
suite.Run(test.name, func() {
t := suite.T()
ctx, flush := tester.NewContext(t)
defer flush()
gdparf := &mockGDPARF{
rootFolder: rf,
pager: test.mock,
}
rc := NewRestoreCaches()
err := rc.Populate(ctx, gdparf, "shmoo")
test.expectErr(t, err, clues.ToCore(err))
assert.Len(t, rc.DriveIDToDriveInfo, test.expectLen)
assert.Len(t, rc.DriveNameToDriveInfo, test.expectLen)
if test.checkValues {
idResult := rc.DriveIDToDriveInfo[driveID]
assert.Equal(t, driveID, idResult.id, "drive id")
assert.Equal(t, name, idResult.name, "drive name")
assert.Equal(t, rfID, idResult.rootFolderID, "root folder id")
nameResult := rc.DriveNameToDriveInfo[name]
assert.Equal(t, driveID, nameResult.id, "drive id")
assert.Equal(t, name, nameResult.name, "drive name")
assert.Equal(t, rfID, nameResult.rootFolderID, "root folder id")
}
})
}
}
type mockPDAGRF struct {
i int
postResp []models.Driveable
postErr []error
grf mockGRF
}
func (m *mockPDAGRF) PostDrive(
ctx context.Context,
protectedResourceID, driveName string,
) (models.Driveable, error) {
defer func() { m.i++ }()
md := m.postResp[m.i]
if md != nil {
md.SetName(&driveName)
}
return md, m.postErr[m.i]
}
func (m *mockPDAGRF) GetRootFolder(
ctx context.Context,
driveID string,
) (models.DriveItemable, error) {
return m.grf.rootFolder, m.grf.err
}
func (suite *RestoreUnitSuite) TestEnsureDriveExists() {
rfID := "this-is-id"
driveID := "another-id"
name := "name"
rf := models.NewDriveItem()
rf.SetId(&rfID)
grf := mockGRF{rootFolder: rf}
makeMD := func() models.Driveable {
md := models.NewDrive()
md.SetId(&driveID)
md.SetName(&name)
return md
}
dp := &path.DrivePath{
DriveID: driveID,
Root: "root:",
Folders: path.Elements{},
}
populatedCache := func(id string) *restoreCaches {
rc := NewRestoreCaches()
di := driveInfo{
id: id,
name: name,
}
rc.DriveIDToDriveInfo[id] = di
rc.DriveNameToDriveInfo[name] = di
return rc
}
table := []struct {
name string
mock *mockPDAGRF
rc *restoreCaches
expectErr require.ErrorAssertionFunc
expectName string
skipValueChecks bool
}{
{
name: "drive already in cache",
mock: &mockPDAGRF{
postResp: []models.Driveable{makeMD()},
postErr: []error{nil},
grf: grf,
},
rc: populatedCache(driveID),
expectErr: require.NoError,
expectName: name,
},
{
name: "drive created",
mock: &mockPDAGRF{
postResp: []models.Driveable{makeMD()},
postErr: []error{nil},
grf: grf,
},
rc: NewRestoreCaches(),
expectErr: require.NoError,
expectName: name,
},
{
name: "error creating drive",
mock: &mockPDAGRF{
postResp: []models.Driveable{nil},
postErr: []error{assert.AnError},
grf: grf,
},
rc: NewRestoreCaches(),
expectErr: require.Error,
expectName: "",
skipValueChecks: true,
},
{
name: "drive name already exists",
mock: &mockPDAGRF{
postResp: []models.Driveable{makeMD()},
postErr: []error{nil},
grf: grf,
},
rc: populatedCache("beaux"),
expectErr: require.NoError,
expectName: name + " 1",
},
{
name: "list with name already exists",
mock: &mockPDAGRF{
postResp: []models.Driveable{nil, makeMD()},
postErr: []error{graph.ErrItemAlreadyExistsConflict, nil},
grf: grf,
},
rc: NewRestoreCaches(),
expectErr: require.NoError,
expectName: name + " 1",
},
{
name: "drive and list with name already exist",
mock: &mockPDAGRF{
postResp: []models.Driveable{nil, makeMD()},
postErr: []error{graph.ErrItemAlreadyExistsConflict, nil},
grf: grf,
},
rc: populatedCache("regard"),
expectErr: require.NoError,
expectName: name + " 2",
},
}
for _, test := range table {
suite.Run(test.name, func() {
t := suite.T()
ctx, flush := tester.NewContext(t)
defer flush()
rc := test.rc
err := ensureDriveExists(
ctx,
test.mock,
rc,
dp,
"prID",
name)
test.expectErr(t, err, clues.ToCore(err))
if !test.skipValueChecks {
nameResult := rc.DriveNameToDriveInfo[test.expectName]
assert.Equal(t, test.expectName, nameResult.name, "found drive entry with expected name")
}
})
}
}

View File

@ -10,6 +10,7 @@ import (
"github.com/alcionai/clues"
"github.com/microsoftgraph/msgraph-sdk-go/models"
"github.com/alcionai/corso/src/internal/common/dttm"
"github.com/alcionai/corso/src/internal/common/ptr"
"github.com/alcionai/corso/src/internal/data"
"github.com/alcionai/corso/src/internal/diagnostics"
@ -39,11 +40,18 @@ func ConsumeRestoreCollections(
ctr *count.Bus,
) (*support.ControllerOperationStatus, error) {
var (
restoreMetrics support.CollectionMetrics
caches = onedrive.NewRestoreCaches()
el = errs.Local()
lrh = libraryRestoreHandler{ac}
protectedResourceID = dcs[0].FullPath().ResourceOwner()
restoreMetrics support.CollectionMetrics
caches = onedrive.NewRestoreCaches()
el = errs.Local()
)
err := caches.Populate(ctx, lrh, protectedResourceID)
if err != nil {
return nil, clues.Wrap(err, "initializing restore caches")
}
// Reorder collections so that the parents directories are created
// before the child directories; a requirement for permissions.
data.SortRestoreCollections(dcs)
@ -69,13 +77,14 @@ func ConsumeRestoreCollections(
case path.LibrariesCategory:
metrics, err = onedrive.RestoreCollection(
ictx,
libraryRestoreHandler{ac.Drives()},
lrh,
restoreCfg,
backupVersion,
dc,
caches,
deets,
opts.RestorePermissions,
control.DefaultRestoreContainerName(dttm.HumanReadableDriveItem),
errs,
ctr)

View File

@ -52,8 +52,9 @@ type RestoreConfig struct {
// Defaults to "Corso_Restore_<current_dttm>"
Location string
// Drive specifies the drive into which the data will be restored.
// If empty, data is restored to the same drive that was backed up.
// Drive specifies the name of the drive into which the data will be
// restored. If empty, data is restored to the same drive that was backed
// up.
// Defaults to empty.
Drive string
}
@ -65,6 +66,10 @@ func DefaultRestoreConfig(timeFormat dttm.TimeFormat) RestoreConfig {
}
}
func DefaultRestoreContainerName(timeFormat dttm.TimeFormat) string {
return defaultRestoreLocation + dttm.FormatNow(timeFormat)
}
// EnsureRestoreConfigDefaults sets all non-supported values in the config
// struct to the default value.
func EnsureRestoreConfigDefaults(

View File

@ -52,6 +52,6 @@ func (suite *ListsAPIIntgSuite) TestLists_PostDrive() {
assert.Equal(t, driveName, ptr.Val(list.GetName()))
// second post, same name, should error on name conflict]
list, err = acl.PostDrive(ctx, siteID, driveName)
_, err = acl.PostDrive(ctx, siteID, driveName)
require.ErrorIs(t, err, graph.ErrItemAlreadyExistsConflict, clues.ToCore(err))
}

View File

@ -16,8 +16,6 @@ Below is a list of known Corso issues and limitations:
from M365 while a backup creation is running.
The next backup creation will correct any missing data.
* SharePoint document library data can't be restored after the library has been deleted.
* Sharing information of items in OneDrive/SharePoint using sharing links aren't backed up and restored.
* Permissions/Access given to a site group can't be restored.