Fix path collisions for SharePoint export (#4112)

Previously if we had multiple SharePoint document libs, we would have merged the contents of both in the export. This separates it by document lib.

After:
- DocumentLibA/FileA
- DocumentLibB/FileB

Before:
- FileA
- FileB

---

#### Does this PR need a docs update or release note?

- [x]  Yes, it's included
- [ ] 🕐 Yes, but in a later PR
- [ ]  No

#### Type of change

<!--- Please check the type of change your PR introduces: --->
- [ ] 🌻 Feature
- [x] 🐛 Bugfix
- [ ] 🗺️ Documentation
- [ ] 🤖 Supportability/Tests
- [ ] 💻 CI/Deployment
- [ ] 🧹 Tech Debt/Cleanup

#### Issue(s)

<!-- Can reference multiple issues. Use one of the following "magic words" - "closes, fixes" to auto-close the Github issue. -->
* #<issue>

#### Test Plan

<!-- How will this be tested prior to merging.-->
- [ ] 💪 Manual
- [x]  Unit test
- [ ] 💚 E2E
This commit is contained in:
Abin Simon 2023-08-28 12:50:09 +05:30 committed by GitHub
parent fe9241fbd8
commit 3e43028a88
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 358 additions and 230 deletions

View File

@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Fixed
- SharePoint document libraries deleted after the last backup can now be restored.
- Restore requires the protected resource to have access to the service being restored.
- SharePoint data from multiple document libraries are not merged in exports
### Added
- Added option to export data from OneDrive and SharePoint backups as individual files or as a single zip file.

View File

@ -0,0 +1,139 @@
package drive
import (
"context"
"strings"
"github.com/alcionai/clues"
"github.com/alcionai/corso/src/internal/data"
"github.com/alcionai/corso/src/internal/m365/collection/drive/metadata"
"github.com/alcionai/corso/src/internal/version"
"github.com/alcionai/corso/src/pkg/export"
"github.com/alcionai/corso/src/pkg/fault"
)
var _ export.Collection = &ExportCollection{}
// ExportCollection is the implementation of export.ExportCollection for OneDrive
type ExportCollection struct {
// baseDir contains the path of the collection
baseDir string
// backingCollection is the restore collection from which we will
// create the export collection.
backingCollection data.RestoreCollection
// backupVersion is the backupVersion of the backup this collection was part
// of. This is required to figure out how to get the name of the
// item.
backupVersion int
}
func NewExportCollection(
baseDir string,
backingCollection data.RestoreCollection,
backupVersion int,
) ExportCollection {
return ExportCollection{
baseDir: baseDir,
backingCollection: backingCollection,
backupVersion: backupVersion,
}
}
func (ec ExportCollection) BasePath() string {
return ec.baseDir
}
func (ec ExportCollection) Items(ctx context.Context) <-chan export.Item {
ch := make(chan export.Item)
go items(ctx, ec, ch)
return ch
}
// items converts items in backing collection to export items
func items(ctx context.Context, ec ExportCollection, ch chan<- export.Item) {
defer close(ch)
errs := fault.New(false)
for item := range ec.backingCollection.Items(ctx, errs) {
itemUUID := item.ID()
if isMetadataFile(itemUUID, ec.backupVersion) {
continue
}
name, err := getItemName(ctx, itemUUID, ec.backupVersion, ec.backingCollection)
ch <- export.Item{
ID: itemUUID,
Data: export.ItemData{
Name: name,
Body: item.ToReader(),
},
Error: err,
}
}
eitems, erecovereable := errs.ItemsAndRecovered()
// Return all the items that we failed to source from the persistence layer
for _, err := range eitems {
ch <- export.Item{
ID: err.ID,
Error: &err,
}
}
for _, ec := range erecovereable {
ch <- export.Item{
Error: ec,
}
}
}
// isMetadataFile is used to determine if a path corresponds to a
// metadata file. This is OneDrive specific logic and depends on the
// version of the backup unlike metadata.IsMetadataFile which only has
// to be concerned about the current version.
func isMetadataFile(id string, backupVersion int) bool {
if backupVersion < version.OneDrive1DataAndMetaFiles {
return false
}
return strings.HasSuffix(id, metadata.MetaFileSuffix) ||
strings.HasSuffix(id, metadata.DirMetaFileSuffix)
}
// getItemName is used to get the name of the item.
// How we get the name depends on the version of the backup.
func getItemName(
ctx context.Context,
id string,
backupVersion int,
fin data.FetchItemByNamer,
) (string, error) {
if backupVersion < version.OneDrive1DataAndMetaFiles {
return id, nil
}
if backupVersion < version.OneDrive5DirMetaNoName {
return strings.TrimSuffix(id, metadata.DataFileSuffix), nil
}
if strings.HasSuffix(id, metadata.DataFileSuffix) {
trimmedName := strings.TrimSuffix(id, metadata.DataFileSuffix)
metaName := trimmedName + metadata.MetaFileSuffix
meta, err := FetchAndReadMetadata(ctx, fin, metaName)
if err != nil {
return "", clues.Wrap(err, "getting metadata").WithClues(ctx)
}
return meta.FileName, nil
}
return "", clues.New("invalid item id").WithClues(ctx)
}

View File

@ -0,0 +1,145 @@
package drive
import (
"bytes"
"context"
"io"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/suite"
"github.com/alcionai/corso/src/internal/data"
dataMock "github.com/alcionai/corso/src/internal/data/mock"
"github.com/alcionai/corso/src/internal/m365/collection/drive/metadata"
"github.com/alcionai/corso/src/internal/tester"
"github.com/alcionai/corso/src/internal/version"
)
type ExportUnitSuite struct {
tester.Suite
}
func TestExportUnitSuite(t *testing.T) {
suite.Run(t, &ExportUnitSuite{Suite: tester.NewUnitSuite(t)})
}
func (suite *ExportUnitSuite) TestIsMetadataFile() {
table := []struct {
name string
id string
backupVersion int
isMeta bool
}{
{
name: "legacy",
backupVersion: version.OneDrive1DataAndMetaFiles,
isMeta: false,
},
{
name: "metadata file",
backupVersion: version.OneDrive3IsMetaMarker,
id: "name" + metadata.MetaFileSuffix,
isMeta: true,
},
{
name: "dir metadata file",
backupVersion: version.OneDrive3IsMetaMarker,
id: "name" + metadata.DirMetaFileSuffix,
isMeta: true,
},
{
name: "non metadata file",
backupVersion: version.OneDrive3IsMetaMarker,
id: "name" + metadata.DataFileSuffix,
isMeta: false,
},
}
for _, test := range table {
suite.Run(test.name, func() {
assert.Equal(suite.T(), test.isMeta, isMetadataFile(test.id, test.backupVersion), "is metadata")
})
}
}
type finD struct {
id string
name string
err error
}
func (fd finD) FetchItemByName(ctx context.Context, name string) (data.Item, error) {
if fd.err != nil {
return nil, fd.err
}
if name == fd.id {
return &dataMock.Item{
ItemID: fd.id,
Reader: io.NopCloser(bytes.NewBufferString(`{"filename": "` + fd.name + `"}`)),
}, nil
}
return nil, assert.AnError
}
func (suite *ExportUnitSuite) TestGetItemName() {
table := []struct {
tname string
id string
backupVersion int
name string
fin data.FetchItemByNamer
errFunc assert.ErrorAssertionFunc
}{
{
tname: "legacy",
id: "name",
backupVersion: version.OneDrive1DataAndMetaFiles,
name: "name",
errFunc: assert.NoError,
},
{
tname: "name in filename",
id: "name.data",
backupVersion: version.OneDrive4DirIncludesPermissions,
name: "name",
errFunc: assert.NoError,
},
{
tname: "name in metadata",
id: "id.data",
backupVersion: version.Backup,
name: "name",
fin: finD{id: "id.meta", name: "name"},
errFunc: assert.NoError,
},
{
tname: "name in metadata but error",
id: "id.data",
backupVersion: version.Backup,
name: "",
fin: finD{err: assert.AnError},
errFunc: assert.Error,
},
}
for _, test := range table {
suite.Run(test.tname, func() {
t := suite.T()
ctx, flush := tester.NewContext(t)
defer flush()
name, err := getItemName(
ctx,
test.id,
test.backupVersion,
test.fin)
test.errFunc(t, err)
assert.Equal(t, test.name, name, "name")
})
}
}

View File

@ -9,6 +9,7 @@ import (
"github.com/alcionai/corso/src/internal/diagnostics"
"github.com/alcionai/corso/src/internal/m365/graph"
"github.com/alcionai/corso/src/internal/m365/service/onedrive"
"github.com/alcionai/corso/src/internal/m365/service/sharepoint"
"github.com/alcionai/corso/src/internal/m365/support"
"github.com/alcionai/corso/src/pkg/backup/details"
"github.com/alcionai/corso/src/pkg/control"
@ -41,8 +42,7 @@ func (ctrl *Controller) ProduceExportCollections(
)
switch sels.Service {
case selectors.ServiceOneDrive, selectors.ServiceSharePoint:
// OneDrive and SharePoint can share the code to create collections
case selectors.ServiceOneDrive:
expCollections, err = onedrive.ProduceExportCollections(
ctx,
backupVersion,
@ -51,6 +51,17 @@ func (ctrl *Controller) ProduceExportCollections(
dcs,
deets,
errs)
case selectors.ServiceSharePoint:
expCollections, err = sharepoint.ProduceExportCollections(
ctx,
backupVersion,
exportCfg,
opts,
dcs,
ctrl.backupDriveIDNames,
deets,
errs)
default:
err = clues.Wrap(clues.New(sels.Service.String()), "service not supported")
}

View File

@ -2,14 +2,11 @@ package onedrive
import (
"context"
"strings"
"github.com/alcionai/clues"
"github.com/alcionai/corso/src/internal/data"
"github.com/alcionai/corso/src/internal/m365/collection/drive"
"github.com/alcionai/corso/src/internal/m365/collection/drive/metadata"
"github.com/alcionai/corso/src/internal/version"
"github.com/alcionai/corso/src/pkg/backup/details"
"github.com/alcionai/corso/src/pkg/control"
"github.com/alcionai/corso/src/pkg/export"
@ -17,121 +14,6 @@ import (
"github.com/alcionai/corso/src/pkg/path"
)
var _ export.Collection = &exportCollection{}
// exportCollection is the implementation of export.ExportCollection for OneDrive
type exportCollection struct {
// baseDir contains the path of the collection
baseDir string
// backingCollection is the restore collection from which we will
// create the export collection.
backingCollection data.RestoreCollection
// backupVersion is the backupVersion of the backup this collection was part
// of. This is required to figure out how to get the name of the
// item.
backupVersion int
}
func (ec exportCollection) BasePath() string {
return ec.baseDir
}
func (ec exportCollection) Items(ctx context.Context) <-chan export.Item {
ch := make(chan export.Item)
go items(ctx, ec, ch)
return ch
}
// items converts items in backing collection to export items
func items(ctx context.Context, ec exportCollection, ch chan<- export.Item) {
defer close(ch)
errs := fault.New(false)
// There will only be a single item in the backingCollections
// for OneDrive
for item := range ec.backingCollection.Items(ctx, errs) {
itemUUID := item.ID()
if isMetadataFile(itemUUID, ec.backupVersion) {
continue
}
name, err := getItemName(ctx, itemUUID, ec.backupVersion, ec.backingCollection)
ch <- export.Item{
ID: itemUUID,
Data: export.ItemData{
Name: name,
Body: item.ToReader(),
},
Error: err,
}
}
eitems, erecovereable := errs.ItemsAndRecovered()
// Return all the items that we failed to get from kopia at the end
for _, err := range eitems {
ch <- export.Item{
ID: err.ID,
Error: &err,
}
}
for _, ec := range erecovereable {
ch <- export.Item{
Error: ec,
}
}
}
// isMetadataFile is used to determine if a path corresponds to a
// metadata file. This is OneDrive specific logic and depends on the
// version of the backup unlike metadata.IsMetadataFile which only has
// to be concerned about the current version.
func isMetadataFile(id string, backupVersion int) bool {
if backupVersion < version.OneDrive1DataAndMetaFiles {
return false
}
return strings.HasSuffix(id, metadata.MetaFileSuffix) ||
strings.HasSuffix(id, metadata.DirMetaFileSuffix)
}
// getItemName is used to get the name of the item.
// How we get the name depends on the version of the backup.
func getItemName(
ctx context.Context,
id string,
backupVersion int,
fin data.FetchItemByNamer,
) (string, error) {
if backupVersion < version.OneDrive1DataAndMetaFiles {
return id, nil
}
if backupVersion < version.OneDrive5DirMetaNoName {
return strings.TrimSuffix(id, metadata.DataFileSuffix), nil
}
if strings.HasSuffix(id, metadata.DataFileSuffix) {
trimmedName := strings.TrimSuffix(id, metadata.DataFileSuffix)
metaName := trimmedName + metadata.MetaFileSuffix
meta, err := drive.FetchAndReadMetadata(ctx, fin, metaName)
if err != nil {
return "", clues.Wrap(err, "getting metadata").WithClues(ctx)
}
return meta.FileName, nil
}
return "", clues.New("invalid item id").WithClues(ctx)
}
// ProduceExportCollections will create the export collections for the
// given restore collections.
func ProduceExportCollections(
@ -156,11 +38,7 @@ func ProduceExportCollections(
baseDir := path.Builder{}.Append(drivePath.Folders...)
ec = append(ec, exportCollection{
baseDir: baseDir.String(),
backingCollection: dc,
backupVersion: backupVersion,
})
ec = append(ec, drive.NewExportCollection(baseDir.String(), dc, backupVersion))
}
return ec, el.Failure()

View File

@ -11,7 +11,7 @@ import (
"github.com/alcionai/corso/src/internal/data"
dataMock "github.com/alcionai/corso/src/internal/data/mock"
"github.com/alcionai/corso/src/internal/m365/collection/drive/metadata"
"github.com/alcionai/corso/src/internal/m365/collection/drive"
odConsts "github.com/alcionai/corso/src/internal/m365/service/onedrive/consts"
odStub "github.com/alcionai/corso/src/internal/m365/service/onedrive/stub"
"github.com/alcionai/corso/src/internal/tester"
@ -30,45 +30,6 @@ func TestExportUnitSuite(t *testing.T) {
suite.Run(t, &ExportUnitSuite{Suite: tester.NewUnitSuite(t)})
}
func (suite *ExportUnitSuite) TestIsMetadataFile() {
table := []struct {
name string
id string
backupVersion int
isMeta bool
}{
{
name: "legacy",
backupVersion: version.OneDrive1DataAndMetaFiles,
isMeta: false,
},
{
name: "metadata file",
backupVersion: version.OneDrive3IsMetaMarker,
id: "name" + metadata.MetaFileSuffix,
isMeta: true,
},
{
name: "dir metadata file",
backupVersion: version.OneDrive3IsMetaMarker,
id: "name" + metadata.DirMetaFileSuffix,
isMeta: true,
},
{
name: "non metadata file",
backupVersion: version.OneDrive3IsMetaMarker,
id: "name" + metadata.DataFileSuffix,
isMeta: false,
},
}
for _, test := range table {
suite.Run(test.name, func() {
assert.Equal(suite.T(), test.isMeta, isMetadataFile(test.id, test.backupVersion), "is metadata")
})
}
}
type finD struct {
id string
name string
@ -90,66 +51,6 @@ func (fd finD) FetchItemByName(ctx context.Context, name string) (data.Item, err
return nil, assert.AnError
}
func (suite *ExportUnitSuite) TestGetItemName() {
table := []struct {
tname string
id string
backupVersion int
name string
fin data.FetchItemByNamer
errFunc assert.ErrorAssertionFunc
}{
{
tname: "legacy",
id: "name",
backupVersion: version.OneDrive1DataAndMetaFiles,
name: "name",
errFunc: assert.NoError,
},
{
tname: "name in filename",
id: "name.data",
backupVersion: version.OneDrive4DirIncludesPermissions,
name: "name",
errFunc: assert.NoError,
},
{
tname: "name in metadata",
id: "id.data",
backupVersion: version.Backup,
name: "name",
fin: finD{id: "id.meta", name: "name"},
errFunc: assert.NoError,
},
{
tname: "name in metadata but error",
id: "id.data",
backupVersion: version.Backup,
name: "",
fin: finD{err: assert.AnError},
errFunc: assert.Error,
},
}
for _, test := range table {
suite.Run(test.tname, func() {
t := suite.T()
ctx, flush := tester.NewContext(t)
defer flush()
name, err := getItemName(
ctx,
test.id,
test.backupVersion,
test.fin)
test.errFunc(t, err)
assert.Equal(t, test.name, name, "name")
})
}
}
type mockRestoreCollection struct {
path path.Path
items []*dataMock.Item
@ -391,11 +292,7 @@ func (suite *ExportUnitSuite) TestGetItems() {
ctx, flush := tester.NewContext(t)
defer flush()
ec := exportCollection{
baseDir: "",
backingCollection: test.backingCollection,
backupVersion: test.version,
}
ec := drive.NewExportCollection("", test.backingCollection, test.version)
items := ec.Items(ctx)

View File

@ -0,0 +1,57 @@
package sharepoint
import (
"context"
"github.com/alcionai/clues"
"github.com/alcionai/corso/src/internal/common/idname"
"github.com/alcionai/corso/src/internal/data"
"github.com/alcionai/corso/src/internal/m365/collection/drive"
"github.com/alcionai/corso/src/pkg/backup/details"
"github.com/alcionai/corso/src/pkg/control"
"github.com/alcionai/corso/src/pkg/export"
"github.com/alcionai/corso/src/pkg/fault"
"github.com/alcionai/corso/src/pkg/logger"
"github.com/alcionai/corso/src/pkg/path"
)
// ProduceExportCollections will create the export collections for the
// given restore collections.
func ProduceExportCollections(
ctx context.Context,
backupVersion int,
exportCfg control.ExportConfig,
opts control.Options,
dcs []data.RestoreCollection,
backupDriveIDNames idname.CacheBuilder,
deets *details.Builder,
errs *fault.Bus,
) ([]export.Collection, error) {
var (
el = errs.Local()
ec = make([]export.Collection, 0, len(dcs))
)
for _, dc := range dcs {
drivePath, err := path.ToDrivePath(dc.FullPath())
if err != nil {
return nil, clues.Wrap(err, "transforming path to drive path").WithClues(ctx)
}
driveName, ok := backupDriveIDNames.NameOf(drivePath.DriveID)
if !ok {
// This should not happen, but just in case
logger.Ctx(ctx).With("drive_id", drivePath.DriveID).Info("drive name not found, using drive id")
driveName = drivePath.DriveID
}
baseDir := path.Builder{}.
Append(driveName).
Append(drivePath.Folders...)
ec = append(ec, drive.NewExportCollection(baseDir.String(), dc, backupVersion))
}
return ec, el.Failure()
}