diff --git a/CHANGELOG.md b/CHANGELOG.md index 906267535..d20674f3e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - SharePoint document libraries deleted after the last backup can now be restored. - Restore requires the protected resource to have access to the service being restored. +- SharePoint data from multiple document libraries are not merged in exports ### Added - Added option to export data from OneDrive and SharePoint backups as individual files or as a single zip file. diff --git a/src/internal/m365/collection/drive/export.go b/src/internal/m365/collection/drive/export.go new file mode 100644 index 000000000..027a9bef6 --- /dev/null +++ b/src/internal/m365/collection/drive/export.go @@ -0,0 +1,139 @@ +package drive + +import ( + "context" + "strings" + + "github.com/alcionai/clues" + + "github.com/alcionai/corso/src/internal/data" + "github.com/alcionai/corso/src/internal/m365/collection/drive/metadata" + "github.com/alcionai/corso/src/internal/version" + "github.com/alcionai/corso/src/pkg/export" + "github.com/alcionai/corso/src/pkg/fault" +) + +var _ export.Collection = &ExportCollection{} + +// ExportCollection is the implementation of export.ExportCollection for OneDrive +type ExportCollection struct { + // baseDir contains the path of the collection + baseDir string + + // backingCollection is the restore collection from which we will + // create the export collection. + backingCollection data.RestoreCollection + + // backupVersion is the backupVersion of the backup this collection was part + // of. This is required to figure out how to get the name of the + // item. + backupVersion int +} + +func NewExportCollection( + baseDir string, + backingCollection data.RestoreCollection, + backupVersion int, +) ExportCollection { + return ExportCollection{ + baseDir: baseDir, + backingCollection: backingCollection, + backupVersion: backupVersion, + } +} + +func (ec ExportCollection) BasePath() string { + return ec.baseDir +} + +func (ec ExportCollection) Items(ctx context.Context) <-chan export.Item { + ch := make(chan export.Item) + go items(ctx, ec, ch) + + return ch +} + +// items converts items in backing collection to export items +func items(ctx context.Context, ec ExportCollection, ch chan<- export.Item) { + defer close(ch) + + errs := fault.New(false) + + for item := range ec.backingCollection.Items(ctx, errs) { + itemUUID := item.ID() + if isMetadataFile(itemUUID, ec.backupVersion) { + continue + } + + name, err := getItemName(ctx, itemUUID, ec.backupVersion, ec.backingCollection) + + ch <- export.Item{ + ID: itemUUID, + Data: export.ItemData{ + Name: name, + Body: item.ToReader(), + }, + Error: err, + } + } + + eitems, erecovereable := errs.ItemsAndRecovered() + + // Return all the items that we failed to source from the persistence layer + for _, err := range eitems { + ch <- export.Item{ + ID: err.ID, + Error: &err, + } + } + + for _, ec := range erecovereable { + ch <- export.Item{ + Error: ec, + } + } +} + +// isMetadataFile is used to determine if a path corresponds to a +// metadata file. This is OneDrive specific logic and depends on the +// version of the backup unlike metadata.IsMetadataFile which only has +// to be concerned about the current version. +func isMetadataFile(id string, backupVersion int) bool { + if backupVersion < version.OneDrive1DataAndMetaFiles { + return false + } + + return strings.HasSuffix(id, metadata.MetaFileSuffix) || + strings.HasSuffix(id, metadata.DirMetaFileSuffix) +} + +// getItemName is used to get the name of the item. +// How we get the name depends on the version of the backup. +func getItemName( + ctx context.Context, + id string, + backupVersion int, + fin data.FetchItemByNamer, +) (string, error) { + if backupVersion < version.OneDrive1DataAndMetaFiles { + return id, nil + } + + if backupVersion < version.OneDrive5DirMetaNoName { + return strings.TrimSuffix(id, metadata.DataFileSuffix), nil + } + + if strings.HasSuffix(id, metadata.DataFileSuffix) { + trimmedName := strings.TrimSuffix(id, metadata.DataFileSuffix) + metaName := trimmedName + metadata.MetaFileSuffix + + meta, err := FetchAndReadMetadata(ctx, fin, metaName) + if err != nil { + return "", clues.Wrap(err, "getting metadata").WithClues(ctx) + } + + return meta.FileName, nil + } + + return "", clues.New("invalid item id").WithClues(ctx) +} diff --git a/src/internal/m365/collection/drive/export_test.go b/src/internal/m365/collection/drive/export_test.go new file mode 100644 index 000000000..2348a2c32 --- /dev/null +++ b/src/internal/m365/collection/drive/export_test.go @@ -0,0 +1,145 @@ +package drive + +import ( + "bytes" + "context" + "io" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/suite" + + "github.com/alcionai/corso/src/internal/data" + dataMock "github.com/alcionai/corso/src/internal/data/mock" + "github.com/alcionai/corso/src/internal/m365/collection/drive/metadata" + "github.com/alcionai/corso/src/internal/tester" + "github.com/alcionai/corso/src/internal/version" +) + +type ExportUnitSuite struct { + tester.Suite +} + +func TestExportUnitSuite(t *testing.T) { + suite.Run(t, &ExportUnitSuite{Suite: tester.NewUnitSuite(t)}) +} + +func (suite *ExportUnitSuite) TestIsMetadataFile() { + table := []struct { + name string + id string + backupVersion int + isMeta bool + }{ + { + name: "legacy", + backupVersion: version.OneDrive1DataAndMetaFiles, + isMeta: false, + }, + { + name: "metadata file", + backupVersion: version.OneDrive3IsMetaMarker, + id: "name" + metadata.MetaFileSuffix, + isMeta: true, + }, + { + name: "dir metadata file", + backupVersion: version.OneDrive3IsMetaMarker, + id: "name" + metadata.DirMetaFileSuffix, + isMeta: true, + }, + { + name: "non metadata file", + backupVersion: version.OneDrive3IsMetaMarker, + id: "name" + metadata.DataFileSuffix, + isMeta: false, + }, + } + + for _, test := range table { + suite.Run(test.name, func() { + assert.Equal(suite.T(), test.isMeta, isMetadataFile(test.id, test.backupVersion), "is metadata") + }) + } +} + +type finD struct { + id string + name string + err error +} + +func (fd finD) FetchItemByName(ctx context.Context, name string) (data.Item, error) { + if fd.err != nil { + return nil, fd.err + } + + if name == fd.id { + return &dataMock.Item{ + ItemID: fd.id, + Reader: io.NopCloser(bytes.NewBufferString(`{"filename": "` + fd.name + `"}`)), + }, nil + } + + return nil, assert.AnError +} + +func (suite *ExportUnitSuite) TestGetItemName() { + table := []struct { + tname string + id string + backupVersion int + name string + fin data.FetchItemByNamer + errFunc assert.ErrorAssertionFunc + }{ + { + tname: "legacy", + id: "name", + backupVersion: version.OneDrive1DataAndMetaFiles, + name: "name", + errFunc: assert.NoError, + }, + { + tname: "name in filename", + id: "name.data", + backupVersion: version.OneDrive4DirIncludesPermissions, + name: "name", + errFunc: assert.NoError, + }, + { + tname: "name in metadata", + id: "id.data", + backupVersion: version.Backup, + name: "name", + fin: finD{id: "id.meta", name: "name"}, + errFunc: assert.NoError, + }, + { + tname: "name in metadata but error", + id: "id.data", + backupVersion: version.Backup, + name: "", + fin: finD{err: assert.AnError}, + errFunc: assert.Error, + }, + } + + for _, test := range table { + suite.Run(test.tname, func() { + t := suite.T() + + ctx, flush := tester.NewContext(t) + defer flush() + + name, err := getItemName( + ctx, + test.id, + test.backupVersion, + test.fin) + test.errFunc(t, err) + + assert.Equal(t, test.name, name, "name") + }) + } +} diff --git a/src/internal/m365/export.go b/src/internal/m365/export.go index 0003353fb..abec3e16a 100644 --- a/src/internal/m365/export.go +++ b/src/internal/m365/export.go @@ -9,6 +9,7 @@ import ( "github.com/alcionai/corso/src/internal/diagnostics" "github.com/alcionai/corso/src/internal/m365/graph" "github.com/alcionai/corso/src/internal/m365/service/onedrive" + "github.com/alcionai/corso/src/internal/m365/service/sharepoint" "github.com/alcionai/corso/src/internal/m365/support" "github.com/alcionai/corso/src/pkg/backup/details" "github.com/alcionai/corso/src/pkg/control" @@ -41,8 +42,7 @@ func (ctrl *Controller) ProduceExportCollections( ) switch sels.Service { - case selectors.ServiceOneDrive, selectors.ServiceSharePoint: - // OneDrive and SharePoint can share the code to create collections + case selectors.ServiceOneDrive: expCollections, err = onedrive.ProduceExportCollections( ctx, backupVersion, @@ -51,6 +51,17 @@ func (ctrl *Controller) ProduceExportCollections( dcs, deets, errs) + case selectors.ServiceSharePoint: + expCollections, err = sharepoint.ProduceExportCollections( + ctx, + backupVersion, + exportCfg, + opts, + dcs, + ctrl.backupDriveIDNames, + deets, + errs) + default: err = clues.Wrap(clues.New(sels.Service.String()), "service not supported") } diff --git a/src/internal/m365/service/onedrive/export.go b/src/internal/m365/service/onedrive/export.go index 60ee7fbea..193321983 100644 --- a/src/internal/m365/service/onedrive/export.go +++ b/src/internal/m365/service/onedrive/export.go @@ -2,14 +2,11 @@ package onedrive import ( "context" - "strings" "github.com/alcionai/clues" "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/m365/collection/drive" - "github.com/alcionai/corso/src/internal/m365/collection/drive/metadata" - "github.com/alcionai/corso/src/internal/version" "github.com/alcionai/corso/src/pkg/backup/details" "github.com/alcionai/corso/src/pkg/control" "github.com/alcionai/corso/src/pkg/export" @@ -17,121 +14,6 @@ import ( "github.com/alcionai/corso/src/pkg/path" ) -var _ export.Collection = &exportCollection{} - -// exportCollection is the implementation of export.ExportCollection for OneDrive -type exportCollection struct { - // baseDir contains the path of the collection - baseDir string - - // backingCollection is the restore collection from which we will - // create the export collection. - backingCollection data.RestoreCollection - - // backupVersion is the backupVersion of the backup this collection was part - // of. This is required to figure out how to get the name of the - // item. - backupVersion int -} - -func (ec exportCollection) BasePath() string { - return ec.baseDir -} - -func (ec exportCollection) Items(ctx context.Context) <-chan export.Item { - ch := make(chan export.Item) - go items(ctx, ec, ch) - - return ch -} - -// items converts items in backing collection to export items -func items(ctx context.Context, ec exportCollection, ch chan<- export.Item) { - defer close(ch) - - errs := fault.New(false) - - // There will only be a single item in the backingCollections - // for OneDrive - for item := range ec.backingCollection.Items(ctx, errs) { - itemUUID := item.ID() - if isMetadataFile(itemUUID, ec.backupVersion) { - continue - } - - name, err := getItemName(ctx, itemUUID, ec.backupVersion, ec.backingCollection) - - ch <- export.Item{ - ID: itemUUID, - Data: export.ItemData{ - Name: name, - Body: item.ToReader(), - }, - Error: err, - } - } - - eitems, erecovereable := errs.ItemsAndRecovered() - - // Return all the items that we failed to get from kopia at the end - for _, err := range eitems { - ch <- export.Item{ - ID: err.ID, - Error: &err, - } - } - - for _, ec := range erecovereable { - ch <- export.Item{ - Error: ec, - } - } -} - -// isMetadataFile is used to determine if a path corresponds to a -// metadata file. This is OneDrive specific logic and depends on the -// version of the backup unlike metadata.IsMetadataFile which only has -// to be concerned about the current version. -func isMetadataFile(id string, backupVersion int) bool { - if backupVersion < version.OneDrive1DataAndMetaFiles { - return false - } - - return strings.HasSuffix(id, metadata.MetaFileSuffix) || - strings.HasSuffix(id, metadata.DirMetaFileSuffix) -} - -// getItemName is used to get the name of the item. -// How we get the name depends on the version of the backup. -func getItemName( - ctx context.Context, - id string, - backupVersion int, - fin data.FetchItemByNamer, -) (string, error) { - if backupVersion < version.OneDrive1DataAndMetaFiles { - return id, nil - } - - if backupVersion < version.OneDrive5DirMetaNoName { - return strings.TrimSuffix(id, metadata.DataFileSuffix), nil - } - - if strings.HasSuffix(id, metadata.DataFileSuffix) { - trimmedName := strings.TrimSuffix(id, metadata.DataFileSuffix) - metaName := trimmedName + metadata.MetaFileSuffix - - meta, err := drive.FetchAndReadMetadata(ctx, fin, metaName) - if err != nil { - return "", clues.Wrap(err, "getting metadata").WithClues(ctx) - } - - return meta.FileName, nil - } - - return "", clues.New("invalid item id").WithClues(ctx) -} - // ProduceExportCollections will create the export collections for the // given restore collections. func ProduceExportCollections( @@ -156,11 +38,7 @@ func ProduceExportCollections( baseDir := path.Builder{}.Append(drivePath.Folders...) - ec = append(ec, exportCollection{ - baseDir: baseDir.String(), - backingCollection: dc, - backupVersion: backupVersion, - }) + ec = append(ec, drive.NewExportCollection(baseDir.String(), dc, backupVersion)) } return ec, el.Failure() diff --git a/src/internal/m365/service/onedrive/export_test.go b/src/internal/m365/service/onedrive/export_test.go index 6ff68447b..8da31cc33 100644 --- a/src/internal/m365/service/onedrive/export_test.go +++ b/src/internal/m365/service/onedrive/export_test.go @@ -11,7 +11,7 @@ import ( "github.com/alcionai/corso/src/internal/data" dataMock "github.com/alcionai/corso/src/internal/data/mock" - "github.com/alcionai/corso/src/internal/m365/collection/drive/metadata" + "github.com/alcionai/corso/src/internal/m365/collection/drive" odConsts "github.com/alcionai/corso/src/internal/m365/service/onedrive/consts" odStub "github.com/alcionai/corso/src/internal/m365/service/onedrive/stub" "github.com/alcionai/corso/src/internal/tester" @@ -30,45 +30,6 @@ func TestExportUnitSuite(t *testing.T) { suite.Run(t, &ExportUnitSuite{Suite: tester.NewUnitSuite(t)}) } -func (suite *ExportUnitSuite) TestIsMetadataFile() { - table := []struct { - name string - id string - backupVersion int - isMeta bool - }{ - { - name: "legacy", - backupVersion: version.OneDrive1DataAndMetaFiles, - isMeta: false, - }, - { - name: "metadata file", - backupVersion: version.OneDrive3IsMetaMarker, - id: "name" + metadata.MetaFileSuffix, - isMeta: true, - }, - { - name: "dir metadata file", - backupVersion: version.OneDrive3IsMetaMarker, - id: "name" + metadata.DirMetaFileSuffix, - isMeta: true, - }, - { - name: "non metadata file", - backupVersion: version.OneDrive3IsMetaMarker, - id: "name" + metadata.DataFileSuffix, - isMeta: false, - }, - } - - for _, test := range table { - suite.Run(test.name, func() { - assert.Equal(suite.T(), test.isMeta, isMetadataFile(test.id, test.backupVersion), "is metadata") - }) - } -} - type finD struct { id string name string @@ -90,66 +51,6 @@ func (fd finD) FetchItemByName(ctx context.Context, name string) (data.Item, err return nil, assert.AnError } -func (suite *ExportUnitSuite) TestGetItemName() { - table := []struct { - tname string - id string - backupVersion int - name string - fin data.FetchItemByNamer - errFunc assert.ErrorAssertionFunc - }{ - { - tname: "legacy", - id: "name", - backupVersion: version.OneDrive1DataAndMetaFiles, - name: "name", - errFunc: assert.NoError, - }, - { - tname: "name in filename", - id: "name.data", - backupVersion: version.OneDrive4DirIncludesPermissions, - name: "name", - errFunc: assert.NoError, - }, - { - tname: "name in metadata", - id: "id.data", - backupVersion: version.Backup, - name: "name", - fin: finD{id: "id.meta", name: "name"}, - errFunc: assert.NoError, - }, - { - tname: "name in metadata but error", - id: "id.data", - backupVersion: version.Backup, - name: "", - fin: finD{err: assert.AnError}, - errFunc: assert.Error, - }, - } - - for _, test := range table { - suite.Run(test.tname, func() { - t := suite.T() - - ctx, flush := tester.NewContext(t) - defer flush() - - name, err := getItemName( - ctx, - test.id, - test.backupVersion, - test.fin) - test.errFunc(t, err) - - assert.Equal(t, test.name, name, "name") - }) - } -} - type mockRestoreCollection struct { path path.Path items []*dataMock.Item @@ -391,11 +292,7 @@ func (suite *ExportUnitSuite) TestGetItems() { ctx, flush := tester.NewContext(t) defer flush() - ec := exportCollection{ - baseDir: "", - backingCollection: test.backingCollection, - backupVersion: test.version, - } + ec := drive.NewExportCollection("", test.backingCollection, test.version) items := ec.Items(ctx) diff --git a/src/internal/m365/service/sharepoint/export.go b/src/internal/m365/service/sharepoint/export.go new file mode 100644 index 000000000..bdec01dc9 --- /dev/null +++ b/src/internal/m365/service/sharepoint/export.go @@ -0,0 +1,57 @@ +package sharepoint + +import ( + "context" + + "github.com/alcionai/clues" + + "github.com/alcionai/corso/src/internal/common/idname" + "github.com/alcionai/corso/src/internal/data" + "github.com/alcionai/corso/src/internal/m365/collection/drive" + "github.com/alcionai/corso/src/pkg/backup/details" + "github.com/alcionai/corso/src/pkg/control" + "github.com/alcionai/corso/src/pkg/export" + "github.com/alcionai/corso/src/pkg/fault" + "github.com/alcionai/corso/src/pkg/logger" + "github.com/alcionai/corso/src/pkg/path" +) + +// ProduceExportCollections will create the export collections for the +// given restore collections. +func ProduceExportCollections( + ctx context.Context, + backupVersion int, + exportCfg control.ExportConfig, + opts control.Options, + dcs []data.RestoreCollection, + backupDriveIDNames idname.CacheBuilder, + deets *details.Builder, + errs *fault.Bus, +) ([]export.Collection, error) { + var ( + el = errs.Local() + ec = make([]export.Collection, 0, len(dcs)) + ) + + for _, dc := range dcs { + drivePath, err := path.ToDrivePath(dc.FullPath()) + if err != nil { + return nil, clues.Wrap(err, "transforming path to drive path").WithClues(ctx) + } + + driveName, ok := backupDriveIDNames.NameOf(drivePath.DriveID) + if !ok { + // This should not happen, but just in case + logger.Ctx(ctx).With("drive_id", drivePath.DriveID).Info("drive name not found, using drive id") + driveName = drivePath.DriveID + } + + baseDir := path.Builder{}. + Append(driveName). + Append(drivePath.Folders...) + + ec = append(ec, drive.NewExportCollection(baseDir.String(), dc, backupVersion)) + } + + return ec, el.Failure() +}