CLI for exporting data from OneDrive (#3822)

This adds the final piece to enable OneDrive exports. The CLI interface which consumes NewExport interface from Repository and lets the user to export and write the contents in a backup to the local filesystem.

<!-- PR description-->
Prev: https://github.com/alcionai/corso/pull/3821
Next: https://github.com/alcionai/corso/pull/3824

---

#### Does this PR need a docs update or release note?

- [x]  Yes, it's included
- [ ] 🕐 Yes, but in a later PR
- [ ]  No

#### Type of change

<!--- Please check the type of change your PR introduces: --->
- [x] 🌻 Feature
- [ ] 🐛 Bugfix
- [ ] 🗺️ Documentation
- [ ] 🤖 Supportability/Tests
- [ ] 💻 CI/Deployment
- [ ] 🧹 Tech Debt/Cleanup

#### Issue(s)

<!-- Can reference multiple issues. Use one of the following "magic words" - "closes, fixes" to auto-close the Github issue. -->
* https://github.com/alcionai/corso/pull/3797
* https://github.com/alcionai/corso/issues/3670

#### Test Plan

<!-- How will this be tested prior to merging.-->
- [ ] 💪 Manual
- [x]  Unit test
- [ ] 💚 E2E
This commit is contained in:
Abin Simon 2023-07-28 13:31:12 +05:30 committed by GitHub
parent 4e2ee2484f
commit d7443c2211
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 703 additions and 4 deletions

View File

@ -14,6 +14,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- SharePoint document libraries deleted after the last backup can now be restored.
- Restore requires the protected resource to have access to the service being restored.
### Added
- Added option to export data from OneDrive backups as individual files or as a single zip file.
## [v0.11.1] (beta) - 2023-07-20
### Fixed

View File

@ -11,6 +11,7 @@ import (
"github.com/alcionai/corso/src/cli/backup"
"github.com/alcionai/corso/src/cli/config"
"github.com/alcionai/corso/src/cli/export"
"github.com/alcionai/corso/src/cli/flags"
"github.com/alcionai/corso/src/cli/help"
"github.com/alcionai/corso/src/cli/print"
@ -53,7 +54,7 @@ func preRun(cc *cobra.Command, args []string) error {
}
avoidTheseCommands := []string{
"corso", "env", "help", "backup", "details", "list", "restore", "delete", "repo", "init", "connect",
"corso", "env", "help", "backup", "details", "list", "restore", "export", "delete", "repo", "init", "connect",
}
if len(logger.ResolvedLogFile) > 0 && !slices.Contains(avoidTheseCommands, cc.Use) {
@ -150,6 +151,7 @@ func BuildCommandTree(cmd *cobra.Command) {
repo.AddCommands(cmd)
backup.AddCommands(cmd)
restore.AddCommands(cmd)
export.AddCommands(cmd)
help.AddCommands(cmd)
}

39
src/cli/export/export.go Normal file
View File

@ -0,0 +1,39 @@
package export
import (
"github.com/spf13/cobra"
)
var exportCommands = []func(cmd *cobra.Command) *cobra.Command{
addOneDriveCommands,
}
// AddCommands attaches all `corso export * *` commands to the parent.
func AddCommands(cmd *cobra.Command) {
exportC := exportCmd()
cmd.AddCommand(exportC)
for _, addExportTo := range exportCommands {
addExportTo(exportC)
}
}
const exportCommand = "export"
// The export category of commands.
// `corso export [<subcommand>] [<flag>...]`
func exportCmd() *cobra.Command {
return &cobra.Command{
Use: exportCommand,
Short: "Export your service data",
Long: `Export the data stored in one of your M365 services.`,
RunE: handleExportCmd,
Args: cobra.NoArgs,
}
}
// Handler for flat calls to `corso export`.
// Produces the same output as `corso export --help`.
func handleExportCmd(cmd *cobra.Command, args []string) error {
return cmd.Help()
}

View File

@ -0,0 +1,175 @@
package export
import (
"bytes"
"context"
"io"
"os"
"path/filepath"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/stretchr/testify/suite"
"github.com/alcionai/corso/src/internal/tester"
"github.com/alcionai/corso/src/pkg/export"
)
type ExportE2ESuite struct {
tester.Suite
called bool
}
func TestExportE2ESuite(t *testing.T) {
suite.Run(t, &ExportE2ESuite{Suite: tester.NewE2ESuite(t, nil)})
}
func (suite *ExportE2ESuite) SetupSuite() {
suite.called = true
}
type mockExportCollection struct {
path string
items []export.Item
}
func (mec mockExportCollection) BasePath() string { return mec.path }
func (mec mockExportCollection) Items(context.Context) <-chan export.Item {
ch := make(chan export.Item)
go func() {
defer close(ch)
for _, item := range mec.items {
ch <- item
}
}()
return ch
}
func (suite *ExportE2ESuite) TestWriteExportCollection() {
type ei struct {
name string
body string
}
type i struct {
path string
items []ei
}
table := []struct {
name string
cols []i
}{
{
name: "single root collection single item",
cols: []i{
{
path: "",
items: []ei{
{
name: "name1",
body: "body1",
},
},
},
},
},
{
name: "single root collection multiple items",
cols: []i{
{
path: "",
items: []ei{
{
name: "name1",
body: "body1",
},
{
name: "name2",
body: "body2",
},
},
},
},
},
{
name: "multiple collections multiple items",
cols: []i{
{
path: "",
items: []ei{
{
name: "name1",
body: "body1",
},
{
name: "name2",
body: "body2",
},
},
},
{
path: "folder",
items: []ei{
{
name: "name3",
body: "body3",
},
},
},
},
},
}
for _, test := range table {
suite.Run(test.name, func() {
t := suite.T()
ctx, flush := tester.NewContext(t)
defer flush()
ecs := []export.Collection{}
for _, col := range test.cols {
items := []export.Item{}
for _, item := range col.items {
items = append(items, export.Item{
Data: export.ItemData{
Name: item.name,
Body: io.NopCloser((bytes.NewBufferString(item.body))),
},
})
}
ecs = append(ecs, mockExportCollection{
path: col.path,
items: items,
})
}
dir, err := os.MkdirTemp("", "export-test")
require.NoError(t, err)
defer os.RemoveAll(dir)
err = writeExportCollections(ctx, dir, ecs)
require.NoError(t, err, "writing data")
for _, col := range test.cols {
for _, item := range col.items {
f, err := os.Open(filepath.Join(dir, col.path, item.name))
require.NoError(t, err, "opening file")
buf := new(bytes.Buffer)
_, err = buf.ReadFrom(f)
require.NoError(t, err, "reading file")
assert.Equal(t, item.body, buf.String(), "file contents")
}
}
})
}
}

215
src/cli/export/onedrive.go Normal file
View File

@ -0,0 +1,215 @@
package export
import (
"context"
"io"
"os"
ospath "path"
"github.com/alcionai/clues"
"github.com/pkg/errors"
"github.com/spf13/cobra"
"github.com/spf13/pflag"
"github.com/alcionai/corso/src/cli/flags"
. "github.com/alcionai/corso/src/cli/print"
"github.com/alcionai/corso/src/cli/repo"
"github.com/alcionai/corso/src/cli/utils"
"github.com/alcionai/corso/src/internal/common/dttm"
"github.com/alcionai/corso/src/internal/data"
"github.com/alcionai/corso/src/internal/observe"
"github.com/alcionai/corso/src/pkg/control"
"github.com/alcionai/corso/src/pkg/export"
"github.com/alcionai/corso/src/pkg/path"
)
// called by export.go to map subcommands to provider-specific handling.
func addOneDriveCommands(cmd *cobra.Command) *cobra.Command {
var (
c *cobra.Command
fs *pflag.FlagSet
)
switch cmd.Use {
case exportCommand:
c, fs = utils.AddCommand(cmd, oneDriveExportCmd())
c.Use = c.Use + " " + oneDriveServiceCommandUseSuffix
// Flags addition ordering should follow the order we want them to appear in help and docs:
// More generic (ex: --user) and more frequently used flags take precedence.
fs.SortFlags = false
flags.AddBackupIDFlag(c, true)
flags.AddOneDriveDetailsAndRestoreFlags(c)
flags.AddExportConfigFlags(c)
flags.AddFailFastFlag(c)
flags.AddCorsoPassphaseFlags(c)
flags.AddAWSCredsFlags(c)
}
return c
}
const (
oneDriveServiceCommand = "onedrive"
oneDriveServiceCommandUseSuffix = "--backup <backupId> <destination>"
//nolint:lll
oneDriveServiceCommandExportExamples = `# Export file with ID 98765abcdef in Bob's last backup (1234abcd...) to my-exports directory
corso export onedrive my-exports --backup 1234abcd-12ab-cd34-56de-1234abcd --file 98765abcdef
# Export files named "FY2021 Planning.xlsx" in "Documents/Finance Reports" to current directory
corso export onedrive . --backup 1234abcd-12ab-cd34-56de-1234abcd \
--file "FY2021 Planning.xlsx" --folder "Documents/Finance Reports"
# Export all files and folders in folder "Documents/Finance Reports" that were created before 2020 to my-exports
corso export onedrive my-exports --backup 1234abcd-12ab-cd34-56de-1234abcd
--folder "Documents/Finance Reports" --file-created-before 2020-01-01T00:00:00`
)
// `corso export onedrive [<flag>...] <destination>`
func oneDriveExportCmd() *cobra.Command {
return &cobra.Command{
Use: oneDriveServiceCommand,
Short: "Export M365 OneDrive service data",
RunE: exportOneDriveCmd,
Args: func(cmd *cobra.Command, args []string) error {
if len(args) != 1 {
return errors.New("missing restore destination")
}
return nil
},
Example: oneDriveServiceCommandExportExamples,
}
}
// processes an onedrive service export.
func exportOneDriveCmd(cmd *cobra.Command, args []string) error {
ctx := cmd.Context()
if utils.HasNoFlagsAndShownHelp(cmd) {
return nil
}
opts := utils.MakeOneDriveOpts(cmd)
if flags.RunModeFV == flags.RunModeFlagTest {
return nil
}
if err := utils.ValidateOneDriveRestoreFlags(flags.BackupIDFV, opts); err != nil {
return err
}
r, _, _, _, err := utils.GetAccountAndConnect(ctx, path.OneDriveService, repo.S3Overrides(cmd))
if err != nil {
return Only(ctx, err)
}
defer utils.CloseRepo(ctx, r)
exportLocation := args[0]
if exportLocation == "" {
// This is unlikely, but adding it just in case.
exportLocation = control.DefaultRestoreLocation + dttm.FormatNow(dttm.HumanReadableDriveItem)
}
Infof(ctx, "Exporting to folder %s", exportLocation)
sel := utils.IncludeOneDriveRestoreDataSelectors(opts)
utils.FilterOneDriveRestoreInfoSelectors(sel, opts)
eo, err := r.NewExport(
ctx,
flags.BackupIDFV,
sel.Selector,
utils.MakeExportConfig(ctx, opts.ExportCfg),
)
if err != nil {
return Only(ctx, clues.Wrap(err, "Failed to initialize OneDrive export"))
}
expColl, err := eo.Run(ctx)
if err != nil {
if errors.Is(err, data.ErrNotFound) {
return Only(ctx, clues.New("Backup or backup details missing for id "+flags.BackupIDFV))
}
return Only(ctx, clues.Wrap(err, "Failed to run OneDrive export"))
}
// It would be better to give a progressbar than a spinner, but we
// have know way of knowing how many files are available as of now.
diskWriteComplete := observe.MessageWithCompletion(ctx, "Writing data to disk")
defer func() {
diskWriteComplete <- struct{}{}
close(diskWriteComplete)
}()
err = writeExportCollections(ctx, exportLocation, expColl)
if err != nil {
return err
}
return nil
}
func writeExportCollections(
ctx context.Context,
exportLocation string,
expColl []export.Collection,
) error {
for _, col := range expColl {
folder := ospath.Join(exportLocation, col.BasePath())
for item := range col.Items(ctx) {
err := item.Error
if err != nil {
return Only(ctx, clues.Wrap(err, "getting item").With("dir_name", folder))
}
err = writeExportItem(ctx, item, folder)
if err != nil {
return err
}
}
}
return nil
}
// writeExportItem writes an ExportItem to disk in the specified folder.
func writeExportItem(ctx context.Context, item export.Item, folder string) error {
name := item.Data.Name
fpath := ospath.Join(folder, name)
progReader, pclose := observe.ItemSpinner(
ctx,
item.Data.Body,
observe.ItemExportMsg,
clues.Hide(name))
defer item.Data.Body.Close()
defer pclose()
err := os.MkdirAll(folder, os.ModePerm)
if err != nil {
return Only(ctx, clues.Wrap(err, "creating directory").With("dir_name", folder))
}
// In case the user tries to restore to a non-clean
// directory, we might run into collisions an fail.
f, err := os.Create(fpath)
if err != nil {
return Only(ctx, clues.Wrap(err, "creating file").With("file_name", name, "file_dir", folder))
}
_, err = io.Copy(f, progReader)
if err != nil {
return Only(ctx, clues.Wrap(err, "writing file").With("file_name", name, "file_dir", folder))
}
return nil
}

View File

@ -0,0 +1,106 @@
package export
import (
"bytes"
"testing"
"github.com/alcionai/clues"
"github.com/spf13/cobra"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/stretchr/testify/suite"
"github.com/alcionai/corso/src/cli/flags"
"github.com/alcionai/corso/src/cli/utils"
"github.com/alcionai/corso/src/cli/utils/testdata"
"github.com/alcionai/corso/src/internal/tester"
)
type OneDriveUnitSuite struct {
tester.Suite
}
func TestOneDriveUnitSuite(t *testing.T) {
suite.Run(t, &OneDriveUnitSuite{Suite: tester.NewUnitSuite(t)})
}
func (suite *OneDriveUnitSuite) TestAddOneDriveCommands() {
expectUse := oneDriveServiceCommand + " " + oneDriveServiceCommandUseSuffix
table := []struct {
name string
use string
expectUse string
expectShort string
expectRunE func(*cobra.Command, []string) error
}{
{"export onedrive", exportCommand, expectUse, oneDriveExportCmd().Short, exportOneDriveCmd},
}
for _, test := range table {
suite.Run(test.name, func() {
t := suite.T()
cmd := &cobra.Command{Use: test.use}
// normally a persistent flag from the root.
// required to ensure a dry run.
flags.AddRunModeFlag(cmd, true)
c := addOneDriveCommands(cmd)
require.NotNil(t, c)
cmds := cmd.Commands()
require.Len(t, cmds, 1)
child := cmds[0]
assert.Equal(t, test.expectUse, child.Use)
assert.Equal(t, test.expectShort, child.Short)
tester.AreSameFunc(t, test.expectRunE, child.RunE)
cmd.SetArgs([]string{
"onedrive",
"--" + flags.RunModeFN, flags.RunModeFlagTest,
"--" + flags.BackupFN, testdata.BackupInput,
"--" + flags.FileFN, testdata.FlgInputs(testdata.FileNameInput),
"--" + flags.FolderFN, testdata.FlgInputs(testdata.FolderPathInput),
"--" + flags.FileCreatedAfterFN, testdata.FileCreatedAfterInput,
"--" + flags.FileCreatedBeforeFN, testdata.FileCreatedBeforeInput,
"--" + flags.FileModifiedAfterFN, testdata.FileModifiedAfterInput,
"--" + flags.FileModifiedBeforeFN, testdata.FileModifiedBeforeInput,
"--" + flags.ArchiveFN,
"--" + flags.AWSAccessKeyFN, testdata.AWSAccessKeyID,
"--" + flags.AWSSecretAccessKeyFN, testdata.AWSSecretAccessKey,
"--" + flags.AWSSessionTokenFN, testdata.AWSSessionToken,
"--" + flags.CorsoPassphraseFN, testdata.CorsoPassphrase,
testdata.RestoreDestination,
})
cmd.SetOut(new(bytes.Buffer)) // drop output
cmd.SetErr(new(bytes.Buffer)) // drop output
err := cmd.Execute()
assert.NoError(t, err, clues.ToCore(err))
opts := utils.MakeOneDriveOpts(cmd)
assert.Equal(t, testdata.BackupInput, flags.BackupIDFV)
assert.ElementsMatch(t, testdata.FileNameInput, opts.FileName)
assert.ElementsMatch(t, testdata.FolderPathInput, opts.FolderPath)
assert.Equal(t, testdata.FileCreatedAfterInput, opts.FileCreatedAfter)
assert.Equal(t, testdata.FileCreatedBeforeInput, opts.FileCreatedBefore)
assert.Equal(t, testdata.FileModifiedAfterInput, opts.FileModifiedAfter)
assert.Equal(t, testdata.FileModifiedBeforeInput, opts.FileModifiedBefore)
assert.Equal(t, testdata.Archive, opts.ExportCfg.Archive)
assert.Equal(t, testdata.AWSAccessKeyID, flags.AWSAccessKeyFV)
assert.Equal(t, testdata.AWSSecretAccessKey, flags.AWSSecretAccessKeyFV)
assert.Equal(t, testdata.AWSSessionToken, flags.AWSSessionTokenFV)
assert.Equal(t, testdata.CorsoPassphrase, flags.CorsoPassphraseFV)
})
}
}

15
src/cli/flags/export.go Normal file
View File

@ -0,0 +1,15 @@
package flags
import (
"github.com/spf13/cobra"
)
const ArchiveFN = "archive"
var ArchiveFV bool
// AddExportConfigFlags adds the restore config flag set.
func AddExportConfigFlags(cmd *cobra.Command) {
fs := cmd.Flags()
fs.BoolVar(&ArchiveFV, ArchiveFN, false, "Export data as an archive instead of individual files")
}

View File

@ -0,0 +1,38 @@
package utils
import (
"context"
"github.com/spf13/cobra"
"github.com/alcionai/corso/src/cli/flags"
"github.com/alcionai/corso/src/pkg/control"
)
type ExportCfgOpts struct {
Archive bool
Populated flags.PopulatedFlags
}
func makeExportCfgOpts(cmd *cobra.Command) ExportCfgOpts {
return ExportCfgOpts{
Archive: flags.ArchiveFV,
// populated contains the list of flags that appear in the
// command, according to pflags. Use this to differentiate
// between an "empty" and a "missing" value.
Populated: flags.GetPopulatedFlags(cmd),
}
}
func MakeExportConfig(
ctx context.Context,
opts ExportCfgOpts,
) control.ExportConfig {
exportCfg := control.DefaultExportConfig()
exportCfg.Archive = opts.Archive
return exportCfg
}

View File

@ -0,0 +1,54 @@
package utils
import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/suite"
"github.com/alcionai/corso/src/cli/flags"
"github.com/alcionai/corso/src/internal/tester"
"github.com/alcionai/corso/src/pkg/control"
)
type ExportCfgUnitSuite struct {
tester.Suite
}
func TestExportCfgUnitSuite(t *testing.T) {
suite.Run(t, &ExportCfgUnitSuite{Suite: tester.NewUnitSuite(t)})
}
func (suite *ExportCfgUnitSuite) TestMakeExportConfig() {
rco := &ExportCfgOpts{Archive: true}
table := []struct {
name string
populated flags.PopulatedFlags
expect control.ExportConfig
}{
{
name: "archive populated",
populated: flags.PopulatedFlags{
flags.ArchiveFN: {},
},
expect: control.ExportConfig{
Archive: true,
},
},
}
for _, test := range table {
suite.Run(test.name, func() {
t := suite.T()
ctx, flush := tester.NewContext(t)
defer flush()
opts := *rco
opts.Populated = test.populated
result := MakeExportConfig(ctx, opts)
assert.Equal(t, test.expect.Archive, result.Archive)
})
}
}

View File

@ -19,6 +19,7 @@ type OneDriveOpts struct {
FileModifiedBefore string
RestoreCfg RestoreCfgOpts
ExportCfg ExportCfgOpts
Populated flags.PopulatedFlags
}
@ -35,6 +36,7 @@ func MakeOneDriveOpts(cmd *cobra.Command) OneDriveOpts {
FileModifiedBefore: flags.FileModifiedBeforeFV,
RestoreCfg: makeRestoreCfgOpts(cmd),
ExportCfg: makeExportCfgOpts(cmd),
// populated contains the list of flags that appear in the
// command, according to pflags. Use this to differentiate

View File

@ -51,6 +51,8 @@ var (
DeltaPageSize = "deltaPageSize"
Archive = true
AzureClientID = "testAzureClientId"
AzureTenantID = "testAzureTenantId"
AzureClientSecret = "testAzureClientSecret"
@ -60,4 +62,6 @@ var (
AWSSessionToken = "testAWSSessionToken"
CorsoPassphrase = "testCorsoPassphrase"
RestoreDestination = "test-restore-destination"
)

View File

@ -133,6 +133,7 @@ func Complete() {
const (
ItemBackupMsg = "Backing up item"
ItemRestoreMsg = "Restoring item"
ItemExportMsg = "Exporting item"
ItemQueueMsg = "Queuing items"
)
@ -281,6 +282,51 @@ func ItemProgress(
return bar.ProxyReader(rc), abort
}
// ItemSpinner is similar to ItemProgress, but for use in cases where
// we don't know the file size but want to show progress.
func ItemSpinner(
ctx context.Context,
rc io.ReadCloser,
header string,
iname any,
) (io.ReadCloser, func()) {
plain := plainString(iname)
log := logger.Ctx(ctx).With("item", iname)
log.Debug(header)
if cfg.hidden() || rc == nil {
defer log.Debug("done - " + header)
return rc, func() {}
}
wg.Add(1)
barOpts := []mpb.BarOption{
mpb.PrependDecorators(
decor.Name(header, decor.WCSyncSpaceR),
decor.Name(plain, decor.WCSyncSpaceR),
decor.CurrentKibiByte(" %.1f", decor.WC{W: 8})),
}
if !cfg.keepBarsAfterComplete {
barOpts = append(barOpts, mpb.BarRemoveOnComplete())
}
bar := progress.New(-1, mpb.NopStyle(), barOpts...)
go waitAndCloseBar(bar, func() {
// might be overly chatty, we can remove if needed.
log.Debug("done - " + header)
})()
abort := func() {
bar.SetTotal(-1, true)
bar.Abort(true)
}
return bar.ProxyReader(rc), abort
}
// ProgressWithCount tracks the display of a bar that tracks the completion
// of the specified count.
// Each write to the provided channel counts as a single increment.

View File

@ -16,7 +16,7 @@ import (
)
const (
defaultRestoreLocation = "Corso_Restore_"
DefaultRestoreLocation = "Corso_Restore_"
)
// CollisionPolicy describes how the datalayer behaves in case of a collision.
@ -70,12 +70,12 @@ type RestoreConfig struct {
func DefaultRestoreConfig(timeFormat dttm.TimeFormat) RestoreConfig {
return RestoreConfig{
OnCollision: Skip,
Location: defaultRestoreLocation + dttm.FormatNow(timeFormat),
Location: DefaultRestoreLocation + dttm.FormatNow(timeFormat),
}
}
func DefaultRestoreContainerName(timeFormat dttm.TimeFormat) string {
return defaultRestoreLocation + dttm.FormatNow(timeFormat)
return DefaultRestoreLocation + dttm.FormatNow(timeFormat)
}
// EnsureRestoreConfigDefaults sets all non-supported values in the config