minimize channel messages exports (#4245)

reduces channel message export data to the minimal set of valuable info: message content, creator,
creation and modification time, and replies (each
reply has the same data, sans other replies).

---

#### Does this PR need a docs update or release note?

- [x]  No

#### Type of change

- [x] 🌻 Feature

#### Issue(s)

* #3991  

#### Test Plan

- [x] 💪 Manual
- [x] 💚 E2E
This commit is contained in:
Keepers 2023-09-15 15:16:06 -06:00 committed by GitHub
parent 265a77f1cd
commit f7042129f4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 275 additions and 49 deletions

View File

@ -69,6 +69,8 @@ func (suite *GroupsUnitSuite) TestAddGroupsCommands() {
"--" + flags.CorsoPassphraseFN, testdata.CorsoPassphrase,
"--" + flags.FormatFN, testdata.FormatType,
// bool flags
"--" + flags.ArchiveFN,
})
@ -82,6 +84,7 @@ func (suite *GroupsUnitSuite) TestAddGroupsCommands() {
assert.Equal(t, testdata.BackupInput, flags.BackupIDFV)
assert.Equal(t, testdata.Archive, opts.ExportCfg.Archive)
assert.Equal(t, testdata.FormatType, opts.ExportCfg.Format)
assert.Equal(t, testdata.AWSAccessKeyID, flags.AWSAccessKeyFV)
assert.Equal(t, testdata.AWSSecretAccessKey, flags.AWSSecretAccessKeyFV)

View File

@ -75,6 +75,8 @@ func (suite *OneDriveUnitSuite) TestAddOneDriveCommands() {
"--" + flags.CorsoPassphraseFN, testdata.CorsoPassphrase,
"--" + flags.FormatFN, testdata.FormatType,
// bool flags
"--" + flags.ArchiveFN,
})
@ -95,6 +97,7 @@ func (suite *OneDriveUnitSuite) TestAddOneDriveCommands() {
assert.Equal(t, testdata.FileModifiedBeforeInput, opts.FileModifiedBefore)
assert.Equal(t, testdata.Archive, opts.ExportCfg.Archive)
assert.Equal(t, testdata.FormatType, opts.ExportCfg.Format)
assert.Equal(t, testdata.AWSAccessKeyID, flags.AWSAccessKeyFV)
assert.Equal(t, testdata.AWSSecretAccessKey, flags.AWSSecretAccessKeyFV)

View File

@ -80,6 +80,8 @@ func (suite *SharePointUnitSuite) TestAddSharePointCommands() {
"--" + flags.CorsoPassphraseFN, testdata.CorsoPassphrase,
"--" + flags.FormatFN, testdata.FormatType,
// bool flags
"--" + flags.ArchiveFN,
})
@ -107,6 +109,7 @@ func (suite *SharePointUnitSuite) TestAddSharePointCommands() {
assert.ElementsMatch(t, testdata.PageFolderInput, opts.PageFolder)
assert.Equal(t, testdata.Archive, opts.ExportCfg.Archive)
assert.Equal(t, testdata.FormatType, opts.ExportCfg.Format)
assert.Equal(t, testdata.AWSAccessKeyID, flags.AWSAccessKeyFV)
assert.Equal(t, testdata.AWSSecretAccessKey, flags.AWSSecretAccessKeyFV)

View File

@ -1,15 +1,46 @@
package flags
import (
"strings"
"github.com/alcionai/clues"
"github.com/spf13/cobra"
"github.com/alcionai/corso/src/pkg/control"
"github.com/alcionai/corso/src/pkg/filters"
)
const ArchiveFN = "archive"
const (
ArchiveFN = "archive"
FormatFN = "format"
)
var ArchiveFV bool
var (
ArchiveFV bool
FormatFV string
)
// AddExportConfigFlags adds the restore config flag set.
func AddExportConfigFlags(cmd *cobra.Command) {
fs := cmd.Flags()
fs.BoolVar(&ArchiveFV, ArchiveFN, false, "Export data as an archive instead of individual files")
fs.StringVar(&FormatFV, FormatFN, "", "Specify the export file format")
cobra.CheckErr(fs.MarkHidden(FormatFN))
}
// ValidateExportConfigFlags ensures all export config flags that utilize
// enumerated values match a well-known value.
func ValidateExportConfigFlags() error {
acceptedFormatTypes := []string{
string(control.DefaultFormat),
string(control.JSONFormat),
}
if !filters.Equal(acceptedFormatTypes).Compare(FormatFV) {
return clues.New("unrecognized format type: " + FormatFV)
}
FormatFV = strings.ToLower(FormatFV)
return nil
}

View File

@ -0,0 +1,43 @@
package flags
import (
"testing"
"github.com/alcionai/clues"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/suite"
"github.com/alcionai/corso/src/internal/tester"
)
type ExportUnitSuite struct {
tester.Suite
}
func TestExportUnitSuite(t *testing.T) {
suite.Run(t, &ExportUnitSuite{Suite: tester.NewUnitSuite(t)})
}
func (suite *ExportUnitSuite) TestValidateExportConfigFlags() {
t := suite.T()
FormatFV = ""
err := ValidateExportConfigFlags()
assert.NoError(t, err, clues.ToCore(err))
FormatFV = "json"
err = ValidateExportConfigFlags()
assert.NoError(t, err, clues.ToCore(err))
FormatFV = "JsoN"
err = ValidateExportConfigFlags()
assert.NoError(t, err, clues.ToCore(err))
FormatFV = "fnerds"
err = ValidateExportConfigFlags()
assert.Error(t, err, clues.ToCore(err))
}

View File

@ -11,6 +11,7 @@ import (
type ExportCfgOpts struct {
Archive bool
Format string
Populated flags.PopulatedFlags
}
@ -18,6 +19,7 @@ type ExportCfgOpts struct {
func makeExportCfgOpts(cmd *cobra.Command) ExportCfgOpts {
return ExportCfgOpts{
Archive: flags.ArchiveFV,
Format: flags.FormatFV,
// populated contains the list of flags that appear in the
// command, according to pflags. Use this to differentiate
@ -33,6 +35,7 @@ func MakeExportConfig(
exportCfg := control.DefaultExportConfig()
exportCfg.Archive = opts.Archive
exportCfg.Format = control.FormatType(opts.Format)
return exportCfg
}

View File

@ -52,6 +52,7 @@ var (
DeltaPageSize = "deltaPageSize"
Archive = true
FormatType = "json"
AzureClientID = "testAzureClientId"
AzureTenantID = "testAzureTenantId"

View File

@ -9,6 +9,7 @@ import (
"github.com/alcionai/corso/src/internal/data"
"github.com/alcionai/corso/src/internal/m365/collection/drive/metadata"
"github.com/alcionai/corso/src/internal/version"
"github.com/alcionai/corso/src/pkg/control"
"github.com/alcionai/corso/src/pkg/export"
"github.com/alcionai/corso/src/pkg/fault"
)
@ -31,6 +32,7 @@ func streamItems(
ctx context.Context,
drc []data.RestoreCollection,
backupVersion int,
cec control.ExportConfig,
ch chan<- export.Item,
) {
defer close(ch)

View File

@ -1,22 +1,34 @@
package groups
import (
"bytes"
"context"
"encoding/json"
"io"
"time"
"github.com/alcionai/clues"
"github.com/microsoftgraph/msgraph-sdk-go/models"
"github.com/alcionai/corso/src/internal/common/ptr"
"github.com/alcionai/corso/src/internal/data"
"github.com/alcionai/corso/src/pkg/control"
"github.com/alcionai/corso/src/pkg/export"
"github.com/alcionai/corso/src/pkg/fault"
"github.com/alcionai/corso/src/pkg/services/m365/api"
)
func NewExportCollection(
baseDir string,
backingCollections []data.RestoreCollection,
backupVersion int,
cec control.ExportConfig,
) export.Collectioner {
return export.BaseCollection{
BaseDir: baseDir,
BackingCollection: backingCollections,
BackupVersion: backupVersion,
Cfg: cec,
Stream: streamItems,
}
}
@ -26,6 +38,7 @@ func streamItems(
ctx context.Context,
drc []data.RestoreCollection,
backupVersion int,
cec control.ExportConfig,
ch chan<- export.Item,
) {
defer close(ch)
@ -34,25 +47,29 @@ func streamItems(
for _, rc := range drc {
for item := range rc.Items(ctx, errs) {
itemID := item.ID()
// channel message items have no name
name := itemID
body, err := formatChannelMessage(cec, item.ToReader())
if err != nil {
ch <- export.Item{
ID: itemID,
Name: name,
Body: item.ToReader(),
ID: item.ID(),
Error: err,
}
} else {
ch <- export.Item{
ID: item.ID(),
// channel message items have no name
Name: item.ID(),
Body: body,
}
}
}
items, recovered := errs.ItemsAndRecovered()
// Return all the items that we failed to source from the persistence layer
for _, err := range items {
for _, item := range items {
ch <- export.Item{
ID: err.ID,
Error: &err,
ID: item.ID,
Error: &item,
}
}
@ -63,3 +80,77 @@ func streamItems(
}
}
}
type (
minimumChannelMessage struct {
Content string `json:"content"`
CreatedDateTime time.Time `json:"createdDateTime"`
From string `json:"from"`
LastModifiedDateTime time.Time `json:"lastModifiedDateTime"`
}
minimumChannelMessageAndReplies struct {
minimumChannelMessage
Replies []minimumChannelMessage `json:"replies,omitempty"`
}
)
func formatChannelMessage(
cec control.ExportConfig,
rc io.ReadCloser,
) (io.ReadCloser, error) {
if cec.Format == control.JSONFormat {
return rc, nil
}
bs, err := io.ReadAll(rc)
if err != nil {
return nil, clues.Wrap(err, "reading item bytes")
}
defer rc.Close()
cfb, err := api.CreateFromBytes(bs, models.CreateChatMessageFromDiscriminatorValue)
if err != nil {
return nil, clues.Wrap(err, "deserializing bytes to message")
}
msg, ok := cfb.(models.ChatMessageable)
if !ok {
return nil, clues.New("expected deserialized item to implement models.ChatMessageable")
}
mItem := makeMinimumChannelMesasge(msg)
replies := msg.GetReplies()
mcmar := minimumChannelMessageAndReplies{
minimumChannelMessage: mItem,
Replies: make([]minimumChannelMessage, 0, len(replies)),
}
for _, r := range replies {
mcmar.Replies = append(mcmar.Replies, makeMinimumChannelMesasge(r))
}
bs, err = json.Marshal(mcmar)
if err != nil {
return nil, clues.Wrap(err, "serializing minimized channel message")
}
return io.NopCloser(bytes.NewReader(bs)), nil
}
func makeMinimumChannelMesasge(item models.ChatMessageable) minimumChannelMessage {
var content string
if item.GetBody() != nil {
content = ptr.Val(item.GetBody().GetContent())
}
return minimumChannelMessage{
Content: content,
CreatedDateTime: ptr.Val(item.GetCreatedDateTime()),
From: api.GetChatMessageFrom(item),
LastModifiedDateTime: ptr.Val(item.GetLastModifiedDateTime()),
}
}

View File

@ -1,6 +1,8 @@
package groups
import (
"bytes"
"io"
"testing"
"github.com/alcionai/clues"
@ -11,6 +13,7 @@ import (
dataMock "github.com/alcionai/corso/src/internal/data/mock"
"github.com/alcionai/corso/src/internal/tester"
"github.com/alcionai/corso/src/internal/version"
"github.com/alcionai/corso/src/pkg/control"
"github.com/alcionai/corso/src/pkg/export"
)
@ -23,6 +26,10 @@ func TestExportUnitSuite(t *testing.T) {
}
func (suite *ExportUnitSuite) TestStreamItems() {
makeBody := func() io.ReadCloser {
return io.NopCloser(bytes.NewReader([]byte("{}")))
}
table := []struct {
name string
backingColl dataMock.Collection
@ -33,7 +40,10 @@ func (suite *ExportUnitSuite) TestStreamItems() {
name: "no errors",
backingColl: dataMock.Collection{
ItemData: []data.Item{
&dataMock.Item{ItemID: "zim"},
&dataMock.Item{
ItemID: "zim",
Reader: makeBody(),
},
},
},
expectName: "zim",
@ -52,7 +62,10 @@ func (suite *ExportUnitSuite) TestStreamItems() {
name: "items and recoverable errors",
backingColl: dataMock.Collection{
ItemData: []data.Item{
&dataMock.Item{ItemID: "gir"},
&dataMock.Item{
ItemID: "gir",
Reader: makeBody(),
},
},
ItemsRecoverableErrs: []error{
clues.New("I miss my cupcake."),
@ -76,6 +89,7 @@ func (suite *ExportUnitSuite) TestStreamItems() {
ctx,
[]data.RestoreCollection{test.backingColl},
version.NoBackup,
control.DefaultExportConfig(),
ch)
var (

View File

@ -45,7 +45,8 @@ func ProduceExportCollections(
coll := groups.NewExportCollection(
path.Builder{}.Append(folders...).String(),
[]data.RestoreCollection{restoreColl},
backupVersion)
backupVersion,
exportCfg)
ec = append(ec, coll)
}

View File

@ -58,14 +58,16 @@ func (suite *ExportUnitSuite) TestExportRestoreCollections() {
var (
itemID = "itemID"
containerName = "channelID"
exportCfg = control.ExportConfig{}
dii = groupMock.ItemInfo()
body = io.NopCloser(bytes.NewBufferString(
`{"displayname": "` + dii.Groups.ItemName + `"}`))
exportCfg = control.ExportConfig{}
expectedPath = path.ChannelMessagesCategory.String() + "/" + containerName
expectedItems = []export.Item{
{
ID: itemID,
Name: dii.Groups.ItemName,
Body: io.NopCloser((bytes.NewBufferString("body1"))),
// Body: body, not checked
},
}
)
@ -80,7 +82,7 @@ func (suite *ExportUnitSuite) TestExportRestoreCollections() {
ItemData: []data.Item{
&dataMock.Item{
ItemID: itemID,
Reader: io.NopCloser(bytes.NewBufferString("body1")),
Reader: body,
ItemInfo: dii,
},
},
@ -103,7 +105,11 @@ func (suite *ExportUnitSuite) TestExportRestoreCollections() {
assert.Equal(t, expectedPath, ecs[0].BasePath(), "base dir")
fitems := []export.Item{}
for item := range ecs[0].Items(ctx) {
// have to nil out body, otherwise assert fails due to
// pointer memory location differences
item.Body = nil
fitems = append(fitems, item)
}

View File

@ -8,13 +8,25 @@ type ExportConfig struct {
// the archive.
Archive bool
// DataFormat decides the format in which we return the data. This is
// only useful for outlook exports, for example they can be in eml
// or pst for emails.
// DataFormat
// TODO: Enable once we support outlook exports
// DataFormat string
// Format decides the format in which we return the data.
// ex: html vs pst vs other.
// Default format is decided on a per-service or per-data basis.
Format FormatType
}
type FormatType string
var (
// Follow whatever format is the default for the service or data type.
DefaultFormat FormatType
// export the data as raw, unmodified json
JSONFormat FormatType = "json"
)
func DefaultExportConfig() ExportConfig {
return ExportConfig{
Archive: false,

View File

@ -5,6 +5,7 @@ import (
"io"
"github.com/alcionai/corso/src/internal/data"
"github.com/alcionai/corso/src/pkg/control"
)
// ---------------------------------------------------------------------------
@ -22,6 +23,13 @@ type Collectioner interface {
Items(context.Context) <-chan Item
}
type itemStreamer func(
ctx context.Context,
backingColls []data.RestoreCollection,
backupVersion int,
cfg control.ExportConfig,
ch chan<- Item)
// BaseCollection holds the foundational details of an export collection.
type BaseCollection struct {
// BaseDir contains the destination path of the collection.
@ -34,7 +42,9 @@ type BaseCollection struct {
// BackupVersion is the backupVersion of the data source.
BackupVersion int
Stream func(context.Context, []data.RestoreCollection, int, chan<- Item)
Cfg control.ExportConfig
Stream itemStreamer
}
func (bc BaseCollection) BasePath() string {
@ -43,7 +53,7 @@ func (bc BaseCollection) BasePath() string {
func (bc BaseCollection) Items(ctx context.Context) <-chan Item {
ch := make(chan Item)
go bc.Stream(ctx, bc.BackingCollection, bc.BackupVersion, ch)
go bc.Stream(ctx, bc.BackingCollection, bc.BackupVersion, bc.Cfg, ch)
return ch
}

View File

@ -143,7 +143,6 @@ func ChannelMessageInfo(
var (
lastReply time.Time
modTime = ptr.OrNow(msg.GetLastModifiedDateTime())
msgCreator string
content string
)
@ -161,19 +160,6 @@ func ChannelMessageInfo(
modTime = lastReply
}
from := msg.GetFrom()
switch true {
case from == nil:
// not all messages have a populated 'from'. Namely, system messages do not.
case from.GetApplication() != nil:
msgCreator = ptr.Val(from.GetApplication().GetDisplayName())
case from.GetDevice() != nil:
msgCreator = ptr.Val(from.GetDevice().GetDisplayName())
case from.GetUser() != nil:
msgCreator = ptr.Val(from.GetUser().GetDisplayName())
}
if msg.GetBody() != nil {
content = ptr.Val(msg.GetBody().GetContent())
}
@ -183,7 +169,7 @@ func ChannelMessageInfo(
Created: ptr.Val(msg.GetCreatedDateTime()),
LastReplyAt: lastReply,
Modified: modTime,
MessageCreator: msgCreator,
MessageCreator: GetChatMessageFrom(msg),
MessagePreview: str.Preview(content, 16),
ReplyCount: len(msg.GetReplies()),
Size: int64(len(content)),
@ -209,3 +195,20 @@ func CheckIDAndName(c models.Channelable) error {
return nil
}
func GetChatMessageFrom(msg models.ChatMessageable) string {
from := msg.GetFrom()
switch true {
case from == nil:
return ""
case from.GetApplication() != nil:
return ptr.Val(from.GetApplication().GetDisplayName())
case from.GetDevice() != nil:
return ptr.Val(from.GetDevice().GetDisplayName())
case from.GetUser() != nil:
return ptr.Val(from.GetUser().GetDisplayName())
}
return ""
}

View File

@ -254,7 +254,7 @@ func (c Contacts) DeleteItem(
// ---------------------------------------------------------------------------
func BytesToContactable(bytes []byte) (models.Contactable, error) {
v, err := createFromBytes(bytes, models.CreateContactFromDiscriminatorValue)
v, err := CreateFromBytes(bytes, models.CreateContactFromDiscriminatorValue)
if err != nil {
return nil, clues.Wrap(err, "deserializing bytes to contact")
}

View File

@ -557,7 +557,7 @@ func (c Events) PostLargeAttachment(
// ---------------------------------------------------------------------------
func BytesToEventable(body []byte) (models.Eventable, error) {
v, err := createFromBytes(body, models.CreateEventFromDiscriminatorValue)
v, err := CreateFromBytes(body, models.CreateEventFromDiscriminatorValue)
if err != nil {
return nil, clues.Wrap(err, "deserializing bytes to event")
}

View File

@ -504,7 +504,7 @@ func (c Mail) PostLargeAttachment(
// ---------------------------------------------------------------------------
func BytesToMessageable(body []byte) (models.Messageable, error) {
v, err := createFromBytes(body, models.CreateMessageFromDiscriminatorValue)
v, err := CreateFromBytes(body, models.CreateMessageFromDiscriminatorValue)
if err != nil {
return nil, clues.Wrap(err, "deserializing bytes to message")
}

View File

@ -6,8 +6,8 @@ import (
kjson "github.com/microsoft/kiota-serialization-json-go"
)
// createFromBytes generates an m365 object form bytes.
func createFromBytes(
// CreateFromBytes generates an m365 object form bytes.
func CreateFromBytes(
bytes []byte,
createFunc serialization.ParsableFactory,
) (serialization.Parsable, error) {