diff --git a/src/go.mod b/src/go.mod
index bad6ed174..324dbf9b5 100644
--- a/src/go.mod
+++ b/src/go.mod
@@ -13,6 +13,7 @@ require (
github.com/golang-jwt/jwt/v5 v5.0.0
github.com/google/uuid v1.3.1
github.com/h2non/gock v1.2.0
+ github.com/jaytaylor/html2text v0.0.0-20230321000545-74c2419ad056
github.com/kopia/kopia v0.13.0
github.com/microsoft/kiota-abstractions-go v1.3.0
github.com/microsoft/kiota-authentication-azure-go v1.0.1
@@ -55,11 +56,13 @@ require (
github.com/magiconair/properties v1.8.7 // indirect
github.com/microsoft/kiota-serialization-multipart-go v1.0.0 // indirect
github.com/mitchellh/mapstructure v1.5.0 // indirect
+ github.com/olekukonko/tablewriter v0.0.5 // indirect
github.com/pelletier/go-toml/v2 v2.1.0 // indirect
github.com/sagikazarmark/locafero v0.3.0 // indirect
github.com/sagikazarmark/slog-shim v0.1.0 // indirect
github.com/sourcegraph/conc v0.3.0 // indirect
github.com/spf13/afero v1.10.0 // indirect
+ github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf // indirect
github.com/std-uritemplate/std-uritemplate/go v0.0.42 // indirect
github.com/subosito/gotenv v1.6.0 // indirect
github.com/valyala/bytebufferpool v1.0.0 // indirect
diff --git a/src/go.sum b/src/go.sum
index 24f790202..1bd2296d1 100644
--- a/src/go.sum
+++ b/src/go.sum
@@ -235,6 +235,8 @@ github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:
github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
+github.com/jaytaylor/html2text v0.0.0-20230321000545-74c2419ad056 h1:iCHtR9CQyktQ5+f3dMVZfwD2KWJUgm7M0gdL9NGr8KA=
+github.com/jaytaylor/html2text v0.0.0-20230321000545-74c2419ad056/go.mod h1:CVKlgaMiht+LXvHG173ujK6JUhZXKb2u/BQtjPDIvyk=
github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg=
github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo=
github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8=
@@ -280,6 +282,7 @@ github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Ky
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA=
github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
+github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U=
github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
@@ -326,6 +329,8 @@ github.com/natefinch/atomic v1.0.1 h1:ZPYKxkqQOx3KZ+RsbnP/YsgvxWQPGxjC0oBt2AhwV0
github.com/natefinch/atomic v1.0.1/go.mod h1:N/D/ELrljoqDyT3rZrsUmtsuzvHkeB/wWjHV22AZRbM=
github.com/nbio/st v0.0.0-20140626010706-e9e8d9816f32 h1:W6apQkHrMkS0Muv8G/TipAy/FJl/rCYT0+EuS8+Z0z4=
github.com/nbio/st v0.0.0-20140626010706-e9e8d9816f32/go.mod h1:9wM+0iRr9ahx58uYLpLIr5fm8diHn0JbqRycJi6w0Ms=
+github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec=
+github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY=
github.com/pascaldekloe/goe v0.1.0 h1:cBOtyMzM9HTpWjXfbbunk26uA6nG3a8n06Wieeh0MwY=
github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
github.com/pelletier/go-toml/v2 v2.1.0 h1:FnwAJ4oYMvbT/34k9zzHuZNrhlz48GB3/s6at6/MHO4=
@@ -399,6 +404,8 @@ github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/spf13/viper v1.17.0 h1:I5txKw7MJasPL/BrfkbA0Jyo/oELqVmux4pR/UxOMfI=
github.com/spf13/viper v1.17.0/go.mod h1:BmMMMLQXSbcHK6KAOiFLz0l5JHrU89OdIRHvsk0+yVI=
+github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf h1:pvbZ0lM0XWPBqUKqFU8cmavspvIl9nulOYwdy6IFRRo=
+github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf/go.mod h1:RJID2RhlZKId02nZ62WenDCkgHFerpIOmW0iT7GKmXM=
github.com/std-uritemplate/std-uritemplate/go v0.0.42 h1:rG+XlE4drkVWs2NLfGS15N+vg+CUcjXElQKvJ0fctlI=
github.com/std-uritemplate/std-uritemplate/go v0.0.42/go.mod h1:Qov4Ay4U83j37XjgxMYevGJFLbnZ2o9cEOhGufBKgKY=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
diff --git a/src/internal/m365/collection/groups/export.go b/src/internal/m365/collection/groups/export.go
index 4418fc9d5..f2b0b6d76 100644
--- a/src/internal/m365/collection/groups/export.go
+++ b/src/internal/m365/collection/groups/export.go
@@ -90,20 +90,23 @@ func streamItems(
type (
minimumChannelMessage struct {
- // TODO(keepers): remove attachmentNames when better formatting
- // of attachments within the content body is implemented.
- AttachmentNames []string `json:"attachmentNames"`
- Content string `json:"content"`
- CreatedDateTime time.Time `json:"createdDateTime"`
- From string `json:"from"`
- LastModifiedDateTime time.Time `json:"lastModifiedDateTime"`
- Subject string `json:"subject"`
+ Attachments []minimumAttachment `json:"attachments"`
+ Content string `json:"content"`
+ CreatedDateTime time.Time `json:"createdDateTime"`
+ From string `json:"from"`
+ LastModifiedDateTime time.Time `json:"lastModifiedDateTime"`
+ Subject string `json:"subject"`
}
minimumChannelMessageAndReplies struct {
minimumChannelMessage
Replies []minimumChannelMessage `json:"replies,omitempty"`
}
+
+ minimumAttachment struct {
+ ID string `json:"id"`
+ Name string `json:"name"`
+ }
)
func formatChannelMessage(
@@ -143,7 +146,7 @@ func formatChannelMessage(
mcmar.Replies = append(mcmar.Replies, makeMinimumChannelMesasge(r))
}
- bs, err = json.Marshal(mcmar)
+ bs, err = marshalJSONContainingHTML(mcmar)
if err != nil {
return nil, clues.Wrap(err, "serializing minimized channel message")
}
@@ -151,6 +154,21 @@ func formatChannelMessage(
return io.NopCloser(bytes.NewReader(bs)), nil
}
+// json.Marshal will replace many markup tags (ex: "<" and ">") with their unicode
+// equivalent. In order to maintain parity with original content that contains html,
+// we have to use this alternative encoding behavior.
+// https://stackoverflow.com/questions/28595664/how-to-stop-json-marshal-from-escaping-and
+func marshalJSONContainingHTML(a any) ([]byte, error) {
+ buffer := &bytes.Buffer{}
+
+ encoder := json.NewEncoder(buffer)
+ encoder.SetEscapeHTML(false)
+
+ err := encoder.Encode(a)
+
+ return buffer.Bytes(), clues.Stack(err).OrNil()
+}
+
func makeMinimumChannelMesasge(item models.ChatMessageable) minimumChannelMessage {
var content string
@@ -158,8 +176,18 @@ func makeMinimumChannelMesasge(item models.ChatMessageable) minimumChannelMessag
content = ptr.Val(item.GetBody().GetContent())
}
+ attachments := item.GetAttachments()
+ minAttachments := make([]minimumAttachment, 0, len(attachments))
+
+ for _, a := range attachments {
+ minAttachments = append(minAttachments, minimumAttachment{
+ ID: ptr.Val(a.GetId()),
+ Name: ptr.Val(a.GetName()),
+ })
+ }
+
return minimumChannelMessage{
- AttachmentNames: api.GetChatMessageAttachmentNames(item),
+ Attachments: minAttachments,
Content: content,
CreatedDateTime: ptr.Val(item.GetCreatedDateTime()),
From: api.GetChatMessageFrom(item),
diff --git a/src/pkg/backup/details/groups.go b/src/pkg/backup/details/groups.go
index 8d7867e9a..b6e5011fe 100644
--- a/src/pkg/backup/details/groups.go
+++ b/src/pkg/backup/details/groups.go
@@ -2,6 +2,7 @@ package details
import (
"strconv"
+ "strings"
"time"
"github.com/alcionai/clues"
@@ -100,7 +101,8 @@ func (i GroupsInfo) Values() []string {
}
return []string{
- i.Message.Preview,
+ // html parsing may produce newlijnes, which we'll want to avoid
+ strings.ReplaceAll(i.Message.Preview, "\n", "\\n"),
i.ParentPath,
i.Message.Subject,
strconv.Itoa(i.Message.ReplyCount),
diff --git a/src/pkg/services/m365/api/channels.go b/src/pkg/services/m365/api/channels.go
index c2ab8f8bb..d7190dfdd 100644
--- a/src/pkg/services/m365/api/channels.go
+++ b/src/pkg/services/m365/api/channels.go
@@ -3,9 +3,11 @@ package api
import (
"context"
"fmt"
+ "regexp"
"time"
"github.com/alcionai/clues"
+ "github.com/jaytaylor/html2text"
"github.com/microsoftgraph/msgraph-sdk-go/models"
"github.com/microsoftgraph/msgraph-sdk-go/teams"
@@ -93,7 +95,7 @@ func (c Channels) GetChannelByName(
// Sanity check ID and name
cal := gv[0]
- if err := CheckIDAndName(cal); err != nil {
+ if err := checkIDAndName(cal); err != nil {
return nil, clues.Stack(err).WithClues(ctx)
}
@@ -163,7 +165,10 @@ func channelMessageInfo(
modTime = lastReplyAt
}
- preview, contentLen := GetChatMessageContentPreview(msg)
+ preview, contentLen, err := getChatMessageContentPreview(msg)
+ if err != nil {
+ preview = "malformed or unparseable html" + preview
+ }
message := details.ChannelMessageInfo{
AttachmentNames: GetChatMessageAttachmentNames(msg),
@@ -178,7 +183,11 @@ func channelMessageInfo(
var lr details.ChannelMessageInfo
if lastReply != nil {
- preview, contentLen = GetChatMessageContentPreview(lastReply)
+ preview, contentLen, err = getChatMessageContentPreview(lastReply)
+ if err != nil {
+ preview = "malformed or unparseable html: " + preview
+ }
+
lr = details.ChannelMessageInfo{
AttachmentNames: GetChatMessageAttachmentNames(lastReply),
CreatedAt: ptr.Val(lastReply.GetCreatedDateTime()),
@@ -196,9 +205,9 @@ func channelMessageInfo(
}
}
-// CheckIDAndName is a validator that ensures the ID
+// checkIDAndName is a validator that ensures the ID
// and name are populated and not zero valued.
-func CheckIDAndName(c models.Channelable) error {
+func checkIDAndName(c models.Channelable) error {
if c == nil {
return clues.New("nil container")
}
@@ -233,14 +242,59 @@ func GetChatMessageFrom(msg models.ChatMessageable) string {
return ""
}
-func GetChatMessageContentPreview(msg models.ChatMessageable) (string, int64) {
- var content string
+func getChatMessageContentPreview(msg models.ChatMessageable) (string, int64, error) {
+ content, origSize, err := stripChatMessageHTML(msg)
+ return str.Preview(content, 128), origSize, clues.Stack(err).OrNil()
+}
+
+func stripChatMessageHTML(msg models.ChatMessageable) (string, int64, error) {
+ var (
+ content string
+ origSize int64
+ )
if msg.GetBody() != nil {
content = ptr.Val(msg.GetBody().GetContent())
}
- return str.Preview(content, 128), int64(len(content))
+ origSize = int64(len(content))
+
+ content = replaceAttachmentMarkup(content, msg.GetAttachments())
+ content, err := html2text.FromString(content)
+
+ return content, origSize, clues.Stack(err).OrNil()
+}
+
+var attachmentMarkupRE = regexp.MustCompile(`
innerend", + attachments: attachments, + expect: "body\n\ninnerend", + expectErr: assert.NoError, + }, + { + name: "with html", + content: "
body