Generate text in case of utf8 descriptions (#5014)

The idea is to mimic the format in which graph convert html into text when generating ics which gets embedded into eml file.

---

#### Does this PR need a docs update or release note?

- [ ]  Yes, it's included
- [x] 🕐 Yes, but in a later PR
- [ ]  No

#### Type of change

<!--- Please check the type of change your PR introduces: --->
- [x] 🌻 Feature
- [ ] 🐛 Bugfix
- [ ] 🗺️ Documentation
- [ ] 🤖 Supportability/Tests
- [ ] 💻 CI/Deployment
- [ ] 🧹 Tech Debt/Cleanup

#### Issue(s)

<!-- Can reference multiple issues. Use one of the following "magic words" - "closes, fixes" to auto-close the Github issue. -->
* #<issue>

#### Test Plan

<!-- How will this be tested prior to merging.-->
- [ ] 💪 Manual
- [x]  Unit test
- [ ] 💚 E2E
This commit is contained in:
Abin Simon 2024-01-13 00:05:46 +05:30 committed by GitHub
parent c43ad6c517
commit 722c69b3cc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 62 additions and 11 deletions

View File

@ -90,6 +90,7 @@ require (
github.com/valyala/fasthttp v1.51.0 // indirect
go.opentelemetry.io/otel/metric v1.21.0 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20231127180814-3a041ad873d4 // indirect
jaytaylor.com/html2text v0.0.0-20230321000545-74c2419ad056 // indirect
)
require (

View File

@ -435,3 +435,5 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gotest.tools/v3 v3.5.1 h1:EENdUnS3pdur5nybKYIh2Vfgc8IUNBjxDPSjtiJcOzU=
gotest.tools/v3 v3.5.1/go.mod h1:isy3WKz7GK6uNw/sbHzfKBLvlvXwUyV06n6brMxxopU=
jaytaylor.com/html2text v0.0.0-20230321000545-74c2419ad056 h1:6YFJoB+0fUH6X3xU/G2tQqCYg+PkGtnZ5nMR5rpw72g=
jaytaylor.com/html2text v0.0.0-20230321000545-74c2419ad056/go.mod h1:OxvTsCwKosqQ1q7B+8FwXqg4rKZ/UG9dUW+g/VL2xH4=

View File

@ -7,10 +7,12 @@ import (
"fmt"
"strings"
"time"
"unicode"
"github.com/alcionai/clues"
ics "github.com/arran4/golang-ical"
"github.com/microsoftgraph/msgraph-sdk-go/models"
"jaytaylor.com/html2text"
"github.com/alcionai/corso/src/internal/common/ptr"
"github.com/alcionai/corso/src/internal/common/str"
@ -251,6 +253,16 @@ func FromJSON(ctx context.Context, body []byte) (string, error) {
return cal.Serialize(), nil
}
func isASCII(s string) bool {
for _, c := range s {
if c > unicode.MaxASCII {
return false
}
}
return true
}
func updateEventProperties(ctx context.Context, event models.Eventable, iCalEvent *ics.VEvent) error {
// CREATED - https://www.rfc-editor.org/rfc/rfc5545#section-3.8.7.1
created := event.GetCreatedDateTime()
@ -322,10 +334,6 @@ func updateEventProperties(ctx context.Context, event models.Eventable, iCalEven
}
// DESCRIPTION - https://www.rfc-editor.org/rfc/rfc5545#section-3.8.1.5
// TODO: Emojies currently don't seem to be read properly by Outlook
// When outlook exports them(in .eml), it exports them in text as it strips down html
bodyPreview := ptr.Val(event.GetBodyPreview())
if event.GetBody() != nil {
description := ptr.Val(event.GetBody().GetContent())
contentType := event.GetBody().GetContentType().String()
@ -333,13 +341,33 @@ func updateEventProperties(ctx context.Context, event models.Eventable, iCalEven
if len(description) > 0 && contentType == "text" {
iCalEvent.SetDescription(description)
} else if len(description) > 0 {
// https://stackoverflow.com/a/859475
iCalEvent.SetDescription(bodyPreview)
if contentType == "html" {
desc := strings.ReplaceAll(description, "\r\n", "")
desc = strings.ReplaceAll(desc, "\n", "")
iCalEvent.AddProperty("X-ALT-DESC", desc, ics.WithFmtType("text/html"))
// If we have html, we have two routes. If we don't have
// UTF-8, then we can do an exact reproduction of the
// original data in outlook by using X-ALT-DESC field and
// using the html there. But if we have UTF-8, then we
// have to use DESCRIPTION field and use the content
// stripped of html there. This because even though the
// field technically supports UTF-8, Outlook does not
// seem to work with it. Exchange does similar things
// when it attaches the event to an email.
// nolint:lll
// https://learn.microsoft.com/en-us/openspecs/exchange_server_protocols/ms-oxcical/d7f285da-9c7a-4597-803b-b74193c898a8
// X-ALT-DESC field uses "Text" as in https://www.rfc-editor.org/rfc/rfc2445#section-4.3.11
if isASCII(description) {
// https://stackoverflow.com/a/859475
replacer := strings.NewReplacer("\r\n", "\\n", "\n", "\\n")
desc := replacer.Replace(description)
iCalEvent.AddProperty("X-ALT-DESC", desc, ics.WithFmtType("text/html"))
} else {
stripped, err := html2text.FromString(description, html2text.Options{PrettyTables: true})
if err != nil {
return clues.Wrap(err, "converting html to text")
}
iCalEvent.SetDescription(stripped)
}
}
}
}

View File

@ -607,10 +607,30 @@ func (suite *ICSUnitSuite) TestEventConversion() {
return e
},
check: func(out string) {
assert.Contains(t, out, "DESCRIPTION:body preview", "body preview")
assert.Contains(t, out, "X-ALT-DESC;FMTTYPE=text/html:<html><body>body</body></html>", "body")
},
},
{
name: "html body with utf8",
event: func() *models.Event {
e := baseEvent()
body := models.NewItemBody()
btype, err := models.ParseBodyType("html")
require.NoError(t, err, "parse body type")
body.SetContentType(btype.(*models.BodyType))
body.SetContent(ptr.To("<html><body>മലയാളം</body></html>"))
e.SetBodyPreview(ptr.To("body preview"))
e.SetBody(body)
return e
},
check: func(out string) {
assert.Contains(t, out, "DESCRIPTION:മലയാളം", "body")
},
},
{
name: "showas free",
event: func() *models.Event {