From 722c69b3cc20f76d9a08730d768faea9e8952366 Mon Sep 17 00:00:00 2001 From: Abin Simon Date: Sat, 13 Jan 2024 00:05:46 +0530 Subject: [PATCH] Generate text in case of utf8 descriptions (#5014) The idea is to mimic the format in which graph convert html into text when generating ics which gets embedded into eml file. --- #### Does this PR need a docs update or release note? - [ ] :white_check_mark: Yes, it's included - [x] :clock1: Yes, but in a later PR - [ ] :no_entry: No #### Type of change - [x] :sunflower: Feature - [ ] :bug: Bugfix - [ ] :world_map: Documentation - [ ] :robot: Supportability/Tests - [ ] :computer: CI/Deployment - [ ] :broom: Tech Debt/Cleanup #### Issue(s) * # #### Test Plan - [ ] :muscle: Manual - [x] :zap: Unit test - [ ] :green_heart: E2E --- src/go.mod | 1 + src/go.sum | 2 ++ src/internal/converters/ics/ics.go | 48 +++++++++++++++++++------ src/internal/converters/ics/ics_test.go | 22 +++++++++++- 4 files changed, 62 insertions(+), 11 deletions(-) diff --git a/src/go.mod b/src/go.mod index 29c5f3105..a42f845e8 100644 --- a/src/go.mod +++ b/src/go.mod @@ -90,6 +90,7 @@ require ( github.com/valyala/fasthttp v1.51.0 // indirect go.opentelemetry.io/otel/metric v1.21.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20231127180814-3a041ad873d4 // indirect + jaytaylor.com/html2text v0.0.0-20230321000545-74c2419ad056 // indirect ) require ( diff --git a/src/go.sum b/src/go.sum index e3ee2e537..0c5ffd3f3 100644 --- a/src/go.sum +++ b/src/go.sum @@ -435,3 +435,5 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gotest.tools/v3 v3.5.1 h1:EENdUnS3pdur5nybKYIh2Vfgc8IUNBjxDPSjtiJcOzU= gotest.tools/v3 v3.5.1/go.mod h1:isy3WKz7GK6uNw/sbHzfKBLvlvXwUyV06n6brMxxopU= +jaytaylor.com/html2text v0.0.0-20230321000545-74c2419ad056 h1:6YFJoB+0fUH6X3xU/G2tQqCYg+PkGtnZ5nMR5rpw72g= +jaytaylor.com/html2text v0.0.0-20230321000545-74c2419ad056/go.mod h1:OxvTsCwKosqQ1q7B+8FwXqg4rKZ/UG9dUW+g/VL2xH4= diff --git a/src/internal/converters/ics/ics.go b/src/internal/converters/ics/ics.go index 23ee5d2f8..7c1d1a760 100644 --- a/src/internal/converters/ics/ics.go +++ b/src/internal/converters/ics/ics.go @@ -7,10 +7,12 @@ import ( "fmt" "strings" "time" + "unicode" "github.com/alcionai/clues" ics "github.com/arran4/golang-ical" "github.com/microsoftgraph/msgraph-sdk-go/models" + "jaytaylor.com/html2text" "github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/internal/common/str" @@ -251,6 +253,16 @@ func FromJSON(ctx context.Context, body []byte) (string, error) { return cal.Serialize(), nil } +func isASCII(s string) bool { + for _, c := range s { + if c > unicode.MaxASCII { + return false + } + } + + return true +} + func updateEventProperties(ctx context.Context, event models.Eventable, iCalEvent *ics.VEvent) error { // CREATED - https://www.rfc-editor.org/rfc/rfc5545#section-3.8.7.1 created := event.GetCreatedDateTime() @@ -322,10 +334,6 @@ func updateEventProperties(ctx context.Context, event models.Eventable, iCalEven } // DESCRIPTION - https://www.rfc-editor.org/rfc/rfc5545#section-3.8.1.5 - // TODO: Emojies currently don't seem to be read properly by Outlook - // When outlook exports them(in .eml), it exports them in text as it strips down html - bodyPreview := ptr.Val(event.GetBodyPreview()) - if event.GetBody() != nil { description := ptr.Val(event.GetBody().GetContent()) contentType := event.GetBody().GetContentType().String() @@ -333,13 +341,33 @@ func updateEventProperties(ctx context.Context, event models.Eventable, iCalEven if len(description) > 0 && contentType == "text" { iCalEvent.SetDescription(description) } else if len(description) > 0 { - // https://stackoverflow.com/a/859475 - iCalEvent.SetDescription(bodyPreview) - if contentType == "html" { - desc := strings.ReplaceAll(description, "\r\n", "") - desc = strings.ReplaceAll(desc, "\n", "") - iCalEvent.AddProperty("X-ALT-DESC", desc, ics.WithFmtType("text/html")) + // If we have html, we have two routes. If we don't have + // UTF-8, then we can do an exact reproduction of the + // original data in outlook by using X-ALT-DESC field and + // using the html there. But if we have UTF-8, then we + // have to use DESCRIPTION field and use the content + // stripped of html there. This because even though the + // field technically supports UTF-8, Outlook does not + // seem to work with it. Exchange does similar things + // when it attaches the event to an email. + + // nolint:lll + // https://learn.microsoft.com/en-us/openspecs/exchange_server_protocols/ms-oxcical/d7f285da-9c7a-4597-803b-b74193c898a8 + // X-ALT-DESC field uses "Text" as in https://www.rfc-editor.org/rfc/rfc2445#section-4.3.11 + if isASCII(description) { + // https://stackoverflow.com/a/859475 + replacer := strings.NewReplacer("\r\n", "\\n", "\n", "\\n") + desc := replacer.Replace(description) + iCalEvent.AddProperty("X-ALT-DESC", desc, ics.WithFmtType("text/html")) + } else { + stripped, err := html2text.FromString(description, html2text.Options{PrettyTables: true}) + if err != nil { + return clues.Wrap(err, "converting html to text") + } + + iCalEvent.SetDescription(stripped) + } } } } diff --git a/src/internal/converters/ics/ics_test.go b/src/internal/converters/ics/ics_test.go index 966b140a6..762f03064 100644 --- a/src/internal/converters/ics/ics_test.go +++ b/src/internal/converters/ics/ics_test.go @@ -607,10 +607,30 @@ func (suite *ICSUnitSuite) TestEventConversion() { return e }, check: func(out string) { - assert.Contains(t, out, "DESCRIPTION:body preview", "body preview") assert.Contains(t, out, "X-ALT-DESC;FMTTYPE=text/html:body", "body") }, }, + { + name: "html body with utf8", + event: func() *models.Event { + e := baseEvent() + + body := models.NewItemBody() + btype, err := models.ParseBodyType("html") + require.NoError(t, err, "parse body type") + + body.SetContentType(btype.(*models.BodyType)) + body.SetContent(ptr.To("മലയാളം")) + + e.SetBodyPreview(ptr.To("body preview")) + e.SetBody(body) + + return e + }, + check: func(out string) { + assert.Contains(t, out, "DESCRIPTION:മലയാളം", "body") + }, + }, { name: "showas free", event: func() *models.Event {