extract timestamp from arbitrary string (#883)

## Description

In order for purge to gracefully handle the variety of timestring values that may appear within a
folder name, the common/time.go file has added
a func to extract a time substring from a string.
Purge is switched to that extractor instead of
a len-based suffix check.

## Type of change

- [x] 🐛 Bugfix

## Issue(s)

* #805

## Test Plan

- [x] 💪 Manual
This commit is contained in:
Keepers 2022-09-19 14:35:49 -06:00 committed by GitHub
parent a0508cc442
commit 1203fd2b6a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 176 additions and 46 deletions

View File

@ -3,9 +3,9 @@ package main
import (
"context"
"os"
"regexp"
"time"
"github.com/hashicorp/go-multierror"
"github.com/pkg/errors"
"github.com/spf13/cobra"
@ -93,17 +93,17 @@ func handleAllFolderPurge(cmd *cobra.Command, args []string) error {
err = purgeMailFolders(ctx, gc, t)
if err != nil {
return errors.Wrap(err, "purging mail folders")
return Only(ctx, errors.Wrap(err, "purging mail folders"))
}
err = purgeCalendarFolders(ctx, gc, t)
if err != nil {
return errors.Wrap(err, "purging calendar folders")
return Only(ctx, errors.Wrap(err, "purging event calendars"))
}
err = purgeContactFolders(ctx, gc, t)
if err != nil {
return errors.Wrap(err, "purging contacts folders")
return Only(ctx, errors.Wrap(err, "purging contacts folders"))
}
return nil
@ -126,7 +126,11 @@ func handleMailFolderPurge(cmd *cobra.Command, args []string) error {
return err
}
return purgeMailFolders(ctx, gc, t)
if err := purgeMailFolders(ctx, gc, t); err != nil {
return Only(ctx, errors.Wrap(err, "purging mail folders"))
}
return nil
}
func handleCalendarFolderPurge(cmd *cobra.Command, args []string) error {
@ -142,7 +146,11 @@ func handleCalendarFolderPurge(cmd *cobra.Command, args []string) error {
return err
}
return purgeCalendarFolders(ctx, gc, t)
if err := purgeCalendarFolders(ctx, gc, t); err != nil {
return Only(ctx, errors.Wrap(err, "purging event calendars"))
}
return nil
}
func handleContactsFolderPurge(cmd *cobra.Command, args []string) error {
@ -158,7 +166,11 @@ func handleContactsFolderPurge(cmd *cobra.Command, args []string) error {
return err
}
return purgeContactFolders(ctx, gc, t)
if err := purgeContactFolders(ctx, gc, t); err != nil {
return Only(ctx, errors.Wrap(err, "purging contacts folders"))
}
return nil
}
// ------------------------------------------------------------------------------------------
@ -247,16 +259,6 @@ func purgeContactFolders(ctx context.Context, gc *connector.GraphConnector, boun
// ----- controller
var secfmt = regexp.MustCompile(`.+:0-9{2}:0-9{2}`)
func normalizeDisplayName(dn string) string {
if !secfmt.MatchString(dn) {
dn += ":00"
}
return dn
}
func purgeFolders(
ctx context.Context,
gc *connector.GraphConnector,
@ -271,31 +273,23 @@ func purgeFolders(
return Only(ctx, errors.Wrapf(err, "retrieving %s folders", data))
}
stLen := len(common.SimpleDateTimeFormat)
var errs error
// delete any that don't meet the boundary
for _, fld := range fs {
// compare the folder time to the deletion boundary time first
var (
del bool
displayName = *fld.GetDisplayName()
normName = normalizeDisplayName(*fld.GetDisplayName())
dnLen = len(normName)
)
displayName := *fld.GetDisplayName()
if dnLen > stLen {
suff := normName[dnLen-stLen:]
dnTime, err := common.ExtractTime(displayName)
if err != nil && !errors.Is(err, common.ErrNoTimeString) {
err = errors.Wrapf(err, "Error: parsing %s folder name [%s]", data, displayName)
errs = multierror.Append(errs, err)
Info(ctx, err)
dnTime, err := common.ParseTime(suff)
if err != nil {
Info(ctx, errors.Wrapf(err, "Error: deleting %s folder [%s]", data, displayName))
continue
}
del = dnTime.Before(boundary)
continue
}
if !del {
if !dnTime.Before(boundary) || dnTime == (time.Time{}) {
continue
}
@ -303,11 +297,13 @@ func purgeFolders(
err = deleter(gc.Service(), user, *fld.GetId())
if err != nil {
Info(ctx, errors.Wrapf(err, "Error: deleting %s folder [%s]", data, displayName))
err = errors.Wrapf(err, "Error: deleting %s folder [%s]", data, displayName)
errs = multierror.Append(errs, err)
Info(ctx, err)
}
}
return nil
return errs
}
// ------------------------------------------------------------------------------------------

View File

@ -1,17 +1,43 @@
package common
import (
"errors"
"regexp"
"time"
"github.com/pkg/errors"
)
const (
// the clipped format occurs when m365 removes the :00 second suffix
ClippedSimpleTimeFormat = "02-Jan-2006_15:04"
LegacyTimeFormat = time.RFC3339
SimpleDateTimeFormat = "02-Jan-2006_15:04:05"
StandardTimeFormat = time.RFC3339Nano
TabularOutputTimeFormat = "2006-01-02T15:04:05Z"
)
var (
clippedSimpleTimeRE = regexp.MustCompile(`.*(\d{2}-[a-zA-Z]{3}-\d{4}_\d{2}:\d{2}).*`)
legacyTimeRE = regexp.MustCompile(
`.*(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}?([Zz]|[a-zA-Z]{2}|([\+|\-]([01]\d|2[0-3])))).*`)
simpleDateTimeRE = regexp.MustCompile(`.*(\d{2}-[a-zA-Z]{3}-\d{4}_\d{2}:\d{2}:\d{2}).*`)
standardTimeRE = regexp.MustCompile(
`.*(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?([Zz]|[a-zA-Z]{2}|([\+|\-]([01]\d|2[0-3])))).*`)
tabularOutputTimeRE = regexp.MustCompile(`.*(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}([Zz]|[a-zA-Z]{2})).*`)
)
var (
// clipped formats must appear last, else they take priority over the regular Simple format.
formats = []string{
StandardTimeFormat, SimpleDateTimeFormat, LegacyTimeFormat, TabularOutputTimeFormat, ClippedSimpleTimeFormat,
}
regexes = []*regexp.Regexp{
standardTimeRE, simpleDateTimeRE, legacyTimeRE, tabularOutputTimeRE, clippedSimpleTimeRE,
}
)
var ErrNoTimeString = errors.New("no substring contains a known time format")
// FormatNow produces the current time in UTC using the provided
// time format.
func FormatNow(fmt string) string {
@ -50,15 +76,29 @@ func ParseTime(s string) (time.Time, error) {
return time.Time{}, errors.New("cannot interpret an empty string as time.Time")
}
t, err := time.Parse(StandardTimeFormat, s)
if err == nil {
return t.UTC(), nil
for _, form := range formats {
t, err := time.Parse(form, s)
if err == nil {
return t.UTC(), nil
}
}
t, err = time.Parse(SimpleDateTimeFormat, s)
if err == nil {
return t.UTC(), nil
}
return time.Time{}, errors.New("unable to format time string: " + s)
return time.Time{}, errors.New("unable to parse time string: " + s)
}
// ExtractTime greedily retrieves a timestamp substring from the provided string.
// returns ErrNoTimeString if no match is found.
func ExtractTime(s string) (time.Time, error) {
if len(s) == 0 {
return time.Time{}, errors.New("cannot extract time.Time from an empty string")
}
for _, re := range regexes {
ss := re.FindAllStringSubmatch(s, -1)
if len(ss) > 0 && len(ss[0]) > 1 {
return ParseTime(ss[0][1])
}
}
return time.Time{}, errors.Wrap(ErrNoTimeString, s)
}

View File

@ -55,3 +55,97 @@ func (suite *CommonTimeUnitSuite) TestParseTime() {
_, err = common.ParseTime("flablabls")
require.Error(t, err)
}
func (suite *CommonTimeUnitSuite) TestExtractTime() {
clipSimpleTime := func(t string) string {
return t[:len(t)-3]
}
comparable := func(t *testing.T, tt time.Time, clipped bool) time.Time {
ts := common.FormatLegacyTime(tt.UTC())
if clipped {
ts = tt.UTC().Format(common.ClippedSimpleTimeFormat)
}
c, err := common.ParseTime(ts)
require.NoError(t, err)
return c
}
parseT := func(v string) time.Time {
t, err := time.Parse(time.RFC3339, v)
require.NoError(suite.T(), err)
return t
}
inputs := []time.Time{
time.Now().UTC(),
time.Now().UTC().Add(-12 * time.Hour),
parseT("2006-01-02T00:00:00Z"),
parseT("2006-01-02T12:00:00Z"),
parseT("2006-01-02T03:01:00Z"),
parseT("2006-01-02T13:00:02Z"),
parseT("2006-01-02T03:03:00+01:00"),
parseT("2006-01-02T03:00:04-01:00"),
}
type timeFormatter func(time.Time) string
var (
clippedF = func(t time.Time) string {
return clipSimpleTime(common.FormatSimpleDateTime(t))
}
legacyF = common.FormatLegacyTime
simpleF = common.FormatSimpleDateTime
stdF = common.FormatTime
tabularF = common.FormatTabularDisplayTime
formatters = []timeFormatter{legacyF, simpleF, stdF, tabularF, clippedF}
)
type presuf struct {
prefix string
suffix string
}
pss := []presuf{
{"foo", "bar"},
{"", "bar"},
{"foo", ""},
{"", ""},
}
type testable struct {
input string
expect time.Time
clipped bool
}
table := []testable{}
// test matrix: for each input, in each format, with each prefix/suffix, run the test.
for _, in := range inputs {
for i, f := range formatters {
v := f(in)
for _, ps := range pss {
table = append(table, testable{
input: ps.prefix + v + ps.suffix,
expect: comparable(suite.T(), in, i == 4),
clipped: i == 4,
})
}
}
}
for _, test := range table {
suite.T().Run(test.input, func(t *testing.T) {
result, err := common.ExtractTime(test.input)
require.NoError(t, err)
assert.Equal(t, test.expect, comparable(t, result, test.clipped))
})
}
}