Add other decode methods

This commit is contained in:
Ashlie Martinez 2023-03-31 15:25:29 -07:00
parent 57454cb00b
commit ed43203544
5 changed files with 363 additions and 1 deletions

View File

@ -0,0 +1,126 @@
package decoder
import (
"encoding/json"
"fmt"
"io"
"io/ioutil"
"time"
"github.com/buger/jsonparser"
"github.com/hashicorp/go-multierror"
"github.com/pkg/errors"
"github.com/alcionai/corso/src/cmd/jsondebug/common"
)
var _ common.ManifestDecoder = JsonParser{}
//revive:disable-next-line:var-naming
type JsonParser struct{}
func (d JsonParser) Decode(r io.Reader, gcStats bool) error {
if gcStats {
common.PrintMemUsage()
}
data, err := ioutil.ReadAll(r)
if err != nil {
return errors.Wrap(err, "reading data")
}
return parseManifestData(data, gcStats)
}
func parseManifestData(data []byte, gcStats bool) error {
if gcStats {
common.PrintMemUsage()
}
var (
errs *multierror.Error
output = common.Manifest{
Entries: []*common.ManifestEntry{},
}
)
_ = output
// var handler func([]byte, []byte, jsonparser.ValueType, int) error
// handler := func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) error {
// fmt.Printf("Key: '%s'\n Value: '%s'\n Type: %s\n", string(key), string(value), dataType)
// return nil
// }
//nolint:errcheck
jsonparser.ArrayEach(data, func(value []byte, dataType jsonparser.ValueType, offset int, err error) {
e, errInner := getManifestEntry(value)
if errInner != nil {
errs = multierror.Append(errs, err)
}
output.Entries = append(output.Entries, e)
}, "entries")
if gcStats {
common.PrintMemUsage()
fmt.Printf("Decoded %d entries\n", len(output.Entries))
}
return errs.ErrorOrNil()
}
func getManifestEntry(data []byte) (*common.ManifestEntry, error) {
var (
errs *multierror.Error
err error
e = &common.ManifestEntry{}
paths = [][]string{
{"id"},
{"labels"},
{"modified"},
{"deleted"},
{"data"},
}
)
jsonparser.EachKey(data, func(idx int, value []byte, vt jsonparser.ValueType, iterErr error) {
switch idx {
case 0:
e.ID = string(value)
case 1:
err = json.Unmarshal(value, &e.Labels)
if err != nil {
err = errors.Wrap(err, "unmarshalling labels")
}
case 2:
e.ModTime, err = time.Parse(time.RFC3339, string(value))
if err != nil {
err = errors.Wrap(err, "unmarshalling modtime")
}
case 3:
err = json.Unmarshal(value, &e.Deleted)
if err != nil {
err = errors.Wrap(err, "unmarshalling deleted")
}
case 4:
e.Content = make([]byte, len(value))
n := copy(e.Content, value)
if n != len(value) {
err = errors.Errorf("failed to copy content; got %d bytes", n)
}
default:
err = errors.Errorf("unexpected input %v", idx)
}
errs = multierror.Append(errs, err)
}, paths...)
return e, errs.ErrorOrNil()
}

View File

@ -11,6 +11,8 @@ import (
var (
_ common.ManifestDecoder = Array{}
_ common.ManifestDecoder = ArrayFull{}
_ common.ManifestDecoder = Map{}
)
type Array struct{}
@ -68,3 +70,206 @@ func parseManifestFields(dec *json.Decoder, res *common.Manifest) error {
return nil
}
type ArrayFull struct{}
func (d ArrayFull) Decode(r io.Reader, gcStats bool) error {
_, err := d.decodeManifestArray(r)
return err
}
func (d ArrayFull) decodeManifestArray(r io.Reader) (common.Manifest, error) {
var (
dec = json.NewDecoder(r)
res = common.Manifest{}
)
if err := expectDelimToken(dec, objectOpen); err != nil {
return res, err
}
// Need to manually decode fields here since we can't reuse the stdlib
// decoder due to memory issues.
if err := d.parseManifestEntries(dec, &res); err != nil {
return res, err
}
// Consumes closing object curly brace after we're done. Don't need to check
// for EOF because json.Decode only guarantees decoding the next JSON item in
// the stream so this follows that.
return res, expectDelimToken(dec, objectClose)
}
func (d ArrayFull) parseManifestEntries(dec *json.Decoder, res *common.Manifest) error {
var seen bool
for dec.More() {
l, err := stringToken(dec)
if err != nil {
return err
}
// Only have `entries` field right now. This is stricter than the current
// JSON decoder in the stdlib.
if l != "entries" {
return errors.Errorf("unexpected field name %s", l)
} else if seen {
return errors.New("repeated Entries field")
}
seen = true
if err := expectDelimToken(dec, arrayOpen); err != nil {
return err
}
for dec.More() {
ent, err := d.parseManifestFields(dec)
if err != nil {
return err
}
res.Entries = append(res.Entries, ent)
}
if err := expectDelimToken(dec, arrayClose); err != nil {
return err
}
}
return nil
}
func (d ArrayFull) parseManifestFields(dec *json.Decoder) (*common.ManifestEntry, error) {
if err := expectDelimToken(dec, objectOpen); err != nil {
return nil, err
}
var (
seen = map[string]struct{}{}
res = &common.ManifestEntry{}
)
for dec.More() {
l, err := stringToken(dec)
if err != nil {
return nil, err
}
if _, ok := seen[l]; ok {
return nil, errors.Errorf("repeated field %s", l)
}
switch l {
case "id":
err = dec.Decode(&res.ID)
case "labels":
err = dec.Decode(&res.Labels)
case "modified":
err = dec.Decode(&res.ModTime)
case "deleted":
err = dec.Decode(&res.Deleted)
case "data":
err = dec.Decode(&res.Content)
default:
if _, err := dec.Token(); err != nil {
return nil, errors.Wrapf(err, "consuming value for unexpected field %s", l)
}
continue
}
seen[l] = struct{}{}
if err != nil {
return nil, errors.Wrapf(err, "decoding value for field %s", l)
}
}
if err := expectDelimToken(dec, objectClose); err != nil {
return nil, err
}
return res, nil
}
type Map struct{}
func (d Map) Decode(r io.Reader, gcStats bool) error {
_, err := d.decodeManifestArray(r)
return err
}
func (d Map) decodeManifestArray(r io.Reader) (common.Manifest, error) {
var (
dec = json.NewDecoder(r)
res = common.Manifest{}
)
if err := expectDelimToken(dec, objectOpen); err != nil {
return res, err
}
// Need to manually decode fields here since we can't reuse the stdlib
// decoder due to memory issues.
if err := d.parseManifestEntries(dec, &res); err != nil {
return res, err
}
// Consumes closing object curly brace after we're done. Don't need to check
// for EOF because json.Decode only guarantees decoding the next JSON item in
// the stream so this follows that.
return res, expectDelimToken(dec, objectClose)
}
func (d Map) parseManifestEntries(dec *json.Decoder, res *common.Manifest) error {
var seen bool
for dec.More() {
l, err := stringToken(dec)
if err != nil {
return err
}
// Only have `entries` field right now. This is stricter than the current
// JSON decoder in the stdlib.
if l != "entries" {
return errors.Errorf("unexpected field name %s", l)
} else if seen {
return errors.New("repeated Entries field")
}
seen = true
if err := expectDelimToken(dec, arrayOpen); err != nil {
return err
}
for dec.More() {
ent := map[string]any{}
if err := dec.Decode(&ent); err != nil {
return err
}
// Give up here, just check how many bytes it needs during benchmarking.
// fmt.Printf("%+v\n", ent)
// return errors.New("exit early")
// me := &common.ManifestEntry{
// ModTime:
// }
} //nolint: wsl
if err := expectDelimToken(dec, arrayClose); err != nil {
return err
}
}
return nil
}

View File

@ -0,0 +1,25 @@
package decoder
import (
"encoding/json"
"io"
"github.com/pkg/errors"
"github.com/alcionai/corso/src/cmd/jsondebug/common"
)
var _ common.ManifestDecoder = Stdlib{}
type Stdlib struct{}
func (d Stdlib) Decode(r io.Reader, gcStats bool) error {
dec := json.NewDecoder(r)
output := common.Manifest{}
if err := dec.Decode(&output); err != nil {
return errors.Wrap(err, "decoding input")
}
return nil
}

View File

@ -7,8 +7,10 @@ require (
github.com/alcionai/clues v0.0.0-20230324015051-5f61be9f301e
github.com/aws/aws-sdk-go v1.44.220
github.com/aws/aws-xray-sdk-go v1.8.1
github.com/buger/jsonparser v1.1.1
github.com/cenkalti/backoff/v4 v4.2.0
github.com/google/uuid v1.3.0
github.com/hashicorp/go-multierror v1.1.1
github.com/kopia/kopia v0.12.2-0.20230327171220-747baeebdab1
github.com/microsoft/kiota-abstractions-go v0.18.0
github.com/microsoft/kiota-authentication-azure-go v0.6.0
@ -38,9 +40,9 @@ require (
github.com/VividCortex/ewma v1.2.0 // indirect
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d // indirect
github.com/andybalholm/brotli v1.0.4 // indirect
github.com/buger/jsonparser v1.1.1 // indirect
github.com/dnaeon/go-vcr v1.2.0 // indirect
github.com/fsnotify/fsnotify v1.6.0 // indirect
github.com/hashicorp/errwrap v1.0.0 // indirect
github.com/hashicorp/hcl v1.0.0 // indirect
github.com/magiconair/properties v1.8.7 // indirect
github.com/mitchellh/mapstructure v1.5.0 // indirect

View File

@ -181,6 +181,10 @@ github.com/googleapis/google-cloud-go-testing v0.0.0-20200911160855-bcd43fbb19e8
github.com/gorilla/mux v1.8.0 h1:i40aqfkR1h2SlN9hojwV5ZA91wcXFOvkdNIeFDP5koI=
github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 h1:+9834+KizmvFV7pXQGSXQTsaWhq2GjuNUt0aUU0YBYw=
github.com/hanwen/go-fuse/v2 v2.2.0 h1:jo5QZYmBLNcl9ovypWaQ5yXMSSV+Ch68xoC3rtZvvBM=
github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA=
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/golang-lru v0.5.4 h1:YDjusn29QI/Das2iO9M0BHnIbxPeyuCHsjMW+lJfyTc=