diff --git a/src/cmd/jsondebug/decoder/jsonparser_decode.go b/src/cmd/jsondebug/decoder/jsonparser_decode.go new file mode 100644 index 000000000..7971f1e86 --- /dev/null +++ b/src/cmd/jsondebug/decoder/jsonparser_decode.go @@ -0,0 +1,126 @@ +package decoder + +import ( + "encoding/json" + "fmt" + "io" + "io/ioutil" + "time" + + "github.com/buger/jsonparser" + "github.com/hashicorp/go-multierror" + "github.com/pkg/errors" + + "github.com/alcionai/corso/src/cmd/jsondebug/common" +) + +var _ common.ManifestDecoder = JsonParser{} + +//revive:disable-next-line:var-naming +type JsonParser struct{} + +func (d JsonParser) Decode(r io.Reader, gcStats bool) error { + if gcStats { + common.PrintMemUsage() + } + + data, err := ioutil.ReadAll(r) + if err != nil { + return errors.Wrap(err, "reading data") + } + + return parseManifestData(data, gcStats) +} + +func parseManifestData(data []byte, gcStats bool) error { + if gcStats { + common.PrintMemUsage() + } + + var ( + errs *multierror.Error + output = common.Manifest{ + Entries: []*common.ManifestEntry{}, + } + ) + + _ = output + + // var handler func([]byte, []byte, jsonparser.ValueType, int) error + // handler := func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) error { + // fmt.Printf("Key: '%s'\n Value: '%s'\n Type: %s\n", string(key), string(value), dataType) + // return nil + // } + + //nolint:errcheck + jsonparser.ArrayEach(data, func(value []byte, dataType jsonparser.ValueType, offset int, err error) { + e, errInner := getManifestEntry(value) + if errInner != nil { + errs = multierror.Append(errs, err) + } + + output.Entries = append(output.Entries, e) + }, "entries") + + if gcStats { + common.PrintMemUsage() + + fmt.Printf("Decoded %d entries\n", len(output.Entries)) + } + + return errs.ErrorOrNil() +} + +func getManifestEntry(data []byte) (*common.ManifestEntry, error) { + var ( + errs *multierror.Error + err error + e = &common.ManifestEntry{} + paths = [][]string{ + {"id"}, + {"labels"}, + {"modified"}, + {"deleted"}, + {"data"}, + } + ) + + jsonparser.EachKey(data, func(idx int, value []byte, vt jsonparser.ValueType, iterErr error) { + switch idx { + case 0: + e.ID = string(value) + + case 1: + err = json.Unmarshal(value, &e.Labels) + if err != nil { + err = errors.Wrap(err, "unmarshalling labels") + } + + case 2: + e.ModTime, err = time.Parse(time.RFC3339, string(value)) + if err != nil { + err = errors.Wrap(err, "unmarshalling modtime") + } + + case 3: + err = json.Unmarshal(value, &e.Deleted) + if err != nil { + err = errors.Wrap(err, "unmarshalling deleted") + } + + case 4: + e.Content = make([]byte, len(value)) + n := copy(e.Content, value) + if n != len(value) { + err = errors.Errorf("failed to copy content; got %d bytes", n) + } + + default: + err = errors.Errorf("unexpected input %v", idx) + } + + errs = multierror.Append(errs, err) + }, paths...) + + return e, errs.ErrorOrNil() +} diff --git a/src/cmd/jsondebug/decoder/manifst_decoder.go b/src/cmd/jsondebug/decoder/manifst_decoder.go index 5c2c5c832..7dc4f4dc0 100644 --- a/src/cmd/jsondebug/decoder/manifst_decoder.go +++ b/src/cmd/jsondebug/decoder/manifst_decoder.go @@ -11,6 +11,8 @@ import ( var ( _ common.ManifestDecoder = Array{} + _ common.ManifestDecoder = ArrayFull{} + _ common.ManifestDecoder = Map{} ) type Array struct{} @@ -68,3 +70,206 @@ func parseManifestFields(dec *json.Decoder, res *common.Manifest) error { return nil } + +type ArrayFull struct{} + +func (d ArrayFull) Decode(r io.Reader, gcStats bool) error { + _, err := d.decodeManifestArray(r) + return err +} + +func (d ArrayFull) decodeManifestArray(r io.Reader) (common.Manifest, error) { + var ( + dec = json.NewDecoder(r) + res = common.Manifest{} + ) + + if err := expectDelimToken(dec, objectOpen); err != nil { + return res, err + } + + // Need to manually decode fields here since we can't reuse the stdlib + // decoder due to memory issues. + if err := d.parseManifestEntries(dec, &res); err != nil { + return res, err + } + + // Consumes closing object curly brace after we're done. Don't need to check + // for EOF because json.Decode only guarantees decoding the next JSON item in + // the stream so this follows that. + return res, expectDelimToken(dec, objectClose) +} + +func (d ArrayFull) parseManifestEntries(dec *json.Decoder, res *common.Manifest) error { + var seen bool + + for dec.More() { + l, err := stringToken(dec) + if err != nil { + return err + } + + // Only have `entries` field right now. This is stricter than the current + // JSON decoder in the stdlib. + if l != "entries" { + return errors.Errorf("unexpected field name %s", l) + } else if seen { + return errors.New("repeated Entries field") + } + + seen = true + + if err := expectDelimToken(dec, arrayOpen); err != nil { + return err + } + + for dec.More() { + ent, err := d.parseManifestFields(dec) + if err != nil { + return err + } + + res.Entries = append(res.Entries, ent) + } + + if err := expectDelimToken(dec, arrayClose); err != nil { + return err + } + } + + return nil +} + +func (d ArrayFull) parseManifestFields(dec *json.Decoder) (*common.ManifestEntry, error) { + if err := expectDelimToken(dec, objectOpen); err != nil { + return nil, err + } + + var ( + seen = map[string]struct{}{} + res = &common.ManifestEntry{} + ) + + for dec.More() { + l, err := stringToken(dec) + if err != nil { + return nil, err + } + + if _, ok := seen[l]; ok { + return nil, errors.Errorf("repeated field %s", l) + } + + switch l { + case "id": + err = dec.Decode(&res.ID) + + case "labels": + err = dec.Decode(&res.Labels) + + case "modified": + err = dec.Decode(&res.ModTime) + + case "deleted": + err = dec.Decode(&res.Deleted) + + case "data": + err = dec.Decode(&res.Content) + + default: + if _, err := dec.Token(); err != nil { + return nil, errors.Wrapf(err, "consuming value for unexpected field %s", l) + } + + continue + } + + seen[l] = struct{}{} + + if err != nil { + return nil, errors.Wrapf(err, "decoding value for field %s", l) + } + } + + if err := expectDelimToken(dec, objectClose); err != nil { + return nil, err + } + + return res, nil +} + +type Map struct{} + +func (d Map) Decode(r io.Reader, gcStats bool) error { + _, err := d.decodeManifestArray(r) + return err +} + +func (d Map) decodeManifestArray(r io.Reader) (common.Manifest, error) { + var ( + dec = json.NewDecoder(r) + res = common.Manifest{} + ) + + if err := expectDelimToken(dec, objectOpen); err != nil { + return res, err + } + + // Need to manually decode fields here since we can't reuse the stdlib + // decoder due to memory issues. + if err := d.parseManifestEntries(dec, &res); err != nil { + return res, err + } + + // Consumes closing object curly brace after we're done. Don't need to check + // for EOF because json.Decode only guarantees decoding the next JSON item in + // the stream so this follows that. + return res, expectDelimToken(dec, objectClose) +} + +func (d Map) parseManifestEntries(dec *json.Decoder, res *common.Manifest) error { + var seen bool + + for dec.More() { + l, err := stringToken(dec) + if err != nil { + return err + } + + // Only have `entries` field right now. This is stricter than the current + // JSON decoder in the stdlib. + if l != "entries" { + return errors.Errorf("unexpected field name %s", l) + } else if seen { + return errors.New("repeated Entries field") + } + + seen = true + + if err := expectDelimToken(dec, arrayOpen); err != nil { + return err + } + + for dec.More() { + ent := map[string]any{} + + if err := dec.Decode(&ent); err != nil { + return err + } + + // Give up here, just check how many bytes it needs during benchmarking. + // fmt.Printf("%+v\n", ent) + // return errors.New("exit early") + + // me := &common.ManifestEntry{ + // ModTime: + // } + } //nolint: wsl + + if err := expectDelimToken(dec, arrayClose); err != nil { + return err + } + } + + return nil +} diff --git a/src/cmd/jsondebug/decoder/stdlib_decode.go b/src/cmd/jsondebug/decoder/stdlib_decode.go new file mode 100644 index 000000000..01bef812f --- /dev/null +++ b/src/cmd/jsondebug/decoder/stdlib_decode.go @@ -0,0 +1,25 @@ +package decoder + +import ( + "encoding/json" + "io" + + "github.com/pkg/errors" + + "github.com/alcionai/corso/src/cmd/jsondebug/common" +) + +var _ common.ManifestDecoder = Stdlib{} + +type Stdlib struct{} + +func (d Stdlib) Decode(r io.Reader, gcStats bool) error { + dec := json.NewDecoder(r) + output := common.Manifest{} + + if err := dec.Decode(&output); err != nil { + return errors.Wrap(err, "decoding input") + } + + return nil +} diff --git a/src/go.mod b/src/go.mod index ca027d4a9..a7ad3eb45 100644 --- a/src/go.mod +++ b/src/go.mod @@ -7,8 +7,10 @@ require ( github.com/alcionai/clues v0.0.0-20230324015051-5f61be9f301e github.com/aws/aws-sdk-go v1.44.220 github.com/aws/aws-xray-sdk-go v1.8.1 + github.com/buger/jsonparser v1.1.1 github.com/cenkalti/backoff/v4 v4.2.0 github.com/google/uuid v1.3.0 + github.com/hashicorp/go-multierror v1.1.1 github.com/kopia/kopia v0.12.2-0.20230327171220-747baeebdab1 github.com/microsoft/kiota-abstractions-go v0.18.0 github.com/microsoft/kiota-authentication-azure-go v0.6.0 @@ -38,9 +40,9 @@ require ( github.com/VividCortex/ewma v1.2.0 // indirect github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d // indirect github.com/andybalholm/brotli v1.0.4 // indirect - github.com/buger/jsonparser v1.1.1 // indirect github.com/dnaeon/go-vcr v1.2.0 // indirect github.com/fsnotify/fsnotify v1.6.0 // indirect + github.com/hashicorp/errwrap v1.0.0 // indirect github.com/hashicorp/hcl v1.0.0 // indirect github.com/magiconair/properties v1.8.7 // indirect github.com/mitchellh/mapstructure v1.5.0 // indirect diff --git a/src/go.sum b/src/go.sum index 5a77ebd43..cc83721e6 100644 --- a/src/go.sum +++ b/src/go.sum @@ -181,6 +181,10 @@ github.com/googleapis/google-cloud-go-testing v0.0.0-20200911160855-bcd43fbb19e8 github.com/gorilla/mux v1.8.0 h1:i40aqfkR1h2SlN9hojwV5ZA91wcXFOvkdNIeFDP5koI= github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 h1:+9834+KizmvFV7pXQGSXQTsaWhq2GjuNUt0aUU0YBYw= github.com/hanwen/go-fuse/v2 v2.2.0 h1:jo5QZYmBLNcl9ovypWaQ5yXMSSV+Ch68xoC3rtZvvBM= +github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA= +github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= +github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo= +github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM= github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/golang-lru v0.5.4 h1:YDjusn29QI/Das2iO9M0BHnIbxPeyuCHsjMW+lJfyTc=