Compare commits

...

19 Commits

Author SHA1 Message Date
Ashlie Martinez
7b901ce186 Add benchmarks for just byte buffers
Don't add the time/memory it takes to get the input data to the
benchmark results.
2023-03-31 16:51:17 -07:00
Ashlie Martinez
1cc3247721 Interface/code for running benchmarks on buffers
Setup things so we can remove the time it takes to read the input data.
2023-03-31 16:50:09 -07:00
Ashlie Martinez
51570be16c Setup some benchmarks 2023-03-31 15:27:44 -07:00
Ashlie Martinez
ed43203544 Add other decode methods 2023-03-31 15:27:44 -07:00
Ashlie Martinez
57454cb00b Interface for benchmarking 2023-03-31 15:27:44 -07:00
Ashlie Martinez
fda1488ae2 Fix lint errors 2023-03-31 15:27:44 -07:00
Ashlie Martinez
7dddc97bbc Fixup file names 2023-03-31 15:11:33 -07:00
Ashlie Martinez
8a7e607474 Fucntion to decode manifests with custom code 2023-03-31 14:33:29 -07:00
Ashlie Martinez
e3dbbc546a Just use errors package for portability 2023-03-31 12:41:17 -07:00
Ashlie Martinez
e02cbfdb73 Code to generate manifest entries
Uses random data for most fields.
2023-03-31 12:40:34 -07:00
Ashlie Martinez
0e74d15259 Add structs for kopia manifests 2023-03-31 12:32:10 -07:00
Vaibhav Kamra
fb08c2374e Add benchmark tests 2023-03-31 10:29:40 -07:00
Vaibhav Kamra
7f91344fda Handle all manifest entry fields 2023-03-31 00:34:28 -07:00
Vaibhav Kamra
62bfed94d6 Change struct to include RawMessage and update jsonparser 2023-03-30 20:47:43 -07:00
Ashlie Martinez
dddbd36969 Update to new struct layout
Also cleanup the code so it's a bit more linear.
2023-03-30 14:01:46 -07:00
Vaibhav Kamra
2bc40b4a39 Jsonparser prototype 2023-03-30 13:39:18 -07:00
Ashlie Martinez
f1b65c9f8b Test program of custom JSON array decoder 2023-03-30 12:43:33 -07:00
Ashlie Martinez
4316136de8 Basic helper to stream a json array
Uses regular decode to decode actual object data for items in the array.
2023-03-30 12:42:53 -07:00
Ashlie Martinez
f00970493d Helper programs to repro JSON deserialize mem use
JSON deserialize of arrays is inefficient for memory. This is a minimal
reproducer to show that it uses lots more memory than expected.

Build each program separately, run gen and then you can run goread for
the repro and some memory usage stats.
2023-03-30 10:41:56 -07:00
16 changed files with 1327 additions and 0 deletions

View File

@ -0,0 +1,33 @@
package main
import (
"fmt"
"os"
"github.com/alcionai/corso/src/cmd/jsondebug/common"
"github.com/alcionai/corso/src/cmd/jsondebug/decoder"
)
func main() {
readData()
}
func readData() {
f, err := os.Open(common.FileName)
if err != nil {
fmt.Printf("Error opening input file: %v\n", err)
return
}
defer f.Close()
output, err := decoder.DecodeFooArray(f)
if err != nil {
fmt.Printf("Error decoding input: %v\n", err)
return
}
common.PrintMemUsage()
fmt.Printf("got array with %d items\n", len(output.Entries))
}

View File

@ -0,0 +1,9 @@
package main
import "testing"
func Benchmark_readData(b *testing.B) {
for i := 0; i < b.N; i++ {
readData()
}
}

View File

@ -0,0 +1,123 @@
package benchmark
import (
"compress/gzip"
"io"
"os"
"testing"
"github.com/alcionai/corso/src/cmd/jsondebug/common"
"github.com/alcionai/corso/src/cmd/jsondebug/decoder"
)
func runBenchmarkByteInput(b *testing.B, d common.ByteManifestDecoder) {
for i := 0; i < b.N; i++ {
fn := common.ManifestFileName
f, err := os.Open(fn)
if err != nil {
b.Logf("Error opening input file: %v", err)
b.FailNow()
}
data, err := io.ReadAll(f)
if err != nil {
b.Logf("Error reading input data: %v", err)
b.FailNow()
}
f.Close()
b.ResetTimer()
err = d.DecodeBytes(data, false)
if err != nil {
b.Logf("Error decoding json: %v", err)
b.FailNow()
}
}
}
func runBenchmark(b *testing.B, d common.ManifestDecoder) {
for _, unzip := range []string{"NotZipped", "Zipped"} {
b.Run(unzip, func(b *testing.B) {
for i := 0; i < b.N; i++ {
fn := common.ManifestFileName
if unzip == "Zipped" {
fn += common.GzipSuffix
}
f, err := os.Open(fn)
if err != nil {
b.Logf("Error opening input file: %v", err)
b.FailNow()
}
defer f.Close()
var r io.ReadCloser = f
if unzip == "Zipped" {
r, err = gzip.NewReader(f)
if err != nil {
b.Logf("Error getting gzip reader: %v", err)
b.FailNow()
}
defer r.Close()
}
b.ResetTimer()
err = d.Decode(r, false)
if err != nil {
b.Logf("Error decoding json: %v", err)
b.FailNow()
}
}
})
}
}
type benchmarkInfo struct {
name string
dec common.Decoder
}
var decoderTable = []benchmarkInfo{
{
name: "Stdlib",
dec: decoder.Stdlib{},
},
{
name: "JsonParser",
dec: decoder.JsonParser{},
},
{
name: "Array",
dec: decoder.Array{},
},
{
name: "ArrayFull",
dec: decoder.ArrayFull{},
},
{
name: "Map",
dec: decoder.Map{},
},
}
func Benchmark_FromFile(b *testing.B) {
for _, benchmark := range decoderTable {
b.Run(benchmark.name, func(b *testing.B) {
runBenchmark(b, benchmark.dec)
})
}
}
func Benchmark_FromBytes(b *testing.B) {
for _, benchmark := range decoderTable {
b.Run(benchmark.name, func(b *testing.B) {
runBenchmarkByteInput(b, benchmark.dec)
})
}
}

View File

@ -0,0 +1,129 @@
package common
import (
"encoding/json"
"fmt"
"io"
"runtime"
"time"
)
const (
NumItems = 300000
ItemSize = 1024
GzipSuffix = ".gz"
FileName = "input.json"
)
var ManifestFileName = fmt.Sprintf("manifest-input.%d.json", NumItems)
type FooArray struct {
Entries []*Foo `json:"entries"`
}
type Foo struct {
ID string `json:"id"`
Labels map[string]string `json:"labels"`
ModTime time.Time `json:"modified"`
Deleted bool `json:"deleted,omitempty"`
Content json.RawMessage `json:"data"`
}
type Content struct {
ID string `json:"id"`
Data []byte `json:"data"`
}
type Manifest struct {
Entries []*ManifestEntry `json:"entries"`
}
type ManifestEntry struct {
ID string `json:"id"`
Labels map[string]string `json:"labels"`
ModTime time.Time `json:"modified"`
Deleted bool `json:"deleted,omitempty"`
Content json.RawMessage `json:"data"`
}
type SnapManifest struct {
ID string `json:"id"`
Source SourceInfo `json:"source"`
Description string `json:"description"`
StartTime int64 `json:"startTime"`
EndTime int64 `json:"endTime"`
Stats StatsS `json:"stats,omitempty"`
IncompleteReason string `json:"incomplete,omitempty"`
RootEntry *DirEntry `json:"rootEntry"`
Tags map[string]string `json:"tags,omitempty"`
}
type SourceInfo struct {
Host string `json:"host"`
UserName string `json:"userName"`
Path string `json:"path"`
}
type StatsS struct {
TotalFileSize int64 `json:"totalSize"`
ExcludedTotalFileSize int64 `json:"excludedTotalSize"`
TotalFileCount int32 `json:"fileCount"`
CachedFiles int32 `json:"cachedFiles"`
NonCachedFiles int32 `json:"nonCachedFiles"`
TotalDirectoryCount int32 `json:"dirCount"`
ExcludedFileCount int32 `json:"excludedFileCount"`
ExcludedDirCount int32 `json:"excludedDirCount"`
IgnoredErrorCount int32 `json:"ignoredErrorCount"`
ErrorCount int32 `json:"errorCount"`
}
type DirEntry struct {
Name string `json:"name,omitempty"`
EntryType string `json:"type,omitempty"`
Permissions int `json:"mode,omitempty"`
FileSize int64 `json:"size,omitempty"`
ModTime int64 `json:"mtime,omitempty"`
UserID int32 `json:"uid,omitempty"`
GroupID int32 `json:"gid,omitempty"`
ObjectID string `json:"obj,omitempty"`
DirSummary *DirectorySummary `json:"summ,omitempty"`
}
type DirectorySummary struct {
TotalFileSize int64 `json:"size"`
TotalFileCount int64 `json:"files"`
TotalSymlinkCount int64 `json:"symlinks"`
TotalDirCount int64 `json:"dirs"`
MaxModTime int64 `json:"maxTime"`
IncompleteReason string `json:"incomplete,omitempty"`
FatalErrorCount int `json:"numFailed"`
IgnoredErrorCount int `json:"numIgnoredErrors,omitempty"`
}
type Decoder interface {
ManifestDecoder
ByteManifestDecoder
}
type ManifestDecoder interface {
Decode(r io.Reader, gcStats bool) error
}
type ByteManifestDecoder interface {
DecodeBytes(data []byte, gcStats bool) error
}
func PrintMemUsage() {
var m runtime.MemStats
runtime.ReadMemStats(&m)
// For info on each, see: https://golang.org/pkg/runtime/#MemStats
fmt.Printf("Alloc = %v MiB", bToMb(m.Alloc))
fmt.Printf("\tTotalAlloc = %v MiB", bToMb(m.TotalAlloc))
fmt.Printf("\tSys = %v MiB", bToMb(m.Sys))
fmt.Printf("\tNumGC = %v\n", m.NumGC)
}
func bToMb(b uint64) uint64 {
return b / 1024 / 1024
}

View File

@ -0,0 +1,131 @@
package decoder
import (
"encoding/json"
"io"
"github.com/pkg/errors"
"github.com/alcionai/corso/src/cmd/jsondebug/common"
)
const (
objectOpen = "{"
objectClose = "}"
arrayOpen = "["
arrayClose = "]"
)
var errEOF = errors.New("unexpected end of input")
func expectDelimToken(dec *json.Decoder, expectedToken string) error {
t, err := dec.Token()
if err == io.EOF {
return errors.WithStack(errEOF)
} else if err != nil {
return errors.Wrap(err, "reading JSON token")
}
d, ok := t.(json.Delim)
if !ok {
return errors.Errorf("unexpected token: (%T) %v", t, t)
} else if d.String() != expectedToken {
return errors.Errorf(
"unexpected token; wanted %s, got %s",
expectedToken,
d,
)
}
return nil
}
func stringToken(dec *json.Decoder) (string, error) {
t, err := dec.Token()
if errors.Is(err, io.EOF) {
return "", errors.WithStack(errEOF)
} else if err != nil {
return "", errors.Wrap(err, "reading JSON token")
}
l, ok := t.(string)
if !ok {
return "", errors.Errorf("unexpected token (%T) %v; wanted field name", t, t)
}
return l, nil
}
func DecodeFooArray(r io.Reader) (common.FooArray, error) {
var (
dec = json.NewDecoder(r)
res = common.FooArray{}
)
if err := expectDelimToken(dec, objectOpen); err != nil {
return res, err
}
// Need to manually decode fields here since we can't reuse the stdlib
// decoder due to memory issues.
if err := parseFields(dec, &res); err != nil {
return res, err
}
// Consumes closing object curly brace after we're done. Don't need to check
// for EOF because json.Decode only guarantees decoding the next JSON item in
// the stream so this follows that.
return res, expectDelimToken(dec, objectClose)
}
func parseFields(dec *json.Decoder, res *common.FooArray) error {
for dec.More() {
t, err := dec.Token()
if err == io.EOF {
return errors.WithStack(errEOF)
} else if err != nil {
return errors.Wrap(err, "reading JSON token")
}
l, ok := t.(string)
if !ok {
return errors.Errorf(
"unexpected token (%T) %v; wanted field name",
t,
t,
)
}
// Only have `entries` field right now. Needs to match the JSON tag for the
// struct.
if l != "entries" {
return errors.Errorf("unexpected field name %s", l)
}
if err = decodeArray(dec, &res.Entries); err != nil {
return err
}
}
return nil
}
func decodeArray[T any](dec *json.Decoder, output *[]T) error {
// Consume starting bracket.
if err := expectDelimToken(dec, arrayOpen); err != nil {
return err
}
// Read elements.
for dec.More() {
tmp := *new(T)
if err := dec.Decode(&tmp); err != nil {
return errors.Wrap(err, "decoding array element")
}
*output = append(*output, tmp)
}
// Consume ending bracket.
return expectDelimToken(dec, arrayClose)
}

View File

@ -0,0 +1,134 @@
package decoder
import (
"encoding/json"
"fmt"
"io"
"io/ioutil"
"time"
"github.com/buger/jsonparser"
"github.com/hashicorp/go-multierror"
"github.com/pkg/errors"
"github.com/alcionai/corso/src/cmd/jsondebug/common"
)
var _ common.ManifestDecoder = JsonParser{}
//revive:disable-next-line:var-naming
type JsonParser struct{}
func (d JsonParser) Decode(r io.Reader, gcStats bool) error {
if gcStats {
common.PrintMemUsage()
}
data, err := ioutil.ReadAll(r)
if err != nil {
return errors.Wrap(err, "reading data")
}
return parseManifestData(data, gcStats)
}
func (d JsonParser) DecodeBytes(data []byte, gcStats bool) error {
if gcStats {
common.PrintMemUsage()
}
return parseManifestData(data, gcStats)
}
func parseManifestData(data []byte, gcStats bool) error {
if gcStats {
common.PrintMemUsage()
}
var (
errs *multierror.Error
output = common.Manifest{
Entries: []*common.ManifestEntry{},
}
)
_ = output
// var handler func([]byte, []byte, jsonparser.ValueType, int) error
// handler := func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) error {
// fmt.Printf("Key: '%s'\n Value: '%s'\n Type: %s\n", string(key), string(value), dataType)
// return nil
// }
//nolint:errcheck
jsonparser.ArrayEach(data, func(value []byte, dataType jsonparser.ValueType, offset int, err error) {
e, errInner := getManifestEntry(value)
if errInner != nil {
errs = multierror.Append(errs, err)
}
output.Entries = append(output.Entries, e)
}, "entries")
if gcStats {
common.PrintMemUsage()
fmt.Printf("Decoded %d entries\n", len(output.Entries))
}
return errs.ErrorOrNil()
}
func getManifestEntry(data []byte) (*common.ManifestEntry, error) {
var (
errs *multierror.Error
err error
e = &common.ManifestEntry{}
paths = [][]string{
{"id"},
{"labels"},
{"modified"},
{"deleted"},
{"data"},
}
)
jsonparser.EachKey(data, func(idx int, value []byte, vt jsonparser.ValueType, iterErr error) {
switch idx {
case 0:
e.ID = string(value)
case 1:
err = json.Unmarshal(value, &e.Labels)
if err != nil {
err = errors.Wrap(err, "unmarshalling labels")
}
case 2:
e.ModTime, err = time.Parse(time.RFC3339, string(value))
if err != nil {
err = errors.Wrap(err, "unmarshalling modtime")
}
case 3:
err = json.Unmarshal(value, &e.Deleted)
if err != nil {
err = errors.Wrap(err, "unmarshalling deleted")
}
case 4:
e.Content = make([]byte, len(value))
n := copy(e.Content, value)
if n != len(value) {
err = errors.Errorf("failed to copy content; got %d bytes", n)
}
default:
err = errors.Errorf("unexpected input %v", idx)
}
errs = multierror.Append(errs, err)
}, paths...)
return e, errs.ErrorOrNil()
}

View File

@ -0,0 +1,300 @@
package decoder
import (
"bytes"
"encoding/json"
"io"
"github.com/pkg/errors"
"github.com/alcionai/corso/src/cmd/jsondebug/common"
)
var (
_ common.ManifestDecoder = Array{}
_ common.ByteManifestDecoder = Array{}
_ common.ManifestDecoder = ArrayFull{}
_ common.ByteManifestDecoder = ArrayFull{}
_ common.ManifestDecoder = Map{}
_ common.ByteManifestDecoder = Map{}
)
type Array struct{}
func (d Array) Decode(r io.Reader, gcStats bool) error {
_, err := DecodeManifestArray(r)
return err
}
func (d Array) DecodeBytes(data []byte, gcStats bool) error {
r := bytes.NewReader(data)
_, err := DecodeManifestArray(r)
return err
}
func DecodeManifestArray(r io.Reader) (common.Manifest, error) {
var (
dec = json.NewDecoder(r)
res = common.Manifest{}
)
if err := expectDelimToken(dec, objectOpen); err != nil {
return res, err
}
// Need to manually decode fields here since we can't reuse the stdlib
// decoder due to memory issues.
if err := parseManifestFields(dec, &res); err != nil {
return res, err
}
// Consumes closing object curly brace after we're done. Don't need to check
// for EOF because json.Decode only guarantees decoding the next JSON item in
// the stream so this follows that.
return res, expectDelimToken(dec, objectClose)
}
func parseManifestFields(dec *json.Decoder, res *common.Manifest) error {
var seen bool
for dec.More() {
l, err := stringToken(dec)
if err != nil {
return err
}
// Only have `entries` field right now. This is stricter than the current
// JSON decoder in the stdlib.
if l != "entries" {
return errors.Errorf("unexpected field name %s", l)
} else if seen {
return errors.New("repeated Entries field")
}
seen = true
if err := decodeArray(dec, &res.Entries); err != nil {
return err
}
}
return nil
}
type ArrayFull struct{}
func (d ArrayFull) Decode(r io.Reader, gcStats bool) error {
_, err := d.decodeManifestArray(r)
return err
}
func (d ArrayFull) DecodeBytes(data []byte, gcStats bool) error {
r := bytes.NewReader(data)
_, err := d.decodeManifestArray(r)
return err
}
func (d ArrayFull) decodeManifestArray(r io.Reader) (common.Manifest, error) {
var (
dec = json.NewDecoder(r)
res = common.Manifest{}
)
if err := expectDelimToken(dec, objectOpen); err != nil {
return res, err
}
// Need to manually decode fields here since we can't reuse the stdlib
// decoder due to memory issues.
if err := d.parseManifestEntries(dec, &res); err != nil {
return res, err
}
// Consumes closing object curly brace after we're done. Don't need to check
// for EOF because json.Decode only guarantees decoding the next JSON item in
// the stream so this follows that.
return res, expectDelimToken(dec, objectClose)
}
func (d ArrayFull) parseManifestEntries(dec *json.Decoder, res *common.Manifest) error {
var seen bool
for dec.More() {
l, err := stringToken(dec)
if err != nil {
return err
}
// Only have `entries` field right now. This is stricter than the current
// JSON decoder in the stdlib.
if l != "entries" {
return errors.Errorf("unexpected field name %s", l)
} else if seen {
return errors.New("repeated Entries field")
}
seen = true
if err := expectDelimToken(dec, arrayOpen); err != nil {
return err
}
for dec.More() {
ent, err := d.parseManifestFields(dec)
if err != nil {
return err
}
res.Entries = append(res.Entries, ent)
}
if err := expectDelimToken(dec, arrayClose); err != nil {
return err
}
}
return nil
}
func (d ArrayFull) parseManifestFields(dec *json.Decoder) (*common.ManifestEntry, error) {
if err := expectDelimToken(dec, objectOpen); err != nil {
return nil, err
}
var (
seen = map[string]struct{}{}
res = &common.ManifestEntry{}
)
for dec.More() {
l, err := stringToken(dec)
if err != nil {
return nil, err
}
if _, ok := seen[l]; ok {
return nil, errors.Errorf("repeated field %s", l)
}
switch l {
case "id":
err = dec.Decode(&res.ID)
case "labels":
err = dec.Decode(&res.Labels)
case "modified":
err = dec.Decode(&res.ModTime)
case "deleted":
err = dec.Decode(&res.Deleted)
case "data":
err = dec.Decode(&res.Content)
default:
if _, err := dec.Token(); err != nil {
return nil, errors.Wrapf(err, "consuming value for unexpected field %s", l)
}
continue
}
seen[l] = struct{}{}
if err != nil {
return nil, errors.Wrapf(err, "decoding value for field %s", l)
}
}
if err := expectDelimToken(dec, objectClose); err != nil {
return nil, err
}
return res, nil
}
type Map struct{}
func (d Map) Decode(r io.Reader, gcStats bool) error {
_, err := d.decodeManifestArray(r)
return err
}
func (d Map) DecodeBytes(data []byte, gcStats bool) error {
r := bytes.NewReader(data)
_, err := d.decodeManifestArray(r)
return err
}
func (d Map) decodeManifestArray(r io.Reader) (common.Manifest, error) {
var (
dec = json.NewDecoder(r)
res = common.Manifest{}
)
if err := expectDelimToken(dec, objectOpen); err != nil {
return res, err
}
// Need to manually decode fields here since we can't reuse the stdlib
// decoder due to memory issues.
if err := d.parseManifestEntries(dec, &res); err != nil {
return res, err
}
// Consumes closing object curly brace after we're done. Don't need to check
// for EOF because json.Decode only guarantees decoding the next JSON item in
// the stream so this follows that.
return res, expectDelimToken(dec, objectClose)
}
func (d Map) parseManifestEntries(dec *json.Decoder, res *common.Manifest) error {
var seen bool
for dec.More() {
l, err := stringToken(dec)
if err != nil {
return err
}
// Only have `entries` field right now. This is stricter than the current
// JSON decoder in the stdlib.
if l != "entries" {
return errors.Errorf("unexpected field name %s", l)
} else if seen {
return errors.New("repeated Entries field")
}
seen = true
if err := expectDelimToken(dec, arrayOpen); err != nil {
return err
}
for dec.More() {
ent := map[string]any{}
if err := dec.Decode(&ent); err != nil {
return err
}
// Give up here, just check how many bytes it needs during benchmarking.
// fmt.Printf("%+v\n", ent)
// return errors.New("exit early")
// me := &common.ManifestEntry{
// ModTime:
// }
} //nolint: wsl
if err := expectDelimToken(dec, arrayClose); err != nil {
return err
}
}
return nil
}

View File

@ -0,0 +1,38 @@
package decoder
import (
"bytes"
"encoding/json"
"io"
"github.com/pkg/errors"
"github.com/alcionai/corso/src/cmd/jsondebug/common"
)
var _ common.ManifestDecoder = Stdlib{}
type Stdlib struct{}
func (d Stdlib) Decode(r io.Reader, gcStats bool) error {
dec := json.NewDecoder(r)
output := common.Manifest{}
if err := dec.Decode(&output); err != nil {
return errors.Wrap(err, "decoding input")
}
return nil
}
func (d Stdlib) DecodeBytes(data []byte, gcStats bool) error {
r := bytes.NewReader(data)
dec := json.NewDecoder(r)
output := common.Manifest{}
if err := dec.Decode(&output); err != nil {
return errors.Wrap(err, "decoding input")
}
return nil
}

View File

@ -0,0 +1,62 @@
package main
import (
"crypto/rand"
"encoding/json"
"fmt"
"os"
"time"
"github.com/alcionai/corso/src/cmd/jsondebug/common"
"github.com/google/uuid"
)
func main() {
buf := make([]byte, common.ItemSize)
data := &common.FooArray{
Entries: make([]*common.Foo, 0, common.NumItems),
}
for i := 0; i < common.NumItems; i++ {
n, err := rand.Read(buf)
if err != nil {
fmt.Printf("Error reading random data: %v\n", err)
return
} else if n != common.ItemSize {
fmt.Printf(
"Short read for item data: wanted %d, got %d\n",
common.ItemSize,
n,
)
return
}
content := common.Content{
ID: uuid.NewString(),
Data: buf,
}
payload, _ := json.Marshal(content)
item := common.Foo{
ID: uuid.NewString(),
Labels: map[string]string{"foo": "bar"},
ModTime: time.Now(),
Content: payload,
}
data.Entries = append(data.Entries, &item)
}
f, err := os.Create(common.FileName)
if err != nil {
fmt.Printf("Error making output file: %v\n", err)
return
}
defer f.Close()
enc := json.NewEncoder(f)
if err := enc.Encode(data); err != nil {
fmt.Printf("Error writing json to file: %v\n", err)
return
}
}

View File

@ -0,0 +1,34 @@
package main
import (
"encoding/json"
"fmt"
"os"
"github.com/alcionai/corso/src/cmd/jsondebug/common"
)
func main() {
readData()
}
func readData() {
f, err := os.Open(common.FileName)
if err != nil {
fmt.Printf("Error opening input file: %v\n", err)
return
}
defer f.Close()
dec := json.NewDecoder(f)
output := common.FooArray{}
if err := dec.Decode(&output); err != nil {
fmt.Printf("Error decoding input: %v\n", err)
return
}
common.PrintMemUsage()
}

View File

@ -0,0 +1,9 @@
package main
import "testing"
func Benchmark_readData(b *testing.B) {
for i := 0; i < b.N; i++ {
readData()
}
}

View File

@ -0,0 +1,123 @@
package main
import (
"encoding/json"
"errors"
"fmt"
"io/ioutil"
"os"
"runtime"
"runtime/pprof"
"github.com/alcionai/corso/src/cmd/jsondebug/common"
"github.com/buger/jsonparser"
)
func main() {
defer func() {
common.PrintMemUsage()
f, err := os.Create("mem.prof")
if err != nil {
fmt.Print("could not create memory profile: ", err)
return
}
defer f.Close() // error handling omitted for example
runtime.GC() // get up-to-date statistics
if err := pprof.WriteHeapProfile(f); err != nil {
fmt.Print("could not write memory profile: ", err)
return
}
}()
d, err := readFile()
if err != nil {
return
}
parseData(d)
}
func readFile() ([]byte, error) {
common.PrintMemUsage()
data, err := ioutil.ReadFile(common.FileName)
if err != nil {
fmt.Printf("Error reading file: %v\n", err)
return nil, err
}
return data, nil
}
func parseData(data []byte) {
common.PrintMemUsage()
output := common.FooArray{
Entries: []*common.Foo{},
}
_ = output
// var handler func([]byte, []byte, jsonparser.ValueType, int) error
// handler := func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) error {
// fmt.Printf("Key: '%s'\n Value: '%s'\n Type: %s\n", string(key), string(value), dataType)
// return nil
// }
//nolint:errcheck
jsonparser.ArrayEach(data, func(value []byte, dataType jsonparser.ValueType, offset int, err error) {
e, errInner := getEntry(value)
if errInner != nil {
fmt.Printf("Error decoding input2: %v\n", errInner)
return
}
output.Entries = append(output.Entries, e)
}, "entries")
common.PrintMemUsage()
fmt.Printf("Decoded %d entries\n", len(output.Entries))
}
func getEntry(data []byte) (*common.Foo, error) {
e := &common.Foo{}
//nolint:errcheck
jsonparser.ObjectEach(data, func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) error {
switch string(key) {
case "id":
e.ID = string(value)
case "labels":
err := json.Unmarshal(value, &e.Labels)
if err != nil {
return fmt.Errorf("unmarshalling labels: %w", err)
}
case "modified":
err := json.Unmarshal(value, &e.ModTime)
if err != nil {
return fmt.Errorf("unmarshalling modtime: %w", err)
}
case "deleted":
err := json.Unmarshal(value, &e.Deleted)
if err != nil {
return fmt.Errorf("unmarshalling deleted: %w", err)
}
case "data":
cpBuf := make([]byte, len(value))
_ = copy(cpBuf, value)
e.Content = cpBuf
default:
fmt.Printf("Unexpected Input: %v\n", key)
return errors.New("Unexpected Input: " + string(key))
}
return nil
})
return e, nil
}

View File

@ -0,0 +1,16 @@
package main
import "testing"
func Benchmark_parseData(b *testing.B) {
d, err := readFile()
if err != nil {
return
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
parseData(d)
}
}

View File

@ -0,0 +1,177 @@
package main
import (
"compress/gzip"
"encoding/json"
"fmt"
"math/rand"
"os"
"time"
"github.com/pkg/errors"
"golang.org/x/exp/maps"
"github.com/alcionai/corso/src/cmd/jsondebug/common"
)
const (
hostName = "host-name"
userName = "user-name"
)
func generateManifestEntry() (*common.ManifestEntry, error) {
snapMan := generateSnapManifest()
// Base tag set for all snapshots.
tags := map[string]string{
"type": "snapshot",
"hostname": snapMan.Source.Host,
"username": snapMan.Source.UserName,
"path": snapMan.Source.Path,
}
maps.Copy(tags, snapMan.Tags)
serializedSnapMan, err := json.Marshal(snapMan)
if err != nil {
return nil, errors.Wrap(err, "serializing inner struct")
}
res := &common.ManifestEntry{
ID: randStringLen(32),
ModTime: time.Now(),
Deleted: rand.Uint32()&1 != 0,
Labels: tags,
Content: serializedSnapMan,
}
return res, nil
}
func generateSnapManifest() common.SnapManifest {
var incomplete string
// Roughly 1/4 incomplete.
if rand.Intn(100) < 25 {
incomplete = "checkpoint"
}
path := randString()
res := common.SnapManifest{
Source: common.SourceInfo{
Host: hostName,
UserName: userName,
Path: path,
},
StartTime: rand.Int63(),
EndTime: rand.Int63(),
Stats: common.StatsS{
TotalFileSize: rand.Int63(),
ExcludedTotalFileSize: int64(rand.Uint32()),
TotalFileCount: rand.Int31(),
CachedFiles: rand.Int31(),
NonCachedFiles: rand.Int31(),
TotalDirectoryCount: rand.Int31(),
ExcludedFileCount: rand.Int31(),
ExcludedDirCount: rand.Int31(),
IgnoredErrorCount: rand.Int31(),
ErrorCount: rand.Int31(),
},
IncompleteReason: incomplete,
RootEntry: &common.DirEntry{
Name: path,
EntryType: randStringLen(1),
Permissions: rand.Intn(512),
FileSize: rand.Int63(),
ModTime: rand.Int63(),
UserID: rand.Int31(),
GroupID: rand.Int31(),
ObjectID: randStringLen(32),
DirSummary: &common.DirectorySummary{
TotalFileSize: rand.Int63(),
TotalFileCount: rand.Int63(),
TotalSymlinkCount: rand.Int63(),
TotalDirCount: rand.Int63(),
MaxModTime: rand.Int63(),
IncompleteReason: incomplete,
FatalErrorCount: rand.Int(),
IgnoredErrorCount: rand.Int(),
},
},
Tags: map[string]string{
// User stand-in.
"tag:" + randStringLen(40): "0",
"tag:backup-id": randStringLen(36),
"tag:is-canon-backup": "0",
// Service/data type stand-in.
"tag:" + randStringLen(20): "0",
},
}
return res
}
var charSet = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ123456789")
func randString() string {
// String lengths between [10, 128] bytes.
return randStringLen(rand.Intn(119) + 10)
}
func randStringLen(length int) string {
res := make([]rune, length)
for i := range res {
res[i] = charSet[rand.Intn(len(charSet))]
}
return string(res)
}
func main() {
data := &common.Manifest{
Entries: make([]*common.ManifestEntry, 0, common.NumItems),
}
for i := 0; i < common.NumItems; i++ {
entry, err := generateManifestEntry()
if err != nil {
fmt.Printf("Error generating random entry: %v\n", err)
return
}
data.Entries = append(data.Entries, entry)
}
f, err := os.Create(common.ManifestFileName)
if err != nil {
fmt.Printf("Error making regular output file: %v\n", err)
return
}
defer f.Close()
enc := json.NewEncoder(f)
if err := enc.Encode(data); err != nil {
fmt.Printf("Error writing json to regular file: %v\n", err)
return
}
fgz, err := os.Create(common.ManifestFileName + common.GzipSuffix)
if err != nil {
fmt.Printf("Error making gzip output file: %v\n", err)
return
}
defer fgz.Close()
gz := gzip.NewWriter(fgz)
defer gz.Close()
enc = json.NewEncoder(gz)
if err := enc.Encode(data); err != nil {
fmt.Printf("Error writing json to regular file: %v\n", err)
return
}
}

View File

@ -7,8 +7,10 @@ require (
github.com/alcionai/clues v0.0.0-20230324015051-5f61be9f301e github.com/alcionai/clues v0.0.0-20230324015051-5f61be9f301e
github.com/aws/aws-sdk-go v1.44.220 github.com/aws/aws-sdk-go v1.44.220
github.com/aws/aws-xray-sdk-go v1.8.1 github.com/aws/aws-xray-sdk-go v1.8.1
github.com/buger/jsonparser v1.1.1
github.com/cenkalti/backoff/v4 v4.2.0 github.com/cenkalti/backoff/v4 v4.2.0
github.com/google/uuid v1.3.0 github.com/google/uuid v1.3.0
github.com/hashicorp/go-multierror v1.1.1
github.com/kopia/kopia v0.12.2-0.20230327171220-747baeebdab1 github.com/kopia/kopia v0.12.2-0.20230327171220-747baeebdab1
github.com/microsoft/kiota-abstractions-go v0.18.0 github.com/microsoft/kiota-abstractions-go v0.18.0
github.com/microsoft/kiota-authentication-azure-go v0.6.0 github.com/microsoft/kiota-authentication-azure-go v0.6.0
@ -40,6 +42,7 @@ require (
github.com/andybalholm/brotli v1.0.4 // indirect github.com/andybalholm/brotli v1.0.4 // indirect
github.com/dnaeon/go-vcr v1.2.0 // indirect github.com/dnaeon/go-vcr v1.2.0 // indirect
github.com/fsnotify/fsnotify v1.6.0 // indirect github.com/fsnotify/fsnotify v1.6.0 // indirect
github.com/hashicorp/errwrap v1.0.0 // indirect
github.com/hashicorp/hcl v1.0.0 // indirect github.com/hashicorp/hcl v1.0.0 // indirect
github.com/magiconair/properties v1.8.7 // indirect github.com/magiconair/properties v1.8.7 // indirect
github.com/mitchellh/mapstructure v1.5.0 // indirect github.com/mitchellh/mapstructure v1.5.0 // indirect

View File

@ -66,6 +66,8 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869 h1:DDGfHa7BWjL4YnC6+E63dPcxHo2sUxDIu8g3QgEJdRY= github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869 h1:DDGfHa7BWjL4YnC6+E63dPcxHo2sUxDIu8g3QgEJdRY=
github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869/go.mod h1:Ekp36dRnpXw/yCqJaO+ZrUyxD+3VXMFFr56k5XYrpB4= github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869/go.mod h1:Ekp36dRnpXw/yCqJaO+ZrUyxD+3VXMFFr56k5XYrpB4=
github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs=
github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0=
github.com/cenkalti/backoff/v4 v4.2.0 h1:HN5dHm3WBOgndBH6E8V0q2jIYIR3s9yglV8k/+MN3u4= github.com/cenkalti/backoff/v4 v4.2.0 h1:HN5dHm3WBOgndBH6E8V0q2jIYIR3s9yglV8k/+MN3u4=
github.com/cenkalti/backoff/v4 v4.2.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= github.com/cenkalti/backoff/v4 v4.2.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
@ -179,6 +181,10 @@ github.com/googleapis/google-cloud-go-testing v0.0.0-20200911160855-bcd43fbb19e8
github.com/gorilla/mux v1.8.0 h1:i40aqfkR1h2SlN9hojwV5ZA91wcXFOvkdNIeFDP5koI= github.com/gorilla/mux v1.8.0 h1:i40aqfkR1h2SlN9hojwV5ZA91wcXFOvkdNIeFDP5koI=
github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 h1:+9834+KizmvFV7pXQGSXQTsaWhq2GjuNUt0aUU0YBYw= github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 h1:+9834+KizmvFV7pXQGSXQTsaWhq2GjuNUt0aUU0YBYw=
github.com/hanwen/go-fuse/v2 v2.2.0 h1:jo5QZYmBLNcl9ovypWaQ5yXMSSV+Ch68xoC3rtZvvBM= github.com/hanwen/go-fuse/v2 v2.2.0 h1:jo5QZYmBLNcl9ovypWaQ5yXMSSV+Ch68xoC3rtZvvBM=
github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA=
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/golang-lru v0.5.4 h1:YDjusn29QI/Das2iO9M0BHnIbxPeyuCHsjMW+lJfyTc= github.com/hashicorp/golang-lru v0.5.4 h1:YDjusn29QI/Das2iO9M0BHnIbxPeyuCHsjMW+lJfyTc=