From 953f03839518cf500f2af83803a4a6c8067d4ca4 Mon Sep 17 00:00:00 2001 From: Keepers Date: Thu, 27 Oct 2022 11:55:50 -0600 Subject: [PATCH] Add AWS X-Ray support (#1111) ## Description Add aws xray observability tooling, plus ways to quickly and easily run the tools/load tests. Next steps are 1/ running the daemon container alongside the load tests within github, and 2/ creating client credentials in aws's xray service so that gathered trace info is propagated from the daemon proxy to the xray service. ## Type of change - [x] :robot: Test - [x] :computer: CI/Deployment ## Issue(s) * #902 ## Test Plan - [x] :muscle: Manual - [x] :green_heart: E2E --- src/Makefile | 37 +++- src/go.mod | 4 + src/go.sum | 15 ++ .../connector/exchange/service_restore.go | 5 +- src/internal/connector/graph_connector.go | 10 +- src/internal/connector/onedrive/restore.go | 7 +- src/internal/diagnostics/diagnostics.go | 170 ++++++++++++++++++ src/internal/kopia/wrapper.go | 10 +- src/internal/operations/backup.go | 5 +- src/internal/operations/restore.go | 5 +- src/pkg/repository/repository_load_test.go | 47 ++++- src/pkg/selectors/scopes.go | 5 +- src/testfiles/otel_daemon/Dockerfile | 7 + 13 files changed, 305 insertions(+), 22 deletions(-) create mode 100644 src/internal/diagnostics/diagnostics.go create mode 100644 src/testfiles/otel_daemon/Dockerfile diff --git a/src/Makefile b/src/Makefile index c7f7c656b..32d24267b 100644 --- a/src/Makefile +++ b/src/Makefile @@ -6,7 +6,7 @@ HAS_LINT := $(shell which golangci-lint) INSTALL_LINT_PAGE := "https://golangci-lint.run/usage/install/" BAD_LINT_MSG := "Missing golangci-lint version $(WANTED_LINT_VERSION). Visit $(INSTALL_LINT_PAGE) for instructions on how to install" -.PHONY: check-lint check-lint-version lint +.PHONY: check-lint check-lint-version lint load-test build: go build -o corso -ldflags \ @@ -26,3 +26,38 @@ check-lint: echo >&2 $(BAD_LINT_MSG); \ false; \ fi + +build-otel-daemon: + cd testfiles/otel_daemon; \ + docker build -t xray-daemon . + +otel-daemon: + results_dir=$$PWD/test_results; \ + cd ./testfiles/otel_daemon; \ + docker run \ + --attach STDOUT \ + -e AWS_REGION \ + -v ~/.aws/:/root/.aws/:ro \ + --name xray-daemon \ + -p 2000:2000/udp \ + --rm \ + xray-daemon \ + --local-mode \ + --log-level debug + +# --net=host \ + +load-test: + AWS_XRAY_NOOP_ID=False \ + CORSO_LOAD_TESTS=y \ + go test \ + -v \ + -count=1 \ + -timeout 1h \ + -blockprofile=block.prof \ + -cpuprofile=cpu.prof \ + -memprofile=mem.prof \ + -mutexprofile=mutex.prof \ + -trace=trace.out \ + -outputdir=test_results \ + ./pkg/repository/repository_load_test.go \ No newline at end of file diff --git a/src/go.mod b/src/go.mod index 74008329f..349310c61 100644 --- a/src/go.mod +++ b/src/go.mod @@ -5,6 +5,7 @@ go 1.18 require ( github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.1.0 github.com/aws/aws-sdk-go v1.44.107 + github.com/aws/aws-xray-sdk-go v1.7.1 github.com/google/uuid v1.3.0 github.com/hashicorp/go-multierror v1.1.1 github.com/kopia/kopia v0.12.0 @@ -31,6 +32,7 @@ require ( require ( github.com/VividCortex/ewma v1.2.0 // indirect github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d // indirect + github.com/andybalholm/brotli v1.0.4 // indirect github.com/fsnotify/fsnotify v1.5.4 // indirect github.com/hashicorp/hcl v1.0.0 // indirect github.com/magiconair/properties v1.8.6 // indirect @@ -41,6 +43,8 @@ require ( github.com/spf13/cast v1.5.0 // indirect github.com/spf13/jwalterweatherman v1.1.0 // indirect github.com/subosito/gotenv v1.3.0 // indirect + github.com/valyala/bytebufferpool v1.0.0 // indirect + github.com/valyala/fasthttp v1.34.0 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect ) diff --git a/src/go.sum b/src/go.sum index c6f8364cc..6c564791a 100644 --- a/src/go.sum +++ b/src/go.sum @@ -46,6 +46,7 @@ github.com/AzureAD/microsoft-authentication-library-for-go v0.7.0 h1:VgSJlZH5u0k github.com/AzureAD/microsoft-authentication-library-for-go v0.7.0/go.mod h1:BDJ5qMFKx9DugEg3+uQSDCdbYPr5s9vBTrL9P8TpqOU= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= +github.com/DATA-DOG/go-sqlmock v1.4.1 h1:ThlnYciV1iM/V0OSF/dtkqWb6xo5qITT1TJBG1MRDJM= github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow= github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4= github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d h1:licZJFw2RwpHMqeKTCYkitsPqHNxTmd4SNR5r94FGM8= @@ -55,8 +56,12 @@ github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuy github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho= +github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY= +github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= github.com/aws/aws-sdk-go v1.44.107 h1:VP7Rq3wzsOV7wrfHqjAAKRksD4We58PaoVSDPKhm8nw= github.com/aws/aws-sdk-go v1.44.107/go.mod h1:y4AeaBuwd2Lk+GepC1E9v0qOiTws0MIWAX4oIKwKHZo= +github.com/aws/aws-xray-sdk-go v1.7.1 h1:mji68Db4oWipJ6SiQQuFiWBYWI8sUvPfcv86mLFVKHQ= +github.com/aws/aws-xray-sdk-go v1.7.1/go.mod h1:aNQo1pqFaaeKaf18CSWCkoaXUI+PQZ7yfNE28YyE2CI= github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= @@ -179,6 +184,7 @@ github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+ github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= github.com/googleapis/google-cloud-go-testing v0.0.0-20200911160855-bcd43fbb19e8/go.mod h1:dvDLG8qkwmyD9a/MJJN3XJcT3xFxOKAvTZGvuZmac9g= +github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 h1:+9834+KizmvFV7pXQGSXQTsaWhq2GjuNUt0aUU0YBYw= github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA= github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo= @@ -210,6 +216,7 @@ github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/X github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/compress v1.15.0/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= github.com/klauspost/compress v1.15.11 h1:Lcadnb3RKGin4FYM/orgq0qde+nc15E5Cbqg4B9Sx9c= github.com/klauspost/compress v1.15.11/go.mod h1:QPwzmACJjUTFsnSHH934V6woptycfrDDJnH7hvFVbGM= github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= @@ -372,6 +379,11 @@ github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs= github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= github.com/tomlazar/table v0.1.2 h1:DP8f62FzZAZk8oavepm1v/oyf4ni3/LMHWNlOinmleg= github.com/tomlazar/table v0.1.2/go.mod h1:IecZnpep9f/BatHacfh+++ftE+lFONN8BVPi9nx5U1w= +github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw= +github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= +github.com/valyala/fasthttp v1.34.0 h1:d3AAQJ2DRcxJYHm7OXNXtXt2as1vMDfxeIcFvhmGGm4= +github.com/valyala/fasthttp v1.34.0/go.mod h1:epZA5N+7pY6ZaEKRmstzOuYJx9HI8DI1oaCGZpdH4h0= +github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc= github.com/vbauerster/mpb/v8 v8.1.4 h1:MOcLTIbbAA892wVjRiuFHa1nRlNvifQMDVh12Bq/xIs= github.com/vbauerster/mpb/v8 v8.1.4/go.mod h1:2fRME8lCLU9gwJwghZb1bO9A3Plc8KPeQ/ayGj+Ek4I= github.com/xtgo/uuid v0.0.0-20140804021211-a0b114877d4c h1:3lbZUMbMiGUW/LMkfsEABsc5zNT9+b1CvsJx47JzJ8g= @@ -414,6 +426,7 @@ golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8U golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210421170649-83a5a9bb288b/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4= golang.org/x/crypto v0.0.0-20211108221036-ceb1ce70b4fa/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.0.0-20220214200702-86341886e292/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/crypto v0.0.0-20220926161630-eccd6366d1be h1:fmw3UbQh+nxngCAHrDCCztao/kbYFnWjoqop8dHx05A= golang.org/x/crypto v0.0.0-20220926161630-eccd6366d1be/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= @@ -486,6 +499,7 @@ golang.org/x/net v0.0.0-20201209123823-ac852fbbde11/go.mod h1:m0MpNAwzfU5UDzcl9v golang.org/x/net v0.0.0-20201224014010-6772e930b67b/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210525063256-abc453219eb5/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= golang.org/x/net v0.0.0-20220225172249-27dd8689420f/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= golang.org/x/net v0.0.0-20220927171203-f486391704dc h1:FxpXZdoBqT8RjqTy6i1E8nXHhW21wK7ptQ/EPIGxzPQ= @@ -560,6 +574,7 @@ golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20210616045830-e2b7044e8c71/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220227234510-4e6760a101f9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220412211240-33da011f77ad/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= diff --git a/src/internal/connector/exchange/service_restore.go b/src/internal/connector/exchange/service_restore.go index eabf8d3d0..973a5c66c 100644 --- a/src/internal/connector/exchange/service_restore.go +++ b/src/internal/connector/exchange/service_restore.go @@ -14,6 +14,7 @@ import ( "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/support" "github.com/alcionai/corso/src/internal/data" + D "github.com/alcionai/corso/src/internal/diagnostics" "github.com/alcionai/corso/src/internal/observe" "github.com/alcionai/corso/src/pkg/backup/details" "github.com/alcionai/corso/src/pkg/control" @@ -313,8 +314,8 @@ func restoreCollection( deets *details.Details, errUpdater func(string, error), ) (support.CollectionMetrics, bool) { - defer trace.StartRegion(ctx, "gc:exchange:restoreCollection").End() - trace.Log(ctx, "gc:exchange:restoreCollection", dc.FullPath().String()) + ctx, end := D.Span(ctx, "gc:exchange:restoreCollection", D.Label("path", dc.FullPath())) + defer end() var ( metrics support.CollectionMetrics diff --git a/src/internal/connector/graph_connector.go b/src/internal/connector/graph_connector.go index 634aa6f46..feabd63a7 100644 --- a/src/internal/connector/graph_connector.go +++ b/src/internal/connector/graph_connector.go @@ -20,6 +20,7 @@ import ( "github.com/alcionai/corso/src/internal/connector/onedrive" "github.com/alcionai/corso/src/internal/connector/support" "github.com/alcionai/corso/src/internal/data" + D "github.com/alcionai/corso/src/internal/diagnostics" "github.com/alcionai/corso/src/internal/observe" "github.com/alcionai/corso/src/pkg/account" "github.com/alcionai/corso/src/pkg/backup/details" @@ -127,7 +128,8 @@ func (gs *graphService) EnableFailFast() { // workspace. The users field is updated during this method // iff the return value is true func (gc *GraphConnector) setTenantUsers(ctx context.Context) error { - defer trace.StartRegion(ctx, "gc:setTenantUsers").End() + ctx, end := D.Span(ctx, "gc:setTenantUsers") + defer end() response, err := exchange.GetAllUsersForTenant(ctx, gc.graphService, "") if err != nil { @@ -252,7 +254,8 @@ func (gc *GraphConnector) RestoreDataCollections( dest control.RestoreDestination, dcs []data.Collection, ) (*details.Details, error) { - gc.region = trace.StartRegion(ctx, "connector:restore") + ctx, end := D.Span(ctx, "connector:restore") + defer end() var ( status *support.ConnectorOperationStatus @@ -389,7 +392,8 @@ func IsNonRecoverableError(e error) bool { // DataCollections utility function to launch backup operations for exchange and onedrive func (gc *GraphConnector) DataCollections(ctx context.Context, sels selectors.Selector) ([]data.Collection, error) { - defer trace.StartRegion(ctx, "gc:dataCollections:"+sels.Service.String()).End() + ctx, end := D.Span(ctx, "gc:dataCollections", D.Index("service", sels.Service.String())) + defer end() err := verifyBackupInputs(sels, gc.Users) if err != nil { diff --git a/src/internal/connector/onedrive/restore.go b/src/internal/connector/onedrive/restore.go index 8aecbd84d..4aca460ac 100644 --- a/src/internal/connector/onedrive/restore.go +++ b/src/internal/connector/onedrive/restore.go @@ -10,6 +10,7 @@ import ( "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/support" "github.com/alcionai/corso/src/internal/data" + D "github.com/alcionai/corso/src/internal/diagnostics" "github.com/alcionai/corso/src/internal/observe" "github.com/alcionai/corso/src/pkg/backup/details" "github.com/alcionai/corso/src/pkg/control" @@ -95,7 +96,8 @@ func restoreCollection( deets *details.Details, errUpdater func(string, error), ) (support.CollectionMetrics, bool) { - defer trace.StartRegion(ctx, "gc:oneDrive:restoreCollection").End() + ctx, end := D.Span(ctx, "gc:oneDrive:restoreCollection", D.Label("path", dc.FullPath())) + defer end() var ( metrics = support.CollectionMetrics{} @@ -228,7 +230,8 @@ func restoreItem( driveID, parentFolderID string, copyBuffer []byte, ) (*details.OneDriveInfo, error) { - defer trace.StartRegion(ctx, "gc:oneDrive:restoreItem").End() + ctx, end := D.Span(ctx, "gc:oneDrive:restoreItem", D.Label("item_uuid", itemData.UUID())) + defer end() itemName := itemData.UUID() trace.Log(ctx, "gc:oneDrive:restoreItem", itemName) diff --git a/src/internal/diagnostics/diagnostics.go b/src/internal/diagnostics/diagnostics.go new file mode 100644 index 000000000..771729bd1 --- /dev/null +++ b/src/internal/diagnostics/diagnostics.go @@ -0,0 +1,170 @@ +package diagnostics + +import ( + "context" + "os" + "runtime/trace" + "strings" + + "github.com/aws/aws-xray-sdk-go/xray" + "github.com/aws/aws-xray-sdk-go/xraylog" + "github.com/pkg/errors" + + "github.com/alcionai/corso/src/pkg/logger" +) + +var localRun bool + +/* +Currently using AWS x-ray for observability: +https://docs.aws.amazon.com/xray/latest/devguide/xray-concepts.html#xray-concepts-annotations + +runtime/trace is also collected for load_test metrics gathering. +*/ + +// Initialize trace and span collection and emission. +// Should only be called as an initialization step in the context of a local run +// (such as load testing). SDK users need not initialize the xray, as they should +// already be running their own collector. +func InitCollector() error { + cfg := xray.Config{ + DaemonAddr: "0.0.0.0:2000", + ServiceVersion: "3.3.5", + } + + if err := xray.Configure(cfg); err != nil { + return errors.Wrap(err, "initializing observability tooling") + } + + // TODO: feed in the corso logger + xray.SetLogger(xraylog.NewDefaultLogger(os.Stderr, xraylog.LogLevelInfo)) + + return nil +} + +// Start kicks off a parent segment for tracking. Start should only be called +// internally, and only once per corso execution. SDK users may provide contexts +// with existing segments rather calling Start. +// The returned context will contain the parent segment (you should only ever have +// one) for all child spans to collect within; adding a span to any context besides +// this one (and its descendants) will slice that span from observation. +// The returned func closes and flushes the parent segment. +func Start(ctx context.Context, name string) (context.Context, func()) { + ctx, seg := xray.BeginSegment(ctx, name) + seg.TraceID = xray.NewTraceID() + + rgn := trace.StartRegion(ctx, name) + localRun = true + + return ctx, func() { + seg.Close(nil) + rgn.End() + } +} + +type extender interface { + extend(context.Context, *xray.Segment) +} + +type annotation struct { + k string + v any +} + +func (a annotation) extend(ctx context.Context, span *xray.Segment) { + if err := span.AddAnnotation(a.k, a.v); err != nil { + logger.Ctx(ctx).Errorw( + "diagnostics annotation addition", + "error", err, + "key", a.k, + "value", a.v, + "span", span.Name) + } +} + +// Index annotates spans with filterable, groupable properties. +// Index values must be of type key, value, or boolean. +func Index(k string, v any) extender { + return annotation{k, v} +} + +type metadata struct { + k string + v any +} + +func (m metadata) extend(ctx context.Context, span *xray.Segment) { + if err := span.AddMetadata(m.k, m.v); err != nil { + logger.Ctx(ctx).Errorw( + "diagnostics metadata addition", + "error", err, + "key", m.k, + "value", m.v, + "span", span.Name) + } +} + +// Label tags spans with non-filterable, purely informational data. +// Label values can be any type. +func Label(k string, v any) extender { + return metadata{k, v} +} + +// Adds a Span to the given context. Spans may be extended with indexes +// for filtering and grouping, or with labels for contextual info. +// Named variable returns are necessary here to prevent nil responses +// during panic handling. +func Span(ctx context.Context, name string, ext ...extender) (_ctx context.Context, _fn func()) { + // no-op if no parent segment exists + if xray.GetSegment(ctx) == nil { + return ctx, func() {} + } + + // spans created without an existing parent segment in the ctx will panic. + defer func() { + if r := recover(); r != nil { + _ctx = ctx + _fn = func() {} + + var rmsg string + + if s, ok := r.(string); ok { + rmsg = s + } else if e, ok := r.(error); ok { + rmsg = e.Error() + } + + if strings.Contains(rmsg, "segment cannot be found") { + return + } + + logger.Ctx(ctx).Errorw("diagnostics.Span", "panic", r) + } + }() + + ctx, span := xray.BeginSubsegment(ctx, name) + rgn := trace.StartRegion(ctx, name) + + for _, e := range ext { + e.extend(ctx, span) + } + + _ctx = ctx + _fn = func() { + rgn.End() + + if span != nil { + return + } + + // during a local run we always deliver segment info to the daemon + if localRun { + span.CloseAndStream(nil) + return + } + + span.Close(nil) + } + + return _ctx, _fn +} diff --git a/src/internal/kopia/wrapper.go b/src/internal/kopia/wrapper.go index 388f298a2..885e7974c 100644 --- a/src/internal/kopia/wrapper.go +++ b/src/internal/kopia/wrapper.go @@ -20,6 +20,7 @@ import ( "github.com/pkg/errors" "github.com/alcionai/corso/src/internal/data" + D "github.com/alcionai/corso/src/internal/diagnostics" "github.com/alcionai/corso/src/internal/model" "github.com/alcionai/corso/src/internal/stats" "github.com/alcionai/corso/src/pkg/backup/details" @@ -277,7 +278,8 @@ func getStreamItemFunc( progress *corsoProgress, ) func(context.Context, func(context.Context, fs.Entry) error) error { return func(ctx context.Context, cb func(context.Context, fs.Entry) error) error { - defer trace.StartRegion(ctx, "kopia:getStreamItemFunc").End() + ctx, end := D.Span(ctx, "kopia:getStreamItemFunc") + defer end() // Collect all errors and return them at the end so that iteration for this // directory doesn't end early. @@ -479,7 +481,8 @@ func (w Wrapper) BackupCollections( return nil, nil, errNotConnected } - defer trace.StartRegion(ctx, "kopia:backupCollections").End() + ctx, end := D.Span(ctx, "kopia:backupCollections") + defer end() if len(collections) == 0 { return &BackupStats{}, &details.Details{}, nil @@ -665,7 +668,8 @@ func (w Wrapper) RestoreMultipleItems( paths []path.Path, bcounter byteCounter, ) ([]data.Collection, error) { - defer trace.StartRegion(ctx, "kopia:restore:multiple").End() + ctx, end := D.Span(ctx, "kopia:restoreMultipleItems") + defer end() if len(paths) == 0 { return nil, errors.WithStack(errNoRestorePath) diff --git a/src/internal/operations/backup.go b/src/internal/operations/backup.go index aa756940f..1deb95809 100644 --- a/src/internal/operations/backup.go +++ b/src/internal/operations/backup.go @@ -2,7 +2,6 @@ package operations import ( "context" - "runtime/trace" "time" "github.com/google/uuid" @@ -12,6 +11,7 @@ import ( "github.com/alcionai/corso/src/internal/connector" "github.com/alcionai/corso/src/internal/connector/support" "github.com/alcionai/corso/src/internal/data" + D "github.com/alcionai/corso/src/internal/diagnostics" "github.com/alcionai/corso/src/internal/events" "github.com/alcionai/corso/src/internal/kopia" "github.com/alcionai/corso/src/internal/model" @@ -85,7 +85,8 @@ type backupStats struct { // Run begins a synchronous backup operation. func (op *BackupOperation) Run(ctx context.Context) (err error) { - defer trace.StartRegion(ctx, "operations:backup:run").End() + ctx, end := D.Span(ctx, "operations:backup:run") + defer end() var ( opStats backupStats diff --git a/src/internal/operations/restore.go b/src/internal/operations/restore.go index fe94826c3..dfb07b396 100644 --- a/src/internal/operations/restore.go +++ b/src/internal/operations/restore.go @@ -3,7 +3,6 @@ package operations import ( "context" "fmt" - "runtime/trace" "time" "github.com/google/uuid" @@ -13,6 +12,7 @@ import ( "github.com/alcionai/corso/src/internal/connector" "github.com/alcionai/corso/src/internal/connector/support" "github.com/alcionai/corso/src/internal/data" + D "github.com/alcionai/corso/src/internal/diagnostics" "github.com/alcionai/corso/src/internal/events" "github.com/alcionai/corso/src/internal/kopia" "github.com/alcionai/corso/src/internal/model" @@ -96,7 +96,8 @@ type restoreStats struct { // Run begins a synchronous restore operation. func (op *RestoreOperation) Run(ctx context.Context) (restoreDetails *details.Details, err error) { - defer trace.StartRegion(ctx, "operations:restore:run").End() + ctx, end := D.Span(ctx, "operations:restore:run") + defer end() var ( opStats = restoreStats{ diff --git a/src/pkg/repository/repository_load_test.go b/src/pkg/repository/repository_load_test.go index 8559748e3..1c858acf7 100644 --- a/src/pkg/repository/repository_load_test.go +++ b/src/pkg/repository/repository_load_test.go @@ -2,6 +2,8 @@ package repository_test import ( "context" + "fmt" + "os" "runtime/pprof" "sort" "testing" @@ -10,6 +12,7 @@ import ( "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" + D "github.com/alcionai/corso/src/internal/diagnostics" "github.com/alcionai/corso/src/internal/operations" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/account" @@ -56,6 +59,40 @@ func singleUserSet(t *testing.T) []string { return []string{tester.LoadTestM365UserID(t)} } +var loadCtx context.Context + +func TestMain(m *testing.M) { + ctx, logFlush := tester.NewContext() + loadCtx = ctx + flush := func() { + logFlush() + } + + if err := tester.RunOnAny(tester.CorsoLoadTests); err == nil { + if err := D.InitCollector(); err != nil { + fmt.Println("initializing load tests:", err) + os.Exit(1) + } + + ctx, spanFlush := D.Start(ctx, "Load_Testing_Main") + loadCtx = ctx + flush = func() { + spanFlush() + logFlush() + } + } + + exitVal := m.Run() + + flush() + + os.Exit(exitVal) +} + +// ------------------------------------------------------------------------------------------------ +// Common +// ------------------------------------------------------------------------------------------------ + func initM365Repo(t *testing.T) ( context.Context, repository.Repository, account.Account, storage.Storage, ) { @@ -65,7 +102,7 @@ func initM365Repo(t *testing.T) ( ) require.NoError(t, err) - ctx, flush := tester.NewContext() + ctx, flush := tester.WithContext(loadCtx) defer flush() st := tester.NewPrefixedS3Storage(t) @@ -374,7 +411,7 @@ func (suite *RepositoryLoadTestExchangeSuite) TeardownSuite() { } func (suite *RepositoryLoadTestExchangeSuite) TestExchange() { - ctx, flush := tester.NewContext() + ctx, flush := tester.WithContext(suite.ctx) defer flush() bsel := selectors.NewExchangeBackup() @@ -424,7 +461,7 @@ func (suite *RepositoryIndividualLoadTestExchangeSuite) TeardownSuite() { } func (suite *RepositoryIndividualLoadTestExchangeSuite) TestExchange() { - ctx, flush := tester.NewContext() + ctx, flush := tester.WithContext(suite.ctx) defer flush() bsel := selectors.NewExchangeBackup() @@ -477,7 +514,7 @@ func (suite *RepositoryLoadTestOneDriveSuite) TeardownSuite() { } func (suite *RepositoryLoadTestOneDriveSuite) TestOneDrive() { - ctx, flush := tester.NewContext() + ctx, flush := tester.WithContext(suite.ctx) defer flush() bsel := selectors.NewOneDriveBackup() @@ -524,7 +561,7 @@ func (suite *RepositoryIndividualLoadTestOneDriveSuite) TeardownSuite() { } func (suite *RepositoryIndividualLoadTestOneDriveSuite) TestOneDrive() { - ctx, flush := tester.NewContext() + ctx, flush := tester.WithContext(suite.ctx) defer flush() bsel := selectors.NewOneDriveBackup() diff --git a/src/pkg/selectors/scopes.go b/src/pkg/selectors/scopes.go index 779355516..794f28b70 100644 --- a/src/pkg/selectors/scopes.go +++ b/src/pkg/selectors/scopes.go @@ -2,8 +2,8 @@ package selectors import ( "context" - "runtime/trace" + D "github.com/alcionai/corso/src/internal/diagnostics" "github.com/alcionai/corso/src/pkg/backup/details" "github.com/alcionai/corso/src/pkg/filters" "github.com/alcionai/corso/src/pkg/logger" @@ -252,7 +252,8 @@ func reduce[T scopeT, C categoryT]( s Selector, dataCategories map[path.CategoryType]C, ) *details.Details { - defer trace.StartRegion(ctx, "selectors:reduce").End() + ctx, end := D.Span(ctx, "selectors:reduce") + defer end() if deets == nil { return nil diff --git a/src/testfiles/otel_daemon/Dockerfile b/src/testfiles/otel_daemon/Dockerfile new file mode 100644 index 000000000..85afeb757 --- /dev/null +++ b/src/testfiles/otel_daemon/Dockerfile @@ -0,0 +1,7 @@ +FROM amazonlinux +RUN yum install -y unzip +RUN curl -o daemon.zip https://s3.us-east-2.amazonaws.com/aws-xray-assets.us-east-2/xray-daemon/aws-xray-daemon-linux-3.x.zip +RUN unzip daemon.zip && cp xray /usr/bin/xray +ENTRYPOINT ["/usr/bin/xray", "-t", "0.0.0.0:2000", "-b", "0.0.0.0:2000"] +EXPOSE 2000/udp +EXPOSE 2000/tcp \ No newline at end of file