some quick logging and error naming updates (#3348)

#### Does this PR need a docs update or release note?

- [x]  No

#### Type of change

- [x] 🧹 Tech Debt/Cleanup

#### Issue(s)

* #3344

#### Test Plan

- [x]  Unit test
- [x] 💚 E2E
This commit is contained in:
Keepers 2023-05-10 20:03:31 -06:00 committed by GitHub
parent 522d6d2206
commit c0725b9cf9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 68 additions and 36 deletions

View File

@ -207,7 +207,7 @@ func runBackups(
var ( var (
owner = discSel.DiscreteOwner owner = discSel.DiscreteOwner
ictx = clues.Add(ctx, "resource_owner", owner) ictx = clues.Add(ctx, "resource_owner_selected", owner)
) )
bo, err := r.NewBackupWithLookup(ictx, discSel, ins) bo, err := r.NewBackupWithLookup(ictx, discSel, ins)
@ -218,6 +218,11 @@ func runBackups(
continue continue
} }
ictx = clues.Add(
ctx,
"resource_owner_id", bo.ResourceOwner.ID(),
"resource_owner_name", bo.ResourceOwner.Name())
err = bo.Run(ictx) err = bo.Run(ictx)
if err != nil { if err != nil {
errs = append(errs, clues.Wrap(err, owner).WithClues(ictx)) errs = append(errs, clues.Wrap(err, owner).WithClues(ictx))

View File

@ -2,7 +2,6 @@ package graph
import ( import (
"context" "context"
"fmt"
"io" "io"
"net/http" "net/http"
"net/http/httputil" "net/http/httputil"
@ -101,6 +100,9 @@ func LoggableURL(url string) pii.SafeURL {
} }
} }
// 1 MB
const logMBLimit = 1 * 1048576
func (mw *LoggingMiddleware) Intercept( func (mw *LoggingMiddleware) Intercept(
pipeline khttp.Pipeline, pipeline khttp.Pipeline,
middlewareIndex int, middlewareIndex int,
@ -123,42 +125,61 @@ func (mw *LoggingMiddleware) Intercept(
return resp, err return resp, err
} }
ctx = clues.Add(ctx, "status", resp.Status, "statusCode", resp.StatusCode) ctx = clues.Add(
log := logger.Ctx(ctx) ctx,
"status", resp.Status,
"statusCode", resp.StatusCode,
"content_len", resp.ContentLength)
// Return immediately if the response is good (2xx). var (
// If api logging is toggled, log a body-less dump of the request/resp. log = logger.Ctx(ctx)
if (resp.StatusCode / 100) == 2 { respClass = resp.StatusCode / 100
if logger.DebugAPIFV || os.Getenv(log2xxGraphRequestsEnvKey) != "" { logExtra = logger.DebugAPIFV || os.Getenv(logGraphRequestsEnvKey) != ""
log.Debugw("2xx graph api resp", "response", getRespDump(ctx, resp, os.Getenv(log2xxGraphResponseEnvKey) != "")) )
}
return resp, err // special case: always info log 429 responses
}
// Log errors according to api debugging configurations.
// When debugging is toggled, every non-2xx is recorded with a response dump.
// Otherwise, throttling cases and other non-2xx responses are logged
// with a slimmer reference for telemetry/supportability purposes.
if logger.DebugAPIFV || os.Getenv(logGraphRequestsEnvKey) != "" {
log.Errorw("non-2xx graph api response", "response", getRespDump(ctx, resp, true))
return resp, err
}
msg := fmt.Sprintf("graph api error: %s", resp.Status)
// special case for supportability: log all throttling cases.
if resp.StatusCode == http.StatusTooManyRequests { if resp.StatusCode == http.StatusTooManyRequests {
log = log.With( log.Infow(
"graph api throttling",
"limit", resp.Header.Get(rateLimitHeader), "limit", resp.Header.Get(rateLimitHeader),
"remaining", resp.Header.Get(rateRemainingHeader), "remaining", resp.Header.Get(rateRemainingHeader),
"reset", resp.Header.Get(rateResetHeader), "reset", resp.Header.Get(rateResetHeader),
"retry-after", resp.Header.Get(retryAfterHeader)) "retry-after", resp.Header.Get(retryAfterHeader))
} else if resp.StatusCode/100 == 4 || resp.StatusCode == http.StatusServiceUnavailable {
log = log.With("response", getRespDump(ctx, resp, true)) return resp, err
} }
log.Info(msg) // special case: always dump status-400-bad-request
if resp.StatusCode == http.StatusBadRequest {
log.With("response", getRespDump(ctx, resp, true)).
Error("graph api error: " + resp.Status)
return resp, err
}
// Log api calls according to api debugging configurations.
switch respClass {
case 2:
if logExtra {
// only dump the body if it's under a size limit. We don't want to copy gigs into memory for a log.
dump := getRespDump(ctx, resp, os.Getenv(log2xxGraphResponseEnvKey) != "" && resp.ContentLength < logMBLimit)
log.Infow("2xx graph api resp", "response", dump)
}
case 3:
log.With("redirect_location", LoggableURL(resp.Header.Get(locationHeader)))
if logExtra {
log.With("response", getRespDump(ctx, resp, false))
}
log.Info("graph api redirect: " + resp.Status)
default:
if logExtra {
log.With("response", getRespDump(ctx, resp, true))
}
log.Error("graph api error: " + resp.Status)
}
return resp, err return resp, err
} }

View File

@ -21,13 +21,14 @@ const (
logGraphRequestsEnvKey = "LOG_GRAPH_REQUESTS" logGraphRequestsEnvKey = "LOG_GRAPH_REQUESTS"
log2xxGraphRequestsEnvKey = "LOG_2XX_GRAPH_REQUESTS" log2xxGraphRequestsEnvKey = "LOG_2XX_GRAPH_REQUESTS"
log2xxGraphResponseEnvKey = "LOG_2XX_GRAPH_RESPONSES" log2xxGraphResponseEnvKey = "LOG_2XX_GRAPH_RESPONSES"
retryAttemptHeader = "Retry-Attempt"
retryAfterHeader = "Retry-After"
defaultMaxRetries = 3 defaultMaxRetries = 3
defaultDelay = 3 * time.Second defaultDelay = 3 * time.Second
locationHeader = "Location"
rateLimitHeader = "RateLimit-Limit" rateLimitHeader = "RateLimit-Limit"
rateRemainingHeader = "RateLimit-Remaining" rateRemainingHeader = "RateLimit-Remaining"
rateResetHeader = "RateLimit-Reset" rateResetHeader = "RateLimit-Reset"
retryAfterHeader = "Retry-After"
retryAttemptHeader = "Retry-Attempt"
defaultHTTPClientTimeout = 1 * time.Hour defaultHTTPClientTimeout = 1 * time.Hour
) )

View File

@ -512,11 +512,16 @@ func consumeBackupCollections(
"kopia_ignored_errors", kopiaStats.IgnoredErrorCount) "kopia_ignored_errors", kopiaStats.IgnoredErrorCount)
} }
if kopiaStats.ErrorCount > 0 || ctx = clues.Add(
(kopiaStats.IgnoredErrorCount > kopiaStats.ExpectedIgnoredErrorCount) { ctx,
err = clues.New("building kopia snapshot").With( "kopia_errors", kopiaStats.ErrorCount,
"kopia_errors", kopiaStats.ErrorCount, "kopia_ignored_errors", kopiaStats.IgnoredErrorCount,
"kopia_ignored_errors", kopiaStats.IgnoredErrorCount) "kopia_expected_ignored_errors", kopiaStats.ExpectedIgnoredErrorCount)
if kopiaStats.ErrorCount > 0 {
err = clues.New("building kopia snapshot").WithClues(ctx)
} else if kopiaStats.IgnoredErrorCount > kopiaStats.ExpectedIgnoredErrorCount {
err = clues.New("downloading items for persistence").WithClues(ctx)
} }
return kopiaStats, deets, itemsSourcedFromBase, err return kopiaStats, deets, itemsSourcedFromBase, err