some quick logging and error naming updates (#3348)
#### Does this PR need a docs update or release note? - [x] ⛔ No #### Type of change - [x] 🧹 Tech Debt/Cleanup #### Issue(s) * #3344 #### Test Plan - [x] ⚡ Unit test - [x] 💚 E2E
This commit is contained in:
parent
522d6d2206
commit
c0725b9cf9
@ -207,7 +207,7 @@ func runBackups(
|
|||||||
|
|
||||||
var (
|
var (
|
||||||
owner = discSel.DiscreteOwner
|
owner = discSel.DiscreteOwner
|
||||||
ictx = clues.Add(ctx, "resource_owner", owner)
|
ictx = clues.Add(ctx, "resource_owner_selected", owner)
|
||||||
)
|
)
|
||||||
|
|
||||||
bo, err := r.NewBackupWithLookup(ictx, discSel, ins)
|
bo, err := r.NewBackupWithLookup(ictx, discSel, ins)
|
||||||
@ -218,6 +218,11 @@ func runBackups(
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ictx = clues.Add(
|
||||||
|
ctx,
|
||||||
|
"resource_owner_id", bo.ResourceOwner.ID(),
|
||||||
|
"resource_owner_name", bo.ResourceOwner.Name())
|
||||||
|
|
||||||
err = bo.Run(ictx)
|
err = bo.Run(ictx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
errs = append(errs, clues.Wrap(err, owner).WithClues(ictx))
|
errs = append(errs, clues.Wrap(err, owner).WithClues(ictx))
|
||||||
|
|||||||
@ -2,7 +2,6 @@ package graph
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
|
||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/http/httputil"
|
"net/http/httputil"
|
||||||
@ -101,6 +100,9 @@ func LoggableURL(url string) pii.SafeURL {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 1 MB
|
||||||
|
const logMBLimit = 1 * 1048576
|
||||||
|
|
||||||
func (mw *LoggingMiddleware) Intercept(
|
func (mw *LoggingMiddleware) Intercept(
|
||||||
pipeline khttp.Pipeline,
|
pipeline khttp.Pipeline,
|
||||||
middlewareIndex int,
|
middlewareIndex int,
|
||||||
@ -123,42 +125,61 @@ func (mw *LoggingMiddleware) Intercept(
|
|||||||
return resp, err
|
return resp, err
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx = clues.Add(ctx, "status", resp.Status, "statusCode", resp.StatusCode)
|
ctx = clues.Add(
|
||||||
log := logger.Ctx(ctx)
|
ctx,
|
||||||
|
"status", resp.Status,
|
||||||
|
"statusCode", resp.StatusCode,
|
||||||
|
"content_len", resp.ContentLength)
|
||||||
|
|
||||||
// Return immediately if the response is good (2xx).
|
var (
|
||||||
// If api logging is toggled, log a body-less dump of the request/resp.
|
log = logger.Ctx(ctx)
|
||||||
if (resp.StatusCode / 100) == 2 {
|
respClass = resp.StatusCode / 100
|
||||||
if logger.DebugAPIFV || os.Getenv(log2xxGraphRequestsEnvKey) != "" {
|
logExtra = logger.DebugAPIFV || os.Getenv(logGraphRequestsEnvKey) != ""
|
||||||
log.Debugw("2xx graph api resp", "response", getRespDump(ctx, resp, os.Getenv(log2xxGraphResponseEnvKey) != ""))
|
)
|
||||||
}
|
|
||||||
|
|
||||||
return resp, err
|
// special case: always info log 429 responses
|
||||||
}
|
|
||||||
|
|
||||||
// Log errors according to api debugging configurations.
|
|
||||||
// When debugging is toggled, every non-2xx is recorded with a response dump.
|
|
||||||
// Otherwise, throttling cases and other non-2xx responses are logged
|
|
||||||
// with a slimmer reference for telemetry/supportability purposes.
|
|
||||||
if logger.DebugAPIFV || os.Getenv(logGraphRequestsEnvKey) != "" {
|
|
||||||
log.Errorw("non-2xx graph api response", "response", getRespDump(ctx, resp, true))
|
|
||||||
return resp, err
|
|
||||||
}
|
|
||||||
|
|
||||||
msg := fmt.Sprintf("graph api error: %s", resp.Status)
|
|
||||||
|
|
||||||
// special case for supportability: log all throttling cases.
|
|
||||||
if resp.StatusCode == http.StatusTooManyRequests {
|
if resp.StatusCode == http.StatusTooManyRequests {
|
||||||
log = log.With(
|
log.Infow(
|
||||||
|
"graph api throttling",
|
||||||
"limit", resp.Header.Get(rateLimitHeader),
|
"limit", resp.Header.Get(rateLimitHeader),
|
||||||
"remaining", resp.Header.Get(rateRemainingHeader),
|
"remaining", resp.Header.Get(rateRemainingHeader),
|
||||||
"reset", resp.Header.Get(rateResetHeader),
|
"reset", resp.Header.Get(rateResetHeader),
|
||||||
"retry-after", resp.Header.Get(retryAfterHeader))
|
"retry-after", resp.Header.Get(retryAfterHeader))
|
||||||
} else if resp.StatusCode/100 == 4 || resp.StatusCode == http.StatusServiceUnavailable {
|
|
||||||
log = log.With("response", getRespDump(ctx, resp, true))
|
return resp, err
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Info(msg)
|
// special case: always dump status-400-bad-request
|
||||||
|
if resp.StatusCode == http.StatusBadRequest {
|
||||||
|
log.With("response", getRespDump(ctx, resp, true)).
|
||||||
|
Error("graph api error: " + resp.Status)
|
||||||
|
|
||||||
|
return resp, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Log api calls according to api debugging configurations.
|
||||||
|
switch respClass {
|
||||||
|
case 2:
|
||||||
|
if logExtra {
|
||||||
|
// only dump the body if it's under a size limit. We don't want to copy gigs into memory for a log.
|
||||||
|
dump := getRespDump(ctx, resp, os.Getenv(log2xxGraphResponseEnvKey) != "" && resp.ContentLength < logMBLimit)
|
||||||
|
log.Infow("2xx graph api resp", "response", dump)
|
||||||
|
}
|
||||||
|
case 3:
|
||||||
|
log.With("redirect_location", LoggableURL(resp.Header.Get(locationHeader)))
|
||||||
|
|
||||||
|
if logExtra {
|
||||||
|
log.With("response", getRespDump(ctx, resp, false))
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Info("graph api redirect: " + resp.Status)
|
||||||
|
default:
|
||||||
|
if logExtra {
|
||||||
|
log.With("response", getRespDump(ctx, resp, true))
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Error("graph api error: " + resp.Status)
|
||||||
|
}
|
||||||
|
|
||||||
return resp, err
|
return resp, err
|
||||||
}
|
}
|
||||||
|
|||||||
@ -21,13 +21,14 @@ const (
|
|||||||
logGraphRequestsEnvKey = "LOG_GRAPH_REQUESTS"
|
logGraphRequestsEnvKey = "LOG_GRAPH_REQUESTS"
|
||||||
log2xxGraphRequestsEnvKey = "LOG_2XX_GRAPH_REQUESTS"
|
log2xxGraphRequestsEnvKey = "LOG_2XX_GRAPH_REQUESTS"
|
||||||
log2xxGraphResponseEnvKey = "LOG_2XX_GRAPH_RESPONSES"
|
log2xxGraphResponseEnvKey = "LOG_2XX_GRAPH_RESPONSES"
|
||||||
retryAttemptHeader = "Retry-Attempt"
|
|
||||||
retryAfterHeader = "Retry-After"
|
|
||||||
defaultMaxRetries = 3
|
defaultMaxRetries = 3
|
||||||
defaultDelay = 3 * time.Second
|
defaultDelay = 3 * time.Second
|
||||||
|
locationHeader = "Location"
|
||||||
rateLimitHeader = "RateLimit-Limit"
|
rateLimitHeader = "RateLimit-Limit"
|
||||||
rateRemainingHeader = "RateLimit-Remaining"
|
rateRemainingHeader = "RateLimit-Remaining"
|
||||||
rateResetHeader = "RateLimit-Reset"
|
rateResetHeader = "RateLimit-Reset"
|
||||||
|
retryAfterHeader = "Retry-After"
|
||||||
|
retryAttemptHeader = "Retry-Attempt"
|
||||||
defaultHTTPClientTimeout = 1 * time.Hour
|
defaultHTTPClientTimeout = 1 * time.Hour
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@ -512,11 +512,16 @@ func consumeBackupCollections(
|
|||||||
"kopia_ignored_errors", kopiaStats.IgnoredErrorCount)
|
"kopia_ignored_errors", kopiaStats.IgnoredErrorCount)
|
||||||
}
|
}
|
||||||
|
|
||||||
if kopiaStats.ErrorCount > 0 ||
|
ctx = clues.Add(
|
||||||
(kopiaStats.IgnoredErrorCount > kopiaStats.ExpectedIgnoredErrorCount) {
|
ctx,
|
||||||
err = clues.New("building kopia snapshot").With(
|
"kopia_errors", kopiaStats.ErrorCount,
|
||||||
"kopia_errors", kopiaStats.ErrorCount,
|
"kopia_ignored_errors", kopiaStats.IgnoredErrorCount,
|
||||||
"kopia_ignored_errors", kopiaStats.IgnoredErrorCount)
|
"kopia_expected_ignored_errors", kopiaStats.ExpectedIgnoredErrorCount)
|
||||||
|
|
||||||
|
if kopiaStats.ErrorCount > 0 {
|
||||||
|
err = clues.New("building kopia snapshot").WithClues(ctx)
|
||||||
|
} else if kopiaStats.IgnoredErrorCount > kopiaStats.ExpectedIgnoredErrorCount {
|
||||||
|
err = clues.New("downloading items for persistence").WithClues(ctx)
|
||||||
}
|
}
|
||||||
|
|
||||||
return kopiaStats, deets, itemsSourcedFromBase, err
|
return kopiaStats, deets, itemsSourcedFromBase, err
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user