Skip to content

Commit

Permalink
Merge pull request #121970 from pohly/log-apimachinery-runtime
Browse files Browse the repository at this point in the history
apimachinery runtime: support contextual logging
  • Loading branch information
k8s-ci-robot authored Apr 18, 2024
2 parents a2abca1 + 5a130d2 commit d35ba36
Show file tree
Hide file tree
Showing 12 changed files with 203 additions and 42 deletions.
1 change: 1 addition & 0 deletions hack/golangci-hints.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ linters-settings: # please keep this alphabetized
# The following packages have been migrated to contextual logging.
# Packages matched here do not have to be listed above because
# "contextual" implies "structured".
contextual k8s.io/apimachinery/pkg/util/runtime/.*
contextual k8s.io/client-go/metadata/.*
contextual k8s.io/client-go/tools/events/.*
contextual k8s.io/client-go/tools/record/.*
Expand Down
1 change: 1 addition & 0 deletions hack/golangci-strict.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ linters-settings: # please keep this alphabetized
# The following packages have been migrated to contextual logging.
# Packages matched here do not have to be listed above because
# "contextual" implies "structured".
contextual k8s.io/apimachinery/pkg/util/runtime/.*
contextual k8s.io/client-go/metadata/.*
contextual k8s.io/client-go/tools/events/.*
contextual k8s.io/client-go/tools/record/.*
Expand Down
1 change: 1 addition & 0 deletions hack/golangci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ linters-settings: # please keep this alphabetized
# The following packages have been migrated to contextual logging.
# Packages matched here do not have to be listed above because
# "contextual" implies "structured".
contextual k8s.io/apimachinery/pkg/util/runtime/.*
contextual k8s.io/client-go/metadata/.*
contextual k8s.io/client-go/tools/events/.*
contextual k8s.io/client-go/tools/record/.*
Expand Down
1 change: 1 addition & 0 deletions hack/logcheck.conf
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ structured k8s.io/apiserver/pkg/server/options/encryptionconfig/.*
# The following packages have been migrated to contextual logging.
# Packages matched here do not have to be listed above because
# "contextual" implies "structured".
contextual k8s.io/apimachinery/pkg/util/runtime/.*
contextual k8s.io/client-go/metadata/.*
contextual k8s.io/client-go/tools/events/.*
contextual k8s.io/client-go/tools/record/.*
Expand Down
135 changes: 109 additions & 26 deletions staging/src/k8s.io/apimachinery/pkg/util/runtime/runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ limitations under the License.
package runtime

import (
"context"
"fmt"
"net/http"
"runtime"
Expand All @@ -35,31 +36,62 @@ var (
)

// PanicHandlers is a list of functions which will be invoked when a panic happens.
var PanicHandlers = []func(interface{}){logPanic}
var PanicHandlers = []func(context.Context, interface{}){logPanic}

// HandleCrash simply catches a crash and logs an error. Meant to be called via
// defer. Additional context-specific handlers can be provided, and will be
// called in case of panic. HandleCrash actually crashes, after calling the
// handlers and logging the panic message.
//
// E.g., you can provide one or more additional handlers for something like shutting down go routines gracefully.
//
// TODO(pohly): logcheck:context // HandleCrashWithContext should be used instead of HandleCrash in code which supports contextual logging.
func HandleCrash(additionalHandlers ...func(interface{})) {
if r := recover(); r != nil {
for _, fn := range PanicHandlers {
fn(r)
}
for _, fn := range additionalHandlers {
fn(r)
}
if ReallyCrash {
// Actually proceed to panic.
panic(r)
additionalHandlersWithContext := make([]func(context.Context, interface{}), len(additionalHandlers))
for i, handler := range additionalHandlers {
handler := handler // capture loop variable
additionalHandlersWithContext[i] = func(_ context.Context, r interface{}) {
handler(r)
}
}

handleCrash(context.Background(), r, additionalHandlersWithContext...)
}
}

// HandleCrashWithContext simply catches a crash and logs an error. Meant to be called via
// defer. Additional context-specific handlers can be provided, and will be
// called in case of panic. HandleCrash actually crashes, after calling the
// handlers and logging the panic message.
//
// E.g., you can provide one or more additional handlers for something like shutting down go routines gracefully.
//
// The context is used to determine how to log.
func HandleCrashWithContext(ctx context.Context, additionalHandlers ...func(context.Context, interface{})) {
if r := recover(); r != nil {
handleCrash(ctx, r, additionalHandlers...)
}
}

// handleCrash is the common implementation of HandleCrash and HandleCrash.
// Having those call a common implementation ensures that the stack depth
// is the same regardless through which path the handlers get invoked.
func handleCrash(ctx context.Context, r any, additionalHandlers ...func(context.Context, interface{})) {
for _, fn := range PanicHandlers {
fn(ctx, r)
}
for _, fn := range additionalHandlers {
fn(ctx, r)
}
if ReallyCrash {
// Actually proceed to panic.
panic(r)
}
}

// logPanic logs the caller tree when a panic occurs (except in the special case of http.ErrAbortHandler).
func logPanic(r interface{}) {
func logPanic(ctx context.Context, r interface{}) {
if r == http.ErrAbortHandler {
// honor the http.ErrAbortHandler sentinel panic value:
// ErrAbortHandler is a sentinel panic value to abort a handler.
Expand All @@ -73,46 +105,97 @@ func logPanic(r interface{}) {
const size = 64 << 10
stacktrace := make([]byte, size)
stacktrace = stacktrace[:runtime.Stack(stacktrace, false)]

// We don't really know how many call frames to skip because the Go
// panic handler is between us and the code where the panic occurred.
// If it's one function (as in Go 1.21), then skipping four levels
// gets us to the function which called the `defer HandleCrashWithontext(...)`.
logger := klog.FromContext(ctx).WithCallDepth(4)

// For backwards compatibility, conversion to string
// is handled here instead of defering to the logging
// backend.
if _, ok := r.(string); ok {
klog.Errorf("Observed a panic: %s\n%s", r, stacktrace)
logger.Error(nil, "Observed a panic", "panic", r, "stacktrace", string(stacktrace))
} else {
klog.Errorf("Observed a panic: %#v (%v)\n%s", r, r, stacktrace)
logger.Error(nil, "Observed a panic", "panic", fmt.Sprintf("%v", r), "panicGoValue", fmt.Sprintf("%#v", r), "stacktrace", string(stacktrace))
}
}

// ErrorHandlers is a list of functions which will be invoked when a nonreturnable
// error occurs.
// TODO(lavalamp): for testability, this and the below HandleError function
// should be packaged up into a testable and reusable object.
var ErrorHandlers = []func(error){
var ErrorHandlers = []ErrorHandler{
logError,
(&rudimentaryErrorBackoff{
lastErrorTime: time.Now(),
// 1ms was the number folks were able to stomach as a global rate limit.
// If you need to log errors more than 1000 times a second you
// should probably consider fixing your code instead. :)
minPeriod: time.Millisecond,
}).OnError,
func(_ context.Context, _ error, _ string, _ ...interface{}) {
(&rudimentaryErrorBackoff{
lastErrorTime: time.Now(),
// 1ms was the number folks were able to stomach as a global rate limit.
// If you need to log errors more than 1000 times a second you
// should probably consider fixing your code instead. :)
minPeriod: time.Millisecond,
}).OnError()
},
}

type ErrorHandler func(ctx context.Context, err error, msg string, keysAndValues ...interface{})

// HandlerError is a method to invoke when a non-user facing piece of code cannot
// return an error and needs to indicate it has been ignored. Invoking this method
// is preferable to logging the error - the default behavior is to log but the
// errors may be sent to a remote server for analysis.
//
// TODO(pohly): logcheck:context // HandleErrorWithContext should be used instead of HandleError in code which supports contextual logging.
func HandleError(err error) {
// this is sometimes called with a nil error. We probably shouldn't fail and should do nothing instead
if err == nil {
return
}

handleError(context.Background(), err, "Unhandled Error")
}

// HandlerErrorWithContext is a method to invoke when a non-user facing piece of code cannot
// return an error and needs to indicate it has been ignored. Invoking this method
// is preferable to logging the error - the default behavior is to log but the
// errors may be sent to a remote server for analysis. The context is used to
// determine how to log the error.
//
// If contextual logging is enabled, the default log output is equivalent to
//
// logr.FromContext(ctx).WithName("UnhandledError").Error(err, msg, keysAndValues...)
//
// Without contextual logging, it is equivalent to:
//
// klog.ErrorS(err, msg, keysAndValues...)
//
// In contrast to HandleError, passing nil for the error is still going to
// trigger a log entry. Don't construct a new error or wrap an error
// with fmt.Errorf. Instead, add additional information via the mssage
// and key/value pairs.
//
// This variant should be used instead of HandleError because it supports
// structured, contextual logging.
func HandleErrorWithContext(ctx context.Context, err error, msg string, keysAndValues ...interface{}) {
handleError(ctx, err, msg, keysAndValues...)
}

// handleError is the common implementation of HandleError and HandleErrorWithContext.
// Using this common implementation ensures that the stack depth
// is the same regardless through which path the handlers get invoked.
func handleError(ctx context.Context, err error, msg string, keysAndValues ...interface{}) {
for _, fn := range ErrorHandlers {
fn(err)
fn(ctx, err, msg, keysAndValues...)
}
}

// logError prints an error with the call stack of the location it was reported
func logError(err error) {
klog.ErrorDepth(2, err)
// logError prints an error with the call stack of the location it was reported.
// It expects to be called as <caller> -> HandleError[WithContext] -> handleError -> logError.
func logError(ctx context.Context, err error, msg string, keysAndValues ...interface{}) {
logger := klog.FromContext(ctx).WithCallDepth(3)
logger = klog.LoggerWithName(logger, "UnhandledError")
logger.Error(err, msg, keysAndValues...) //nolint:logcheck // logcheck complains about unknown key/value pairs.
}

type rudimentaryErrorBackoff struct {
Expand All @@ -125,7 +208,7 @@ type rudimentaryErrorBackoff struct {

// OnError will block if it is called more often than the embedded period time.
// This will prevent overly tight hot error loops.
func (r *rudimentaryErrorBackoff) OnError(error) {
func (r *rudimentaryErrorBackoff) OnError() {
now := time.Now() // start the timer before acquiring the lock
r.lastErrorTimeLock.Lock()
d := now.Sub(r.lastErrorTime)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package runtime

import (
"bytes"
"context"
"errors"
"flag"
"fmt"
"regexp"

"k8s.io/klog/v2"
)

//nolint:logcheck // Several functions are normally not okay in a package.
func ExampleHandleErrorWithContext() {
state := klog.CaptureState()
defer state.Restore()
var fs flag.FlagSet
klog.InitFlags(&fs)
for flag, value := range map[string]string{
"one_output": "true",
"logtostderr": "false",
} {
if err := fs.Set(flag, value); err != nil {
fmt.Printf("Unexpected error configuring klog: %v", err)
return
}
}
var buffer bytes.Buffer
klog.SetOutput(&buffer)

logger := klog.Background()
logger = klog.LoggerWithValues(logger, "request", 42)
ctx := klog.NewContext(context.Background(), logger)

// The line number of the next call must be at line 60. Here are some
// blank lines that can be removed to keep the line unchanged.
//
//
//
//
//
//
HandleErrorWithContext(ctx, errors.New("fake error"), "test")

klog.Flush()
// Strip varying header. Code location should be constant and something
// that needs to be tested.
output := buffer.String()
output = regexp.MustCompile(`^.* ([^[:space:]]*.go:[[:digit:]]*)\] `).ReplaceAllString(output, `xxx $1] `)
fmt.Print(output)

// Output:
// xxx runtime_stack_test.go:60] "test" err="fake error" logger="UnhandledError" request=42
}
14 changes: 8 additions & 6 deletions staging/src/k8s.io/apimachinery/pkg/util/runtime/runtime_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package runtime

import (
"bytes"
"context"
"fmt"
"io"
"net/http"
Expand All @@ -43,8 +44,8 @@ func TestCustomHandleCrash(t *testing.T) {
old := PanicHandlers
defer func() { PanicHandlers = old }()
var result interface{}
PanicHandlers = []func(interface{}){
func(r interface{}) {
PanicHandlers = []func(context.Context, interface{}){
func(_ context.Context, r interface{}) {
result = r
},
}
Expand All @@ -66,8 +67,8 @@ func TestCustomHandleError(t *testing.T) {
old := ErrorHandlers
defer func() { ErrorHandlers = old }()
var result error
ErrorHandlers = []func(error){
func(err error) {
ErrorHandlers = []ErrorHandler{
func(_ context.Context, err error, msg string, keysAndValues ...interface{}) {
result = err
},
}
Expand Down Expand Up @@ -101,7 +102,8 @@ func TestHandleCrashLog(t *testing.T) {
if len(lines) < 4 {
t.Fatalf("panic log should have 1 line of message, 1 line per goroutine and 2 lines per function call")
}
if match, _ := regexp.MatchString("Observed a panic: test panic", lines[0]); !match {
t.Logf("Got log output:\n%s", strings.Join(lines, "\n"))
if match, _ := regexp.MatchString(`"Observed a panic" panic="test panic"`, lines[0]); !match {
t.Errorf("mismatch panic message: %s", lines[0])
}
// The following regexp's verify that Kubernetes panic log matches Golang stdlib
Expand Down Expand Up @@ -170,7 +172,7 @@ func Test_rudimentaryErrorBackoff_OnError_ParallelSleep(t *testing.T) {
wg.Add(1)
go func() {
<-start
r.OnError(nil) // input error is ignored
r.OnError()
wg.Done()
}()
}
Expand Down
4 changes: 2 additions & 2 deletions staging/src/k8s.io/apimachinery/pkg/util/wait/wait_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,8 +194,8 @@ func TestJitterUntilRecoversPanic(t *testing.T) {

// Hook up a custom crash handler to ensure it is called when a jitter function panics
runtime.ReallyCrash = false
runtime.PanicHandlers = []func(interface{}){
func(p interface{}) {
runtime.PanicHandlers = []func(context.Context, interface{}){
func(_ context.Context, p interface{}) {
handled++
},
}
Expand Down
4 changes: 1 addition & 3 deletions staging/src/k8s.io/apiserver/pkg/server/filters/timeout.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,9 +137,7 @@ func (t *timeoutHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
}

metrics.RecordRequestPostTimeout(metrics.PostTimeoutSourceTimeoutHandler, status)
err := fmt.Errorf("post-timeout activity - time-elapsed: %s, %v %q result: %v",
time.Since(timedOutAt), r.Method, r.URL.Path, res)
utilruntime.HandleError(err)
utilruntime.HandleErrorWithContext(r.Context(), nil, "Post-timeout activity", "timeElapsed", time.Since(timedOutAt), "method", r.Method, "path", r.URL.Path, "result", res)
}()
}()
httplog.SetStacktracePredicate(r.Context(), func(status int) bool {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,9 @@ func TestErrConnKilled(t *testing.T) {
if isStackTraceLoggedByRuntime(capturedOutput) {
t.Errorf("unexpected stack trace in log, actual = %v", capturedOutput)
}
if !strings.Contains(capturedOutput, `timeout or abort while handling: method=GET URI="/" audit-ID=""`) {
// For the sake of simplicity and clarity this matches the full log line.
// This is not part of the Kubernetes API and could change.
if !strings.Contains(capturedOutput, `"Timeout or abort while handling" logger="UnhandledError" method="GET" URI="/" auditID=""`) {
t.Errorf("unexpected output captured actual = %v", capturedOutput)
}
}
Expand Down Expand Up @@ -501,7 +503,9 @@ func TestErrConnKilledHTTP2(t *testing.T) {
if isStackTraceLoggedByRuntime(capturedOutput) {
t.Errorf("unexpected stack trace in log, actual = %v", capturedOutput)
}
if !strings.Contains(capturedOutput, `timeout or abort while handling: method=GET URI="/" audit-ID=""`) {
// For the sake of simplicity and clarity this matches the full log line.
// This is not part of the Kubernetes API and could change.
if !strings.Contains(capturedOutput, `"Timeout or abort while handling" logger="UnhandledError" method="GET" URI="/" auditID=""`) {
t.Errorf("unexpected output captured actual = %v", capturedOutput)
}

Expand Down
Loading

0 comments on commit d35ba36

Please sign in to comment.