diff --git a/pkg/cli/agent/agent.go b/pkg/cli/agent/agent.go index 1768e10b83..608209ffd8 100644 --- a/pkg/cli/agent/agent.go +++ b/pkg/cli/agent/agent.go @@ -23,12 +23,13 @@ func Run(ctx *cli.Context) error { // database credentials or other secrets. gspt.SetProcTitle(os.Args[0] + " agent") - // Do init stuff if pid 1. - // This must be done before InitLogging as that may reexec in order to capture log output - if err := cmds.HandleInit(); err != nil { + // Evacuate cgroup v2 before doing anything else that may fork. + if err := cmds.EvacuateCgroup2(); err != nil { return err } + // Initialize logging, and subprocess reaping if necessary. + // Log output redirection and subprocess reaping both require forking. if err := cmds.InitLogging(); err != nil { return err } diff --git a/pkg/cli/cmds/init_default.go b/pkg/cli/cmds/init_default.go index d86906ac6c..12da597a5f 100644 --- a/pkg/cli/cmds/init_default.go +++ b/pkg/cli/cmds/init_default.go @@ -2,6 +2,6 @@ package cmds -func HandleInit() error { +func EvacuateCgroup2() error { return nil } diff --git a/pkg/cli/cmds/init_linux.go b/pkg/cli/cmds/init_linux.go index 6d82fd4fe4..42ae052f86 100644 --- a/pkg/cli/cmds/init_linux.go +++ b/pkg/cli/cmds/init_linux.go @@ -4,83 +4,21 @@ package cmds import ( "os" - "os/signal" - "syscall" "github.com/containerd/containerd/sys" - "github.com/erikdubbelboer/gspt" "github.com/pkg/errors" - "github.com/rancher/k3s/pkg/version" "github.com/rootless-containers/rootlesskit/pkg/parent/cgrouputil" ) -// HandleInit takes care of things that need to be done when running as process 1, usually in a -// Docker container. This includes evacuating the root cgroup and reaping child pids. -func HandleInit() error { - if os.Getpid() != 1 { - return nil - } - - if !sys.RunningInUserNS() { +// EvacuateCgroup2 will handle evacuating the root cgroup in order to enable subtree_control, +// if running as pid 1 without rootless support. +func EvacuateCgroup2() error { + if os.Getpid() == 1 && !sys.RunningInUserNS() { // The root cgroup has to be empty to enable subtree_control, so evacuate it by placing // ourselves in the init cgroup. if err := cgrouputil.EvacuateCgroup2("init"); err != nil { return errors.Wrap(err, "failed to evacuate root cgroup") } } - - pwd, err := os.Getwd() - if err != nil { - return errors.Wrap(err, "failed to get working directory for init process") - } - - go reapChildren() - - // fork the main process to do work so that this init process can handle reaping pids - // without interfering with any other exec's that the rest of the codebase may do. - var wstatus syscall.WaitStatus - pattrs := &syscall.ProcAttr{ - Dir: pwd, - Env: os.Environ(), - Sys: &syscall.SysProcAttr{Setsid: true}, - Files: []uintptr{ - uintptr(syscall.Stdin), - uintptr(syscall.Stdout), - uintptr(syscall.Stderr), - }, - } - pid, err := syscall.ForkExec(os.Args[0], os.Args, pattrs) - if err != nil { - return errors.Wrap(err, "failed to fork/exec "+version.Program) - } - - gspt.SetProcTitle(os.Args[0] + " init") - // wait for main process to exit, and return its status when it does - _, err = syscall.Wait4(pid, &wstatus, 0, nil) - for err == syscall.EINTR { - _, err = syscall.Wait4(pid, &wstatus, 0, nil) - } - os.Exit(wstatus.ExitStatus()) return nil } - -//reapChildren calls Wait4 whenever SIGCHLD is received -func reapChildren() { - sigs := make(chan os.Signal, 1) - signal.Notify(sigs, syscall.SIGCHLD) - for { - select { - case <-sigs: - } - for { - var wstatus syscall.WaitStatus - _, err := syscall.Wait4(-1, &wstatus, 0, nil) - for err == syscall.EINTR { - _, err = syscall.Wait4(-1, &wstatus, 0, nil) - } - if err == nil || err == syscall.ECHILD { - break - } - } - } -} diff --git a/pkg/cli/cmds/log.go b/pkg/cli/cmds/log.go index 44f6bd8d01..3f896fcc11 100644 --- a/pkg/cli/cmds/log.go +++ b/pkg/cli/cmds/log.go @@ -3,15 +3,10 @@ package cmds import ( "flag" "fmt" - "io" - "os" "strconv" "sync" "time" - "github.com/docker/docker/pkg/reexec" - "github.com/natefinch/lumberjack" - "github.com/rancher/k3s/pkg/version" "github.com/urfave/cli" ) @@ -52,8 +47,8 @@ var ( func InitLogging() error { var rErr error logSetupOnce.Do(func() { - if LogConfig.LogFile != "" && os.Getenv("_K3S_LOG_REEXEC_") == "" { - rErr = runWithLogging() + if err := forkIfLoggingOrReaping(); err != nil { + rErr = err return } @@ -76,31 +71,6 @@ func checkUnixTimestamp() error { return nil } -func runWithLogging() error { - var ( - l io.Writer - ) - l = &lumberjack.Logger{ - Filename: LogConfig.LogFile, - MaxSize: 50, - MaxBackups: 3, - MaxAge: 28, - Compress: true, - } - if LogConfig.AlsoLogToStderr { - l = io.MultiWriter(l, os.Stderr) - } - - args := append([]string{version.Program}, os.Args[1:]...) - cmd := reexec.Command(args...) - cmd.Env = os.Environ() - cmd.Env = append(cmd.Env, "_K3S_LOG_REEXEC_=true") - cmd.Stderr = l - cmd.Stdout = l - cmd.Stdin = os.Stdin - return cmd.Run() -} - func setupLogging() { flag.Set("v", strconv.Itoa(LogConfig.VLevel)) flag.Set("vmodule", LogConfig.VModule) diff --git a/pkg/cli/cmds/log_default.go b/pkg/cli/cmds/log_default.go new file mode 100644 index 0000000000..b056c2cfed --- /dev/null +++ b/pkg/cli/cmds/log_default.go @@ -0,0 +1,7 @@ +// +build !linux !cgo + +package cmds + +func forkIfLoggingOrReaping() error { + return nil +} diff --git a/pkg/cli/cmds/log_linux.go b/pkg/cli/cmds/log_linux.go new file mode 100644 index 0000000000..4a95cb012d --- /dev/null +++ b/pkg/cli/cmds/log_linux.go @@ -0,0 +1,104 @@ +// +build linux,cgo + +package cmds + +import ( + "io" + "os" + "os/exec" + "os/signal" + "syscall" + + systemd "github.com/coreos/go-systemd/daemon" + "github.com/erikdubbelboer/gspt" + "github.com/natefinch/lumberjack" + "github.com/pkg/errors" + "github.com/rancher/k3s/pkg/version" + "golang.org/x/sys/unix" +) + +// forkIfLoggingOrReaping handles forking off the actual k3s process if it is necessary to +// capture log output, or reap child processes. Reaping is only necessary when running +// as pid 1. +func forkIfLoggingOrReaping() error { + var stdout, stderr io.Writer = os.Stdout, os.Stderr + enableLogRedirect := LogConfig.LogFile != "" && os.Getenv("_K3S_LOG_REEXEC_") == "" + enableReaping := os.Getpid() == 1 + + if enableLogRedirect { + var l io.Writer = &lumberjack.Logger{ + Filename: LogConfig.LogFile, + MaxSize: 50, + MaxBackups: 3, + MaxAge: 28, + Compress: true, + } + if LogConfig.AlsoLogToStderr { + l = io.MultiWriter(l, os.Stderr) + } + stdout = l + stderr = l + } + + if enableLogRedirect || enableReaping { + gspt.SetProcTitle(os.Args[0] + " init") + + pwd, err := os.Getwd() + if err != nil { + return errors.Wrap(err, "failed to get working directory") + } + + if enableReaping { + // If we're running as pid 1 we need to reap child processes or defunct containerd-shim + // child processes will accumulate. + unix.Prctl(unix.PR_SET_CHILD_SUBREAPER, uintptr(1), 0, 0, 0) + go reapChildren() + } + + args := append([]string{version.Program}, os.Args[1:]...) + env := append(os.Environ(), "_K3S_LOG_REEXEC_=true", "NOTIFY_SOCKET=") + cmd := &exec.Cmd{ + Path: os.Args[0], + Dir: pwd, + Args: args, + Env: env, + Stdin: os.Stdin, + Stdout: stdout, + Stderr: stderr, + SysProcAttr: &syscall.SysProcAttr{ + Setsid: true, + }, + } + if err := cmd.Start(); err != nil { + return err + } + + // The child process won't be allowed to notify, so we send one for it as soon as it's started, + // and then wait for it to exit and pass along the exit code. + systemd.SdNotify(true, "READY=1\n") + cmd.Wait() + os.Exit(cmd.ProcessState.ExitCode()) + } + return nil +} + +//reapChildren calls Wait4 whenever SIGCHLD is received +func reapChildren() { + sigs := make(chan os.Signal, 1) + signal.Notify(sigs, syscall.SIGCHLD) + for { + select { + case <-sigs: + } + for { + var wstatus syscall.WaitStatus + _, err := syscall.Wait4(-1, &wstatus, 0, nil) + for err == syscall.EINTR { + _, err = syscall.Wait4(-1, &wstatus, 0, nil) + } + if err == nil || err == syscall.ECHILD { + break + } + } + } +} diff --git a/pkg/cli/server/server.go b/pkg/cli/server/server.go index e344fd80d7..a520de3623 100644 --- a/pkg/cli/server/server.go +++ b/pkg/cli/server/server.go @@ -55,12 +55,13 @@ func run(app *cli.Context, cfg *cmds.Server, leaderControllers server.CustomCont // database credentials or other secrets. gspt.SetProcTitle(os.Args[0] + " server") - // Do init stuff if pid 1. - // This must be done before InitLogging as that may reexec in order to capture log output - if err := cmds.HandleInit(); err != nil { + // Evacuate cgroup v2 before doing anything else that may fork. + if err := cmds.EvacuateCgroup2(); err != nil { return err } + // Initialize logging, and subprocess reaping if necessary. + // Log output redirection and subprocess reaping both require forking. if err := cmds.InitLogging(); err != nil { return err }