Refactor log and reaper exec to omit MAINPID

Using MAINPID breaks systemd's exit detection, as it stops watching the
original pid, but is unable to watch the new pid as it is not a child
of systemd itself. The best we can do is just notify when execing the child
process.

We also need to consolidate forking into a sigle place so that we don't
end up with multiple levels of child processes if both redirecting log
output and reaping child processes.

Signed-off-by: Brad Davidson <brad.davidson@rancher.com>
(cherry picked from commit dc18ef2e51)
This commit is contained in:
Brad Davidson 2021-10-08 12:47:20 -07:00 committed by Brad Davidson
parent 504e249a5e
commit a18c2efb4c
7 changed files with 126 additions and 105 deletions

View File

@ -23,12 +23,13 @@ func Run(ctx *cli.Context) error {
// database credentials or other secrets.
gspt.SetProcTitle(os.Args[0] + " agent")
// Do init stuff if pid 1.
// This must be done before InitLogging as that may reexec in order to capture log output
if err := cmds.HandleInit(); err != nil {
// Evacuate cgroup v2 before doing anything else that may fork.
if err := cmds.EvacuateCgroup2(); err != nil {
return err
}
// Initialize logging, and subprocess reaping if necessary.
// Log output redirection and subprocess reaping both require forking.
if err := cmds.InitLogging(); err != nil {
return err
}

View File

@ -2,6 +2,6 @@
package cmds
func HandleInit() error {
func EvacuateCgroup2() error {
return nil
}

View File

@ -4,83 +4,21 @@ package cmds
import (
"os"
"os/signal"
"syscall"
"github.com/containerd/containerd/sys"
"github.com/erikdubbelboer/gspt"
"github.com/pkg/errors"
"github.com/rancher/k3s/pkg/version"
"github.com/rootless-containers/rootlesskit/pkg/parent/cgrouputil"
)
// HandleInit takes care of things that need to be done when running as process 1, usually in a
// Docker container. This includes evacuating the root cgroup and reaping child pids.
func HandleInit() error {
if os.Getpid() != 1 {
return nil
}
if !sys.RunningInUserNS() {
// EvacuateCgroup2 will handle evacuating the root cgroup in order to enable subtree_control,
// if running as pid 1 without rootless support.
func EvacuateCgroup2() error {
if os.Getpid() == 1 && !sys.RunningInUserNS() {
// The root cgroup has to be empty to enable subtree_control, so evacuate it by placing
// ourselves in the init cgroup.
if err := cgrouputil.EvacuateCgroup2("init"); err != nil {
return errors.Wrap(err, "failed to evacuate root cgroup")
}
}
pwd, err := os.Getwd()
if err != nil {
return errors.Wrap(err, "failed to get working directory for init process")
}
go reapChildren()
// fork the main process to do work so that this init process can handle reaping pids
// without interfering with any other exec's that the rest of the codebase may do.
var wstatus syscall.WaitStatus
pattrs := &syscall.ProcAttr{
Dir: pwd,
Env: os.Environ(),
Sys: &syscall.SysProcAttr{Setsid: true},
Files: []uintptr{
uintptr(syscall.Stdin),
uintptr(syscall.Stdout),
uintptr(syscall.Stderr),
},
}
pid, err := syscall.ForkExec(os.Args[0], os.Args, pattrs)
if err != nil {
return errors.Wrap(err, "failed to fork/exec "+version.Program)
}
gspt.SetProcTitle(os.Args[0] + " init")
// wait for main process to exit, and return its status when it does
_, err = syscall.Wait4(pid, &wstatus, 0, nil)
for err == syscall.EINTR {
_, err = syscall.Wait4(pid, &wstatus, 0, nil)
}
os.Exit(wstatus.ExitStatus())
return nil
}
//reapChildren calls Wait4 whenever SIGCHLD is received
func reapChildren() {
sigs := make(chan os.Signal, 1)
signal.Notify(sigs, syscall.SIGCHLD)
for {
select {
case <-sigs:
}
for {
var wstatus syscall.WaitStatus
_, err := syscall.Wait4(-1, &wstatus, 0, nil)
for err == syscall.EINTR {
_, err = syscall.Wait4(-1, &wstatus, 0, nil)
}
if err == nil || err == syscall.ECHILD {
break
}
}
}
}

View File

@ -3,15 +3,10 @@ package cmds
import (
"flag"
"fmt"
"io"
"os"
"strconv"
"sync"
"time"
"github.com/docker/docker/pkg/reexec"
"github.com/natefinch/lumberjack"
"github.com/rancher/k3s/pkg/version"
"github.com/urfave/cli"
)
@ -52,8 +47,8 @@ var (
func InitLogging() error {
var rErr error
logSetupOnce.Do(func() {
if LogConfig.LogFile != "" && os.Getenv("_K3S_LOG_REEXEC_") == "" {
rErr = runWithLogging()
if err := forkIfLoggingOrReaping(); err != nil {
rErr = err
return
}
@ -76,31 +71,6 @@ func checkUnixTimestamp() error {
return nil
}
func runWithLogging() error {
var (
l io.Writer
)
l = &lumberjack.Logger{
Filename: LogConfig.LogFile,
MaxSize: 50,
MaxBackups: 3,
MaxAge: 28,
Compress: true,
}
if LogConfig.AlsoLogToStderr {
l = io.MultiWriter(l, os.Stderr)
}
args := append([]string{version.Program}, os.Args[1:]...)
cmd := reexec.Command(args...)
cmd.Env = os.Environ()
cmd.Env = append(cmd.Env, "_K3S_LOG_REEXEC_=true")
cmd.Stderr = l
cmd.Stdout = l
cmd.Stdin = os.Stdin
return cmd.Run()
}
func setupLogging() {
flag.Set("v", strconv.Itoa(LogConfig.VLevel))
flag.Set("vmodule", LogConfig.VModule)

View File

@ -0,0 +1,7 @@
// +build !linux !cgo
package cmds
func forkIfLoggingOrReaping() error {
return nil
}

104
pkg/cli/cmds/log_linux.go Normal file
View File

@ -0,0 +1,104 @@
// +build linux,cgo
package cmds
import (
"io"
"os"
"os/exec"
"os/signal"
"syscall"
systemd "github.com/coreos/go-systemd/daemon"
"github.com/erikdubbelboer/gspt"
"github.com/natefinch/lumberjack"
"github.com/pkg/errors"
"github.com/rancher/k3s/pkg/version"
"golang.org/x/sys/unix"
)
// forkIfLoggingOrReaping handles forking off the actual k3s process if it is necessary to
// capture log output, or reap child processes. Reaping is only necessary when running
// as pid 1.
func forkIfLoggingOrReaping() error {
var stdout, stderr io.Writer = os.Stdout, os.Stderr
enableLogRedirect := LogConfig.LogFile != "" && os.Getenv("_K3S_LOG_REEXEC_") == ""
enableReaping := os.Getpid() == 1
if enableLogRedirect {
var l io.Writer = &lumberjack.Logger{
Filename: LogConfig.LogFile,
MaxSize: 50,
MaxBackups: 3,
MaxAge: 28,
Compress: true,
}
if LogConfig.AlsoLogToStderr {
l = io.MultiWriter(l, os.Stderr)
}
stdout = l
stderr = l
}
if enableLogRedirect || enableReaping {
gspt.SetProcTitle(os.Args[0] + " init")
pwd, err := os.Getwd()
if err != nil {
return errors.Wrap(err, "failed to get working directory")
}
if enableReaping {
// If we're running as pid 1 we need to reap child processes or defunct containerd-shim
// child processes will accumulate.
unix.Prctl(unix.PR_SET_CHILD_SUBREAPER, uintptr(1), 0, 0, 0)
go reapChildren()
}
args := append([]string{version.Program}, os.Args[1:]...)
env := append(os.Environ(), "_K3S_LOG_REEXEC_=true", "NOTIFY_SOCKET=")
cmd := &exec.Cmd{
Path: os.Args[0],
Dir: pwd,
Args: args,
Env: env,
Stdin: os.Stdin,
Stdout: stdout,
Stderr: stderr,
SysProcAttr: &syscall.SysProcAttr{
Setsid: true,
},
}
if err := cmd.Start(); err != nil {
return err
}
// The child process won't be allowed to notify, so we send one for it as soon as it's started,
// and then wait for it to exit and pass along the exit code.
systemd.SdNotify(true, "READY=1\n")
cmd.Wait()
os.Exit(cmd.ProcessState.ExitCode())
}
return nil
}
//reapChildren calls Wait4 whenever SIGCHLD is received
func reapChildren() {
sigs := make(chan os.Signal, 1)
signal.Notify(sigs, syscall.SIGCHLD)
for {
select {
case <-sigs:
}
for {
var wstatus syscall.WaitStatus
_, err := syscall.Wait4(-1, &wstatus, 0, nil)
for err == syscall.EINTR {
_, err = syscall.Wait4(-1, &wstatus, 0, nil)
}
if err == nil || err == syscall.ECHILD {
break
}
}
}
}

View File

@ -55,12 +55,13 @@ func run(app *cli.Context, cfg *cmds.Server, leaderControllers server.CustomCont
// database credentials or other secrets.
gspt.SetProcTitle(os.Args[0] + " server")
// Do init stuff if pid 1.
// This must be done before InitLogging as that may reexec in order to capture log output
if err := cmds.HandleInit(); err != nil {
// Evacuate cgroup v2 before doing anything else that may fork.
if err := cmds.EvacuateCgroup2(); err != nil {
return err
}
// Initialize logging, and subprocess reaping if necessary.
// Log output redirection and subprocess reaping both require forking.
if err := cmds.InitLogging(); err != nil {
return err
}