Refactor log and reaper exec to omit MAINPID

Using MAINPID breaks systemd's exit detection, as it stops watching the
original pid, but is unable to watch the new pid as it is not a child
of systemd itself. The best we can do is just notify when execing the child
process.

We also need to consolidate forking into a sigle place so that we don't
end up with multiple levels of child processes if both redirecting log
output and reaping child processes.

Signed-off-by: Brad Davidson <brad.davidson@rancher.com>
(cherry picked from commit dc18ef2e51)
This commit is contained in:
Brad Davidson 2021-10-08 12:47:20 -07:00 committed by Brad Davidson
parent 504e249a5e
commit a18c2efb4c
7 changed files with 126 additions and 105 deletions

View File

@ -23,12 +23,13 @@ func Run(ctx *cli.Context) error {
// database credentials or other secrets. // database credentials or other secrets.
gspt.SetProcTitle(os.Args[0] + " agent") gspt.SetProcTitle(os.Args[0] + " agent")
// Do init stuff if pid 1. // Evacuate cgroup v2 before doing anything else that may fork.
// This must be done before InitLogging as that may reexec in order to capture log output if err := cmds.EvacuateCgroup2(); err != nil {
if err := cmds.HandleInit(); err != nil {
return err return err
} }
// Initialize logging, and subprocess reaping if necessary.
// Log output redirection and subprocess reaping both require forking.
if err := cmds.InitLogging(); err != nil { if err := cmds.InitLogging(); err != nil {
return err return err
} }

View File

@ -2,6 +2,6 @@
package cmds package cmds
func HandleInit() error { func EvacuateCgroup2() error {
return nil return nil
} }

View File

@ -4,83 +4,21 @@ package cmds
import ( import (
"os" "os"
"os/signal"
"syscall"
"github.com/containerd/containerd/sys" "github.com/containerd/containerd/sys"
"github.com/erikdubbelboer/gspt"
"github.com/pkg/errors" "github.com/pkg/errors"
"github.com/rancher/k3s/pkg/version"
"github.com/rootless-containers/rootlesskit/pkg/parent/cgrouputil" "github.com/rootless-containers/rootlesskit/pkg/parent/cgrouputil"
) )
// HandleInit takes care of things that need to be done when running as process 1, usually in a // EvacuateCgroup2 will handle evacuating the root cgroup in order to enable subtree_control,
// Docker container. This includes evacuating the root cgroup and reaping child pids. // if running as pid 1 without rootless support.
func HandleInit() error { func EvacuateCgroup2() error {
if os.Getpid() != 1 { if os.Getpid() == 1 && !sys.RunningInUserNS() {
return nil
}
if !sys.RunningInUserNS() {
// The root cgroup has to be empty to enable subtree_control, so evacuate it by placing // The root cgroup has to be empty to enable subtree_control, so evacuate it by placing
// ourselves in the init cgroup. // ourselves in the init cgroup.
if err := cgrouputil.EvacuateCgroup2("init"); err != nil { if err := cgrouputil.EvacuateCgroup2("init"); err != nil {
return errors.Wrap(err, "failed to evacuate root cgroup") return errors.Wrap(err, "failed to evacuate root cgroup")
} }
} }
pwd, err := os.Getwd()
if err != nil {
return errors.Wrap(err, "failed to get working directory for init process")
}
go reapChildren()
// fork the main process to do work so that this init process can handle reaping pids
// without interfering with any other exec's that the rest of the codebase may do.
var wstatus syscall.WaitStatus
pattrs := &syscall.ProcAttr{
Dir: pwd,
Env: os.Environ(),
Sys: &syscall.SysProcAttr{Setsid: true},
Files: []uintptr{
uintptr(syscall.Stdin),
uintptr(syscall.Stdout),
uintptr(syscall.Stderr),
},
}
pid, err := syscall.ForkExec(os.Args[0], os.Args, pattrs)
if err != nil {
return errors.Wrap(err, "failed to fork/exec "+version.Program)
}
gspt.SetProcTitle(os.Args[0] + " init")
// wait for main process to exit, and return its status when it does
_, err = syscall.Wait4(pid, &wstatus, 0, nil)
for err == syscall.EINTR {
_, err = syscall.Wait4(pid, &wstatus, 0, nil)
}
os.Exit(wstatus.ExitStatus())
return nil return nil
} }
//reapChildren calls Wait4 whenever SIGCHLD is received
func reapChildren() {
sigs := make(chan os.Signal, 1)
signal.Notify(sigs, syscall.SIGCHLD)
for {
select {
case <-sigs:
}
for {
var wstatus syscall.WaitStatus
_, err := syscall.Wait4(-1, &wstatus, 0, nil)
for err == syscall.EINTR {
_, err = syscall.Wait4(-1, &wstatus, 0, nil)
}
if err == nil || err == syscall.ECHILD {
break
}
}
}
}

View File

@ -3,15 +3,10 @@ package cmds
import ( import (
"flag" "flag"
"fmt" "fmt"
"io"
"os"
"strconv" "strconv"
"sync" "sync"
"time" "time"
"github.com/docker/docker/pkg/reexec"
"github.com/natefinch/lumberjack"
"github.com/rancher/k3s/pkg/version"
"github.com/urfave/cli" "github.com/urfave/cli"
) )
@ -52,8 +47,8 @@ var (
func InitLogging() error { func InitLogging() error {
var rErr error var rErr error
logSetupOnce.Do(func() { logSetupOnce.Do(func() {
if LogConfig.LogFile != "" && os.Getenv("_K3S_LOG_REEXEC_") == "" { if err := forkIfLoggingOrReaping(); err != nil {
rErr = runWithLogging() rErr = err
return return
} }
@ -76,31 +71,6 @@ func checkUnixTimestamp() error {
return nil return nil
} }
func runWithLogging() error {
var (
l io.Writer
)
l = &lumberjack.Logger{
Filename: LogConfig.LogFile,
MaxSize: 50,
MaxBackups: 3,
MaxAge: 28,
Compress: true,
}
if LogConfig.AlsoLogToStderr {
l = io.MultiWriter(l, os.Stderr)
}
args := append([]string{version.Program}, os.Args[1:]...)
cmd := reexec.Command(args...)
cmd.Env = os.Environ()
cmd.Env = append(cmd.Env, "_K3S_LOG_REEXEC_=true")
cmd.Stderr = l
cmd.Stdout = l
cmd.Stdin = os.Stdin
return cmd.Run()
}
func setupLogging() { func setupLogging() {
flag.Set("v", strconv.Itoa(LogConfig.VLevel)) flag.Set("v", strconv.Itoa(LogConfig.VLevel))
flag.Set("vmodule", LogConfig.VModule) flag.Set("vmodule", LogConfig.VModule)

View File

@ -0,0 +1,7 @@
// +build !linux !cgo
package cmds
func forkIfLoggingOrReaping() error {
return nil
}

104
pkg/cli/cmds/log_linux.go Normal file
View File

@ -0,0 +1,104 @@
// +build linux,cgo
package cmds
import (
"io"
"os"
"os/exec"
"os/signal"
"syscall"
systemd "github.com/coreos/go-systemd/daemon"
"github.com/erikdubbelboer/gspt"
"github.com/natefinch/lumberjack"
"github.com/pkg/errors"
"github.com/rancher/k3s/pkg/version"
"golang.org/x/sys/unix"
)
// forkIfLoggingOrReaping handles forking off the actual k3s process if it is necessary to
// capture log output, or reap child processes. Reaping is only necessary when running
// as pid 1.
func forkIfLoggingOrReaping() error {
var stdout, stderr io.Writer = os.Stdout, os.Stderr
enableLogRedirect := LogConfig.LogFile != "" && os.Getenv("_K3S_LOG_REEXEC_") == ""
enableReaping := os.Getpid() == 1
if enableLogRedirect {
var l io.Writer = &lumberjack.Logger{
Filename: LogConfig.LogFile,
MaxSize: 50,
MaxBackups: 3,
MaxAge: 28,
Compress: true,
}
if LogConfig.AlsoLogToStderr {
l = io.MultiWriter(l, os.Stderr)
}
stdout = l
stderr = l
}
if enableLogRedirect || enableReaping {
gspt.SetProcTitle(os.Args[0] + " init")
pwd, err := os.Getwd()
if err != nil {
return errors.Wrap(err, "failed to get working directory")
}
if enableReaping {
// If we're running as pid 1 we need to reap child processes or defunct containerd-shim
// child processes will accumulate.
unix.Prctl(unix.PR_SET_CHILD_SUBREAPER, uintptr(1), 0, 0, 0)
go reapChildren()
}
args := append([]string{version.Program}, os.Args[1:]...)
env := append(os.Environ(), "_K3S_LOG_REEXEC_=true", "NOTIFY_SOCKET=")
cmd := &exec.Cmd{
Path: os.Args[0],
Dir: pwd,
Args: args,
Env: env,
Stdin: os.Stdin,
Stdout: stdout,
Stderr: stderr,
SysProcAttr: &syscall.SysProcAttr{
Setsid: true,
},
}
if err := cmd.Start(); err != nil {
return err
}
// The child process won't be allowed to notify, so we send one for it as soon as it's started,
// and then wait for it to exit and pass along the exit code.
systemd.SdNotify(true, "READY=1\n")
cmd.Wait()
os.Exit(cmd.ProcessState.ExitCode())
}
return nil
}
//reapChildren calls Wait4 whenever SIGCHLD is received
func reapChildren() {
sigs := make(chan os.Signal, 1)
signal.Notify(sigs, syscall.SIGCHLD)
for {
select {
case <-sigs:
}
for {
var wstatus syscall.WaitStatus
_, err := syscall.Wait4(-1, &wstatus, 0, nil)
for err == syscall.EINTR {
_, err = syscall.Wait4(-1, &wstatus, 0, nil)
}
if err == nil || err == syscall.ECHILD {
break
}
}
}
}

View File

@ -55,12 +55,13 @@ func run(app *cli.Context, cfg *cmds.Server, leaderControllers server.CustomCont
// database credentials or other secrets. // database credentials or other secrets.
gspt.SetProcTitle(os.Args[0] + " server") gspt.SetProcTitle(os.Args[0] + " server")
// Do init stuff if pid 1. // Evacuate cgroup v2 before doing anything else that may fork.
// This must be done before InitLogging as that may reexec in order to capture log output if err := cmds.EvacuateCgroup2(); err != nil {
if err := cmds.HandleInit(); err != nil {
return err return err
} }
// Initialize logging, and subprocess reaping if necessary.
// Log output redirection and subprocess reaping both require forking.
if err := cmds.InitLogging(); err != nil { if err := cmds.InitLogging(); err != nil {
return err return err
} }