2019-01-12 04:58:27 +00:00
|
|
|
/*
|
|
|
|
Copyright 2014 The Kubernetes Authors.
|
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
package kubelet
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"os"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"k8s.io/api/core/v1"
|
2020-08-10 17:43:49 +00:00
|
|
|
"k8s.io/klog/v2"
|
2019-01-12 04:58:27 +00:00
|
|
|
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
|
|
|
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
|
|
|
|
"k8s.io/kubernetes/pkg/kubelet/util/format"
|
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
|
|
|
runOnceManifestDelay = 1 * time.Second
|
|
|
|
runOnceMaxRetries = 10
|
|
|
|
runOnceRetryDelay = 1 * time.Second
|
|
|
|
runOnceRetryDelayBackoff = 2
|
|
|
|
)
|
|
|
|
|
|
|
|
// RunPodResult defines the running results of a Pod.
|
|
|
|
type RunPodResult struct {
|
|
|
|
Pod *v1.Pod
|
|
|
|
Err error
|
|
|
|
}
|
|
|
|
|
|
|
|
// RunOnce polls from one configuration update and run the associated pods.
|
|
|
|
func (kl *Kubelet) RunOnce(updates <-chan kubetypes.PodUpdate) ([]RunPodResult, error) {
|
|
|
|
// Setup filesystem directories.
|
|
|
|
if err := kl.setupDataDirs(); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// If the container logs directory does not exist, create it.
|
|
|
|
if _, err := os.Stat(ContainerLogsDir); err != nil {
|
|
|
|
if err := kl.os.MkdirAll(ContainerLogsDir, 0755); err != nil {
|
2021-03-18 22:40:29 +00:00
|
|
|
klog.ErrorS(err, "Failed to create directory", "path", ContainerLogsDir)
|
2019-01-12 04:58:27 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
select {
|
|
|
|
case u := <-updates:
|
2021-03-18 22:40:29 +00:00
|
|
|
klog.InfoS("Processing manifest with pods", "numPods", len(u.Pods))
|
2019-01-12 04:58:27 +00:00
|
|
|
result, err := kl.runOnce(u.Pods, runOnceRetryDelay)
|
2021-03-18 22:40:29 +00:00
|
|
|
klog.InfoS("Finished processing pods", "numPods", len(u.Pods))
|
2019-01-12 04:58:27 +00:00
|
|
|
return result, err
|
|
|
|
case <-time.After(runOnceManifestDelay):
|
|
|
|
return nil, fmt.Errorf("no pod manifest update after %v", runOnceManifestDelay)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// runOnce runs a given set of pods and returns their status.
|
|
|
|
func (kl *Kubelet) runOnce(pods []*v1.Pod, retryDelay time.Duration) (results []RunPodResult, err error) {
|
|
|
|
ch := make(chan RunPodResult)
|
|
|
|
admitted := []*v1.Pod{}
|
|
|
|
for _, pod := range pods {
|
|
|
|
// Check if we can admit the pod.
|
|
|
|
if ok, reason, message := kl.canAdmitPod(admitted, pod); !ok {
|
|
|
|
kl.rejectPod(pod, reason, message)
|
|
|
|
results = append(results, RunPodResult{pod, nil})
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
admitted = append(admitted, pod)
|
|
|
|
go func(pod *v1.Pod) {
|
|
|
|
err := kl.runPod(pod, retryDelay)
|
|
|
|
ch <- RunPodResult{pod, err}
|
|
|
|
}(pod)
|
|
|
|
}
|
|
|
|
|
2021-03-18 22:40:29 +00:00
|
|
|
klog.InfoS("Waiting for pods", "numPods", len(admitted))
|
2019-01-12 04:58:27 +00:00
|
|
|
failedPods := []string{}
|
|
|
|
for i := 0; i < len(admitted); i++ {
|
|
|
|
res := <-ch
|
|
|
|
results = append(results, res)
|
|
|
|
if res.Err != nil {
|
2021-03-18 22:40:29 +00:00
|
|
|
failedContainerName, err := kl.getFailedContainers(res.Pod)
|
2019-01-12 04:58:27 +00:00
|
|
|
if err != nil {
|
2021-03-18 22:40:29 +00:00
|
|
|
klog.InfoS("Unable to get failed containers' names for pod", "pod", klog.KObj(res.Pod), "err", err)
|
2019-01-12 04:58:27 +00:00
|
|
|
} else {
|
2021-03-18 22:40:29 +00:00
|
|
|
klog.InfoS("Unable to start pod because container failed", "pod", klog.KObj(res.Pod), "containerName", failedContainerName)
|
2019-01-12 04:58:27 +00:00
|
|
|
}
|
|
|
|
failedPods = append(failedPods, format.Pod(res.Pod))
|
|
|
|
} else {
|
2021-03-18 22:40:29 +00:00
|
|
|
klog.InfoS("Started pod", "pod", klog.KObj(res.Pod))
|
2019-01-12 04:58:27 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if len(failedPods) > 0 {
|
|
|
|
return results, fmt.Errorf("error running pods: %v", failedPods)
|
|
|
|
}
|
2021-03-18 22:40:29 +00:00
|
|
|
klog.InfoS("Pods started", "numPods", len(pods))
|
2019-01-12 04:58:27 +00:00
|
|
|
return results, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// runPod runs a single pod and wait until all containers are running.
|
|
|
|
func (kl *Kubelet) runPod(pod *v1.Pod, retryDelay time.Duration) error {
|
|
|
|
delay := retryDelay
|
|
|
|
retry := 0
|
|
|
|
for {
|
|
|
|
status, err := kl.containerRuntime.GetPodStatus(pod.UID, pod.Name, pod.Namespace)
|
|
|
|
if err != nil {
|
2019-09-27 21:51:53 +00:00
|
|
|
return fmt.Errorf("unable to get status for pod %q: %v", format.Pod(pod), err)
|
2019-01-12 04:58:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if kl.isPodRunning(pod, status) {
|
2021-03-18 22:40:29 +00:00
|
|
|
klog.InfoS("Pod's containers running", "pod", klog.KObj(pod))
|
2019-01-12 04:58:27 +00:00
|
|
|
return nil
|
|
|
|
}
|
2021-03-18 22:40:29 +00:00
|
|
|
klog.InfoS("Pod's containers not running: syncing", "pod", klog.KObj(pod))
|
2019-01-12 04:58:27 +00:00
|
|
|
|
2021-03-18 22:40:29 +00:00
|
|
|
klog.InfoS("Creating a mirror pod for static pod", "pod", klog.KObj(pod))
|
2019-01-12 04:58:27 +00:00
|
|
|
if err := kl.podManager.CreateMirrorPod(pod); err != nil {
|
2021-03-18 22:40:29 +00:00
|
|
|
klog.ErrorS(err, "Failed creating a mirror pod", "pod", klog.KObj(pod))
|
2019-01-12 04:58:27 +00:00
|
|
|
}
|
|
|
|
mirrorPod, _ := kl.podManager.GetMirrorPodByPod(pod)
|
|
|
|
if err = kl.syncPod(syncPodOptions{
|
|
|
|
pod: pod,
|
|
|
|
mirrorPod: mirrorPod,
|
|
|
|
podStatus: status,
|
|
|
|
updateType: kubetypes.SyncPodUpdate,
|
|
|
|
}); err != nil {
|
|
|
|
return fmt.Errorf("error syncing pod %q: %v", format.Pod(pod), err)
|
|
|
|
}
|
|
|
|
if retry >= runOnceMaxRetries {
|
|
|
|
return fmt.Errorf("timeout error: pod %q containers not running after %d retries", format.Pod(pod), runOnceMaxRetries)
|
|
|
|
}
|
|
|
|
// TODO(proppy): health checking would be better than waiting + checking the state at the next iteration.
|
2021-03-18 22:40:29 +00:00
|
|
|
klog.InfoS("Pod's containers synced, waiting", "pod", klog.KObj(pod), "duration", delay)
|
2019-01-12 04:58:27 +00:00
|
|
|
time.Sleep(delay)
|
|
|
|
retry++
|
|
|
|
delay *= runOnceRetryDelayBackoff
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// isPodRunning returns true if all containers of a manifest are running.
|
|
|
|
func (kl *Kubelet) isPodRunning(pod *v1.Pod, status *kubecontainer.PodStatus) bool {
|
|
|
|
for _, c := range pod.Spec.Containers {
|
|
|
|
cs := status.FindContainerStatusByName(c.Name)
|
|
|
|
if cs == nil || cs.State != kubecontainer.ContainerStateRunning {
|
2021-03-18 22:40:29 +00:00
|
|
|
klog.InfoS("Container not running", "pod", klog.KObj(pod), "containerName", c.Name)
|
2019-01-12 04:58:27 +00:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
// getFailedContainer returns failed container name for pod.
|
|
|
|
func (kl *Kubelet) getFailedContainers(pod *v1.Pod) ([]string, error) {
|
|
|
|
status, err := kl.containerRuntime.GetPodStatus(pod.UID, pod.Name, pod.Namespace)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("unable to get status for pod %q: %v", format.Pod(pod), err)
|
|
|
|
}
|
|
|
|
var containerNames []string
|
|
|
|
for _, cs := range status.ContainerStatuses {
|
|
|
|
if cs.State != kubecontainer.ContainerStateRunning && cs.ExitCode != 0 {
|
|
|
|
containerNames = append(containerNames, cs.Name)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return containerNames, nil
|
|
|
|
}
|