k3s/scripts/test-helpers
James Blair 86be784aa0 Replace deprecated k8s registry references.
Problem:
Previously all of Kubernetes' image hosting has been out of gcr.io. There were significant egress costs associated with this when images were pulled from entities outside gcp.  Refer to https://github.com/kubernetes/k8s.io/wiki/New-Registry-url-for-Kubernetes-(registry.k8s.io)

Solution:
As highlighted at KubeCon NA 2022 k8s infra SIG update, the replacement for k8s.gcr.io which is registry.k8s.io is now ready for mainstream use and the old k8s.gcr.io has been formally deprecated. This commit migrates all references for k3s to registry.k8s.io.

Signed-off-by: James Blair <mail@jamesblair.net>
2022-11-01 16:06:50 -07:00

649 lines
17 KiB
Bash
Executable File

#!/bin/bash
# ---
port-used() {
netstat -tuna | grep -q ":$1 "
}
export -f port-used
# ---
get-port() {
local port=0
while
port=$((10000 + RANDOM % 50000))
port-used $port
do continue; done
echo $port
}
export -f get-port
# ---
fetch-kubeconfig() {(
set -e -o pipefail
local num=${1:-1}
local name=$(cat $TEST_DIR/servers/$num/metadata/name)
local port=$(cat $TEST_DIR/servers/$num/metadata/port)
docker cp $name:/etc/rancher/k3s/k3s.yaml - 2>/dev/null | tar -xO 2>/dev/null | sed -e "s/:6443/:$port/g" >$TEST_DIR/servers/$num/kubeconfig.yaml
)}
export -f fetch-kubeconfig
# ---
wait-for-kubeconfig() {
while ! fetch-kubeconfig $1; do
echo 'Waiting for kubeconfig to become available...' >&2
sleep 5
done
}
export -f wait-for-kubeconfig
# ---
count-ready-nodes() {
kubectl get nodes -o json \
| jq '.items[].status.conditions[] | select(.type == "Ready" and .status == "True") | .type' \
| wc -l \
| tr -d '[:space:]'
}
export -f count-ready-nodes
# ---
wait-for-nodes() {
while [[ $(count-ready-nodes) -ne $1 ]]; do
echo 'Waiting for nodes to be ready...' >&2
sleep 5
done
}
export -f wait-for-nodes
# ---
pod-ready() {
kubectl get pods -n kube-system -o json \
| jq ".items[].status | select(.containerStatuses != null) | .containerStatuses[] | select(.name == \"$1\") | .ready" 2>/dev/null
}
export -f pod-ready
# ---
wait-for-services() {
for service in $@; do
while [[ "$(pod-ready $service | sort -u)" != 'true' ]]; do
echo "Waiting for service $service to be ready..." >&2
sleep 5
done
echo "Service $service is ready"
done
}
export -f wait-for-services
# ---
wait-for-db-connection() {
if [ -z "$DB_CONNECTION_TEST" ]; then
echo 'DB_CONNECTION_TEST is not defined' >&2
return 1
fi
while ! $DB_CONNECTION_TEST 2>/dev/null; do
echo 'Waiting for database to become available...' >&2
sleep 5
done
}
export -f wait-for-db-connection
# ---
verify-valid-version() {
docker exec $@ 2>&1 | tee .version.tmp
# check for bad strings in the version output, including '.' in the build metadata
if grep -oiE '.*(dev|head|unknown|fail|refuse|\+[^"]*\.).*' .version.tmp; then
return 1
fi
}
export -f verify-valid-version
# ---
verify-valid-versions() {
verify-valid-version $1 kubectl version
verify-valid-version $1 ctr version
verify-valid-version $1 crictl version
}
export -f verify-valid-versions
# ---
dump-logs() {
local testID=$(basename $TEST_DIR)
echo "#---------------------------------"
echo "#- Begin: logs for run ($testID)"
echo
local server=$(cat $TEST_DIR/servers/1/metadata/name)
docker exec $server kubectl get pods -A -o wide >$TEST_DIR/logs/kubectl-get-pods.txt
docker exec $server kubectl get nodes -o wide >$TEST_DIR/logs/kubectl-get-nodes.txt
docker exec $server kubectl describe pods -A >$TEST_DIR/logs/kubectl-describe-pods.txt
for node in $TEST_DIR/*/*; do
[ -d "$node" ] || continue
local name=$(cat $node/metadata/name 2>/dev/null)
[ "$name" ] || continue
mkdir -p $node/logs
local hostname=$(docker exec $name hostname)
docker logs $name >$node/logs/system.log 2>&1
if [[ ! -z "$hostname" && $name == k3s-* ]]; then
docker exec $server kubectl describe node/$hostname >$node/logs/kubectl-describe-node.txt
docker cp $name:/var/lib/rancher/k3s/agent/containerd/containerd.log $node/logs/containerd.log 2>/dev/null
docker exec $name crictl pods >$node/logs/crictl-pods.txt
docker exec $name crictl ps -a >$node/logs/crictl-ps.txt
docker exec $name crictl ps -a -o json >$node/metadata/crictl-ps.json
for container in $(jq -r '.containers[].id' <$node/metadata/crictl-ps.json); do
local cname=$(jq -r '.containers[] | select(.id == "'$container'") | .metadata.name' <$node/metadata/crictl-ps.json)
docker exec $name crictl logs $container >$node/logs/$cname-$container.log 2>&1
done
fi
if [ "$1" == "skip-output" ]; then
continue
fi
for log in $node/logs/*.log; do
echo
echo "#- Tail: $log"
tail -5 $log
echo "#- Done: $log"
echo
done
done
if [ "$1" == "skip-output" ]; then
return
fi
for txt in $TEST_DIR/logs/*.txt $TEST_DIR/*/*/logs/*.txt; do
echo
echo "#- Cat: $txt"
cat $txt
echo "#- Done: $txt"
echo
done
echo
echo "#- Finish: logs for run ($testID)"
echo "#---------------------------------"
echo
./scripts/log-upload $TEST_DIR
}
export -f dump-logs
# ---
retrieve-sonobuoy-logs() {
local status=passed
local code=0
local testStatus=$(sonobuoy status 2>&1)
cat <<< $testStatus
if ! grep -q -E '\s+e2e\s+complete\s+passed\s+' <<< $testStatus; then
status=failed
code=1
fi
mkdir -p $TEST_DIR/sonobuoy
sonobuoy retrieve $TEST_DIR/sonobuoy 2>/dev/null || true
local logTarball=$TEST_DIR/sonobuoy/*_sonobuoy_*.tar.gz
if [ -f $logTarball ]; then
tar -xz -f $logTarball -C $TEST_DIR/sonobuoy
rm $logTarball
else
rm -rf $TEST_DIR/sonobuoy
fi
local e2eLog=$TEST_DIR/sonobuoy/plugins/e2e/results/global/e2e.log
if [ ! -s $e2eLog ]; then
return 1
fi
if [ -n "$LOG_OUTPUT" ]; then
cp $e2eLog $(sed -e "s/-STATUS-/-$status-/g" <<< "$LOG_OUTPUT")
fi
tail -11 $e2eLog
awk '/^Summarizing .* Failures?:$/,0' $e2eLog
return $code
}
export -f retrieve-sonobuoy-logs
# ---
test-wait() {
wait $1 || echo "test wait exit code $?"
local delay=15
sleep $delay
while sonobuoy status | grep -q -E ' +e2e +running +'; do
sleep $delay
done
sleep $delay
if sonobuoy status | grep -q -E ' +e2e +complete +passed +'; then
return 0
fi
return 1
}
export -f test-wait
# ---
sonobuoy-test() {
if [ "$ARCH" = 'arm' ]; then
echo "Aborting sonobuoy tests, images not available for $ARCH"
return 0
fi
echo 'Starting sonobuoy tests'
sonobuoy run \
--config=scripts/sonobuoy-config.json \
--plugin-env=e2e.E2E_USE_GO_RUNNER=true \
--kubernetes-version=${VERSION_K8S} \
--wait=90 \
$@ &
local sonobuoyPID=$!
local code=0
time timeout --foreground 60m bash -c test-wait $sonobuoyPID || code=$?
echo "Sonobuoy finished with code $code"
retrieve-sonobuoy-logs
return $code
}
export -f sonobuoy-test
# ---
test-cleanup() {
local code=$?
set +e -x
echo 'Cleaning up...'
trap - EXIT INT TERM
if has-function test-post-hook; then
test-post-hook $code
code=$?
fi
if [[ $code -ne 0 ]]; then
dump-logs
fi
for name in $TEST_DIR/*/*/metadata/name; do
[ -f "$name" ] || continue
local container=$(cat $name)
echo "Removing container $container"
docker rm -f -v $container
done
if has-function test-cleanup-hook; then
test-cleanup-hook
fi
echo
if [ "$TEST_CLEANUP" = true ]; then
echo "Removing test directory $TEST_DIR"
rm -rf $TEST_DIR
fi
[ -f "$PROVISION_LOCK" ] && rm $PROVISION_LOCK
echo -n "Test $(basename $TEST_DIR) "
if [ $code -eq 0 ]; then
echo "passed."
else
echo "failed."
fi
echo
exit $code
}
export -f test-cleanup
# ---
test-setup() {
export TEST_DIR=$(mktemp -d '/tmp/XXXXXX')
trap test-cleanup EXIT INT TERM
mkdir -p $TEST_DIR/metadata
if [ "$LABEL" ]; then
exec > >(awk "{ printf \"[\033[36m${LABEL}\033[m] %s\n\", \$0; fflush() }") \
2> >(awk "{ printf \"[\033[35m${LABEL}\033[m] %s\n\", \$0; fflush() }" >&2)
echo "$LABEL" >$TEST_DIR/metadata/label
fi
mkdir -p $TEST_DIR/logs
exec > >(tee -a $TEST_DIR/logs/test.log) \
2> >(tee -a $TEST_DIR/logs/test.log >&2)
if [ -z "$K3S_IMAGE" ]; then
. ./scripts/version.sh
TAG=${TAG:-${VERSION_TAG}${SUFFIX}}
REPO=${REPO:-rancher}
IMAGE_NAME=${IMAGE_NAME:-k3s}
export K3S_IMAGE=${REPO}/${IMAGE_NAME}:${TAG}
fi
if [ -z "$K3S_IMAGE" ]; then
echo 'K3S_IMAGE environment variable should be defined'
return 1
fi
SERVER_MINOR=$(awk -F. '{print $2}' <<<${K3S_IMAGE_SERVER:-$K3S_IMAGE})
AGENT_MINOR=$(awk -F. '{print $2}' <<<${K3S_IMAGE_AGENT:-$K3S_IMAGE})
if [ $NUM_AGENTS -gt 0 ] && [ $AGENT_MINOR -gt $SERVER_MINOR ]; then
echo "Agent minor version cannot be higher than server - not supported by Kubernetes version skew policy"
exit 0
fi
echo ${RANDOM}${RANDOM}${RANDOM} >$TEST_DIR/metadata/secret
}
export -f test-setup
# ---
inc-count() {(
shopt -s extglob
local count=$(exec 2>/dev/null; ls -1d $TEST_DIR/$1/+([0-9]) | xargs -n1 basename | sort -n -r | head -1)
count=$((count+1))
mkdir -p $TEST_DIR/$1/$count/metadata
echo $count
)}
export -f inc-count
# ---
has-function() {
[[ ! -z "$1" && $(type -t $1) == "function" ]]
} 2> /dev/null
export -f has-function
# ---
run-function() {
has-function $1 || return 0
$@
}
export -f run-function
# ---
provision-server() {
local count=$(inc-count servers)
local testID=$(basename $TEST_DIR)
local name=$(echo "k3s-server-${count}-${testID,,}" | tee $TEST_DIR/servers/$count/metadata/name)
local port=$(timeout --foreground 5s bash -c get-port | tee $TEST_DIR/servers/$count/metadata/port)
local SERVER_INSTANCE_ARGS="SERVER_${count}_ARGS"
run-function server-pre-hook $count
docker run \
-d --name $name \
--hostname $name \
--privileged \
-p 127.0.0.1:$port:6443 \
-p 6443 \
-e K3S_TOKEN=$(cat $TEST_DIR/metadata/secret) \
-e K3S_DEBUG=true \
${SERVER_DOCKER_ARGS:-} \
${REGISTRY_CLUSTER_ARGS:-} \
${K3S_IMAGE_SERVER:-$K3S_IMAGE} server ${ARGS} ${SERVER_ARGS} ${!SERVER_INSTANCE_ARGS}
local ip=$(docker inspect --format '{{ .NetworkSettings.IPAddress }}' $name | tee $TEST_DIR/servers/$count/metadata/ip)
local url=$(echo "https://$ip:6443" | tee $TEST_DIR/servers/$count/metadata/url)
echo "Started $name @ $url"
run-function server-post-hook $count
}
export -f provision-server
# ---
provision-agent() {
local K3S_URL=${K3S_URL:-"$(cat $TEST_DIR/servers/1/metadata/url)"}
local count=$(inc-count agents)
local testID=$(basename $TEST_DIR)
local name=$(echo "k3s-agent-${count}-${testID,,}" | tee $TEST_DIR/agents/$count/metadata/name)
local AGENT_INSTANCE_ARGS="AGENT_${count}_ARGS"
run-function agent-pre-hook $count
docker run \
-d --name $name \
--hostname $name \
--privileged \
-e K3S_TOKEN=$(cat $TEST_DIR/metadata/secret) \
-e K3S_URL=$K3S_URL \
${AGENT_DOCKER_ARGS:-} \
${REGISTRY_CLUSTER_ARGS:-} \
${K3S_IMAGE_AGENT:-$K3S_IMAGE} agent ${ARGS} ${AGENT_ARGS} ${!AGENT_INSTANCE_ARGS}
local ip=$(docker inspect --format '{{ .NetworkSettings.IPAddress }}' $name | tee $TEST_DIR/agents/$count/metadata/ip)
echo "Started $name"
run-function agent-post-hook $count
}
export -f provision-agent
# ---
provision-cluster() {
run-function cluster-pre-hook
if [ "${ENABLE_REGISTRY}" == 'true' ]; then
provision-registry-proxy
fi
for i in $(seq 1 $NUM_SERVERS); do
provision-server
timeout --foreground 120s bash -c "wait-for-kubeconfig $i"
done
export KUBECONFIG=$TEST_DIR/servers/1/kubeconfig.yaml
if [ $NUM_AGENTS -gt 0 ]; then
for _ in $(seq 1 $NUM_AGENTS); do
provision-agent
done
fi
[ -f "$PROVISION_LOCK" ] && rm $PROVISION_LOCK
timeout --foreground 2m bash -c "wait-for-nodes $(( NUM_SERVERS + NUM_AGENTS ))"
timeout --foreground 4m bash -c "wait-for-services $WAIT_SERVICES"
run-function cluster-post-hook
}
export -f provision-cluster
# ---
provision-registry-proxy() {
set -e -o pipefail
local image="docker.io/library/registry:2.8.1"
local prefix="docker-registry-"
local registries="docker.io:registry-1.docker.io registry.k8s.io gcr.io quay.io ghcr.io"
local registries_yaml="$TEST_DIR/registries.yaml"
echo "mirrors:" > $registries_yaml
for registry in $registries; do
IFS=: read registry_name registry_endpoint <<< $registry
if [ -z "$registry_endpoint" ]; then
registry_endpoint=$registry_name
fi
local name="registry_${registry_name//./_}"
local status=$(docker inspect $name --format '{{ .State.Status }} {{ .Config.Image }} {{ (index .HostConfig.PortBindings "5000/tcp" 0).HostPort }}' 2>/dev/null || true)
read state_status config_image hostport <<< $status
if [ "$state_status" != "running" ] || [ "$config_image" != "$image" ]; then
hostport=$(timeout --foreground 5s bash -c get-port)
docker rm --force $name 2>/dev/null || true
docker run \
-d --name $name \
-p 0.0.0.0:$hostport:5000 \
-v "registry-cache:/var/lib/registry" \
-e "REGISTRY_HTTP_SECRET=shared-secret" \
-e "REGISTRY_PROXY_REMOTEURL=https://$registry_endpoint" \
-e "REGISTRY_STORAGE_CACHE_BLOBDESCRIPTOR=inmemory" \
-e "REGISTRY_STORAGE_FILESYSTEM_ROOTDIRECTORY=/var/lib/registry/$registry_name" \
$image
fi
echo -e " $registry_name:\n endpoint:\n - http://172.17.0.1:$hostport" >> $registries_yaml
done
echo "Using registry mirror with cluster registries.yaml:"
cat $registries_yaml
export REGISTRY_CLUSTER_ARGS="-v $registries_yaml:/etc/rancher/k3s/registries.yaml"
}
export -f provision-registry-proxy
# ---
early-exit() {
printf "\033[33m$1\033[m\n"
exit $2
}
export -f early-exit
# ---
run-test() {
local delay=15
(
set +x
while [ $(count-running-tests) -ge ${MAX_CONCURRENT_TESTS:-4} ]; do
sleep $delay
done
)
export PROVISION_LOCK=$(mktemp)
./scripts/test-runner $@ &
pids+=($!)
(
set +x
# busy-wait on the provisioning lock before imposing a final inter-test delay
while [ -f "$PROVISION_LOCK" ]; do
sleep 1
done
sleep $delay
)
}
export -f run-test
# ---
cleanup-test-env(){
export NUM_SERVERS=1
export NUM_AGENTS=1
export AGENT_ARGS=''
export SERVER_ARGS=''
export WAIT_SERVICES="${all_services[@]}"
unset AGENT_1_ARGS AGENT_2_ARGS AGENT_3_ARGS AGENT_DOCKER_ARGS
unset SERVER_1_ARGS SERVER_2_ARGS SERVER_3_ARGS SERVER_DOCKER_ARGS
unset -f server-pre-hook server-post-hook agent-pre-hook agent-post-hook cluster-pre-hook cluster-post-hook test-post-hook test-cleanup-hook
}
# ---
count-running-tests(){
local count=0
for pid in ${pids[@]}; do
if [ $(pgrep -c -P $pid) -gt 0 ]; then
((count++))
fi
done
echo "Currently running ${count} tests" 1>&2
echo ${count}
}
export -f count-running-tests
# ---
e2e-test() {
local label=$label
if [ -n "$LABEL_SUFFIX" ]; then
label="$label-$LABEL_SUFFIX"
fi
local logOutput=
if [ -n "$E2E_OUTPUT" ]; then
logOutput=$E2E_OUTPUT/$logName
fi
if [[ $label =~ ^PARALLEL.* ]]; then
LABEL=$label LOG_OUTPUT=$logOutput MAX_CONCURRENT_TESTS=3 run-test $@
else
LABEL=$label LOG_OUTPUT=$logOutput run-test $@
fi
}
# ---
test-run-sonobuoy() {
local suffix
if [ "$1" ] && [[ ! $1 =~ ^(serial|parallel)$ ]] ; then
suffix="-$1"
export LABEL_SUFFIX=$1
else
unset LABEL_SUFFIX
fi
cleanup-test-env
. ./scripts/test-setup-sonobuoy$suffix
if [ "$1" = "parallel" ] || [ "$2" = "parallel" ]; then
label=PARALLEL \
logName=e2e-STATUS-${ARCH}-parallel.log \
e2e-test ${sonobuoyParallelArgs[@]}
echo "Exit code $? for parallel start"
fi
if [ "$1" = "serial" ] || [ "$2" = "serial" ]; then
label=SERIAL \
logName=e2e-STATUS-${ARCH}-serial.log \
e2e-test ${sonobuoySerialArgs[@]}
echo "Exit code $? for serial start"
fi
}
export -f test-run-sonobuoy
# ---
pid-cleanup() {
local code=$?
local failCount=0
set +e
if [ $code -eq 0 ]; then
for pid in ${pids[@]}; do
wait $pid || code=$?
done
fi
if [ $code -ne 0 ]; then
for pid in ${pids[@]}; do
pkill -P $pid
wait $pid || failCount=$((failCount+1))
done
fi
trap - EXIT INT TERM
set +x
echo
if [ $failCount -eq 0 ]; then
printf '\033[32mAll tests passed.\033[m\n'
else
printf "\033[31m$failCount tests failed.\033[m\n"
if [ "$DRONE_BUILD_EVENT" = 'tag' ]; then
printf "\033[31mIgnoring test failures on tag.\033[m\n"
code=0
else
code=1
fi
fi
echo
exit $code
}
export -f pid-cleanup
# ---
pids=()
trap pid-cleanup EXIT INT TERM