Add E2E Split Server to Drone, support parrallel testing in Drone (#9940)

* Fix SE old test name
* E2E: support multiple VMs at once in CI with time prefix
* Add local binary support to split server test, add to drone CI
* Cleanup old VMs in drone

Signed-off-by: Derek Nola <derek.nola@suse.com>
This commit is contained in:
Derek Nola 2024-04-29 13:57:22 -07:00 committed by GitHub
parent 5c94ce2cf8
commit 0981f0069d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
21 changed files with 165 additions and 41 deletions

View File

@ -617,33 +617,24 @@ steps:
- mkdir -p dist/artifacts
- cp /tmp/artifacts/* dist/artifacts/
- docker stop registry && docker rm registry
# Cleanup VMs running, happens if a previous test panics
# Cleanup inactive domains, happens if previous test is canceled
- |
VMS=$(virsh list --name | grep '_server-\|_agent-' || true)
if [ -n "$VMS" ]; then
for vm in $VMS
do
virsh destroy $vm
virsh undefine $vm --remove-all-storage
done
fi
VMS=$(virsh list --name --inactive | grep '_server-\|_agent-' || true)
if [ -n "$VMS" ]; then
for vm in $VMS
do
virsh undefine $vm
done
fi
# Cleanup VMs that are older than 2h. Happens if a previous test panics or is canceled
- tests/e2e/scripts/cleanup_vms.sh
- docker run -d -p 5000:5000 -e REGISTRY_PROXY_REMOTEURL=https://registry-1.docker.io --name registry registry:2
- cd tests/e2e/validatecluster
- vagrant destroy -f
- go test -v -timeout=45m ./validatecluster_test.go -ci -local
- cp ./coverage.out /tmp/artifacts/validate-coverage.out
- cd ../secretsencryption
- vagrant destroy -f
- go test -v -timeout=30m ./secretsencryption_test.go -ci -local
- cp ./coverage.out /tmp/artifacts/se-coverage.out
- |
cd tests/e2e/validatecluster
vagrant destroy -f
go test -v -timeout=45m ./validatecluster_test.go -ci -local
cp ./coverage.out /tmp/artifacts/validate-coverage.out
- |
cd ../secretsencryption
vagrant destroy -f
go test -v -timeout=30m ./secretsencryption_test.go -ci -local
cp ./coverage.out /tmp/artifacts/se-coverage.out
- |
cd ../splitserver
vagrant destroy -f
go test -v -timeout=30m ./splitserver_test.go -ci -local
cp ./coverage.out /tmp/artifacts/split-coverage.out
- |
if [ "$DRONE_BUILD_EVENT" = "pull_request" ]; then
cd ../upgradecluster
@ -669,6 +660,7 @@ steps:
files:
- /tmp/artifacts/validate-coverage.out
- /tmp/artifacts/se-coverage.out
- /tmp/artifacts/split-coverage.out
- /tmp/artifacts/upgrade-coverage.out
flags:
- e2etests

View File

@ -90,6 +90,8 @@ Vagrant.configure("2") do |config|
config.vm.provider "libvirt" do |v|
v.cpus = NODE_CPUS
v.memory = NODE_MEMORY
# We replicate the default prefix, but add a timestamp to enable parallel runs and cleanup of old VMs
v.default_prefix = File.basename(Dir.getwd) + "_" + Time.now.to_i.to_s + "_"
end
if NODE_ROLES.kind_of?(String)

View File

@ -97,6 +97,8 @@ Vagrant.configure("2") do |config|
config.vm.provider "libvirt" do |v|
v.cpus = NODE_CPUS
v.memory = NODE_MEMORY
# We replicate the default prefix, but add a timestamp to enable parallel runs and cleanup of old VMs
v.default_prefix = File.basename(Dir.getwd) + "_" + Time.now.to_i.to_s + "_"
end
config.vm.provider "virtualbox" do |v|
v.cpus = NODE_CPUS

View File

@ -65,6 +65,8 @@ Vagrant.configure("2") do |config|
config.vm.provider "libvirt" do |v|
v.cpus = NODE_CPUS
v.memory = NODE_MEMORY
# We replicate the default prefix, but add a timestamp to enable parallel runs and cleanup of old VMs
v.default_prefix = File.basename(Dir.getwd) + "_" + Time.now.to_i.to_s + "_"
end
if NODE_ROLES.kind_of?(String)

View File

@ -103,6 +103,8 @@ Vagrant.configure("2") do |config|
config.vm.provider "libvirt" do |v|
v.cpus = NODE_CPUS
v.memory = NODE_MEMORY
# We replicate the default prefix, but add a timestamp to enable parallel runs and cleanup of old VMs
v.default_prefix = File.basename(Dir.getwd) + "_" + Time.now.to_i.to_s + "_"
end
config.vm.provider "virtualbox" do |v|
v.cpus = NODE_CPUS

View File

@ -45,6 +45,8 @@ Vagrant.configure("2") do |config|
config.vm.provider "libvirt" do |v|
v.cpus = NODE_CPUS
v.memory = NODE_MEMORY
# We replicate the default prefix, but add a timestamp to enable parallel runs and cleanup of old VMs
v.default_prefix = File.basename(Dir.getwd) + "_" + Time.now.to_i.to_s + "_"
end
config.vm.provider "virtualbox" do |v|
v.cpus = NODE_CPUS

View File

@ -65,6 +65,8 @@ Vagrant.configure("2") do |config|
config.vm.provider "libvirt" do |v|
v.cpus = NODE_CPUS
v.memory = NODE_MEMORY
# We replicate the default prefix, but add a timestamp to enable parallel runs and cleanup of old VMs
v.default_prefix = File.basename(Dir.getwd) + "_" + Time.now.to_i.to_s + "_"
end
config.vm.provider "virtualbox" do |v|
v.cpus = NODE_CPUS

View File

@ -69,6 +69,8 @@ Vagrant.configure("2") do |config|
config.vm.provider "libvirt" do |v|
v.cpus = NODE_CPUS
v.memory = NODE_MEMORY
# We replicate the default prefix, but add a timestamp to enable parallel runs and cleanup of old VMs
v.default_prefix = File.basename(Dir.getwd) + "_" + Time.now.to_i.to_s + "_"
end
config.vm.provider "virtualbox" do |v|
v.cpus = NODE_CPUS

View File

@ -0,0 +1,35 @@
#!/bin/bash
# Clean up any VMS that are older than 2 hours.
#
# We embed the time in the VM name, so we can easily filter them out.
# Get the current time in seconds since the epoch
current_time=$(date +%s)
# Get the list of VMs
vms=$(virsh list --name --all)
time_regex="_([0-9]+)_(server|agent)"
# Cleanup running VMs, happens if a previous test panics
for vm in $vms; do
if [[ $vm =~ $time_regex ]]; then
vm_time="${BASH_REMATCH[1]}"
age=$((current_time - vm_time))
if [ $age -gt 7200 ]; then
virsh destroy $vm
virsh undefine $vm --remove-all-storage
fi
fi
done
# Cleanup inactive domains, happens if previous test is canceled
vms=$(virsh list --name --inactive)
for vm in $vms; do
if [[ $vm =~ $time_regex ]]; then
vm_time="${BASH_REMATCH[1]}"
age=$((current_time - vm_time))
if [ $age -gt 7200 ]; then
virsh undefine $vm --remove-all-storage
fi
fi
done

View File

@ -9,7 +9,7 @@ GOCOVER = (ENV['E2E_GOCOVER'] || "")
NODE_CPUS = (ENV['E2E_NODE_CPUS'] || 2).to_i
NODE_MEMORY = (ENV['E2E_NODE_MEMORY'] || 2048).to_i
# Virtualbox >= 6.1.28 require `/etc/vbox/network.conf` for expanded private networks
NETWORK_PREFIX = "10.10.10"
NETWORK_PREFIX = "10.10.13"
install_type = ""
def provision(vm, role, role_num, node_num)
@ -53,6 +53,8 @@ Vagrant.configure("2") do |config|
config.vm.provider "libvirt" do |v|
v.cpus = NODE_CPUS
v.memory = NODE_MEMORY
# We replicate the default prefix, but add a timestamp to enable parallel runs and cleanup of old VMs
v.default_prefix = File.basename(Dir.getwd) + "_" + Time.now.to_i.to_s + "_"
end
config.vm.provider "virtualbox" do |v|
v.cpus = NODE_CPUS

View File

@ -52,6 +52,8 @@ Vagrant.configure("2") do |config|
config.vm.provider "libvirt" do |v|
v.cpus = NODE_CPUS
v.memory = NODE_MEMORY
# We replicate the default prefix, but add a timestamp to enable parallel runs and cleanup of old VMs
v.default_prefix = File.basename(Dir.getwd) + "_" + Time.now.to_i.to_s + "_"
end
config.vm.provider "virtualbox" do |v|
v.cpus = NODE_CPUS

View File

@ -22,7 +22,7 @@ var local = flag.Bool("local", false, "deploy a locally built K3s binary")
// Environment Variables Info:
// E2E_RELEASE_VERSION=v1.23.1+k3s2 or nil for latest commit from master
func Test_E2ESecretsEncryption(t *testing.T) {
func Test_E2ESecretsEncryptionOld(t *testing.T) {
RegisterFailHandler(Fail)
flag.Parse()
suiteConfig, reporterConfig := GinkgoConfiguration()

View File

@ -82,6 +82,8 @@ Vagrant.configure("2") do |config|
config.vm.provider "libvirt" do |v|
v.cpus = NODE_CPUS
v.memory = NODE_MEMORY
# We replicate the default prefix, but add a timestamp to enable parallel runs and cleanup of old VMs
v.default_prefix = File.basename(Dir.getwd) + "_" + Time.now.to_i.to_s + "_"
end
config.vm.provider "virtualbox" do |v|
v.cpus = NODE_CPUS

View File

@ -1,14 +1,15 @@
ENV['VAGRANT_NO_PARALLEL'] = 'no'
NODE_ROLES = (ENV['E2E_NODE_ROLES'] ||
["server-etcd-0", "server-etcd-1", "server-etcd-2", "server-cp-0", "server-cp-1", "agent-0"])
["server-etcd-0", "server-etcd-1", "server-etcd-2", "server-cp-0", "server-cp-1", "agent-0", "agent-1"])
NODE_BOXES = (ENV['E2E_NODE_BOXES'] ||
['generic/ubuntu2310', 'generic/ubuntu2310', 'generic/ubuntu2310', 'generic/ubuntu2310', 'generic/ubuntu2310'])
GITHUB_BRANCH = (ENV['E2E_GITHUB_BRANCH'] || "master")
RELEASE_VERSION = (ENV['E2E_RELEASE_VERSION'] || "")
GOCOVER = (ENV['E2E_GOCOVER'] || "")
NODE_CPUS = (ENV['E2E_NODE_CPUS'] || 2).to_i
NODE_MEMORY = (ENV['E2E_NODE_MEMORY'] || 2048).to_i
# Virtualbox >= 6.1.28 require `/etc/vbox/network.conf` for expanded private networks
NETWORK_PREFIX = "10.10.10"
NETWORK_PREFIX = "10.10.12"
def provision(vm, role, role_num, node_num)
vm.box = NODE_BOXES[node_num]
@ -22,12 +23,14 @@ def provision(vm, role, role_num, node_num)
defaultOSConfigure(vm)
install_type = getInstallType(vm, RELEASE_VERSION, GITHUB_BRANCH)
addCoverageDir(vm, role, GOCOVER)
vm.provision "ping k3s.io", type: "shell", inline: "ping -c 2 k3s.io"
if node_num == 0 && !role.include?("server") && !role.include?("etcd")
puts "first node must be a etcd server"
abort
if ARGV.include?("up") || (ARGV.include?("reload") && ARGV.include?("--provision"))
puts "Error: first node provisioned must be a etcd server"
abort
end
elsif role.include?("server") && role.include?("etcd") && role_num == 0
vm.provision 'k3s-install', type: 'k3s', run: 'once' do |k3s|
k3s.args = "server"
@ -102,6 +105,8 @@ Vagrant.configure("2") do |config|
config.vm.provider "libvirt" do |v|
v.cpus = NODE_CPUS
v.memory = NODE_MEMORY
# We replicate the default prefix, but add a timestamp to enable parallel runs and cleanup of old VMs
v.default_prefix = File.basename(Dir.getwd) + "_" + Time.now.to_i.to_s + "_"
end
config.vm.provider "virtualbox" do |v|
v.cpus = NODE_CPUS
@ -116,6 +121,7 @@ Vagrant.configure("2") do |config|
end
NODE_ROLES.each_with_index do |role, i|
# Find the first number in the role name
role_num = role.split("-", -1).pop.to_i
config.vm.define role do |node|
provision(node.vm, role, role_num, i)

View File

@ -1,16 +1,19 @@
package splitserver
import (
"context"
"flag"
"fmt"
"os"
"strconv"
"strings"
"testing"
"time"
"github.com/k3s-io/k3s/tests/e2e"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"golang.org/x/sync/errgroup"
)
// Valid nodeOS: generic/ubuntu2310, opensuse/Leap-15.3.x86_64
@ -19,21 +22,25 @@ var etcdCount = flag.Int("etcdCount", 3, "number of server nodes only deploying
var controlPlaneCount = flag.Int("controlPlaneCount", 1, "number of server nodes acting as control plane")
var agentCount = flag.Int("agentCount", 1, "number of agent nodes")
var ci = flag.Bool("ci", false, "running on CI")
var local = flag.Bool("local", false, "deploy a locally built K3s binary")
var hardened = flag.Bool("hardened", false, "true or false")
// Environment Variables Info:
// E2E_RELEASE_VERSION=v1.23.1+k3s2 or nil for latest commit from master
func createSplitCluster(nodeOS string, etcdCount, controlPlaneCount, agentCount int) ([]string, []string, []string, error) {
// createSplitCluster creates a split server cluster with the given nodeOS, etcdCount, controlPlaneCount, and agentCount.
// It duplicates and merges functionality found in the e2e.CreateCluster and e2e.CreateLocalCluster functions.
func createSplitCluster(nodeOS string, etcdCount, controlPlaneCount, agentCount int, local bool) ([]string, []string, []string, error) {
etcdNodeNames := make([]string, etcdCount)
cpNodeNames := make([]string, controlPlaneCount)
agentNodeNames := make([]string, agentCount)
for i := 0; i < etcdCount; i++ {
etcdNodeNames[i] = "server-etcd-" + strconv.Itoa(i)
}
cpNodeNames := make([]string, controlPlaneCount)
for i := 0; i < controlPlaneCount; i++ {
cpNodeNames[i] = "server-cp-" + strconv.Itoa(i)
}
agentNodeNames := make([]string, agentCount)
for i := 0; i < agentCount; i++ {
agentNodeNames[i] = "agent-" + strconv.Itoa(i)
}
@ -43,6 +50,9 @@ func createSplitCluster(nodeOS string, etcdCount, controlPlaneCount, agentCount
nodeBoxes := strings.Repeat(nodeOS+" ", etcdCount+controlPlaneCount+agentCount)
nodeBoxes = strings.TrimSpace(nodeBoxes)
allNodes := append(etcdNodeNames, cpNodeNames...)
allNodes = append(allNodes, agentNodeNames...)
var testOptions string
for _, env := range os.Environ() {
if strings.HasPrefix(env, "E2E_") {
@ -50,14 +60,58 @@ func createSplitCluster(nodeOS string, etcdCount, controlPlaneCount, agentCount
}
}
cmd := fmt.Sprintf(`E2E_NODE_ROLES="%s" E2E_NODE_BOXES="%s" %s vagrant up &> vagrant.log`, nodeRoles, nodeBoxes, testOptions)
// Provision the first etcd node. In GitHub Actions, this also imports the VM image into libvirt, which
// takes time and can cause the next vagrant up to fail if it is not given enough time to complete.
cmd := fmt.Sprintf(`E2E_NODE_ROLES="%s" E2E_NODE_BOXES="%s" vagrant up --no-provision %s &> vagrant.log`, nodeRoles, nodeBoxes, etcdNodeNames[0])
fmt.Println(cmd)
if _, err := e2e.RunCommand(cmd); err != nil {
fmt.Println("Error Creating Cluster", err)
return nil, nil, nil, err
return etcdNodeNames, cpNodeNames, agentNodeNames, err
}
// Bring up the rest of the nodes in parallel
errg, _ := errgroup.WithContext(context.Background())
for _, node := range allNodes[1:] {
cmd := fmt.Sprintf(`E2E_NODE_ROLES="%s" E2E_NODE_BOXES="%s" vagrant up --no-provision %s &>> vagrant.log`, nodeRoles, nodeBoxes, node)
errg.Go(func() error {
_, err := e2e.RunCommand(cmd)
return err
})
// libVirt/Virtualbox needs some time between provisioning nodes
time.Sleep(10 * time.Second)
}
if err := errg.Wait(); err != nil {
return etcdNodeNames, cpNodeNames, agentNodeNames, err
}
if local {
testOptions += " E2E_RELEASE_VERSION=skip"
for _, node := range allNodes {
cmd := fmt.Sprintf(`E2E_NODE_ROLES=%s vagrant scp ../../../dist/artifacts/k3s %s:/tmp/`, node, node)
if _, err := e2e.RunCommand(cmd); err != nil {
return etcdNodeNames, cpNodeNames, agentNodeNames, fmt.Errorf("failed to scp k3s binary to %s: %v", node, err)
}
if _, err := e2e.RunCmdOnNode("mv /tmp/k3s /usr/local/bin/", node); err != nil {
return etcdNodeNames, cpNodeNames, agentNodeNames, err
}
}
}
// Install K3s on all nodes in parallel
errg, _ = errgroup.WithContext(context.Background())
for _, node := range allNodes {
cmd = fmt.Sprintf(`E2E_NODE_ROLES="%s" E2E_NODE_BOXES="%s" %s vagrant provision %s &>> vagrant.log`, nodeRoles, nodeBoxes, testOptions, node)
errg.Go(func() error {
_, err := e2e.RunCommand(cmd)
return err
})
// K3s needs some time between joining nodes to avoid learner issues
time.Sleep(10 * time.Second)
}
if err := errg.Wait(); err != nil {
return etcdNodeNames, cpNodeNames, agentNodeNames, err
}
return etcdNodeNames, cpNodeNames, agentNodeNames, nil
}
func Test_E2ESplitServer(t *testing.T) {
RegisterFailHandler(Fail)
flag.Parse()
@ -78,7 +132,7 @@ var _ = Describe("Verify Create", Ordered, func() {
Context("Cluster :", func() {
It("Starts up with no issues", func() {
var err error
etcdNodeNames, cpNodeNames, agentNodeNames, err = createSplitCluster(*nodeOS, *etcdCount, *controlPlaneCount, *agentCount)
etcdNodeNames, cpNodeNames, agentNodeNames, err = createSplitCluster(*nodeOS, *etcdCount, *controlPlaneCount, *agentCount, *local)
Expect(err).NotTo(HaveOccurred(), e2e.GetVagrantLog(err))
fmt.Println("CLUSTER CONFIG")
fmt.Println("OS:", *nodeOS)
@ -229,6 +283,11 @@ var _ = AfterEach(func() {
})
var _ = AfterSuite(func() {
if !failed {
allNodes := append(cpNodeNames, etcdNodeNames...)
allNodes = append(allNodes, agentNodeNames...)
Expect(e2e.GetCoverageReport(allNodes)).To(Succeed())
}
if !failed || *ci {
Expect(e2e.DestroyCluster()).To(Succeed())
Expect(os.Remove(kubeConfigFile)).To(Succeed())

View File

@ -67,6 +67,8 @@ Vagrant.configure("2") do |config|
config.vm.provider "libvirt" do |v|
v.cpus = NODE_CPUS
v.memory = NODE_MEMORY
# We replicate the default prefix, but add a timestamp to enable parallel runs and cleanup of old VMs
v.default_prefix = File.basename(Dir.getwd) + "_" + Time.now.to_i.to_s + "_"
end
config.vm.provider "virtualbox" do |v|
v.cpus = NODE_CPUS

View File

@ -63,6 +63,8 @@ Vagrant.configure("2") do |config|
config.vm.provider "libvirt" do |v|
v.cpus = NODE_CPUS
v.memory = NODE_MEMORY
# We replicate the default prefix, but add a timestamp to enable parallel runs and cleanup of old VMs
v.default_prefix = File.basename(Dir.getwd) + "_" + Time.now.to_i.to_s + "_"
end
if NODE_ROLES.kind_of?(String)

View File

@ -72,6 +72,8 @@ Vagrant.configure("2") do |config|
config.vm.provider "libvirt" do |v|
v.cpus = NODE_CPUS
v.memory = NODE_MEMORY
# We replicate the default prefix, but add a timestamp to enable parallel runs and cleanup of old VMs
v.default_prefix = File.basename(Dir.getwd) + "_" + Time.now.to_i.to_s + "_"
end
if NODE_ROLES.kind_of?(String)

View File

@ -11,7 +11,7 @@ GOCOVER = (ENV['E2E_GOCOVER'] || "")
NODE_CPUS = (ENV['E2E_NODE_CPUS'] || 2).to_i
NODE_MEMORY = (ENV['E2E_NODE_MEMORY'] || 2048).to_i
# Virtualbox >= 6.1.28 require `/etc/vbox/network.conf` for expanded private networks
NETWORK_PREFIX = "10.10.10"
NETWORK_PREFIX = "10.10.11"
install_type = ""
def provision(vm, role, role_num, node_num)
@ -141,6 +141,8 @@ Vagrant.configure("2") do |config|
config.vm.provider "libvirt" do |v|
v.cpus = NODE_CPUS
v.memory = NODE_MEMORY
# We replicate the default prefix, but add a timestamp to enable parallel runs and cleanup of old VMs
v.default_prefix = File.basename(Dir.getwd) + "_" + Time.now.to_i.to_s + "_"
end
config.vm.provider "virtualbox" do |v|
v.cpus = NODE_CPUS

View File

@ -139,6 +139,8 @@ Vagrant.configure("2") do |config|
config.vm.provider "libvirt" do |v|
v.cpus = NODE_CPUS
v.memory = NODE_MEMORY
# We replicate the default prefix, but add a timestamp to enable parallel runs and cleanup of old VMs
v.default_prefix = File.basename(Dir.getwd) + "_" + Time.now.to_i.to_s + "_"
end
config.vm.provider "virtualbox" do |v|
v.cpus = NODE_CPUS

View File

@ -69,6 +69,8 @@ Vagrant.configure("2") do |config|
config.vm.provider "libvirt" do |v|
v.cpus = NODE_CPUS
v.memory = NODE_MEMORY
# We replicate the default prefix, but add a timestamp to enable parallel runs and cleanup of old VMs
v.default_prefix = File.basename(Dir.getwd) + "_" + Time.now.to_i.to_s + "_"
end
config.vm.provider "virtualbox" do |v|
v.cpus = NODE_CPUS