From 677cc1a191356054bf21cc52cd719862757897e1 Mon Sep 17 00:00:00 2001 From: Taylor Price Date: Tue, 10 Sep 2019 16:19:32 -0700 Subject: [PATCH 1/7] This is a combination of 2 commits. initial loadtesting framework --- load_testing/k3s/.gitignore | 1 + load_testing/k3s/cluster-loader/.gitignore | 1 + .../k3s/cluster-loader/large/config.yaml | 471 ++++++++++++++++++ .../k3s/cluster-loader/large/configmap.yaml | 9 + .../k3s/cluster-loader/large/deployment.yaml | 62 +++ .../k3s/cluster-loader/large/run-test.sh | 3 + .../k3s/cluster-loader/large/secret.yaml | 7 + .../k3s/cluster-loader/large/service.yaml | 16 + .../k3s/cluster-loader/large/statefulset.yaml | 30 ++ .../large/statefulset_service.yaml | 10 + .../k3s/cluster-loader/small/config.yaml | 471 ++++++++++++++++++ .../k3s/cluster-loader/small/configmap.yaml | 9 + .../k3s/cluster-loader/small/deployment.yaml | 62 +++ .../k3s/cluster-loader/small/run-test.sh | 3 + .../k3s/cluster-loader/small/secret.yaml | 7 + .../k3s/cluster-loader/small/service.yaml | 16 + .../k3s/cluster-loader/small/statefulset.yaml | 30 ++ .../small/statefulset_service.yaml | 10 + load_testing/k3s/pool/data.tf | 55 ++ .../k3s/pool/files/pool_worker_userdata.tmpl | 32 ++ load_testing/k3s/pool/main.tf | 80 +++ load_testing/k3s/pool/outputs.tf | 0 load_testing/k3s/pool/variables.tf | 16 + load_testing/k3s/pool/versions.tf | 4 + load_testing/k3s/readme.MD | 47 ++ load_testing/k3s/server/data.tf | 83 +++ load_testing/k3s/server/files/metrics.yaml | 227 +++++++++ load_testing/k3s/server/files/prom.yaml | 86 ++++ .../k3s/server/files/server_userdata.tmpl | 45 ++ .../k3s/server/files/worker_userdata.tmpl | 26 + load_testing/k3s/server/iam.tf | 58 +++ load_testing/k3s/server/main.tf | 135 +++++ load_testing/k3s/server/outputs.tf | 11 + load_testing/k3s/server/variables.tf | 12 + load_testing/k3s/server/versions.tf | 4 + 35 files changed, 2139 insertions(+) create mode 100644 load_testing/k3s/.gitignore create mode 100644 load_testing/k3s/cluster-loader/.gitignore create mode 100644 load_testing/k3s/cluster-loader/large/config.yaml create mode 100644 load_testing/k3s/cluster-loader/large/configmap.yaml create mode 100644 load_testing/k3s/cluster-loader/large/deployment.yaml create mode 100644 load_testing/k3s/cluster-loader/large/run-test.sh create mode 100644 load_testing/k3s/cluster-loader/large/secret.yaml create mode 100644 load_testing/k3s/cluster-loader/large/service.yaml create mode 100644 load_testing/k3s/cluster-loader/large/statefulset.yaml create mode 100644 load_testing/k3s/cluster-loader/large/statefulset_service.yaml create mode 100644 load_testing/k3s/cluster-loader/small/config.yaml create mode 100644 load_testing/k3s/cluster-loader/small/configmap.yaml create mode 100644 load_testing/k3s/cluster-loader/small/deployment.yaml create mode 100644 load_testing/k3s/cluster-loader/small/run-test.sh create mode 100644 load_testing/k3s/cluster-loader/small/secret.yaml create mode 100644 load_testing/k3s/cluster-loader/small/service.yaml create mode 100644 load_testing/k3s/cluster-loader/small/statefulset.yaml create mode 100644 load_testing/k3s/cluster-loader/small/statefulset_service.yaml create mode 100644 load_testing/k3s/pool/data.tf create mode 100644 load_testing/k3s/pool/files/pool_worker_userdata.tmpl create mode 100644 load_testing/k3s/pool/main.tf create mode 100644 load_testing/k3s/pool/outputs.tf create mode 100644 load_testing/k3s/pool/variables.tf create mode 100644 load_testing/k3s/pool/versions.tf create mode 100644 load_testing/k3s/readme.MD create mode 100644 load_testing/k3s/server/data.tf create mode 100644 load_testing/k3s/server/files/metrics.yaml create mode 100644 load_testing/k3s/server/files/prom.yaml create mode 100644 load_testing/k3s/server/files/server_userdata.tmpl create mode 100644 load_testing/k3s/server/files/worker_userdata.tmpl create mode 100644 load_testing/k3s/server/iam.tf create mode 100644 load_testing/k3s/server/main.tf create mode 100644 load_testing/k3s/server/outputs.tf create mode 100644 load_testing/k3s/server/variables.tf create mode 100644 load_testing/k3s/server/versions.tf diff --git a/load_testing/k3s/.gitignore b/load_testing/k3s/.gitignore new file mode 100644 index 0000000000..e79eb23105 --- /dev/null +++ b/load_testing/k3s/.gitignore @@ -0,0 +1 @@ +.terraform* diff --git a/load_testing/k3s/cluster-loader/.gitignore b/load_testing/k3s/cluster-loader/.gitignore new file mode 100644 index 0000000000..ee5ab6892d --- /dev/null +++ b/load_testing/k3s/cluster-loader/.gitignore @@ -0,0 +1 @@ +kubeConfig.yaml diff --git a/load_testing/k3s/cluster-loader/large/config.yaml b/load_testing/k3s/cluster-loader/large/config.yaml new file mode 100644 index 0000000000..2b9f23f23a --- /dev/null +++ b/load_testing/k3s/cluster-loader/large/config.yaml @@ -0,0 +1,471 @@ +# ASSUMPTIONS: +# - Underlying cluster should have 100+ nodes. +# - Number of nodes should be divisible by NODES_PER_NAMESPACE (default 100). +# - The number of created SVCs is half the number of created Deployments. +# - Only half of Deployments will be assigned 1-1 to existing SVCs. + +#Constants +{{$NODE_MODE := DefaultParam .NODE_MODE "allnodes"}} +{{$NODES_PER_NAMESPACE := DefaultParam .NODES_PER_NAMESPACE 100}} +{{$PODS_PER_NODE := DefaultParam .PODS_PER_NODE 30}} +{{$LOAD_TEST_THROUGHPUT := DefaultParam .LOAD_TEST_THROUGHPUT 10}} +{{$BIG_GROUP_SIZE := 25}} +{{$MEDIUM_GROUP_SIZE := 15}} +{{$SMALL_GROUP_SIZE := 1}} +{{$SMALL_STATEFUL_SETS_PER_NAMESPACE := 1}} +{{$MEDIUM_STATEFUL_SETS_PER_NAMESPACE := 1}} +{{$ENABLE_CHAOSMONKEY := DefaultParam .ENABLE_CHAOSMONKEY false}} +{{$ENABLE_PROMETHEUS_API_RESPONSIVENESS := DefaultParam .ENABLE_PROMETHEUS_API_RESPONSIVENESS false}} +{{$ENABLE_CONFIGMAPS := DefaultParam .ENABLE_CONFIGMAPS false}} +{{$ENABLE_SECRETS := DefaultParam .ENABLE_SECRETS false}} +{{$ENABLE_STATEFULSETS := DefaultParam .ENABLE_STATEFULSETS false}} +#Variables +{{$namespaces := DivideInt .Nodes $NODES_PER_NAMESPACE}} +{{$totalPods := MultiplyInt $namespaces $NODES_PER_NAMESPACE $PODS_PER_NODE}} +{{$podsPerNamespace := DivideInt $totalPods $namespaces}} +{{$saturationTime := DivideInt $totalPods $LOAD_TEST_THROUGHPUT}} +# bigDeployments - 1/4 of namespace pods should be in big Deployments. +{{$bigDeploymentsPerNamespace := DivideInt $podsPerNamespace (MultiplyInt 4 $BIG_GROUP_SIZE)}} +# mediumDeployments - 1/4 of namespace pods should be in medium Deployments. +{{$mediumDeploymentsPerNamespace := DivideInt $podsPerNamespace (MultiplyInt 4 $MEDIUM_GROUP_SIZE)}} +# smallDeployments - 1/2 of namespace pods should be in small Deployments. +{{$smallDeploymentsPerNamespace := DivideInt $podsPerNamespace (MultiplyInt 2 $SMALL_GROUP_SIZE)}} +# If StatefulSets are enabled reduce the number of small and medium deployments per namespace +{{$smallDeploymentsPerNamespace := SubtractInt $smallDeploymentsPerNamespace (IfThenElse $ENABLE_STATEFULSETS $SMALL_STATEFUL_SETS_PER_NAMESPACE 0)}} +{{$mediumDeploymentsPerNamespace := SubtractInt $mediumDeploymentsPerNamespace (IfThenElse $ENABLE_STATEFULSETS $MEDIUM_STATEFUL_SETS_PER_NAMESPACE 0)}} + + +name: load +automanagedNamespaces: {{$namespaces}} +tuningSets: +- name: Sequence + parallelismLimitedLoad: + parallelismLimit: 1 +- name: RandomizedSaturationTimeLimited + RandomizedTimeLimitedLoad: + timeLimit: {{$saturationTime}}s +- name: RandomizedScalingTimeLimited + RandomizedTimeLimitedLoad: + # The expected number of created/deleted pods is totalPods/4 when scaling, + # as each RS changes its size from X to a uniform random value in [X/2, 3X/2]. + # To match 10 [pods/s] requirement, we need to divide saturationTime by 4. + timeLimit: {{DivideInt $saturationTime 4}}s +{{if $ENABLE_CHAOSMONKEY}} +chaosMonkey: + nodeFailure: + failureRate: 0.01 + interval: 1m + jitterFactor: 10.0 + simulatedDowntime: 10m +{{end}} +steps: +- name: Starting measurements + measurements: + - Identifier: APIResponsiveness + Method: APIResponsiveness + Params: + action: reset + - Identifier: APIResponsivenessPrometheus + Method: APIResponsivenessPrometheus + Params: + action: start + - Identifier: PodStartupLatency + Method: PodStartupLatency + Params: + action: start + labelSelector: group = load + threshold: 1h + - Identifier: InClusterNetworkLatency + Method: InClusterNetworkLatency + Params: + action: start + replicasPerProbe: {{DivideInt .Nodes 100}} + - Identifier: DnsLookupLatency + Method: DnsLookupLatency + Params: + action: start + replicasPerProbe: {{DivideInt .Nodes 100}} + - Identifier: NetworkProgrammingLatency + Method: NetworkProgrammingLatency + Params: + action: start + - Identifier: TestMetrics + Method: TestMetrics + Params: + action: start + nodeMode: {{$NODE_MODE}} + +- name: Creating SVCs + phases: + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: {{DivideInt (AddInt $bigDeploymentsPerNamespace 1) 2}} + tuningSet: Sequence + objectBundle: + - basename: big-service + objectTemplatePath: service.yaml + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: {{DivideInt (AddInt $mediumDeploymentsPerNamespace 1) 2}} + tuningSet: Sequence + objectBundle: + - basename: medium-service + objectTemplatePath: service.yaml + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: {{DivideInt (AddInt $smallDeploymentsPerNamespace 1) 2}} + tuningSet: Sequence + objectBundle: + - basename: small-service + objectTemplatePath: service.yaml + +- name: Starting measurement for waiting for pods + measurements: + - Identifier: WaitForRunningDeployments + Method: WaitForControlledPodsRunning + Params: + action: start + apiVersion: apps/v1 + kind: Deployment + labelSelector: group = load + operationTimeout: 15m + {{if $ENABLE_STATEFULSETS}} + - Identifier: WaitForRunningStatefulSets + Method: WaitForControlledPodsRunning + Params: + action: start + apiVersion: apps/v1 + kind: StatefulSet + labelSelector: group = load + operationTimeout: 15m + {{end}} + +- name: Creating objects + phases: + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: {{$bigDeploymentsPerNamespace}} + tuningSet: RandomizedSaturationTimeLimited + objectBundle: + {{if $ENABLE_CONFIGMAPS}} + - basename: big-deployment + objectTemplatePath: configmap.yaml + {{end}} + {{if $ENABLE_SECRETS}} + - basename: big-deployment + objectTemplatePath: secret.yaml + {{end}} + - basename: big-deployment + objectTemplatePath: deployment.yaml + templateFillMap: + ReplicasMin: {{$BIG_GROUP_SIZE}} + ReplicasMax: {{$BIG_GROUP_SIZE}} + SvcName: big-service + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: {{$mediumDeploymentsPerNamespace}} + tuningSet: RandomizedSaturationTimeLimited + objectBundle: + {{if $ENABLE_CONFIGMAPS}} + - basename: medium-deployment + objectTemplatePath: configmap.yaml + {{end}} + {{if $ENABLE_SECRETS}} + - basename: medium-deployment + objectTemplatePath: secret.yaml + {{end}} + - basename: medium-deployment + objectTemplatePath: deployment.yaml + templateFillMap: + ReplicasMin: {{$MEDIUM_GROUP_SIZE}} + ReplicasMax: {{$MEDIUM_GROUP_SIZE}} + SvcName: medium-service + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: {{$smallDeploymentsPerNamespace}} + tuningSet: RandomizedSaturationTimeLimited + objectBundle: + {{if $ENABLE_CONFIGMAPS}} + - basename: small-deployment + objectTemplatePath: configmap.yaml + {{end}} + {{if $ENABLE_SECRETS}} + - basename: small-deployment + objectTemplatePath: secret.yaml + {{end}} + - basename: small-deployment + objectTemplatePath: deployment.yaml + templateFillMap: + ReplicasMin: {{$SMALL_GROUP_SIZE}} + ReplicasMax: {{$SMALL_GROUP_SIZE}} + SvcName: small-service + {{if $ENABLE_STATEFULSETS}} + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: {{$SMALL_STATEFUL_SETS_PER_NAMESPACE}} + tuningSet: RandomizedSaturationTimeLimited + objectBundle: + - basename: small-statefulset + objectTemplatePath: statefulset_service.yaml + - basename: small-statefulset + objectTemplatePath: statefulset.yaml + templateFillMap: + ReplicasMin: {{$SMALL_GROUP_SIZE}} + ReplicasMax: {{$SMALL_GROUP_SIZE}} + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: {{$MEDIUM_STATEFUL_SETS_PER_NAMESPACE}} + tuningSet: RandomizedSaturationTimeLimited + objectBundle: + - basename: medium-statefulset + objectTemplatePath: statefulset_service.yaml + - basename: medium-statefulset + objectTemplatePath: statefulset.yaml + templateFillMap: + ReplicasMin: {{$MEDIUM_GROUP_SIZE}} + ReplicasMax: {{$MEDIUM_GROUP_SIZE}} + {{end}} + +- name: Waiting for pods to be running + measurements: + - Identifier: WaitForRunningDeployments + Method: WaitForControlledPodsRunning + Params: + action: gather + {{if $ENABLE_STATEFULSETS}} + - Identifier: WaitForRunningStatefulSets + Method: WaitForControlledPodsRunning + Params: + action: gather + {{end}} + +- name: Scaling objects + phases: + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: {{$bigDeploymentsPerNamespace}} + tuningSet: RandomizedScalingTimeLimited + objectBundle: + - basename: big-deployment + objectTemplatePath: deployment.yaml + templateFillMap: + ReplicasMin: {{MultiplyInt $BIG_GROUP_SIZE 0.5}} + ReplicasMax: {{MultiplyInt $BIG_GROUP_SIZE 1.5}} + SvcName: big-service + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: {{$mediumDeploymentsPerNamespace}} + tuningSet: RandomizedScalingTimeLimited + objectBundle: + - basename: medium-deployment + objectTemplatePath: deployment.yaml + templateFillMap: + ReplicasMin: {{MultiplyInt $MEDIUM_GROUP_SIZE 0.5}} + ReplicasMax: {{MultiplyInt $MEDIUM_GROUP_SIZE 1.5}} + SvcName: medium-service + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: {{$smallDeploymentsPerNamespace}} + tuningSet: RandomizedScalingTimeLimited + objectBundle: + - basename: small-deployment + objectTemplatePath: deployment.yaml + templateFillMap: + ReplicasMin: {{MultiplyInt $SMALL_GROUP_SIZE 0.5}} + ReplicasMax: {{MultiplyInt $SMALL_GROUP_SIZE 1.5}} + SvcName: small-service + {{if $ENABLE_STATEFULSETS}} + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: {{$SMALL_STATEFUL_SETS_PER_NAMESPACE}} + tuningSet: RandomizedScalingTimeLimited + objectBundle: + - basename: small-statefulset + objectTemplatePath: statefulset.yaml + templateFillMap: + ReplicasMin: {{MultiplyInt $SMALL_GROUP_SIZE 0.5}} + ReplicasMax: {{MultiplyInt $SMALL_GROUP_SIZE 1.5}} + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: {{$MEDIUM_STATEFUL_SETS_PER_NAMESPACE}} + tuningSet: RandomizedScalingTimeLimited + objectBundle: + - basename: medium-statefulset + objectTemplatePath: statefulset.yaml + templateFillMap: + ReplicasMin: {{MultiplyInt $MEDIUM_GROUP_SIZE 0.5}} + ReplicasMax: {{MultiplyInt $MEDIUM_GROUP_SIZE 1.5}} + {{end}} + +- name: Waiting for objects to become scaled + measurements: + - Identifier: WaitForRunningDeployments + Method: WaitForControlledPodsRunning + Params: + action: gather + {{if $ENABLE_STATEFULSETS}} + - Identifier: WaitForRunningStatefulSets + Method: WaitForControlledPodsRunning + Params: + action: gather + {{end}} + +- name: Deleting objects + phases: + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: 0 + tuningSet: RandomizedSaturationTimeLimited + objectBundle: + - basename: big-deployment + objectTemplatePath: deployment.yaml + {{if $ENABLE_CONFIGMAPS}} + - basename: big-deployment + objectTemplatePath: configmap.yaml + {{end}} + {{if $ENABLE_SECRETS}} + - basename: big-deployment + objectTemplatePath: secret.yaml + {{end}} + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: 0 + tuningSet: RandomizedSaturationTimeLimited + objectBundle: + - basename: medium-deployment + objectTemplatePath: deployment.yaml + {{if $ENABLE_CONFIGMAPS}} + - basename: medium-deployment + objectTemplatePath: configmap.yaml + {{end}} + {{if $ENABLE_SECRETS}} + - basename: medium-deployment + objectTemplatePath: secret.yaml + {{end}} + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: 0 + tuningSet: RandomizedSaturationTimeLimited + objectBundle: + - basename: small-deployment + objectTemplatePath: deployment.yaml + {{if $ENABLE_CONFIGMAPS}} + - basename: small-deployment + objectTemplatePath: configmap.yaml + {{end}} + {{if $ENABLE_SECRETS}} + - basename: small-deployment + objectTemplatePath: secret.yaml + {{end}} + {{if $ENABLE_STATEFULSETS}} + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: 0 + tuningSet: RandomizedSaturationTimeLimited + objectBundle: + - basename: small-statefulset + objectTemplatePath: statefulset.yaml + - basename: small-statefulset + objectTemplatePath: statefulset_service.yaml + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: 0 + tuningSet: RandomizedSaturationTimeLimited + objectBundle: + - basename: medium-statefulset + objectTemplatePath: statefulset.yaml + - basename: medium-statefulset + objectTemplatePath: statefulset_service.yaml + {{end}} + +- name: Waiting for pods to be deleted + measurements: + - Identifier: WaitForRunningDeployments + Method: WaitForControlledPodsRunning + Params: + action: gather + {{if $ENABLE_STATEFULSETS}} + - Identifier: WaitForRunningStatefulSets + Method: WaitForControlledPodsRunning + Params: + action: gather + {{end}} + +- name: Deleting SVCs + phases: + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: 0 + tuningSet: Sequence + objectBundle: + - basename: big-service + objectTemplatePath: service.yaml + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: 0 + tuningSet: Sequence + objectBundle: + - basename: medium-service + objectTemplatePath: service.yaml + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: 0 + tuningSet: Sequence + objectBundle: + - basename: small-service + objectTemplatePath: service.yaml + +- name: Collecting measurements + measurements: + - Identifier: APIResponsiveness + Method: APIResponsiveness + Params: + action: gather + - Identifier: APIResponsivenessPrometheus + Method: APIResponsivenessPrometheus + Params: + action: gather + {{if $ENABLE_PROMETHEUS_API_RESPONSIVENESS}} + enableViolations: true + {{end}} + - Identifier: PodStartupLatency + Method: PodStartupLatency + Params: + action: gather + - Identifier: InClusterNetworkLatency + Method: InClusterNetworkLatency + Params: + action: gather + - Identifier: DnsLookupLatency + Method: DnsLookupLatency + Params: + action: gather + - Identifier: NetworkProgrammingLatency + Method: NetworkProgrammingLatency + Params: + action: gather + - Identifier: TestMetrics + Method: TestMetrics + Params: + action: gather diff --git a/load_testing/k3s/cluster-loader/large/configmap.yaml b/load_testing/k3s/cluster-loader/large/configmap.yaml new file mode 100644 index 0000000000..b249a39143 --- /dev/null +++ b/load_testing/k3s/cluster-loader/large/configmap.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{.Name}} +data: + data.yaml: |- + a: 1 + b: 2 + c: 3 diff --git a/load_testing/k3s/cluster-loader/large/deployment.yaml b/load_testing/k3s/cluster-loader/large/deployment.yaml new file mode 100644 index 0000000000..dcd581914a --- /dev/null +++ b/load_testing/k3s/cluster-loader/large/deployment.yaml @@ -0,0 +1,62 @@ +{{$EnableConfigMaps := DefaultParam .ENABLE_CONFIGMAPS false}} +{{$EnableSecrets := DefaultParam .ENABLE_SECRETS false}} + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{.Name}} + labels: + group: load + svc: {{.SvcName}}-{{.Index}} +spec: + replicas: {{RandIntRange .ReplicasMin .ReplicasMax}} + selector: + matchLabels: + name: {{.Name}} + template: + metadata: + labels: + group: load + name: {{.Name}} + svc: {{.SvcName}}-{{.Index}} + spec: + containers: + - image: k8s.gcr.io/pause:3.1 + name: {{.Name}} + resources: + requests: + cpu: 10m + memory: "10M" + volumeMounts: + {{if and $EnableConfigMaps (eq (Mod .Index 20) 0 19) }} # .Index % 20 in {0,19} - 10% deployments will have ConfigMap + - name: configmap + mountPath: /var/configmap + {{end}} + {{if and $EnableSecrets (eq (Mod .Index 20) 10 19) }} # .Index % 20 in {10,19} - 10% deployments will have Secret + - name: secret + mountPath: /var/secret + {{end}} + dnsPolicy: Default + terminationGracePeriodSeconds: 1 + # Add not-ready/unreachable tolerations for 15 minutes so that node + # failure doesn't trigger pod deletion. + tolerations: + - key: "node.kubernetes.io/not-ready" + operator: "Exists" + effect: "NoExecute" + tolerationSeconds: 900 + - key: "node.kubernetes.io/unreachable" + operator: "Exists" + effect: "NoExecute" + tolerationSeconds: 900 + volumes: + {{if and $EnableConfigMaps (eq (Mod .Index 20) 0 19) }} # .Index % 20 in {0,19} - 10% deployments will have ConfigMap + - name: configmap + configMap: + name: {{.BaseName}}-{{.Index}} + {{end}} + {{if and $EnableSecrets (eq (Mod .Index 20) 10 19) }} # .Index % 20 in {10,19} - 10% deployments will have Secret + - name: secret + secret: + secretName: {{.BaseName}}-{{.Index}} + {{end}} diff --git a/load_testing/k3s/cluster-loader/large/run-test.sh b/load_testing/k3s/cluster-loader/large/run-test.sh new file mode 100644 index 0000000000..b190c99ef3 --- /dev/null +++ b/load_testing/k3s/cluster-loader/large/run-test.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +clusterloader --kubeconfig=../kubeConfig.yaml --testconfig=config.yaml diff --git a/load_testing/k3s/cluster-loader/large/secret.yaml b/load_testing/k3s/cluster-loader/large/secret.yaml new file mode 100644 index 0000000000..67134b355f --- /dev/null +++ b/load_testing/k3s/cluster-loader/large/secret.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: Secret +metadata: + name: {{.Name}} +type: Opaque +data: + password: c2NhbGFiaWxpdHkK diff --git a/load_testing/k3s/cluster-loader/large/service.yaml b/load_testing/k3s/cluster-loader/large/service.yaml new file mode 100644 index 0000000000..ed6a22c8cf --- /dev/null +++ b/load_testing/k3s/cluster-loader/large/service.yaml @@ -0,0 +1,16 @@ +{{$SetServiceProxyLabel := DefaultParam .SetServiceProxyLabel false}} + +apiVersion: v1 +kind: Service +metadata: + name: {{.Name}} +{{if and $SetServiceProxyLabel (eq (Mod .Index 2) 0)}} + labels: + service.kubernetes.io/service-proxy-name: foo +{{end}} +spec: + selector: + svc: {{.Name}} + ports: + - port: 80 + targetPort: 80 diff --git a/load_testing/k3s/cluster-loader/large/statefulset.yaml b/load_testing/k3s/cluster-loader/large/statefulset.yaml new file mode 100644 index 0000000000..bb97bfce9a --- /dev/null +++ b/load_testing/k3s/cluster-loader/large/statefulset.yaml @@ -0,0 +1,30 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: {{.Name}} + labels: + group: load +spec: + podManagementPolicy: Parallel + selector: + matchLabels: + name: {{.Name}} + serviceName: {{.Name}} + replicas: {{RandIntRange .ReplicasMin .ReplicasMax}} + template: + metadata: + labels: + group: statefulset + name: {{.Name}} + spec: + terminationGracePeriodSeconds: 1 + containers: + - name: {{.Name}} + image: k8s.gcr.io/pause:3.1 + ports: + - containerPort: 80 + name: web + resources: + requests: + cpu: 10m + memory: "10M" diff --git a/load_testing/k3s/cluster-loader/large/statefulset_service.yaml b/load_testing/k3s/cluster-loader/large/statefulset_service.yaml new file mode 100644 index 0000000000..5e16a47a19 --- /dev/null +++ b/load_testing/k3s/cluster-loader/large/statefulset_service.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{.Name}} + labels: + name: {{.Name}} +spec: + clusterIP: None + selector: + name: {{.Name}} diff --git a/load_testing/k3s/cluster-loader/small/config.yaml b/load_testing/k3s/cluster-loader/small/config.yaml new file mode 100644 index 0000000000..3c999b5069 --- /dev/null +++ b/load_testing/k3s/cluster-loader/small/config.yaml @@ -0,0 +1,471 @@ +# ASSUMPTIONS: +# - Underlying cluster should have 100+ nodes. +# - Number of nodes should be divisible by NODES_PER_NAMESPACE (default 100). +# - The number of created SVCs is half the number of created Deployments. +# - Only half of Deployments will be assigned 1-1 to existing SVCs. + +#Constants +{{$NODE_MODE := DefaultParam .NODE_MODE "allnodes"}} +{{$NODES_PER_NAMESPACE := DefaultParam .NODES_PER_NAMESPACE 1}} +{{$PODS_PER_NODE := DefaultParam .PODS_PER_NODE 5}} +{{$LOAD_TEST_THROUGHPUT := DefaultParam .LOAD_TEST_THROUGHPUT 10}} +{{$BIG_GROUP_SIZE := 25}} +{{$MEDIUM_GROUP_SIZE := 15}} +{{$SMALL_GROUP_SIZE := 1}} +{{$SMALL_STATEFUL_SETS_PER_NAMESPACE := 1}} +{{$MEDIUM_STATEFUL_SETS_PER_NAMESPACE := 1}} +{{$ENABLE_CHAOSMONKEY := DefaultParam .ENABLE_CHAOSMONKEY false}} +{{$ENABLE_PROMETHEUS_API_RESPONSIVENESS := DefaultParam .ENABLE_PROMETHEUS_API_RESPONSIVENESS false}} +{{$ENABLE_CONFIGMAPS := DefaultParam .ENABLE_CONFIGMAPS false}} +{{$ENABLE_SECRETS := DefaultParam .ENABLE_SECRETS false}} +{{$ENABLE_STATEFULSETS := DefaultParam .ENABLE_STATEFULSETS false}} +#Variables +{{$namespaces := DivideInt .Nodes $NODES_PER_NAMESPACE}} +{{$totalPods := MultiplyInt $namespaces $NODES_PER_NAMESPACE $PODS_PER_NODE}} +{{$podsPerNamespace := DivideInt $totalPods $namespaces}} +{{$saturationTime := DivideInt $totalPods $LOAD_TEST_THROUGHPUT}} +# bigDeployments - 1/4 of namespace pods should be in big Deployments. +{{$bigDeploymentsPerNamespace := DivideInt $podsPerNamespace (MultiplyInt 4 $BIG_GROUP_SIZE)}} +# mediumDeployments - 1/4 of namespace pods should be in medium Deployments. +{{$mediumDeploymentsPerNamespace := DivideInt $podsPerNamespace (MultiplyInt 4 $MEDIUM_GROUP_SIZE)}} +# smallDeployments - 1/2 of namespace pods should be in small Deployments. +{{$smallDeploymentsPerNamespace := DivideInt $podsPerNamespace (MultiplyInt 2 $SMALL_GROUP_SIZE)}} +# If StatefulSets are enabled reduce the number of small and medium deployments per namespace +{{$smallDeploymentsPerNamespace := SubtractInt $smallDeploymentsPerNamespace (IfThenElse $ENABLE_STATEFULSETS $SMALL_STATEFUL_SETS_PER_NAMESPACE 0)}} +{{$mediumDeploymentsPerNamespace := SubtractInt $mediumDeploymentsPerNamespace (IfThenElse $ENABLE_STATEFULSETS $MEDIUM_STATEFUL_SETS_PER_NAMESPACE 0)}} + + +name: load +automanagedNamespaces: {{$namespaces}} +tuningSets: +- name: Sequence + parallelismLimitedLoad: + parallelismLimit: 1 +- name: RandomizedSaturationTimeLimited + RandomizedTimeLimitedLoad: + timeLimit: {{$saturationTime}}s +- name: RandomizedScalingTimeLimited + RandomizedTimeLimitedLoad: + # The expected number of created/deleted pods is totalPods/4 when scaling, + # as each RS changes its size from X to a uniform random value in [X/2, 3X/2]. + # To match 10 [pods/s] requirement, we need to divide saturationTime by 4. + timeLimit: {{DivideInt $saturationTime 4}}s +{{if $ENABLE_CHAOSMONKEY}} +chaosMonkey: + nodeFailure: + failureRate: 0.01 + interval: 1m + jitterFactor: 10.0 + simulatedDowntime: 10m +{{end}} +steps: +- name: Starting measurements + measurements: + - Identifier: APIResponsiveness + Method: APIResponsiveness + Params: + action: reset + - Identifier: APIResponsivenessPrometheus + Method: APIResponsivenessPrometheus + Params: + action: start + - Identifier: PodStartupLatency + Method: PodStartupLatency + Params: + action: start + labelSelector: group = load + threshold: 1h + - Identifier: InClusterNetworkLatency + Method: InClusterNetworkLatency + Params: + action: start + replicasPerProbe: {{DivideInt .Nodes 100}} + - Identifier: DnsLookupLatency + Method: DnsLookupLatency + Params: + action: start + replicasPerProbe: {{DivideInt .Nodes 100}} + - Identifier: NetworkProgrammingLatency + Method: NetworkProgrammingLatency + Params: + action: start + - Identifier: TestMetrics + Method: TestMetrics + Params: + action: start + nodeMode: {{$NODE_MODE}} + +- name: Creating SVCs + phases: + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: {{DivideInt (AddInt $bigDeploymentsPerNamespace 1) 2}} + tuningSet: Sequence + objectBundle: + - basename: big-service + objectTemplatePath: service.yaml + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: {{DivideInt (AddInt $mediumDeploymentsPerNamespace 1) 2}} + tuningSet: Sequence + objectBundle: + - basename: medium-service + objectTemplatePath: service.yaml + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: {{DivideInt (AddInt $smallDeploymentsPerNamespace 1) 2}} + tuningSet: Sequence + objectBundle: + - basename: small-service + objectTemplatePath: service.yaml + +- name: Starting measurement for waiting for pods + measurements: + - Identifier: WaitForRunningDeployments + Method: WaitForControlledPodsRunning + Params: + action: start + apiVersion: apps/v1 + kind: Deployment + labelSelector: group = load + operationTimeout: 15m + {{if $ENABLE_STATEFULSETS}} + - Identifier: WaitForRunningStatefulSets + Method: WaitForControlledPodsRunning + Params: + action: start + apiVersion: apps/v1 + kind: StatefulSet + labelSelector: group = load + operationTimeout: 15m + {{end}} + +- name: Creating objects + phases: + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: {{$bigDeploymentsPerNamespace}} + tuningSet: RandomizedSaturationTimeLimited + objectBundle: + {{if $ENABLE_CONFIGMAPS}} + - basename: big-deployment + objectTemplatePath: configmap.yaml + {{end}} + {{if $ENABLE_SECRETS}} + - basename: big-deployment + objectTemplatePath: secret.yaml + {{end}} + - basename: big-deployment + objectTemplatePath: deployment.yaml + templateFillMap: + ReplicasMin: {{$BIG_GROUP_SIZE}} + ReplicasMax: {{$BIG_GROUP_SIZE}} + SvcName: big-service + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: {{$mediumDeploymentsPerNamespace}} + tuningSet: RandomizedSaturationTimeLimited + objectBundle: + {{if $ENABLE_CONFIGMAPS}} + - basename: medium-deployment + objectTemplatePath: configmap.yaml + {{end}} + {{if $ENABLE_SECRETS}} + - basename: medium-deployment + objectTemplatePath: secret.yaml + {{end}} + - basename: medium-deployment + objectTemplatePath: deployment.yaml + templateFillMap: + ReplicasMin: {{$MEDIUM_GROUP_SIZE}} + ReplicasMax: {{$MEDIUM_GROUP_SIZE}} + SvcName: medium-service + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: {{$smallDeploymentsPerNamespace}} + tuningSet: RandomizedSaturationTimeLimited + objectBundle: + {{if $ENABLE_CONFIGMAPS}} + - basename: small-deployment + objectTemplatePath: configmap.yaml + {{end}} + {{if $ENABLE_SECRETS}} + - basename: small-deployment + objectTemplatePath: secret.yaml + {{end}} + - basename: small-deployment + objectTemplatePath: deployment.yaml + templateFillMap: + ReplicasMin: {{$SMALL_GROUP_SIZE}} + ReplicasMax: {{$SMALL_GROUP_SIZE}} + SvcName: small-service + {{if $ENABLE_STATEFULSETS}} + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: {{$SMALL_STATEFUL_SETS_PER_NAMESPACE}} + tuningSet: RandomizedSaturationTimeLimited + objectBundle: + - basename: small-statefulset + objectTemplatePath: statefulset_service.yaml + - basename: small-statefulset + objectTemplatePath: statefulset.yaml + templateFillMap: + ReplicasMin: {{$SMALL_GROUP_SIZE}} + ReplicasMax: {{$SMALL_GROUP_SIZE}} + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: {{$MEDIUM_STATEFUL_SETS_PER_NAMESPACE}} + tuningSet: RandomizedSaturationTimeLimited + objectBundle: + - basename: medium-statefulset + objectTemplatePath: statefulset_service.yaml + - basename: medium-statefulset + objectTemplatePath: statefulset.yaml + templateFillMap: + ReplicasMin: {{$MEDIUM_GROUP_SIZE}} + ReplicasMax: {{$MEDIUM_GROUP_SIZE}} + {{end}} + +- name: Waiting for pods to be running + measurements: + - Identifier: WaitForRunningDeployments + Method: WaitForControlledPodsRunning + Params: + action: gather + {{if $ENABLE_STATEFULSETS}} + - Identifier: WaitForRunningStatefulSets + Method: WaitForControlledPodsRunning + Params: + action: gather + {{end}} + +- name: Scaling objects + phases: + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: {{$bigDeploymentsPerNamespace}} + tuningSet: RandomizedScalingTimeLimited + objectBundle: + - basename: big-deployment + objectTemplatePath: deployment.yaml + templateFillMap: + ReplicasMin: {{MultiplyInt $BIG_GROUP_SIZE 0.5}} + ReplicasMax: {{MultiplyInt $BIG_GROUP_SIZE 1.5}} + SvcName: big-service + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: {{$mediumDeploymentsPerNamespace}} + tuningSet: RandomizedScalingTimeLimited + objectBundle: + - basename: medium-deployment + objectTemplatePath: deployment.yaml + templateFillMap: + ReplicasMin: {{MultiplyInt $MEDIUM_GROUP_SIZE 0.5}} + ReplicasMax: {{MultiplyInt $MEDIUM_GROUP_SIZE 1.5}} + SvcName: medium-service + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: {{$smallDeploymentsPerNamespace}} + tuningSet: RandomizedScalingTimeLimited + objectBundle: + - basename: small-deployment + objectTemplatePath: deployment.yaml + templateFillMap: + ReplicasMin: {{MultiplyInt $SMALL_GROUP_SIZE 0.5}} + ReplicasMax: {{MultiplyInt $SMALL_GROUP_SIZE 1.5}} + SvcName: small-service + {{if $ENABLE_STATEFULSETS}} + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: {{$SMALL_STATEFUL_SETS_PER_NAMESPACE}} + tuningSet: RandomizedScalingTimeLimited + objectBundle: + - basename: small-statefulset + objectTemplatePath: statefulset.yaml + templateFillMap: + ReplicasMin: {{MultiplyInt $SMALL_GROUP_SIZE 0.5}} + ReplicasMax: {{MultiplyInt $SMALL_GROUP_SIZE 1.5}} + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: {{$MEDIUM_STATEFUL_SETS_PER_NAMESPACE}} + tuningSet: RandomizedScalingTimeLimited + objectBundle: + - basename: medium-statefulset + objectTemplatePath: statefulset.yaml + templateFillMap: + ReplicasMin: {{MultiplyInt $MEDIUM_GROUP_SIZE 0.5}} + ReplicasMax: {{MultiplyInt $MEDIUM_GROUP_SIZE 1.5}} + {{end}} + +- name: Waiting for objects to become scaled + measurements: + - Identifier: WaitForRunningDeployments + Method: WaitForControlledPodsRunning + Params: + action: gather + {{if $ENABLE_STATEFULSETS}} + - Identifier: WaitForRunningStatefulSets + Method: WaitForControlledPodsRunning + Params: + action: gather + {{end}} + +- name: Deleting objects + phases: + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: 0 + tuningSet: RandomizedSaturationTimeLimited + objectBundle: + - basename: big-deployment + objectTemplatePath: deployment.yaml + {{if $ENABLE_CONFIGMAPS}} + - basename: big-deployment + objectTemplatePath: configmap.yaml + {{end}} + {{if $ENABLE_SECRETS}} + - basename: big-deployment + objectTemplatePath: secret.yaml + {{end}} + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: 0 + tuningSet: RandomizedSaturationTimeLimited + objectBundle: + - basename: medium-deployment + objectTemplatePath: deployment.yaml + {{if $ENABLE_CONFIGMAPS}} + - basename: medium-deployment + objectTemplatePath: configmap.yaml + {{end}} + {{if $ENABLE_SECRETS}} + - basename: medium-deployment + objectTemplatePath: secret.yaml + {{end}} + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: 0 + tuningSet: RandomizedSaturationTimeLimited + objectBundle: + - basename: small-deployment + objectTemplatePath: deployment.yaml + {{if $ENABLE_CONFIGMAPS}} + - basename: small-deployment + objectTemplatePath: configmap.yaml + {{end}} + {{if $ENABLE_SECRETS}} + - basename: small-deployment + objectTemplatePath: secret.yaml + {{end}} + {{if $ENABLE_STATEFULSETS}} + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: 0 + tuningSet: RandomizedSaturationTimeLimited + objectBundle: + - basename: small-statefulset + objectTemplatePath: statefulset.yaml + - basename: small-statefulset + objectTemplatePath: statefulset_service.yaml + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: 0 + tuningSet: RandomizedSaturationTimeLimited + objectBundle: + - basename: medium-statefulset + objectTemplatePath: statefulset.yaml + - basename: medium-statefulset + objectTemplatePath: statefulset_service.yaml + {{end}} + +- name: Waiting for pods to be deleted + measurements: + - Identifier: WaitForRunningDeployments + Method: WaitForControlledPodsRunning + Params: + action: gather + {{if $ENABLE_STATEFULSETS}} + - Identifier: WaitForRunningStatefulSets + Method: WaitForControlledPodsRunning + Params: + action: gather + {{end}} + +- name: Deleting SVCs + phases: + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: 0 + tuningSet: Sequence + objectBundle: + - basename: big-service + objectTemplatePath: service.yaml + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: 0 + tuningSet: Sequence + objectBundle: + - basename: medium-service + objectTemplatePath: service.yaml + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: 0 + tuningSet: Sequence + objectBundle: + - basename: small-service + objectTemplatePath: service.yaml + +- name: Collecting measurements + measurements: + - Identifier: APIResponsiveness + Method: APIResponsiveness + Params: + action: gather + - Identifier: APIResponsivenessPrometheus + Method: APIResponsivenessPrometheus + Params: + action: gather + {{if $ENABLE_PROMETHEUS_API_RESPONSIVENESS}} + enableViolations: true + {{end}} + - Identifier: PodStartupLatency + Method: PodStartupLatency + Params: + action: gather + - Identifier: InClusterNetworkLatency + Method: InClusterNetworkLatency + Params: + action: gather + - Identifier: DnsLookupLatency + Method: DnsLookupLatency + Params: + action: gather + - Identifier: NetworkProgrammingLatency + Method: NetworkProgrammingLatency + Params: + action: gather + - Identifier: TestMetrics + Method: TestMetrics + Params: + action: gather diff --git a/load_testing/k3s/cluster-loader/small/configmap.yaml b/load_testing/k3s/cluster-loader/small/configmap.yaml new file mode 100644 index 0000000000..b249a39143 --- /dev/null +++ b/load_testing/k3s/cluster-loader/small/configmap.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{.Name}} +data: + data.yaml: |- + a: 1 + b: 2 + c: 3 diff --git a/load_testing/k3s/cluster-loader/small/deployment.yaml b/load_testing/k3s/cluster-loader/small/deployment.yaml new file mode 100644 index 0000000000..dcd581914a --- /dev/null +++ b/load_testing/k3s/cluster-loader/small/deployment.yaml @@ -0,0 +1,62 @@ +{{$EnableConfigMaps := DefaultParam .ENABLE_CONFIGMAPS false}} +{{$EnableSecrets := DefaultParam .ENABLE_SECRETS false}} + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{.Name}} + labels: + group: load + svc: {{.SvcName}}-{{.Index}} +spec: + replicas: {{RandIntRange .ReplicasMin .ReplicasMax}} + selector: + matchLabels: + name: {{.Name}} + template: + metadata: + labels: + group: load + name: {{.Name}} + svc: {{.SvcName}}-{{.Index}} + spec: + containers: + - image: k8s.gcr.io/pause:3.1 + name: {{.Name}} + resources: + requests: + cpu: 10m + memory: "10M" + volumeMounts: + {{if and $EnableConfigMaps (eq (Mod .Index 20) 0 19) }} # .Index % 20 in {0,19} - 10% deployments will have ConfigMap + - name: configmap + mountPath: /var/configmap + {{end}} + {{if and $EnableSecrets (eq (Mod .Index 20) 10 19) }} # .Index % 20 in {10,19} - 10% deployments will have Secret + - name: secret + mountPath: /var/secret + {{end}} + dnsPolicy: Default + terminationGracePeriodSeconds: 1 + # Add not-ready/unreachable tolerations for 15 minutes so that node + # failure doesn't trigger pod deletion. + tolerations: + - key: "node.kubernetes.io/not-ready" + operator: "Exists" + effect: "NoExecute" + tolerationSeconds: 900 + - key: "node.kubernetes.io/unreachable" + operator: "Exists" + effect: "NoExecute" + tolerationSeconds: 900 + volumes: + {{if and $EnableConfigMaps (eq (Mod .Index 20) 0 19) }} # .Index % 20 in {0,19} - 10% deployments will have ConfigMap + - name: configmap + configMap: + name: {{.BaseName}}-{{.Index}} + {{end}} + {{if and $EnableSecrets (eq (Mod .Index 20) 10 19) }} # .Index % 20 in {10,19} - 10% deployments will have Secret + - name: secret + secret: + secretName: {{.BaseName}}-{{.Index}} + {{end}} diff --git a/load_testing/k3s/cluster-loader/small/run-test.sh b/load_testing/k3s/cluster-loader/small/run-test.sh new file mode 100644 index 0000000000..b190c99ef3 --- /dev/null +++ b/load_testing/k3s/cluster-loader/small/run-test.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +clusterloader --kubeconfig=../kubeConfig.yaml --testconfig=config.yaml diff --git a/load_testing/k3s/cluster-loader/small/secret.yaml b/load_testing/k3s/cluster-loader/small/secret.yaml new file mode 100644 index 0000000000..67134b355f --- /dev/null +++ b/load_testing/k3s/cluster-loader/small/secret.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: Secret +metadata: + name: {{.Name}} +type: Opaque +data: + password: c2NhbGFiaWxpdHkK diff --git a/load_testing/k3s/cluster-loader/small/service.yaml b/load_testing/k3s/cluster-loader/small/service.yaml new file mode 100644 index 0000000000..ed6a22c8cf --- /dev/null +++ b/load_testing/k3s/cluster-loader/small/service.yaml @@ -0,0 +1,16 @@ +{{$SetServiceProxyLabel := DefaultParam .SetServiceProxyLabel false}} + +apiVersion: v1 +kind: Service +metadata: + name: {{.Name}} +{{if and $SetServiceProxyLabel (eq (Mod .Index 2) 0)}} + labels: + service.kubernetes.io/service-proxy-name: foo +{{end}} +spec: + selector: + svc: {{.Name}} + ports: + - port: 80 + targetPort: 80 diff --git a/load_testing/k3s/cluster-loader/small/statefulset.yaml b/load_testing/k3s/cluster-loader/small/statefulset.yaml new file mode 100644 index 0000000000..bb97bfce9a --- /dev/null +++ b/load_testing/k3s/cluster-loader/small/statefulset.yaml @@ -0,0 +1,30 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: {{.Name}} + labels: + group: load +spec: + podManagementPolicy: Parallel + selector: + matchLabels: + name: {{.Name}} + serviceName: {{.Name}} + replicas: {{RandIntRange .ReplicasMin .ReplicasMax}} + template: + metadata: + labels: + group: statefulset + name: {{.Name}} + spec: + terminationGracePeriodSeconds: 1 + containers: + - name: {{.Name}} + image: k8s.gcr.io/pause:3.1 + ports: + - containerPort: 80 + name: web + resources: + requests: + cpu: 10m + memory: "10M" diff --git a/load_testing/k3s/cluster-loader/small/statefulset_service.yaml b/load_testing/k3s/cluster-loader/small/statefulset_service.yaml new file mode 100644 index 0000000000..5e16a47a19 --- /dev/null +++ b/load_testing/k3s/cluster-loader/small/statefulset_service.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{.Name}} + labels: + name: {{.Name}} +spec: + clusterIP: None + selector: + name: {{.Name}} diff --git a/load_testing/k3s/pool/data.tf b/load_testing/k3s/pool/data.tf new file mode 100644 index 0000000000..0da33a9128 --- /dev/null +++ b/load_testing/k3s/pool/data.tf @@ -0,0 +1,55 @@ +data "terraform_remote_state" "server" { + backend = "local" + + config = { + path = "${path.module}/../server/server.tfstate" + } +} + +data "aws_vpc" "default" { + default = true +} + +data "aws_subnet_ids" "available" { + vpc_id = data.aws_vpc.default.id +} + +data "aws_subnet" "selected" { + id = "${tolist(data.aws_subnet_ids.available.ids)[1]}" +} + +data "aws_ami" "ubuntu" { + most_recent = true + owners = ["099720109477"] + + filter { + name = "name" + values = ["ubuntu-minimal/images/*/ubuntu-bionic-18.04-*"] + } + + filter { + name = "virtualization-type" + values = ["hvm"] + } + + filter { + name = "root-device-type" + values = ["ebs"] + } + + filter { + name = "architecture" + values = ["x86_64"] + } +} + +data "template_file" "k3s-pool-worker-user_data" { + template = file("${path.module}/files/pool_worker_userdata.tmpl") + + vars = { + k3s_url = data.terraform_remote_state.server.outputs.public_ip[0] + k3s_cluster_secret = local.k3s_cluster_secret + install_k3s_version = local.install_k3s_version + k3s_per_node = var.k3s_per_node + } +} diff --git a/load_testing/k3s/pool/files/pool_worker_userdata.tmpl b/load_testing/k3s/pool/files/pool_worker_userdata.tmpl new file mode 100644 index 0000000000..c2d83f7343 --- /dev/null +++ b/load_testing/k3s/pool/files/pool_worker_userdata.tmpl @@ -0,0 +1,32 @@ +#cloud-config +ssh_authorized_keys: +- ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC06Qvs+Y9JiyOTeYNGAN/Ukq7SmeCTr7EreD1K8Lwu5VuOmo+SBZh685tNTEGV044HgFvGEOBVreDlO2ArYuwHjUBGnpQGV8/abjoeLrmZBdREAUzBQ1h2GFE/WssKUfum81cnigRK1J3tWP7emq/Y2h/Zw5F09yiCIlXMBX2auKWUCXqwG3xKTi1NVSF9N6BGyFolrAR0LZJ6k7UBXPRc/QDTclI427gSJNbnmn8LVym6YxacV/V9Y7s23iR5zYbhLPe9VJWYNk1brVvfUVb3mILVVYz76KGEq8SHdWlPQPCOp+fSJ+PezDRklnex/MmvhNrBOmMSNcpj7wSLA3hD wmaxwell@wmaxwell-laptop +- ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIN5O7k6gRYCU7YPkCH6dyXVW10izMAkDAQtQxNxdRE22 drpebcak +- ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC2TBZGjE+J8ag11dzkFT58J3XPONDrVmalCNrKxsfADfyy0eqdZrG8hAcxAR/5zuj90Gin2uB4RSw6Cn4VHsPZcFpXyQCj1KQDADj+WcuhpXOIOY3AB0LZBly9NI0ll+8lo3QtEaoyRLtrMBhQ6Mooy2M3MTG4JNwU9o3yInuqZWf9PvtW6KxMl+ygg1xZkljhemGZ9k0wSrjqif+8usNbzVlCOVQmZwZA+BZxbdcLNwkg7zWJSXzDIXyqM6iWPGXQDEbWLq3+HR1qKucTCSxjbqoe0FD5xcW7NHIME5XKX84yH92n6yn+rxSsyUfhJWYqJd+i0fKf5UbN6qLrtd/D darren@darrens +runcmd: + - echo "net.ipv4.neigh.default.gc_interval = 3600" >> /etc/sysctl.conf + - echo "net.ipv4.neigh.default.gc_stale_time = 3600" >> /etc/sysctl.conf + - echo "net.ipv4.neigh.default.gc_thresh3 = 16384" >> /etc/sysctl.conf + - echo "net.ipv4.neigh.default.gc_thresh2 = 8192" >> /etc/sysctl.conf + - echo "net.ipv4.neigh.default.gc_thresh1 = 4096" >> /etc/sysctl.conf + - echo "fs.file-max = 12000500" >> /etc/sysctl.conf + - echo "fs.nr_open = 20000500" >> /etc/sysctl.conf + - echo "net.ipv4.tcp_mem = '10000000 10000000 10000000'" >> /etc/sysctl.conf + - echo "net.ipv4.tcp_rmem = '1024 4096 16384'" >> /etc/sysctl.conf + - echo "net.ipv4.tcp_wmem = '1024 4096 16384'" >> /etc/sysctl.conf + - echo "net.core.rmem_max = 16384" >> /etc/sysctl.conf + - echo "net.core.wmem_max = 16384" >> /etc/sysctl.conf + - ulimit -n 20000000 + - echo "# " >> /etc/security/limits.d/limits.conf + - echo " * soft nofile 20000" >> /etc/security/limits.d/limits.conf + - echo " * hard nofile 20000" >> /etc/security/limits.d/limits.conf + - sysctl -p + - apt-get update + - apt-get install -y software-properties-common + - curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - + - add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" + - apt-get update + - apt-get -y install docker-ce + - apt-get install -y resolvconf linux-headers-$(uname -r) && echo "nameserver 1.1.1.1" > /etc/resolvconf/resolv.conf.d/tail && systemctl start resolvconf + - DEBIAN_FRONTEND=noninteractive apt-get upgrade -y + - n=1; while [ $n -le ${k3s_per_node} ]; do docker run -d --restart=unless-stopped -e K3S_URL=https://${k3s_url}:6443 -e K3S_CLUSTER_SECRET="${k3s_cluster_secret}" --privileged --mount type=tmpfs,destination=/var/run --mount type=tmpfs,destination=/run -m 1g --cpus=".7" rancher/k3s:${install_k3s_version}; n=$(( n + 1 )); done diff --git a/load_testing/k3s/pool/main.tf b/load_testing/k3s/pool/main.tf new file mode 100644 index 0000000000..6e85a15742 --- /dev/null +++ b/load_testing/k3s/pool/main.tf @@ -0,0 +1,80 @@ +terraform { + backend "local" { + path = "pool.tfstate" + } +} + +locals { + name = "load-test-pool" + k3s_cluster_secret = "pvc-6476dcaf-73a0-11e9-b8e5-06943b744282" + install_k3s_version = "v0.9.0-rc2" +} + +provider "aws" { + region = "us-west-2" + profile = "rancher-eng" +} + +resource "aws_security_group" "k3s" { + name = "${local.name}-pool" + vpc_id = data.aws_vpc.default.id + + ingress { + from_port = 22 + to_port = 22 + protocol = "TCP" + cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + from_port = 0 + to_port = 0 + protocol = "-1" + self = true + } + + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } +} + +module "k3s-pool-worker-asg" { + source = "terraform-aws-modules/autoscaling/aws" + version = "3.0.0" + name = local.name + asg_name = local.name + instance_type = var.worker_instance_type + image_id = data.aws_ami.ubuntu.id + user_data = data.template_file.k3s-pool-worker-user_data.rendered + ebs_optimized = true + + desired_capacity = var.node_count + health_check_type = "EC2" + max_size = var.node_count + min_size = var.node_count + vpc_zone_identifier = [data.aws_subnet.selected.id] + spot_price = "0.680" + + security_groups = [ + aws_security_group.k3s.id, + ] + + lc_name = local.name + + root_block_device = [ + { + volume_size = "100" + volume_type = "gp2" + }, + ] +} diff --git a/load_testing/k3s/pool/outputs.tf b/load_testing/k3s/pool/outputs.tf new file mode 100644 index 0000000000..e69de29bb2 diff --git a/load_testing/k3s/pool/variables.tf b/load_testing/k3s/pool/variables.tf new file mode 100644 index 0000000000..0fef717b51 --- /dev/null +++ b/load_testing/k3s/pool/variables.tf @@ -0,0 +1,16 @@ +variable "node_count" { + description = "Number of nodes to run k3s agents on." + type = number + # default = 10 +} + +variable "k3s_per_node" { + description = "Number of k3s agent docker containers to run per ec2 instance" + type = number + default = 10 +} + +variable "worker_instance_type" { + type = string + default = "c5.4xlarge" +} diff --git a/load_testing/k3s/pool/versions.tf b/load_testing/k3s/pool/versions.tf new file mode 100644 index 0000000000..ac97c6ac8e --- /dev/null +++ b/load_testing/k3s/pool/versions.tf @@ -0,0 +1,4 @@ + +terraform { + required_version = ">= 0.12" +} diff --git a/load_testing/k3s/readme.MD b/load_testing/k3s/readme.MD new file mode 100644 index 0000000000..3b3d1154ea --- /dev/null +++ b/load_testing/k3s/readme.MD @@ -0,0 +1,47 @@ +# K3S Load Testing + +This directory contains tooling to help spin up k3s clusters for scale testing (load testing the k3s server). + +## Usage + +From inside the `server` directory, run the following commands: + +``` +cd server +terraform init +terraform apply +``` + +You will be asked to specify an instance type for the k3s server. For a `large` cluster test, use a `c4.8xlarge`. For a `small` cluster test, use a `t3.micro`. +To run these commands, you will need access to an AWS account (configured here by default will be the `rancher-eng` account). + +When the server terraform completes, go to the `pool` directory and run: + +``` +cd pool +terraform init +terraform apply +``` + +You will be asked to specify how many ec2 instances to create (variable is `node_count`). You can also specify the `k3s_per_node` and `worker_instance_type` variables when you run apply to override these defaults. + +For the `large` cluster test, you will want to specify `node_count=100`. That will get you 100 ec2 instances with 10 k3s agents each - for a total of 1000 nodes in your k3s cluster. + +For the `small` test, `node_count=1`, and you will override `k3s_per_node=5`. That will get you 1 ec2 instance with 5 agents on it - for a total of 5 nodes in your k3s cluster. + + +Once `pool` is finished, you can run through the cluster-loader scenarios using the `run-test.sh` script in the corresponding directory (depending on whether you are running the `large` or `small` scenario). + +``` +cd cluster-loader/ +./run-test.sh +``` + +* The `run-test.sh` script assumes you have [cluster-loader](https://github.com/kubernetes/perf-tests/tree/master/clusterloader2) installed on your machine. + + +### TODO + +* Investigate cluster-loader failures. +* Simplify this process. +* Organized reporting on SLOs after cluster-loader is complete. diff --git a/load_testing/k3s/server/data.tf b/load_testing/k3s/server/data.tf new file mode 100644 index 0000000000..2a8bb32025 --- /dev/null +++ b/load_testing/k3s/server/data.tf @@ -0,0 +1,83 @@ +data "aws_vpc" "default" { + default = true +} + +data "aws_subnet_ids" "available" { + vpc_id = data.aws_vpc.default.id +} + +data "aws_subnet" "selected" { + id = "${tolist(data.aws_subnet_ids.available.ids)[1]}" +} + +data "aws_ami" "ubuntu" { + most_recent = true + owners = ["099720109477"] + + filter { + name = "name" + values = ["ubuntu-minimal/images/*/ubuntu-bionic-18.04-*"] + } + + filter { + name = "virtualization-type" + values = ["hvm"] + } + + filter { + name = "root-device-type" + values = ["ebs"] + } + + filter { + name = "architecture" + values = ["x86_64"] + } +} + +data "template_file" "metrics" { + template = file("${path.module}/files/metrics.yaml") +} +data "template_file" "k3s-prom-yaml" { + template = file("${path.module}/files/prom.yaml") + vars = { + prom_host = var.prom_host + graf_host = var.graf_host + } +} + +data "template_file" "k3s-server-user_data" { + template = file("${path.module}/files/server_userdata.tmpl") + + vars = { + create_eip = 1 + metrics_yaml = base64encode(data.template_file.metrics.rendered) + prom_yaml = base64encode(data.template_file.k3s-prom-yaml.rendered) + eip = join(",", aws_eip.k3s-server.*.public_ip) + k3s_cluster_secret = local.k3s_cluster_secret + install_k3s_version = local.install_k3s_version + k3s_server_args = var.k3s_server_args + } +} + +data "template_file" "k3s-prom-worker-user_data" { + template = file("${path.module}/files/worker_userdata.tmpl") + + vars = { + k3s_url = aws_eip.k3s-server.0.public_ip + k3s_cluster_secret = local.k3s_cluster_secret + install_k3s_version = local.install_k3s_version + k3s_exec = "--node-label prom=true" + } +} + +data "template_file" "k3s-worker-user_data" { + template = file("${path.module}/files/worker_userdata.tmpl") + + vars = { + k3s_url = aws_eip.k3s-server.0.public_ip + k3s_cluster_secret = local.k3s_cluster_secret + install_k3s_version = local.install_k3s_version + k3s_exec = "" + } +} diff --git a/load_testing/k3s/server/files/metrics.yaml b/load_testing/k3s/server/files/metrics.yaml new file mode 100644 index 0000000000..3b35b737d6 --- /dev/null +++ b/load_testing/k3s/server/files/metrics.yaml @@ -0,0 +1,227 @@ +%{ if local.prom_worker_node_count != 0 } +--- +apiVersion: rbac.authorization.k8s.io/v1 +# kubernetes versions before 1.8.0 should use rbac.authorization.k8s.io/v1beta1 +kind: ClusterRoleBinding +metadata: + name: kube-state-metrics +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: kube-state-metrics +subjects: +- kind: ServiceAccount + name: kube-state-metrics + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +# kubernetes versions before 1.8.0 should use rbac.authorization.k8s.io/v1beta1 +kind: ClusterRole +metadata: + name: kube-state-metrics +rules: +- apiGroups: [""] + resources: + - configmaps + - secrets + - nodes + - pods + - services + - resourcequotas + - replicationcontrollers + - limitranges + - persistentvolumeclaims + - persistentvolumes + - namespaces + - endpoints + verbs: ["list", "watch"] +- apiGroups: ["extensions"] + resources: + - daemonsets + - deployments + - replicasets + - ingresses + verbs: ["list", "watch"] +- apiGroups: ["apps"] + resources: + - daemonsets + - deployments + - replicasets + - statefulsets + verbs: ["list", "watch"] +- apiGroups: ["batch"] + resources: + - cronjobs + - jobs + verbs: ["list", "watch"] +- apiGroups: ["autoscaling"] + resources: + - horizontalpodautoscalers + verbs: ["list", "watch"] +- apiGroups: ["policy"] + resources: + - poddisruptionbudgets + verbs: ["list", "watch"] +- apiGroups: ["certificates.k8s.io"] + resources: + - certificatesigningrequests + verbs: ["list", "watch"] +- apiGroups: ["storage.k8s.io"] + resources: + - storageclasses + verbs: ["list", "watch"] +- apiGroups: ["autoscaling.k8s.io"] + resources: + - verticalpodautoscalers + verbs: ["list", "watch"] +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + k8s-app: kube-state-metrics + name: kube-state-metrics + namespace: kube-system +spec: + selector: + matchLabels: + k8s-app: kube-state-metrics + replicas: 1 + template: + metadata: + labels: + k8s-app: kube-state-metrics + spec: + serviceAccountName: kube-state-metrics + containers: + - name: kube-state-metrics + image: quay.io/coreos/kube-state-metrics:v1.7.2 + ports: + - name: http-metrics + containerPort: 8080 + - name: telemetry + containerPort: 8081 + livenessProbe: + httpGet: + path: /healthz + port: 8080 + initialDelaySeconds: 5 + timeoutSeconds: 5 + readinessProbe: + httpGet: + path: / + port: 8080 + initialDelaySeconds: 5 + timeoutSeconds: 5 +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: kube-state-metrics + namespace: kube-system +--- +apiVersion: v1 +kind: Service +metadata: + name: kube-state-metrics + namespace: kube-system + labels: + k8s-app: kube-state-metrics + annotations: + prometheus.io/scrape: 'true' +spec: + ports: + - name: http-metrics + port: 8080 + targetPort: http-metrics + protocol: TCP + - name: telemetry + port: 8081 + targetPort: telemetry + protocol: TCP + selector: + k8s-app: kube-state-metrics +--- +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: slo-monitor +subjects: +- kind: ServiceAccount + name: slo-monitor + namespace: kube-system +roleRef: + kind: ClusterRole + name: slo-monitor + apiGroup: rbac.authorization.k8s.io +--- +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: slo-monitor + namespace: kube-system +rules: +- apiGroups: [""] + resources: ["pods", "events"] + verbs: ["get", "watch", "list"] +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: slo-monitor + namespace: kube-system +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: slo-monitor + namespace: kube-system + labels: + app: slo-monitor +spec: + selector: + matchLabels: + app: slo-monitor + template: + metadata: + labels: + app: slo-monitor + annotations: + prometheus.io/scrape: "true" + spec: + containers: + - name: slo-monitor + image: gcr.io/google-containers/slo-monitor:0.12.0 + command: + - /slo-monitor + - --alsologtostderr=true + imagePullPolicy: Always + ports: + - name: metrics + containerPort: 8080 + resources: + requests: + cpu: 300m + memory: 100Mi + limits: + cpu: 300m + memory: 100Mi + restartPolicy: Always + serviceAccountName: slo-monitor +--- +apiVersion: v1 +kind: Service +metadata: + name: slo-monitor + namespace: kube-system + labels: + app: slo-monitor +spec: + selector: + app: slo-monitor + ports: + - name: metrics + port: 80 + targetPort: metrics + type: LoadBalancer +%{ endif } diff --git a/load_testing/k3s/server/files/prom.yaml b/load_testing/k3s/server/files/prom.yaml new file mode 100644 index 0000000000..9c780b2d36 --- /dev/null +++ b/load_testing/k3s/server/files/prom.yaml @@ -0,0 +1,86 @@ +%{ if local.prom_worker_node_count != 0 } +--- +apiVersion: v1 +kind: Namespace +metadata: + name: monitoring + +--- +apiVersion: helm.cattle.io/v1 +kind: HelmChart +metadata: + name: prometheus + namespace: kube-system +spec: + chart: https://raw.githubusercontent.com/drpebcak/charts/master/prometheus-9.1.0.tgz + targetNamespace: monitoring + valuesContent: |- + alertmanager: + nodeSelector: + prom: "true" + persistentVolume: + enabled: false + kubeStateMetrics: + nodeSelector: + prom: "true" + nodeExporter: + nodeSelector: + prom: "true" + server: + nodeSelector: + prom: "true" + ingress: + enabled: true + hosts: + - ${prom_host} + persistentVolume: + enabled: false + pushgateway: + nodeSelector: + prom: "true" + persistentVolume: + enabled: false + serverFiles: + prometheus.yml: + scrape_configs: + - job_name: prometheus + static_configs: + - targets: + - localhost:9090 + - job_name: kubernetes-apiservers + scrape_interval: 10s + scrape_timeout: 10s + metrics_path: /metrics + scheme: https + kubernetes_sd_configs: + - api_server: null + role: endpoints + namespaces: + names: [] + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + relabel_configs: + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] + separator: ; + regex: default;kubernetes;https + replacement: $1 + action: keep +--- +apiVersion: helm.cattle.io/v1 +kind: HelmChart +metadata: + name: grafana + namespace: kube-system +spec: + chart: stable/grafana + targetNamespace: monitoring + valuesContent: |- + ingress: + enabled: true + hosts: + - ${graf_host} + nodeSelector: + prom: "true" +%{ endif } diff --git a/load_testing/k3s/server/files/server_userdata.tmpl b/load_testing/k3s/server/files/server_userdata.tmpl new file mode 100644 index 0000000000..74e1cbf41e --- /dev/null +++ b/load_testing/k3s/server/files/server_userdata.tmpl @@ -0,0 +1,45 @@ +#cloud-config +ssh_authorized_keys: +- ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC06Qvs+Y9JiyOTeYNGAN/Ukq7SmeCTr7EreD1K8Lwu5VuOmo+SBZh685tNTEGV044HgFvGEOBVreDlO2ArYuwHjUBGnpQGV8/abjoeLrmZBdREAUzBQ1h2GFE/WssKUfum81cnigRK1J3tWP7emq/Y2h/Zw5F09yiCIlXMBX2auKWUCXqwG3xKTi1NVSF9N6BGyFolrAR0LZJ6k7UBXPRc/QDTclI427gSJNbnmn8LVym6YxacV/V9Y7s23iR5zYbhLPe9VJWYNk1brVvfUVb3mILVVYz76KGEq8SHdWlPQPCOp+fSJ+PezDRklnex/MmvhNrBOmMSNcpj7wSLA3hD wmaxwell@wmaxwell-laptop +- ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIN5O7k6gRYCU7YPkCH6dyXVW10izMAkDAQtQxNxdRE22 drpebcak +- ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC2TBZGjE+J8ag11dzkFT58J3XPONDrVmalCNrKxsfADfyy0eqdZrG8hAcxAR/5zuj90Gin2uB4RSw6Cn4VHsPZcFpXyQCj1KQDADj+WcuhpXOIOY3AB0LZBly9NI0ll+8lo3QtEaoyRLtrMBhQ6Mooy2M3MTG4JNwU9o3yInuqZWf9PvtW6KxMl+ygg1xZkljhemGZ9k0wSrjqif+8usNbzVlCOVQmZwZA+BZxbdcLNwkg7zWJSXzDIXyqM6iWPGXQDEbWLq3+HR1qKucTCSxjbqoe0FD5xcW7NHIME5XKX84yH92n6yn+rxSsyUfhJWYqJd+i0fKf5UbN6qLrtd/D darren@darrens +write_files: + - path: /var/lib/rancher/k3s/server/manifests/metrics.yaml + permissions: "0755" + owner: root:root + encoding: b64 + content: ${metrics_yaml} + - path: /var/lib/rancher/k3s/server/manifests/prom.yaml + permissions: "0755" + owner: root:root + encoding: b64 + content: ${prom_yaml} +runcmd: + - echo "net.ipv4.neigh.default.gc_interval = 3600" >> /etc/sysctl.conf + - echo "net.ipv4.neigh.default.gc_stale_time = 3600" >> /etc/sysctl.conf + - echo "net.ipv4.neigh.default.gc_thresh3 = 16384" >> /etc/sysctl.conf + - echo "net.ipv4.neigh.default.gc_thresh2 = 8192" >> /etc/sysctl.conf + - echo "net.ipv4.neigh.default.gc_thresh1 = 4096" >> /etc/sysctl.conf + - echo "fs.file-max = 12000500" >> /etc/sysctl.conf + - echo "fs.nr_open = 20000500" >> /etc/sysctl.conf + - echo "net.ipv4.tcp_mem = '10000000 10000000 10000000'" >> /etc/sysctl.conf + - echo "net.ipv4.tcp_rmem = '1024 4096 16384'" >> /etc/sysctl.conf + - echo "net.ipv4.tcp_wmem = '1024 4096 16384'" >> /etc/sysctl.conf + - echo "net.core.rmem_max = 16384" >> /etc/sysctl.conf + - echo "net.core.wmem_max = 16384" >> /etc/sysctl.conf + - ulimit -n 20000000 + - echo "# " >> /etc/security/limits.d/limits.conf + - echo " * soft nofile 20000" >> /etc/security/limits.d/limits.conf + - echo " * hard nofile 20000" >> /etc/security/limits.d/limits.conf + - sysctl -p + - apt-get update + - apt-get install -y software-properties-common resolvconf linux-headers-$(uname -r) + - echo "nameserver 1.1.1.1" > /etc/resolvconf/resolv.conf.d/tail + - systemctl start resolvconf + - curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - + - add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" + - apt-get update + - apt-get -y install docker-ce + - DEBIAN_FRONTEND=noninteractive apt-get upgrade -y + - if [ "${create_eip}" = "1" ]; then docker run -e "EIP=${eip}" cloudnautique/eip-autoassign:latest; fi + - until (curl -sfL https://get.k3s.io | INSTALL_K3S_EXEC="${k3s_server_args} --disable-agent --no-deploy traefik --no-deploy servicelb --cluster-cidr=10.0.0.0/8 --service-cidr=192.168.0.0/16 --cluster-dns=192.168.0.10 --tls-san ${eip}" K3S_CLUSTER_SECRET="${k3s_cluster_secret}" INSTALL_K3S_VERSION=${install_k3s_version} sh -); do echo 'Error installing k3s'; sleep 1; done diff --git a/load_testing/k3s/server/files/worker_userdata.tmpl b/load_testing/k3s/server/files/worker_userdata.tmpl new file mode 100644 index 0000000000..8d5e7a55fd --- /dev/null +++ b/load_testing/k3s/server/files/worker_userdata.tmpl @@ -0,0 +1,26 @@ +#cloud-config +ssh_authorized_keys: +- ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC06Qvs+Y9JiyOTeYNGAN/Ukq7SmeCTr7EreD1K8Lwu5VuOmo+SBZh685tNTEGV044HgFvGEOBVreDlO2ArYuwHjUBGnpQGV8/abjoeLrmZBdREAUzBQ1h2GFE/WssKUfum81cnigRK1J3tWP7emq/Y2h/Zw5F09yiCIlXMBX2auKWUCXqwG3xKTi1NVSF9N6BGyFolrAR0LZJ6k7UBXPRc/QDTclI427gSJNbnmn8LVym6YxacV/V9Y7s23iR5zYbhLPe9VJWYNk1brVvfUVb3mILVVYz76KGEq8SHdWlPQPCOp+fSJ+PezDRklnex/MmvhNrBOmMSNcpj7wSLA3hD wmaxwell@wmaxwell-laptop +- ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIN5O7k6gRYCU7YPkCH6dyXVW10izMAkDAQtQxNxdRE22 drpebcak +runcmd: + - echo "net.ipv4.neigh.default.gc_interval = 3600" >> /etc/sysctl.conf + - echo "net.ipv4.neigh.default.gc_stale_time = 3600" >> /etc/sysctl.conf + - echo "net.ipv4.neigh.default.gc_thresh3 = 16384" >> /etc/sysctl.conf + - echo "net.ipv4.neigh.default.gc_thresh2 = 8192" >> /etc/sysctl.conf + - echo "net.ipv4.neigh.default.gc_thresh1 = 4096" >> /etc/sysctl.conf + - echo "fs.file-max = 12000500" >> /etc/sysctl.conf + - echo "fs.nr_open = 20000500" >> /etc/sysctl.conf + - echo "net.ipv4.tcp_mem = '10000000 10000000 10000000'" >> /etc/sysctl.conf + - echo "net.ipv4.tcp_rmem = '1024 4096 16384'" >> /etc/sysctl.conf + - echo "net.ipv4.tcp_wmem = '1024 4096 16384'" >> /etc/sysctl.conf + - echo "net.core.rmem_max = 16384" >> /etc/sysctl.conf + - echo "net.core.wmem_max = 16384" >> /etc/sysctl.conf + - ulimit -n 20000000 + - echo "# " >> /etc/security/limits.d/limits.conf + - echo " * soft nofile 20000" >> /etc/security/limits.d/limits.conf + - echo " * hard nofile 20000" >> /etc/security/limits.d/limits.conf + - sysctl -p + - apt-get update + - apt-get install -y software-properties-common + - DEBIAN_FRONTEND=noninteractive apt-get upgrade -y + - until (curl -sfL https://get.k3s.io | INSTALL_K3S_VERSION=${install_k3s_version} INSTALL_K3S_EXEC="${k3s_exec}" K3S_URL=https://${k3s_url}:6443 K3S_CLUSTER_SECRET="${k3s_cluster_secret}" sh -); do echo 'k3s did not install correctly'; sleep 1; done diff --git a/load_testing/k3s/server/iam.tf b/load_testing/k3s/server/iam.tf new file mode 100644 index 0000000000..3b7c786860 --- /dev/null +++ b/load_testing/k3s/server/iam.tf @@ -0,0 +1,58 @@ +resource "aws_iam_instance_profile" "k3s-server" { + name_prefix = "load-testing-k3s-server" + role = aws_iam_role.k3s-server.name + + lifecycle { + create_before_destroy = true + } +} + +resource "aws_iam_role" "k3s-server" { + name_prefix = "load-testing-k3s-server" + + assume_role_policy = <| ../cluster-loader/kubeConfig.yaml; do sleep 5; done" + } +} diff --git a/load_testing/k3s/server/outputs.tf b/load_testing/k3s/server/outputs.tf new file mode 100644 index 0000000000..a994aafb68 --- /dev/null +++ b/load_testing/k3s/server/outputs.tf @@ -0,0 +1,11 @@ +output "public_ip" { + value = aws_eip.k3s-server.*.public_ip +} + +output "install_k3s_version" { + value = local.install_k3s_version +} + +output "k3s_cluster_secret" { + value = local.k3s_cluster_secret +} diff --git a/load_testing/k3s/server/variables.tf b/load_testing/k3s/server/variables.tf new file mode 100644 index 0000000000..779ac4ffcb --- /dev/null +++ b/load_testing/k3s/server/variables.tf @@ -0,0 +1,12 @@ +variable "server_instance_type" { + # default = "c4.8xlarge" +} +variable "k3s_server_args" { + default = "" +} +variable "prom_host" { + default = "" +} +variable "graf_host" { + default = "" +} diff --git a/load_testing/k3s/server/versions.tf b/load_testing/k3s/server/versions.tf new file mode 100644 index 0000000000..ac97c6ac8e --- /dev/null +++ b/load_testing/k3s/server/versions.tf @@ -0,0 +1,4 @@ + +terraform { + required_version = ">= 0.12" +} From bf6ca8b2d8850c41a3ffa9a9ddf2af5345162e4b Mon Sep 17 00:00:00 2001 From: Taylor Price Date: Thu, 17 Oct 2019 16:21:42 -0700 Subject: [PATCH 2/7] general improvements, dont require iam --- load_testing/k3s/pool/data.tf | 11 --- .../k3s/pool/files/pool_worker_userdata.tmpl | 60 +++++++------- load_testing/k3s/pool/main.tf | 2 +- load_testing/k3s/pool/variables.tf | 6 ++ load_testing/k3s/server/data.tf | 36 --------- .../k3s/server/files/server_userdata.tmpl | 78 +++++++++---------- .../k3s/server/files/worker_userdata.tmpl | 46 +++++------ load_testing/k3s/server/iam.tf | 58 -------------- load_testing/k3s/server/main.tf | 75 +++++++----------- load_testing/k3s/server/outputs.tf | 2 +- load_testing/k3s/server/variables.tf | 26 +++++++ 11 files changed, 153 insertions(+), 247 deletions(-) delete mode 100644 load_testing/k3s/server/iam.tf diff --git a/load_testing/k3s/pool/data.tf b/load_testing/k3s/pool/data.tf index 0da33a9128..bff5eb3ea3 100644 --- a/load_testing/k3s/pool/data.tf +++ b/load_testing/k3s/pool/data.tf @@ -42,14 +42,3 @@ data "aws_ami" "ubuntu" { values = ["x86_64"] } } - -data "template_file" "k3s-pool-worker-user_data" { - template = file("${path.module}/files/pool_worker_userdata.tmpl") - - vars = { - k3s_url = data.terraform_remote_state.server.outputs.public_ip[0] - k3s_cluster_secret = local.k3s_cluster_secret - install_k3s_version = local.install_k3s_version - k3s_per_node = var.k3s_per_node - } -} diff --git a/load_testing/k3s/pool/files/pool_worker_userdata.tmpl b/load_testing/k3s/pool/files/pool_worker_userdata.tmpl index c2d83f7343..b47b2fb344 100644 --- a/load_testing/k3s/pool/files/pool_worker_userdata.tmpl +++ b/load_testing/k3s/pool/files/pool_worker_userdata.tmpl @@ -1,32 +1,34 @@ #cloud-config +%{ if length(extra_ssh_keys) > 0 } ssh_authorized_keys: -- ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC06Qvs+Y9JiyOTeYNGAN/Ukq7SmeCTr7EreD1K8Lwu5VuOmo+SBZh685tNTEGV044HgFvGEOBVreDlO2ArYuwHjUBGnpQGV8/abjoeLrmZBdREAUzBQ1h2GFE/WssKUfum81cnigRK1J3tWP7emq/Y2h/Zw5F09yiCIlXMBX2auKWUCXqwG3xKTi1NVSF9N6BGyFolrAR0LZJ6k7UBXPRc/QDTclI427gSJNbnmn8LVym6YxacV/V9Y7s23iR5zYbhLPe9VJWYNk1brVvfUVb3mILVVYz76KGEq8SHdWlPQPCOp+fSJ+PezDRklnex/MmvhNrBOmMSNcpj7wSLA3hD wmaxwell@wmaxwell-laptop -- ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIN5O7k6gRYCU7YPkCH6dyXVW10izMAkDAQtQxNxdRE22 drpebcak -- ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC2TBZGjE+J8ag11dzkFT58J3XPONDrVmalCNrKxsfADfyy0eqdZrG8hAcxAR/5zuj90Gin2uB4RSw6Cn4VHsPZcFpXyQCj1KQDADj+WcuhpXOIOY3AB0LZBly9NI0ll+8lo3QtEaoyRLtrMBhQ6Mooy2M3MTG4JNwU9o3yInuqZWf9PvtW6KxMl+ygg1xZkljhemGZ9k0wSrjqif+8usNbzVlCOVQmZwZA+BZxbdcLNwkg7zWJSXzDIXyqM6iWPGXQDEbWLq3+HR1qKucTCSxjbqoe0FD5xcW7NHIME5XKX84yH92n6yn+rxSsyUfhJWYqJd+i0fKf5UbN6qLrtd/D darren@darrens +%{ for ssh_key in extra_ssh_keys } +- ${ssh_key} +%{ endfor } +%{ endif } runcmd: - - echo "net.ipv4.neigh.default.gc_interval = 3600" >> /etc/sysctl.conf - - echo "net.ipv4.neigh.default.gc_stale_time = 3600" >> /etc/sysctl.conf - - echo "net.ipv4.neigh.default.gc_thresh3 = 16384" >> /etc/sysctl.conf - - echo "net.ipv4.neigh.default.gc_thresh2 = 8192" >> /etc/sysctl.conf - - echo "net.ipv4.neigh.default.gc_thresh1 = 4096" >> /etc/sysctl.conf - - echo "fs.file-max = 12000500" >> /etc/sysctl.conf - - echo "fs.nr_open = 20000500" >> /etc/sysctl.conf - - echo "net.ipv4.tcp_mem = '10000000 10000000 10000000'" >> /etc/sysctl.conf - - echo "net.ipv4.tcp_rmem = '1024 4096 16384'" >> /etc/sysctl.conf - - echo "net.ipv4.tcp_wmem = '1024 4096 16384'" >> /etc/sysctl.conf - - echo "net.core.rmem_max = 16384" >> /etc/sysctl.conf - - echo "net.core.wmem_max = 16384" >> /etc/sysctl.conf - - ulimit -n 20000000 - - echo "# " >> /etc/security/limits.d/limits.conf - - echo " * soft nofile 20000" >> /etc/security/limits.d/limits.conf - - echo " * hard nofile 20000" >> /etc/security/limits.d/limits.conf - - sysctl -p - - apt-get update - - apt-get install -y software-properties-common - - curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - - - add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" - - apt-get update - - apt-get -y install docker-ce - - apt-get install -y resolvconf linux-headers-$(uname -r) && echo "nameserver 1.1.1.1" > /etc/resolvconf/resolv.conf.d/tail && systemctl start resolvconf - - DEBIAN_FRONTEND=noninteractive apt-get upgrade -y - - n=1; while [ $n -le ${k3s_per_node} ]; do docker run -d --restart=unless-stopped -e K3S_URL=https://${k3s_url}:6443 -e K3S_CLUSTER_SECRET="${k3s_cluster_secret}" --privileged --mount type=tmpfs,destination=/var/run --mount type=tmpfs,destination=/run -m 1g --cpus=".7" rancher/k3s:${install_k3s_version}; n=$(( n + 1 )); done +- echo "net.ipv4.neigh.default.gc_interval = 3600" >> /etc/sysctl.conf +- echo "net.ipv4.neigh.default.gc_stale_time = 3600" >> /etc/sysctl.conf +- echo "net.ipv4.neigh.default.gc_thresh3 = 16384" >> /etc/sysctl.conf +- echo "net.ipv4.neigh.default.gc_thresh2 = 8192" >> /etc/sysctl.conf +- echo "net.ipv4.neigh.default.gc_thresh1 = 4096" >> /etc/sysctl.conf +- echo "fs.file-max = 12000500" >> /etc/sysctl.conf +- echo "fs.nr_open = 20000500" >> /etc/sysctl.conf +- echo "net.ipv4.tcp_mem = '10000000 10000000 10000000'" >> /etc/sysctl.conf +- echo "net.ipv4.tcp_rmem = '1024 4096 16384'" >> /etc/sysctl.conf +- echo "net.ipv4.tcp_wmem = '1024 4096 16384'" >> /etc/sysctl.conf +- echo "net.core.rmem_max = 16384" >> /etc/sysctl.conf +- echo "net.core.wmem_max = 16384" >> /etc/sysctl.conf +- ulimit -n 20000000 +- echo "# " >> /etc/security/limits.d/limits.conf +- echo " * soft nofile 20000" >> /etc/security/limits.d/limits.conf +- echo " * hard nofile 20000" >> /etc/security/limits.d/limits.conf +- sysctl -p +- apt-get update +- apt-get install -y software-properties-common +- curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - +- add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" +- apt-get update +- apt-get -y install docker-ce +- apt-get install -y resolvconf linux-headers-$(uname -r) && echo "nameserver 1.1.1.1" > /etc/resolvconf/resolv.conf.d/tail && systemctl start resolvconf +- DEBIAN_FRONTEND=noninteractive apt-get upgrade -y +- n=1; while [ $n -le ${k3s_per_node} ]; do docker run -d --restart=unless-stopped -e K3S_URL=https://${k3s_url}:6443 -e K3S_CLUSTER_SECRET="${k3s_cluster_secret}" --privileged --mount type=tmpfs,destination=/var/run --mount type=tmpfs,destination=/run -m 1g --cpus=".7" rancher/k3s:${install_k3s_version}; n=$(( n + 1 )); done diff --git a/load_testing/k3s/pool/main.tf b/load_testing/k3s/pool/main.tf index 6e85a15742..6bfc0c2a0e 100644 --- a/load_testing/k3s/pool/main.tf +++ b/load_testing/k3s/pool/main.tf @@ -55,7 +55,7 @@ module "k3s-pool-worker-asg" { asg_name = local.name instance_type = var.worker_instance_type image_id = data.aws_ami.ubuntu.id - user_data = data.template_file.k3s-pool-worker-user_data.rendered + user_data = base64encode(templatefile("${path.module}/files/pool_worker_userdata.tmpl", { k3s_url = data.terraform_remote_state.server.outputs.public_ip[0], k3s_cluster_secret = local.k3s_cluster_secret, install_k3s_version = local.install_k3s_version, k3s_per_node = var.k3s_per_node, extra_ssh_keys = var.extra_ssh_keys })) ebs_optimized = true desired_capacity = var.node_count diff --git a/load_testing/k3s/pool/variables.tf b/load_testing/k3s/pool/variables.tf index 0fef717b51..87ac1c0c0f 100644 --- a/load_testing/k3s/pool/variables.tf +++ b/load_testing/k3s/pool/variables.tf @@ -14,3 +14,9 @@ variable "worker_instance_type" { type = string default = "c5.4xlarge" } + +variable "extra_ssh_keys" { + type = list + default = [] + description = "Extra ssh keys to inject into Rancher instances" +} diff --git a/load_testing/k3s/server/data.tf b/load_testing/k3s/server/data.tf index 2a8bb32025..aa0843d511 100644 --- a/load_testing/k3s/server/data.tf +++ b/load_testing/k3s/server/data.tf @@ -45,39 +45,3 @@ data "template_file" "k3s-prom-yaml" { graf_host = var.graf_host } } - -data "template_file" "k3s-server-user_data" { - template = file("${path.module}/files/server_userdata.tmpl") - - vars = { - create_eip = 1 - metrics_yaml = base64encode(data.template_file.metrics.rendered) - prom_yaml = base64encode(data.template_file.k3s-prom-yaml.rendered) - eip = join(",", aws_eip.k3s-server.*.public_ip) - k3s_cluster_secret = local.k3s_cluster_secret - install_k3s_version = local.install_k3s_version - k3s_server_args = var.k3s_server_args - } -} - -data "template_file" "k3s-prom-worker-user_data" { - template = file("${path.module}/files/worker_userdata.tmpl") - - vars = { - k3s_url = aws_eip.k3s-server.0.public_ip - k3s_cluster_secret = local.k3s_cluster_secret - install_k3s_version = local.install_k3s_version - k3s_exec = "--node-label prom=true" - } -} - -data "template_file" "k3s-worker-user_data" { - template = file("${path.module}/files/worker_userdata.tmpl") - - vars = { - k3s_url = aws_eip.k3s-server.0.public_ip - k3s_cluster_secret = local.k3s_cluster_secret - install_k3s_version = local.install_k3s_version - k3s_exec = "" - } -} diff --git a/load_testing/k3s/server/files/server_userdata.tmpl b/load_testing/k3s/server/files/server_userdata.tmpl index 74e1cbf41e..f9e195315c 100644 --- a/load_testing/k3s/server/files/server_userdata.tmpl +++ b/load_testing/k3s/server/files/server_userdata.tmpl @@ -1,45 +1,41 @@ #cloud-config +%{ if length(extra_ssh_keys) > 0 } ssh_authorized_keys: -- ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC06Qvs+Y9JiyOTeYNGAN/Ukq7SmeCTr7EreD1K8Lwu5VuOmo+SBZh685tNTEGV044HgFvGEOBVreDlO2ArYuwHjUBGnpQGV8/abjoeLrmZBdREAUzBQ1h2GFE/WssKUfum81cnigRK1J3tWP7emq/Y2h/Zw5F09yiCIlXMBX2auKWUCXqwG3xKTi1NVSF9N6BGyFolrAR0LZJ6k7UBXPRc/QDTclI427gSJNbnmn8LVym6YxacV/V9Y7s23iR5zYbhLPe9VJWYNk1brVvfUVb3mILVVYz76KGEq8SHdWlPQPCOp+fSJ+PezDRklnex/MmvhNrBOmMSNcpj7wSLA3hD wmaxwell@wmaxwell-laptop -- ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIN5O7k6gRYCU7YPkCH6dyXVW10izMAkDAQtQxNxdRE22 drpebcak -- ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC2TBZGjE+J8ag11dzkFT58J3XPONDrVmalCNrKxsfADfyy0eqdZrG8hAcxAR/5zuj90Gin2uB4RSw6Cn4VHsPZcFpXyQCj1KQDADj+WcuhpXOIOY3AB0LZBly9NI0ll+8lo3QtEaoyRLtrMBhQ6Mooy2M3MTG4JNwU9o3yInuqZWf9PvtW6KxMl+ygg1xZkljhemGZ9k0wSrjqif+8usNbzVlCOVQmZwZA+BZxbdcLNwkg7zWJSXzDIXyqM6iWPGXQDEbWLq3+HR1qKucTCSxjbqoe0FD5xcW7NHIME5XKX84yH92n6yn+rxSsyUfhJWYqJd+i0fKf5UbN6qLrtd/D darren@darrens +%{ for ssh_key in extra_ssh_keys } +- ${ssh_key} +%{ endfor } +%{ endif } write_files: - - path: /var/lib/rancher/k3s/server/manifests/metrics.yaml - permissions: "0755" - owner: root:root - encoding: b64 - content: ${metrics_yaml} - - path: /var/lib/rancher/k3s/server/manifests/prom.yaml - permissions: "0755" - owner: root:root - encoding: b64 - content: ${prom_yaml} +- path: /var/lib/rancher/k3s/server/manifests/metrics.yaml + permissions: "0755" + owner: root:root + encoding: b64 + content: ${metrics_yaml} +- path: /var/lib/rancher/k3s/server/manifests/prom.yaml + permissions: "0755" + owner: root:root + encoding: b64 + content: ${prom_yaml} runcmd: - - echo "net.ipv4.neigh.default.gc_interval = 3600" >> /etc/sysctl.conf - - echo "net.ipv4.neigh.default.gc_stale_time = 3600" >> /etc/sysctl.conf - - echo "net.ipv4.neigh.default.gc_thresh3 = 16384" >> /etc/sysctl.conf - - echo "net.ipv4.neigh.default.gc_thresh2 = 8192" >> /etc/sysctl.conf - - echo "net.ipv4.neigh.default.gc_thresh1 = 4096" >> /etc/sysctl.conf - - echo "fs.file-max = 12000500" >> /etc/sysctl.conf - - echo "fs.nr_open = 20000500" >> /etc/sysctl.conf - - echo "net.ipv4.tcp_mem = '10000000 10000000 10000000'" >> /etc/sysctl.conf - - echo "net.ipv4.tcp_rmem = '1024 4096 16384'" >> /etc/sysctl.conf - - echo "net.ipv4.tcp_wmem = '1024 4096 16384'" >> /etc/sysctl.conf - - echo "net.core.rmem_max = 16384" >> /etc/sysctl.conf - - echo "net.core.wmem_max = 16384" >> /etc/sysctl.conf - - ulimit -n 20000000 - - echo "# " >> /etc/security/limits.d/limits.conf - - echo " * soft nofile 20000" >> /etc/security/limits.d/limits.conf - - echo " * hard nofile 20000" >> /etc/security/limits.d/limits.conf - - sysctl -p - - apt-get update - - apt-get install -y software-properties-common resolvconf linux-headers-$(uname -r) - - echo "nameserver 1.1.1.1" > /etc/resolvconf/resolv.conf.d/tail - - systemctl start resolvconf - - curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - - - add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" - - apt-get update - - apt-get -y install docker-ce - - DEBIAN_FRONTEND=noninteractive apt-get upgrade -y - - if [ "${create_eip}" = "1" ]; then docker run -e "EIP=${eip}" cloudnautique/eip-autoassign:latest; fi - - until (curl -sfL https://get.k3s.io | INSTALL_K3S_EXEC="${k3s_server_args} --disable-agent --no-deploy traefik --no-deploy servicelb --cluster-cidr=10.0.0.0/8 --service-cidr=192.168.0.0/16 --cluster-dns=192.168.0.10 --tls-san ${eip}" K3S_CLUSTER_SECRET="${k3s_cluster_secret}" INSTALL_K3S_VERSION=${install_k3s_version} sh -); do echo 'Error installing k3s'; sleep 1; done +- echo "net.ipv4.neigh.default.gc_interval = 3600" >> /etc/sysctl.conf +- echo "net.ipv4.neigh.default.gc_stale_time = 3600" >> /etc/sysctl.conf +- echo "net.ipv4.neigh.default.gc_thresh3 = 16384" >> /etc/sysctl.conf +- echo "net.ipv4.neigh.default.gc_thresh2 = 8192" >> /etc/sysctl.conf +- echo "net.ipv4.neigh.default.gc_thresh1 = 4096" >> /etc/sysctl.conf +- echo "fs.file-max = 12000500" >> /etc/sysctl.conf +- echo "fs.nr_open = 20000500" >> /etc/sysctl.conf +- echo "net.ipv4.tcp_mem = '10000000 10000000 10000000'" >> /etc/sysctl.conf +- echo "net.ipv4.tcp_rmem = '1024 4096 16384'" >> /etc/sysctl.conf +- echo "net.ipv4.tcp_wmem = '1024 4096 16384'" >> /etc/sysctl.conf +- echo "net.core.rmem_max = 16384" >> /etc/sysctl.conf +- echo "net.core.wmem_max = 16384" >> /etc/sysctl.conf +- ulimit -n 20000000 +- echo "# " >> /etc/security/limits.d/limits.conf +- echo " * soft nofile 20000" >> /etc/security/limits.d/limits.conf +- echo " * hard nofile 20000" >> /etc/security/limits.d/limits.conf +- sysctl -p +- apt-get update +- apt-get install -y software-properties-common resolvconf linux-headers-$(uname -r) +- echo "nameserver 1.1.1.1" > /etc/resolvconf/resolv.conf.d/tail +- systemctl start resolvconf +- until (curl -sfL https://get.k3s.io | INSTALL_K3S_EXEC="${k3s_server_args} --disable-agent --no-deploy traefik --no-deploy servicelb --cluster-cidr=10.0.0.0/8 --service-cidr=192.168.0.0/16 --cluster-dns=192.168.0.10 --tls-san ${public_ip}" K3S_CLUSTER_SECRET="${k3s_cluster_secret}" INSTALL_K3S_VERSION=${install_k3s_version} sh -); do echo 'Error installing k3s'; sleep 1; done diff --git a/load_testing/k3s/server/files/worker_userdata.tmpl b/load_testing/k3s/server/files/worker_userdata.tmpl index 8d5e7a55fd..e451a6d0ce 100644 --- a/load_testing/k3s/server/files/worker_userdata.tmpl +++ b/load_testing/k3s/server/files/worker_userdata.tmpl @@ -1,26 +1,26 @@ #cloud-config +%{ if length(extra_ssh_keys) > 0 } ssh_authorized_keys: -- ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC06Qvs+Y9JiyOTeYNGAN/Ukq7SmeCTr7EreD1K8Lwu5VuOmo+SBZh685tNTEGV044HgFvGEOBVreDlO2ArYuwHjUBGnpQGV8/abjoeLrmZBdREAUzBQ1h2GFE/WssKUfum81cnigRK1J3tWP7emq/Y2h/Zw5F09yiCIlXMBX2auKWUCXqwG3xKTi1NVSF9N6BGyFolrAR0LZJ6k7UBXPRc/QDTclI427gSJNbnmn8LVym6YxacV/V9Y7s23iR5zYbhLPe9VJWYNk1brVvfUVb3mILVVYz76KGEq8SHdWlPQPCOp+fSJ+PezDRklnex/MmvhNrBOmMSNcpj7wSLA3hD wmaxwell@wmaxwell-laptop -- ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIN5O7k6gRYCU7YPkCH6dyXVW10izMAkDAQtQxNxdRE22 drpebcak +%{ for ssh_key in extra_ssh_keys } +- ${ssh_key} +%{ endfor } +%{ endif } runcmd: - - echo "net.ipv4.neigh.default.gc_interval = 3600" >> /etc/sysctl.conf - - echo "net.ipv4.neigh.default.gc_stale_time = 3600" >> /etc/sysctl.conf - - echo "net.ipv4.neigh.default.gc_thresh3 = 16384" >> /etc/sysctl.conf - - echo "net.ipv4.neigh.default.gc_thresh2 = 8192" >> /etc/sysctl.conf - - echo "net.ipv4.neigh.default.gc_thresh1 = 4096" >> /etc/sysctl.conf - - echo "fs.file-max = 12000500" >> /etc/sysctl.conf - - echo "fs.nr_open = 20000500" >> /etc/sysctl.conf - - echo "net.ipv4.tcp_mem = '10000000 10000000 10000000'" >> /etc/sysctl.conf - - echo "net.ipv4.tcp_rmem = '1024 4096 16384'" >> /etc/sysctl.conf - - echo "net.ipv4.tcp_wmem = '1024 4096 16384'" >> /etc/sysctl.conf - - echo "net.core.rmem_max = 16384" >> /etc/sysctl.conf - - echo "net.core.wmem_max = 16384" >> /etc/sysctl.conf - - ulimit -n 20000000 - - echo "# " >> /etc/security/limits.d/limits.conf - - echo " * soft nofile 20000" >> /etc/security/limits.d/limits.conf - - echo " * hard nofile 20000" >> /etc/security/limits.d/limits.conf - - sysctl -p - - apt-get update - - apt-get install -y software-properties-common - - DEBIAN_FRONTEND=noninteractive apt-get upgrade -y - - until (curl -sfL https://get.k3s.io | INSTALL_K3S_VERSION=${install_k3s_version} INSTALL_K3S_EXEC="${k3s_exec}" K3S_URL=https://${k3s_url}:6443 K3S_CLUSTER_SECRET="${k3s_cluster_secret}" sh -); do echo 'k3s did not install correctly'; sleep 1; done +- echo "net.ipv4.neigh.default.gc_interval = 3600" >> /etc/sysctl.conf +- echo "net.ipv4.neigh.default.gc_stale_time = 3600" >> /etc/sysctl.conf +- echo "net.ipv4.neigh.default.gc_thresh3 = 16384" >> /etc/sysctl.conf +- echo "net.ipv4.neigh.default.gc_thresh2 = 8192" >> /etc/sysctl.conf +- echo "net.ipv4.neigh.default.gc_thresh1 = 4096" >> /etc/sysctl.conf +- echo "fs.file-max = 12000500" >> /etc/sysctl.conf +- echo "fs.nr_open = 20000500" >> /etc/sysctl.conf +- echo "net.ipv4.tcp_mem = '10000000 10000000 10000000'" >> /etc/sysctl.conf +- echo "net.ipv4.tcp_rmem = '1024 4096 16384'" >> /etc/sysctl.conf +- echo "net.ipv4.tcp_wmem = '1024 4096 16384'" >> /etc/sysctl.conf +- echo "net.core.rmem_max = 16384" >> /etc/sysctl.conf +- echo "net.core.wmem_max = 16384" >> /etc/sysctl.conf +- ulimit -n 20000 +- echo "# " >> /etc/security/limits.d/limits.conf +- echo " * soft nofile 20000" >> /etc/security/limits.d/limits.conf +- echo " * hard nofile 20000" >> /etc/security/limits.d/limits.conf +- sysctl -p +- until (curl -sfL https://get.k3s.io | INSTALL_K3S_VERSION=${install_k3s_version} INSTALL_K3S_EXEC="${k3s_exec}" K3S_URL=https://${k3s_url}:6443 K3S_CLUSTER_SECRET="${k3s_cluster_secret}" sh -); do echo 'k3s did not install correctly'; sleep 1; done diff --git a/load_testing/k3s/server/iam.tf b/load_testing/k3s/server/iam.tf deleted file mode 100644 index 3b7c786860..0000000000 --- a/load_testing/k3s/server/iam.tf +++ /dev/null @@ -1,58 +0,0 @@ -resource "aws_iam_instance_profile" "k3s-server" { - name_prefix = "load-testing-k3s-server" - role = aws_iam_role.k3s-server.name - - lifecycle { - create_before_destroy = true - } -} - -resource "aws_iam_role" "k3s-server" { - name_prefix = "load-testing-k3s-server" - - assume_role_policy = <| ../cluster-loader/kubeConfig.yaml; do sleep 5; done" + command = "until ssh ubuntu@${aws_spot_instance_request.k3s-server.public_ip} 'sudo sed \"s/localhost/$aws_spot_instance_request.k3s-server.public_ip}/g;s/127.0.0.1/${aws_spot_instance_request.k3s-server.public_ip}/g\" /etc/rancher/k3s/k3s.yaml' >| ../cluster-loader/kubeConfig.yaml; do sleep 5; done" } } diff --git a/load_testing/k3s/server/outputs.tf b/load_testing/k3s/server/outputs.tf index a994aafb68..7f0cac2753 100644 --- a/load_testing/k3s/server/outputs.tf +++ b/load_testing/k3s/server/outputs.tf @@ -1,5 +1,5 @@ output "public_ip" { - value = aws_eip.k3s-server.*.public_ip + value = aws_spot_instance_request.k3s-server.public_ip } output "install_k3s_version" { diff --git a/load_testing/k3s/server/variables.tf b/load_testing/k3s/server/variables.tf index 779ac4ffcb..b45abc7211 100644 --- a/load_testing/k3s/server/variables.tf +++ b/load_testing/k3s/server/variables.tf @@ -1,12 +1,38 @@ variable "server_instance_type" { # default = "c4.8xlarge" } +variable "k3s_version" { + default = "v0.9.1" + type = string + description = "Version of K3S to install" +} variable "k3s_server_args" { default = "" } +variable "prom_worker_node_count" { + default = 0 + type = number + description = "The number of workers to create labeled for prometheus" +} +variable "k3s_cluster_secret" { + default = "pvc-6476dcaf-73a0-11e9-b8e5-06943b744282" + type = string + description = "Cluster secret for k3s cluster registration" +} variable "prom_host" { default = "" } variable "graf_host" { default = "" } +variable "name" { + default = "k3s-loadtest" + type = string + description = "Name to identify this cluster" +} + +variable "extra_ssh_keys" { + type = list + default = [] + description = "Extra ssh keys to inject into Rancher instances" +} From 31a615fc0f2677e9f19a1c9976379c0f6c17dee0 Mon Sep 17 00:00:00 2001 From: galal-hussein Date: Thu, 31 Oct 2019 23:06:10 +0200 Subject: [PATCH 3/7] delete load testsing --- load_testing/k3s/.gitignore | 1 - load_testing/k3s/cluster-loader/.gitignore | 1 - .../k3s/cluster-loader/large/config.yaml | 471 ------------------ .../k3s/cluster-loader/large/configmap.yaml | 9 - .../k3s/cluster-loader/large/deployment.yaml | 62 --- .../k3s/cluster-loader/large/run-test.sh | 3 - .../k3s/cluster-loader/large/secret.yaml | 7 - .../k3s/cluster-loader/large/service.yaml | 16 - .../k3s/cluster-loader/large/statefulset.yaml | 30 -- .../large/statefulset_service.yaml | 10 - .../k3s/cluster-loader/small/config.yaml | 471 ------------------ .../k3s/cluster-loader/small/configmap.yaml | 9 - .../k3s/cluster-loader/small/deployment.yaml | 62 --- .../k3s/cluster-loader/small/run-test.sh | 3 - .../k3s/cluster-loader/small/secret.yaml | 7 - .../k3s/cluster-loader/small/service.yaml | 16 - .../k3s/cluster-loader/small/statefulset.yaml | 30 -- .../small/statefulset_service.yaml | 10 - load_testing/k3s/pool/data.tf | 44 -- .../k3s/pool/files/pool_worker_userdata.tmpl | 34 -- load_testing/k3s/pool/main.tf | 80 --- load_testing/k3s/pool/outputs.tf | 0 load_testing/k3s/pool/variables.tf | 22 - load_testing/k3s/pool/versions.tf | 4 - load_testing/k3s/readme.MD | 47 -- load_testing/k3s/server/data.tf | 47 -- load_testing/k3s/server/files/metrics.yaml | 227 --------- load_testing/k3s/server/files/prom.yaml | 86 ---- .../k3s/server/files/server_userdata.tmpl | 41 -- .../k3s/server/files/worker_userdata.tmpl | 26 - load_testing/k3s/server/main.tf | 116 ----- load_testing/k3s/server/outputs.tf | 11 - load_testing/k3s/server/variables.tf | 38 -- load_testing/k3s/server/versions.tf | 4 - 34 files changed, 2045 deletions(-) delete mode 100644 load_testing/k3s/.gitignore delete mode 100644 load_testing/k3s/cluster-loader/.gitignore delete mode 100644 load_testing/k3s/cluster-loader/large/config.yaml delete mode 100644 load_testing/k3s/cluster-loader/large/configmap.yaml delete mode 100644 load_testing/k3s/cluster-loader/large/deployment.yaml delete mode 100644 load_testing/k3s/cluster-loader/large/run-test.sh delete mode 100644 load_testing/k3s/cluster-loader/large/secret.yaml delete mode 100644 load_testing/k3s/cluster-loader/large/service.yaml delete mode 100644 load_testing/k3s/cluster-loader/large/statefulset.yaml delete mode 100644 load_testing/k3s/cluster-loader/large/statefulset_service.yaml delete mode 100644 load_testing/k3s/cluster-loader/small/config.yaml delete mode 100644 load_testing/k3s/cluster-loader/small/configmap.yaml delete mode 100644 load_testing/k3s/cluster-loader/small/deployment.yaml delete mode 100644 load_testing/k3s/cluster-loader/small/run-test.sh delete mode 100644 load_testing/k3s/cluster-loader/small/secret.yaml delete mode 100644 load_testing/k3s/cluster-loader/small/service.yaml delete mode 100644 load_testing/k3s/cluster-loader/small/statefulset.yaml delete mode 100644 load_testing/k3s/cluster-loader/small/statefulset_service.yaml delete mode 100644 load_testing/k3s/pool/data.tf delete mode 100644 load_testing/k3s/pool/files/pool_worker_userdata.tmpl delete mode 100644 load_testing/k3s/pool/main.tf delete mode 100644 load_testing/k3s/pool/outputs.tf delete mode 100644 load_testing/k3s/pool/variables.tf delete mode 100644 load_testing/k3s/pool/versions.tf delete mode 100644 load_testing/k3s/readme.MD delete mode 100644 load_testing/k3s/server/data.tf delete mode 100644 load_testing/k3s/server/files/metrics.yaml delete mode 100644 load_testing/k3s/server/files/prom.yaml delete mode 100644 load_testing/k3s/server/files/server_userdata.tmpl delete mode 100644 load_testing/k3s/server/files/worker_userdata.tmpl delete mode 100644 load_testing/k3s/server/main.tf delete mode 100644 load_testing/k3s/server/outputs.tf delete mode 100644 load_testing/k3s/server/variables.tf delete mode 100644 load_testing/k3s/server/versions.tf diff --git a/load_testing/k3s/.gitignore b/load_testing/k3s/.gitignore deleted file mode 100644 index e79eb23105..0000000000 --- a/load_testing/k3s/.gitignore +++ /dev/null @@ -1 +0,0 @@ -.terraform* diff --git a/load_testing/k3s/cluster-loader/.gitignore b/load_testing/k3s/cluster-loader/.gitignore deleted file mode 100644 index ee5ab6892d..0000000000 --- a/load_testing/k3s/cluster-loader/.gitignore +++ /dev/null @@ -1 +0,0 @@ -kubeConfig.yaml diff --git a/load_testing/k3s/cluster-loader/large/config.yaml b/load_testing/k3s/cluster-loader/large/config.yaml deleted file mode 100644 index 2b9f23f23a..0000000000 --- a/load_testing/k3s/cluster-loader/large/config.yaml +++ /dev/null @@ -1,471 +0,0 @@ -# ASSUMPTIONS: -# - Underlying cluster should have 100+ nodes. -# - Number of nodes should be divisible by NODES_PER_NAMESPACE (default 100). -# - The number of created SVCs is half the number of created Deployments. -# - Only half of Deployments will be assigned 1-1 to existing SVCs. - -#Constants -{{$NODE_MODE := DefaultParam .NODE_MODE "allnodes"}} -{{$NODES_PER_NAMESPACE := DefaultParam .NODES_PER_NAMESPACE 100}} -{{$PODS_PER_NODE := DefaultParam .PODS_PER_NODE 30}} -{{$LOAD_TEST_THROUGHPUT := DefaultParam .LOAD_TEST_THROUGHPUT 10}} -{{$BIG_GROUP_SIZE := 25}} -{{$MEDIUM_GROUP_SIZE := 15}} -{{$SMALL_GROUP_SIZE := 1}} -{{$SMALL_STATEFUL_SETS_PER_NAMESPACE := 1}} -{{$MEDIUM_STATEFUL_SETS_PER_NAMESPACE := 1}} -{{$ENABLE_CHAOSMONKEY := DefaultParam .ENABLE_CHAOSMONKEY false}} -{{$ENABLE_PROMETHEUS_API_RESPONSIVENESS := DefaultParam .ENABLE_PROMETHEUS_API_RESPONSIVENESS false}} -{{$ENABLE_CONFIGMAPS := DefaultParam .ENABLE_CONFIGMAPS false}} -{{$ENABLE_SECRETS := DefaultParam .ENABLE_SECRETS false}} -{{$ENABLE_STATEFULSETS := DefaultParam .ENABLE_STATEFULSETS false}} -#Variables -{{$namespaces := DivideInt .Nodes $NODES_PER_NAMESPACE}} -{{$totalPods := MultiplyInt $namespaces $NODES_PER_NAMESPACE $PODS_PER_NODE}} -{{$podsPerNamespace := DivideInt $totalPods $namespaces}} -{{$saturationTime := DivideInt $totalPods $LOAD_TEST_THROUGHPUT}} -# bigDeployments - 1/4 of namespace pods should be in big Deployments. -{{$bigDeploymentsPerNamespace := DivideInt $podsPerNamespace (MultiplyInt 4 $BIG_GROUP_SIZE)}} -# mediumDeployments - 1/4 of namespace pods should be in medium Deployments. -{{$mediumDeploymentsPerNamespace := DivideInt $podsPerNamespace (MultiplyInt 4 $MEDIUM_GROUP_SIZE)}} -# smallDeployments - 1/2 of namespace pods should be in small Deployments. -{{$smallDeploymentsPerNamespace := DivideInt $podsPerNamespace (MultiplyInt 2 $SMALL_GROUP_SIZE)}} -# If StatefulSets are enabled reduce the number of small and medium deployments per namespace -{{$smallDeploymentsPerNamespace := SubtractInt $smallDeploymentsPerNamespace (IfThenElse $ENABLE_STATEFULSETS $SMALL_STATEFUL_SETS_PER_NAMESPACE 0)}} -{{$mediumDeploymentsPerNamespace := SubtractInt $mediumDeploymentsPerNamespace (IfThenElse $ENABLE_STATEFULSETS $MEDIUM_STATEFUL_SETS_PER_NAMESPACE 0)}} - - -name: load -automanagedNamespaces: {{$namespaces}} -tuningSets: -- name: Sequence - parallelismLimitedLoad: - parallelismLimit: 1 -- name: RandomizedSaturationTimeLimited - RandomizedTimeLimitedLoad: - timeLimit: {{$saturationTime}}s -- name: RandomizedScalingTimeLimited - RandomizedTimeLimitedLoad: - # The expected number of created/deleted pods is totalPods/4 when scaling, - # as each RS changes its size from X to a uniform random value in [X/2, 3X/2]. - # To match 10 [pods/s] requirement, we need to divide saturationTime by 4. - timeLimit: {{DivideInt $saturationTime 4}}s -{{if $ENABLE_CHAOSMONKEY}} -chaosMonkey: - nodeFailure: - failureRate: 0.01 - interval: 1m - jitterFactor: 10.0 - simulatedDowntime: 10m -{{end}} -steps: -- name: Starting measurements - measurements: - - Identifier: APIResponsiveness - Method: APIResponsiveness - Params: - action: reset - - Identifier: APIResponsivenessPrometheus - Method: APIResponsivenessPrometheus - Params: - action: start - - Identifier: PodStartupLatency - Method: PodStartupLatency - Params: - action: start - labelSelector: group = load - threshold: 1h - - Identifier: InClusterNetworkLatency - Method: InClusterNetworkLatency - Params: - action: start - replicasPerProbe: {{DivideInt .Nodes 100}} - - Identifier: DnsLookupLatency - Method: DnsLookupLatency - Params: - action: start - replicasPerProbe: {{DivideInt .Nodes 100}} - - Identifier: NetworkProgrammingLatency - Method: NetworkProgrammingLatency - Params: - action: start - - Identifier: TestMetrics - Method: TestMetrics - Params: - action: start - nodeMode: {{$NODE_MODE}} - -- name: Creating SVCs - phases: - - namespaceRange: - min: 1 - max: {{$namespaces}} - replicasPerNamespace: {{DivideInt (AddInt $bigDeploymentsPerNamespace 1) 2}} - tuningSet: Sequence - objectBundle: - - basename: big-service - objectTemplatePath: service.yaml - - namespaceRange: - min: 1 - max: {{$namespaces}} - replicasPerNamespace: {{DivideInt (AddInt $mediumDeploymentsPerNamespace 1) 2}} - tuningSet: Sequence - objectBundle: - - basename: medium-service - objectTemplatePath: service.yaml - - namespaceRange: - min: 1 - max: {{$namespaces}} - replicasPerNamespace: {{DivideInt (AddInt $smallDeploymentsPerNamespace 1) 2}} - tuningSet: Sequence - objectBundle: - - basename: small-service - objectTemplatePath: service.yaml - -- name: Starting measurement for waiting for pods - measurements: - - Identifier: WaitForRunningDeployments - Method: WaitForControlledPodsRunning - Params: - action: start - apiVersion: apps/v1 - kind: Deployment - labelSelector: group = load - operationTimeout: 15m - {{if $ENABLE_STATEFULSETS}} - - Identifier: WaitForRunningStatefulSets - Method: WaitForControlledPodsRunning - Params: - action: start - apiVersion: apps/v1 - kind: StatefulSet - labelSelector: group = load - operationTimeout: 15m - {{end}} - -- name: Creating objects - phases: - - namespaceRange: - min: 1 - max: {{$namespaces}} - replicasPerNamespace: {{$bigDeploymentsPerNamespace}} - tuningSet: RandomizedSaturationTimeLimited - objectBundle: - {{if $ENABLE_CONFIGMAPS}} - - basename: big-deployment - objectTemplatePath: configmap.yaml - {{end}} - {{if $ENABLE_SECRETS}} - - basename: big-deployment - objectTemplatePath: secret.yaml - {{end}} - - basename: big-deployment - objectTemplatePath: deployment.yaml - templateFillMap: - ReplicasMin: {{$BIG_GROUP_SIZE}} - ReplicasMax: {{$BIG_GROUP_SIZE}} - SvcName: big-service - - namespaceRange: - min: 1 - max: {{$namespaces}} - replicasPerNamespace: {{$mediumDeploymentsPerNamespace}} - tuningSet: RandomizedSaturationTimeLimited - objectBundle: - {{if $ENABLE_CONFIGMAPS}} - - basename: medium-deployment - objectTemplatePath: configmap.yaml - {{end}} - {{if $ENABLE_SECRETS}} - - basename: medium-deployment - objectTemplatePath: secret.yaml - {{end}} - - basename: medium-deployment - objectTemplatePath: deployment.yaml - templateFillMap: - ReplicasMin: {{$MEDIUM_GROUP_SIZE}} - ReplicasMax: {{$MEDIUM_GROUP_SIZE}} - SvcName: medium-service - - namespaceRange: - min: 1 - max: {{$namespaces}} - replicasPerNamespace: {{$smallDeploymentsPerNamespace}} - tuningSet: RandomizedSaturationTimeLimited - objectBundle: - {{if $ENABLE_CONFIGMAPS}} - - basename: small-deployment - objectTemplatePath: configmap.yaml - {{end}} - {{if $ENABLE_SECRETS}} - - basename: small-deployment - objectTemplatePath: secret.yaml - {{end}} - - basename: small-deployment - objectTemplatePath: deployment.yaml - templateFillMap: - ReplicasMin: {{$SMALL_GROUP_SIZE}} - ReplicasMax: {{$SMALL_GROUP_SIZE}} - SvcName: small-service - {{if $ENABLE_STATEFULSETS}} - - namespaceRange: - min: 1 - max: {{$namespaces}} - replicasPerNamespace: {{$SMALL_STATEFUL_SETS_PER_NAMESPACE}} - tuningSet: RandomizedSaturationTimeLimited - objectBundle: - - basename: small-statefulset - objectTemplatePath: statefulset_service.yaml - - basename: small-statefulset - objectTemplatePath: statefulset.yaml - templateFillMap: - ReplicasMin: {{$SMALL_GROUP_SIZE}} - ReplicasMax: {{$SMALL_GROUP_SIZE}} - - namespaceRange: - min: 1 - max: {{$namespaces}} - replicasPerNamespace: {{$MEDIUM_STATEFUL_SETS_PER_NAMESPACE}} - tuningSet: RandomizedSaturationTimeLimited - objectBundle: - - basename: medium-statefulset - objectTemplatePath: statefulset_service.yaml - - basename: medium-statefulset - objectTemplatePath: statefulset.yaml - templateFillMap: - ReplicasMin: {{$MEDIUM_GROUP_SIZE}} - ReplicasMax: {{$MEDIUM_GROUP_SIZE}} - {{end}} - -- name: Waiting for pods to be running - measurements: - - Identifier: WaitForRunningDeployments - Method: WaitForControlledPodsRunning - Params: - action: gather - {{if $ENABLE_STATEFULSETS}} - - Identifier: WaitForRunningStatefulSets - Method: WaitForControlledPodsRunning - Params: - action: gather - {{end}} - -- name: Scaling objects - phases: - - namespaceRange: - min: 1 - max: {{$namespaces}} - replicasPerNamespace: {{$bigDeploymentsPerNamespace}} - tuningSet: RandomizedScalingTimeLimited - objectBundle: - - basename: big-deployment - objectTemplatePath: deployment.yaml - templateFillMap: - ReplicasMin: {{MultiplyInt $BIG_GROUP_SIZE 0.5}} - ReplicasMax: {{MultiplyInt $BIG_GROUP_SIZE 1.5}} - SvcName: big-service - - namespaceRange: - min: 1 - max: {{$namespaces}} - replicasPerNamespace: {{$mediumDeploymentsPerNamespace}} - tuningSet: RandomizedScalingTimeLimited - objectBundle: - - basename: medium-deployment - objectTemplatePath: deployment.yaml - templateFillMap: - ReplicasMin: {{MultiplyInt $MEDIUM_GROUP_SIZE 0.5}} - ReplicasMax: {{MultiplyInt $MEDIUM_GROUP_SIZE 1.5}} - SvcName: medium-service - - namespaceRange: - min: 1 - max: {{$namespaces}} - replicasPerNamespace: {{$smallDeploymentsPerNamespace}} - tuningSet: RandomizedScalingTimeLimited - objectBundle: - - basename: small-deployment - objectTemplatePath: deployment.yaml - templateFillMap: - ReplicasMin: {{MultiplyInt $SMALL_GROUP_SIZE 0.5}} - ReplicasMax: {{MultiplyInt $SMALL_GROUP_SIZE 1.5}} - SvcName: small-service - {{if $ENABLE_STATEFULSETS}} - - namespaceRange: - min: 1 - max: {{$namespaces}} - replicasPerNamespace: {{$SMALL_STATEFUL_SETS_PER_NAMESPACE}} - tuningSet: RandomizedScalingTimeLimited - objectBundle: - - basename: small-statefulset - objectTemplatePath: statefulset.yaml - templateFillMap: - ReplicasMin: {{MultiplyInt $SMALL_GROUP_SIZE 0.5}} - ReplicasMax: {{MultiplyInt $SMALL_GROUP_SIZE 1.5}} - - namespaceRange: - min: 1 - max: {{$namespaces}} - replicasPerNamespace: {{$MEDIUM_STATEFUL_SETS_PER_NAMESPACE}} - tuningSet: RandomizedScalingTimeLimited - objectBundle: - - basename: medium-statefulset - objectTemplatePath: statefulset.yaml - templateFillMap: - ReplicasMin: {{MultiplyInt $MEDIUM_GROUP_SIZE 0.5}} - ReplicasMax: {{MultiplyInt $MEDIUM_GROUP_SIZE 1.5}} - {{end}} - -- name: Waiting for objects to become scaled - measurements: - - Identifier: WaitForRunningDeployments - Method: WaitForControlledPodsRunning - Params: - action: gather - {{if $ENABLE_STATEFULSETS}} - - Identifier: WaitForRunningStatefulSets - Method: WaitForControlledPodsRunning - Params: - action: gather - {{end}} - -- name: Deleting objects - phases: - - namespaceRange: - min: 1 - max: {{$namespaces}} - replicasPerNamespace: 0 - tuningSet: RandomizedSaturationTimeLimited - objectBundle: - - basename: big-deployment - objectTemplatePath: deployment.yaml - {{if $ENABLE_CONFIGMAPS}} - - basename: big-deployment - objectTemplatePath: configmap.yaml - {{end}} - {{if $ENABLE_SECRETS}} - - basename: big-deployment - objectTemplatePath: secret.yaml - {{end}} - - namespaceRange: - min: 1 - max: {{$namespaces}} - replicasPerNamespace: 0 - tuningSet: RandomizedSaturationTimeLimited - objectBundle: - - basename: medium-deployment - objectTemplatePath: deployment.yaml - {{if $ENABLE_CONFIGMAPS}} - - basename: medium-deployment - objectTemplatePath: configmap.yaml - {{end}} - {{if $ENABLE_SECRETS}} - - basename: medium-deployment - objectTemplatePath: secret.yaml - {{end}} - - namespaceRange: - min: 1 - max: {{$namespaces}} - replicasPerNamespace: 0 - tuningSet: RandomizedSaturationTimeLimited - objectBundle: - - basename: small-deployment - objectTemplatePath: deployment.yaml - {{if $ENABLE_CONFIGMAPS}} - - basename: small-deployment - objectTemplatePath: configmap.yaml - {{end}} - {{if $ENABLE_SECRETS}} - - basename: small-deployment - objectTemplatePath: secret.yaml - {{end}} - {{if $ENABLE_STATEFULSETS}} - - namespaceRange: - min: 1 - max: {{$namespaces}} - replicasPerNamespace: 0 - tuningSet: RandomizedSaturationTimeLimited - objectBundle: - - basename: small-statefulset - objectTemplatePath: statefulset.yaml - - basename: small-statefulset - objectTemplatePath: statefulset_service.yaml - - namespaceRange: - min: 1 - max: {{$namespaces}} - replicasPerNamespace: 0 - tuningSet: RandomizedSaturationTimeLimited - objectBundle: - - basename: medium-statefulset - objectTemplatePath: statefulset.yaml - - basename: medium-statefulset - objectTemplatePath: statefulset_service.yaml - {{end}} - -- name: Waiting for pods to be deleted - measurements: - - Identifier: WaitForRunningDeployments - Method: WaitForControlledPodsRunning - Params: - action: gather - {{if $ENABLE_STATEFULSETS}} - - Identifier: WaitForRunningStatefulSets - Method: WaitForControlledPodsRunning - Params: - action: gather - {{end}} - -- name: Deleting SVCs - phases: - - namespaceRange: - min: 1 - max: {{$namespaces}} - replicasPerNamespace: 0 - tuningSet: Sequence - objectBundle: - - basename: big-service - objectTemplatePath: service.yaml - - namespaceRange: - min: 1 - max: {{$namespaces}} - replicasPerNamespace: 0 - tuningSet: Sequence - objectBundle: - - basename: medium-service - objectTemplatePath: service.yaml - - namespaceRange: - min: 1 - max: {{$namespaces}} - replicasPerNamespace: 0 - tuningSet: Sequence - objectBundle: - - basename: small-service - objectTemplatePath: service.yaml - -- name: Collecting measurements - measurements: - - Identifier: APIResponsiveness - Method: APIResponsiveness - Params: - action: gather - - Identifier: APIResponsivenessPrometheus - Method: APIResponsivenessPrometheus - Params: - action: gather - {{if $ENABLE_PROMETHEUS_API_RESPONSIVENESS}} - enableViolations: true - {{end}} - - Identifier: PodStartupLatency - Method: PodStartupLatency - Params: - action: gather - - Identifier: InClusterNetworkLatency - Method: InClusterNetworkLatency - Params: - action: gather - - Identifier: DnsLookupLatency - Method: DnsLookupLatency - Params: - action: gather - - Identifier: NetworkProgrammingLatency - Method: NetworkProgrammingLatency - Params: - action: gather - - Identifier: TestMetrics - Method: TestMetrics - Params: - action: gather diff --git a/load_testing/k3s/cluster-loader/large/configmap.yaml b/load_testing/k3s/cluster-loader/large/configmap.yaml deleted file mode 100644 index b249a39143..0000000000 --- a/load_testing/k3s/cluster-loader/large/configmap.yaml +++ /dev/null @@ -1,9 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{.Name}} -data: - data.yaml: |- - a: 1 - b: 2 - c: 3 diff --git a/load_testing/k3s/cluster-loader/large/deployment.yaml b/load_testing/k3s/cluster-loader/large/deployment.yaml deleted file mode 100644 index dcd581914a..0000000000 --- a/load_testing/k3s/cluster-loader/large/deployment.yaml +++ /dev/null @@ -1,62 +0,0 @@ -{{$EnableConfigMaps := DefaultParam .ENABLE_CONFIGMAPS false}} -{{$EnableSecrets := DefaultParam .ENABLE_SECRETS false}} - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{.Name}} - labels: - group: load - svc: {{.SvcName}}-{{.Index}} -spec: - replicas: {{RandIntRange .ReplicasMin .ReplicasMax}} - selector: - matchLabels: - name: {{.Name}} - template: - metadata: - labels: - group: load - name: {{.Name}} - svc: {{.SvcName}}-{{.Index}} - spec: - containers: - - image: k8s.gcr.io/pause:3.1 - name: {{.Name}} - resources: - requests: - cpu: 10m - memory: "10M" - volumeMounts: - {{if and $EnableConfigMaps (eq (Mod .Index 20) 0 19) }} # .Index % 20 in {0,19} - 10% deployments will have ConfigMap - - name: configmap - mountPath: /var/configmap - {{end}} - {{if and $EnableSecrets (eq (Mod .Index 20) 10 19) }} # .Index % 20 in {10,19} - 10% deployments will have Secret - - name: secret - mountPath: /var/secret - {{end}} - dnsPolicy: Default - terminationGracePeriodSeconds: 1 - # Add not-ready/unreachable tolerations for 15 minutes so that node - # failure doesn't trigger pod deletion. - tolerations: - - key: "node.kubernetes.io/not-ready" - operator: "Exists" - effect: "NoExecute" - tolerationSeconds: 900 - - key: "node.kubernetes.io/unreachable" - operator: "Exists" - effect: "NoExecute" - tolerationSeconds: 900 - volumes: - {{if and $EnableConfigMaps (eq (Mod .Index 20) 0 19) }} # .Index % 20 in {0,19} - 10% deployments will have ConfigMap - - name: configmap - configMap: - name: {{.BaseName}}-{{.Index}} - {{end}} - {{if and $EnableSecrets (eq (Mod .Index 20) 10 19) }} # .Index % 20 in {10,19} - 10% deployments will have Secret - - name: secret - secret: - secretName: {{.BaseName}}-{{.Index}} - {{end}} diff --git a/load_testing/k3s/cluster-loader/large/run-test.sh b/load_testing/k3s/cluster-loader/large/run-test.sh deleted file mode 100644 index b190c99ef3..0000000000 --- a/load_testing/k3s/cluster-loader/large/run-test.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/sh - -clusterloader --kubeconfig=../kubeConfig.yaml --testconfig=config.yaml diff --git a/load_testing/k3s/cluster-loader/large/secret.yaml b/load_testing/k3s/cluster-loader/large/secret.yaml deleted file mode 100644 index 67134b355f..0000000000 --- a/load_testing/k3s/cluster-loader/large/secret.yaml +++ /dev/null @@ -1,7 +0,0 @@ -apiVersion: v1 -kind: Secret -metadata: - name: {{.Name}} -type: Opaque -data: - password: c2NhbGFiaWxpdHkK diff --git a/load_testing/k3s/cluster-loader/large/service.yaml b/load_testing/k3s/cluster-loader/large/service.yaml deleted file mode 100644 index ed6a22c8cf..0000000000 --- a/load_testing/k3s/cluster-loader/large/service.yaml +++ /dev/null @@ -1,16 +0,0 @@ -{{$SetServiceProxyLabel := DefaultParam .SetServiceProxyLabel false}} - -apiVersion: v1 -kind: Service -metadata: - name: {{.Name}} -{{if and $SetServiceProxyLabel (eq (Mod .Index 2) 0)}} - labels: - service.kubernetes.io/service-proxy-name: foo -{{end}} -spec: - selector: - svc: {{.Name}} - ports: - - port: 80 - targetPort: 80 diff --git a/load_testing/k3s/cluster-loader/large/statefulset.yaml b/load_testing/k3s/cluster-loader/large/statefulset.yaml deleted file mode 100644 index bb97bfce9a..0000000000 --- a/load_testing/k3s/cluster-loader/large/statefulset.yaml +++ /dev/null @@ -1,30 +0,0 @@ -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: {{.Name}} - labels: - group: load -spec: - podManagementPolicy: Parallel - selector: - matchLabels: - name: {{.Name}} - serviceName: {{.Name}} - replicas: {{RandIntRange .ReplicasMin .ReplicasMax}} - template: - metadata: - labels: - group: statefulset - name: {{.Name}} - spec: - terminationGracePeriodSeconds: 1 - containers: - - name: {{.Name}} - image: k8s.gcr.io/pause:3.1 - ports: - - containerPort: 80 - name: web - resources: - requests: - cpu: 10m - memory: "10M" diff --git a/load_testing/k3s/cluster-loader/large/statefulset_service.yaml b/load_testing/k3s/cluster-loader/large/statefulset_service.yaml deleted file mode 100644 index 5e16a47a19..0000000000 --- a/load_testing/k3s/cluster-loader/large/statefulset_service.yaml +++ /dev/null @@ -1,10 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: {{.Name}} - labels: - name: {{.Name}} -spec: - clusterIP: None - selector: - name: {{.Name}} diff --git a/load_testing/k3s/cluster-loader/small/config.yaml b/load_testing/k3s/cluster-loader/small/config.yaml deleted file mode 100644 index 3c999b5069..0000000000 --- a/load_testing/k3s/cluster-loader/small/config.yaml +++ /dev/null @@ -1,471 +0,0 @@ -# ASSUMPTIONS: -# - Underlying cluster should have 100+ nodes. -# - Number of nodes should be divisible by NODES_PER_NAMESPACE (default 100). -# - The number of created SVCs is half the number of created Deployments. -# - Only half of Deployments will be assigned 1-1 to existing SVCs. - -#Constants -{{$NODE_MODE := DefaultParam .NODE_MODE "allnodes"}} -{{$NODES_PER_NAMESPACE := DefaultParam .NODES_PER_NAMESPACE 1}} -{{$PODS_PER_NODE := DefaultParam .PODS_PER_NODE 5}} -{{$LOAD_TEST_THROUGHPUT := DefaultParam .LOAD_TEST_THROUGHPUT 10}} -{{$BIG_GROUP_SIZE := 25}} -{{$MEDIUM_GROUP_SIZE := 15}} -{{$SMALL_GROUP_SIZE := 1}} -{{$SMALL_STATEFUL_SETS_PER_NAMESPACE := 1}} -{{$MEDIUM_STATEFUL_SETS_PER_NAMESPACE := 1}} -{{$ENABLE_CHAOSMONKEY := DefaultParam .ENABLE_CHAOSMONKEY false}} -{{$ENABLE_PROMETHEUS_API_RESPONSIVENESS := DefaultParam .ENABLE_PROMETHEUS_API_RESPONSIVENESS false}} -{{$ENABLE_CONFIGMAPS := DefaultParam .ENABLE_CONFIGMAPS false}} -{{$ENABLE_SECRETS := DefaultParam .ENABLE_SECRETS false}} -{{$ENABLE_STATEFULSETS := DefaultParam .ENABLE_STATEFULSETS false}} -#Variables -{{$namespaces := DivideInt .Nodes $NODES_PER_NAMESPACE}} -{{$totalPods := MultiplyInt $namespaces $NODES_PER_NAMESPACE $PODS_PER_NODE}} -{{$podsPerNamespace := DivideInt $totalPods $namespaces}} -{{$saturationTime := DivideInt $totalPods $LOAD_TEST_THROUGHPUT}} -# bigDeployments - 1/4 of namespace pods should be in big Deployments. -{{$bigDeploymentsPerNamespace := DivideInt $podsPerNamespace (MultiplyInt 4 $BIG_GROUP_SIZE)}} -# mediumDeployments - 1/4 of namespace pods should be in medium Deployments. -{{$mediumDeploymentsPerNamespace := DivideInt $podsPerNamespace (MultiplyInt 4 $MEDIUM_GROUP_SIZE)}} -# smallDeployments - 1/2 of namespace pods should be in small Deployments. -{{$smallDeploymentsPerNamespace := DivideInt $podsPerNamespace (MultiplyInt 2 $SMALL_GROUP_SIZE)}} -# If StatefulSets are enabled reduce the number of small and medium deployments per namespace -{{$smallDeploymentsPerNamespace := SubtractInt $smallDeploymentsPerNamespace (IfThenElse $ENABLE_STATEFULSETS $SMALL_STATEFUL_SETS_PER_NAMESPACE 0)}} -{{$mediumDeploymentsPerNamespace := SubtractInt $mediumDeploymentsPerNamespace (IfThenElse $ENABLE_STATEFULSETS $MEDIUM_STATEFUL_SETS_PER_NAMESPACE 0)}} - - -name: load -automanagedNamespaces: {{$namespaces}} -tuningSets: -- name: Sequence - parallelismLimitedLoad: - parallelismLimit: 1 -- name: RandomizedSaturationTimeLimited - RandomizedTimeLimitedLoad: - timeLimit: {{$saturationTime}}s -- name: RandomizedScalingTimeLimited - RandomizedTimeLimitedLoad: - # The expected number of created/deleted pods is totalPods/4 when scaling, - # as each RS changes its size from X to a uniform random value in [X/2, 3X/2]. - # To match 10 [pods/s] requirement, we need to divide saturationTime by 4. - timeLimit: {{DivideInt $saturationTime 4}}s -{{if $ENABLE_CHAOSMONKEY}} -chaosMonkey: - nodeFailure: - failureRate: 0.01 - interval: 1m - jitterFactor: 10.0 - simulatedDowntime: 10m -{{end}} -steps: -- name: Starting measurements - measurements: - - Identifier: APIResponsiveness - Method: APIResponsiveness - Params: - action: reset - - Identifier: APIResponsivenessPrometheus - Method: APIResponsivenessPrometheus - Params: - action: start - - Identifier: PodStartupLatency - Method: PodStartupLatency - Params: - action: start - labelSelector: group = load - threshold: 1h - - Identifier: InClusterNetworkLatency - Method: InClusterNetworkLatency - Params: - action: start - replicasPerProbe: {{DivideInt .Nodes 100}} - - Identifier: DnsLookupLatency - Method: DnsLookupLatency - Params: - action: start - replicasPerProbe: {{DivideInt .Nodes 100}} - - Identifier: NetworkProgrammingLatency - Method: NetworkProgrammingLatency - Params: - action: start - - Identifier: TestMetrics - Method: TestMetrics - Params: - action: start - nodeMode: {{$NODE_MODE}} - -- name: Creating SVCs - phases: - - namespaceRange: - min: 1 - max: {{$namespaces}} - replicasPerNamespace: {{DivideInt (AddInt $bigDeploymentsPerNamespace 1) 2}} - tuningSet: Sequence - objectBundle: - - basename: big-service - objectTemplatePath: service.yaml - - namespaceRange: - min: 1 - max: {{$namespaces}} - replicasPerNamespace: {{DivideInt (AddInt $mediumDeploymentsPerNamespace 1) 2}} - tuningSet: Sequence - objectBundle: - - basename: medium-service - objectTemplatePath: service.yaml - - namespaceRange: - min: 1 - max: {{$namespaces}} - replicasPerNamespace: {{DivideInt (AddInt $smallDeploymentsPerNamespace 1) 2}} - tuningSet: Sequence - objectBundle: - - basename: small-service - objectTemplatePath: service.yaml - -- name: Starting measurement for waiting for pods - measurements: - - Identifier: WaitForRunningDeployments - Method: WaitForControlledPodsRunning - Params: - action: start - apiVersion: apps/v1 - kind: Deployment - labelSelector: group = load - operationTimeout: 15m - {{if $ENABLE_STATEFULSETS}} - - Identifier: WaitForRunningStatefulSets - Method: WaitForControlledPodsRunning - Params: - action: start - apiVersion: apps/v1 - kind: StatefulSet - labelSelector: group = load - operationTimeout: 15m - {{end}} - -- name: Creating objects - phases: - - namespaceRange: - min: 1 - max: {{$namespaces}} - replicasPerNamespace: {{$bigDeploymentsPerNamespace}} - tuningSet: RandomizedSaturationTimeLimited - objectBundle: - {{if $ENABLE_CONFIGMAPS}} - - basename: big-deployment - objectTemplatePath: configmap.yaml - {{end}} - {{if $ENABLE_SECRETS}} - - basename: big-deployment - objectTemplatePath: secret.yaml - {{end}} - - basename: big-deployment - objectTemplatePath: deployment.yaml - templateFillMap: - ReplicasMin: {{$BIG_GROUP_SIZE}} - ReplicasMax: {{$BIG_GROUP_SIZE}} - SvcName: big-service - - namespaceRange: - min: 1 - max: {{$namespaces}} - replicasPerNamespace: {{$mediumDeploymentsPerNamespace}} - tuningSet: RandomizedSaturationTimeLimited - objectBundle: - {{if $ENABLE_CONFIGMAPS}} - - basename: medium-deployment - objectTemplatePath: configmap.yaml - {{end}} - {{if $ENABLE_SECRETS}} - - basename: medium-deployment - objectTemplatePath: secret.yaml - {{end}} - - basename: medium-deployment - objectTemplatePath: deployment.yaml - templateFillMap: - ReplicasMin: {{$MEDIUM_GROUP_SIZE}} - ReplicasMax: {{$MEDIUM_GROUP_SIZE}} - SvcName: medium-service - - namespaceRange: - min: 1 - max: {{$namespaces}} - replicasPerNamespace: {{$smallDeploymentsPerNamespace}} - tuningSet: RandomizedSaturationTimeLimited - objectBundle: - {{if $ENABLE_CONFIGMAPS}} - - basename: small-deployment - objectTemplatePath: configmap.yaml - {{end}} - {{if $ENABLE_SECRETS}} - - basename: small-deployment - objectTemplatePath: secret.yaml - {{end}} - - basename: small-deployment - objectTemplatePath: deployment.yaml - templateFillMap: - ReplicasMin: {{$SMALL_GROUP_SIZE}} - ReplicasMax: {{$SMALL_GROUP_SIZE}} - SvcName: small-service - {{if $ENABLE_STATEFULSETS}} - - namespaceRange: - min: 1 - max: {{$namespaces}} - replicasPerNamespace: {{$SMALL_STATEFUL_SETS_PER_NAMESPACE}} - tuningSet: RandomizedSaturationTimeLimited - objectBundle: - - basename: small-statefulset - objectTemplatePath: statefulset_service.yaml - - basename: small-statefulset - objectTemplatePath: statefulset.yaml - templateFillMap: - ReplicasMin: {{$SMALL_GROUP_SIZE}} - ReplicasMax: {{$SMALL_GROUP_SIZE}} - - namespaceRange: - min: 1 - max: {{$namespaces}} - replicasPerNamespace: {{$MEDIUM_STATEFUL_SETS_PER_NAMESPACE}} - tuningSet: RandomizedSaturationTimeLimited - objectBundle: - - basename: medium-statefulset - objectTemplatePath: statefulset_service.yaml - - basename: medium-statefulset - objectTemplatePath: statefulset.yaml - templateFillMap: - ReplicasMin: {{$MEDIUM_GROUP_SIZE}} - ReplicasMax: {{$MEDIUM_GROUP_SIZE}} - {{end}} - -- name: Waiting for pods to be running - measurements: - - Identifier: WaitForRunningDeployments - Method: WaitForControlledPodsRunning - Params: - action: gather - {{if $ENABLE_STATEFULSETS}} - - Identifier: WaitForRunningStatefulSets - Method: WaitForControlledPodsRunning - Params: - action: gather - {{end}} - -- name: Scaling objects - phases: - - namespaceRange: - min: 1 - max: {{$namespaces}} - replicasPerNamespace: {{$bigDeploymentsPerNamespace}} - tuningSet: RandomizedScalingTimeLimited - objectBundle: - - basename: big-deployment - objectTemplatePath: deployment.yaml - templateFillMap: - ReplicasMin: {{MultiplyInt $BIG_GROUP_SIZE 0.5}} - ReplicasMax: {{MultiplyInt $BIG_GROUP_SIZE 1.5}} - SvcName: big-service - - namespaceRange: - min: 1 - max: {{$namespaces}} - replicasPerNamespace: {{$mediumDeploymentsPerNamespace}} - tuningSet: RandomizedScalingTimeLimited - objectBundle: - - basename: medium-deployment - objectTemplatePath: deployment.yaml - templateFillMap: - ReplicasMin: {{MultiplyInt $MEDIUM_GROUP_SIZE 0.5}} - ReplicasMax: {{MultiplyInt $MEDIUM_GROUP_SIZE 1.5}} - SvcName: medium-service - - namespaceRange: - min: 1 - max: {{$namespaces}} - replicasPerNamespace: {{$smallDeploymentsPerNamespace}} - tuningSet: RandomizedScalingTimeLimited - objectBundle: - - basename: small-deployment - objectTemplatePath: deployment.yaml - templateFillMap: - ReplicasMin: {{MultiplyInt $SMALL_GROUP_SIZE 0.5}} - ReplicasMax: {{MultiplyInt $SMALL_GROUP_SIZE 1.5}} - SvcName: small-service - {{if $ENABLE_STATEFULSETS}} - - namespaceRange: - min: 1 - max: {{$namespaces}} - replicasPerNamespace: {{$SMALL_STATEFUL_SETS_PER_NAMESPACE}} - tuningSet: RandomizedScalingTimeLimited - objectBundle: - - basename: small-statefulset - objectTemplatePath: statefulset.yaml - templateFillMap: - ReplicasMin: {{MultiplyInt $SMALL_GROUP_SIZE 0.5}} - ReplicasMax: {{MultiplyInt $SMALL_GROUP_SIZE 1.5}} - - namespaceRange: - min: 1 - max: {{$namespaces}} - replicasPerNamespace: {{$MEDIUM_STATEFUL_SETS_PER_NAMESPACE}} - tuningSet: RandomizedScalingTimeLimited - objectBundle: - - basename: medium-statefulset - objectTemplatePath: statefulset.yaml - templateFillMap: - ReplicasMin: {{MultiplyInt $MEDIUM_GROUP_SIZE 0.5}} - ReplicasMax: {{MultiplyInt $MEDIUM_GROUP_SIZE 1.5}} - {{end}} - -- name: Waiting for objects to become scaled - measurements: - - Identifier: WaitForRunningDeployments - Method: WaitForControlledPodsRunning - Params: - action: gather - {{if $ENABLE_STATEFULSETS}} - - Identifier: WaitForRunningStatefulSets - Method: WaitForControlledPodsRunning - Params: - action: gather - {{end}} - -- name: Deleting objects - phases: - - namespaceRange: - min: 1 - max: {{$namespaces}} - replicasPerNamespace: 0 - tuningSet: RandomizedSaturationTimeLimited - objectBundle: - - basename: big-deployment - objectTemplatePath: deployment.yaml - {{if $ENABLE_CONFIGMAPS}} - - basename: big-deployment - objectTemplatePath: configmap.yaml - {{end}} - {{if $ENABLE_SECRETS}} - - basename: big-deployment - objectTemplatePath: secret.yaml - {{end}} - - namespaceRange: - min: 1 - max: {{$namespaces}} - replicasPerNamespace: 0 - tuningSet: RandomizedSaturationTimeLimited - objectBundle: - - basename: medium-deployment - objectTemplatePath: deployment.yaml - {{if $ENABLE_CONFIGMAPS}} - - basename: medium-deployment - objectTemplatePath: configmap.yaml - {{end}} - {{if $ENABLE_SECRETS}} - - basename: medium-deployment - objectTemplatePath: secret.yaml - {{end}} - - namespaceRange: - min: 1 - max: {{$namespaces}} - replicasPerNamespace: 0 - tuningSet: RandomizedSaturationTimeLimited - objectBundle: - - basename: small-deployment - objectTemplatePath: deployment.yaml - {{if $ENABLE_CONFIGMAPS}} - - basename: small-deployment - objectTemplatePath: configmap.yaml - {{end}} - {{if $ENABLE_SECRETS}} - - basename: small-deployment - objectTemplatePath: secret.yaml - {{end}} - {{if $ENABLE_STATEFULSETS}} - - namespaceRange: - min: 1 - max: {{$namespaces}} - replicasPerNamespace: 0 - tuningSet: RandomizedSaturationTimeLimited - objectBundle: - - basename: small-statefulset - objectTemplatePath: statefulset.yaml - - basename: small-statefulset - objectTemplatePath: statefulset_service.yaml - - namespaceRange: - min: 1 - max: {{$namespaces}} - replicasPerNamespace: 0 - tuningSet: RandomizedSaturationTimeLimited - objectBundle: - - basename: medium-statefulset - objectTemplatePath: statefulset.yaml - - basename: medium-statefulset - objectTemplatePath: statefulset_service.yaml - {{end}} - -- name: Waiting for pods to be deleted - measurements: - - Identifier: WaitForRunningDeployments - Method: WaitForControlledPodsRunning - Params: - action: gather - {{if $ENABLE_STATEFULSETS}} - - Identifier: WaitForRunningStatefulSets - Method: WaitForControlledPodsRunning - Params: - action: gather - {{end}} - -- name: Deleting SVCs - phases: - - namespaceRange: - min: 1 - max: {{$namespaces}} - replicasPerNamespace: 0 - tuningSet: Sequence - objectBundle: - - basename: big-service - objectTemplatePath: service.yaml - - namespaceRange: - min: 1 - max: {{$namespaces}} - replicasPerNamespace: 0 - tuningSet: Sequence - objectBundle: - - basename: medium-service - objectTemplatePath: service.yaml - - namespaceRange: - min: 1 - max: {{$namespaces}} - replicasPerNamespace: 0 - tuningSet: Sequence - objectBundle: - - basename: small-service - objectTemplatePath: service.yaml - -- name: Collecting measurements - measurements: - - Identifier: APIResponsiveness - Method: APIResponsiveness - Params: - action: gather - - Identifier: APIResponsivenessPrometheus - Method: APIResponsivenessPrometheus - Params: - action: gather - {{if $ENABLE_PROMETHEUS_API_RESPONSIVENESS}} - enableViolations: true - {{end}} - - Identifier: PodStartupLatency - Method: PodStartupLatency - Params: - action: gather - - Identifier: InClusterNetworkLatency - Method: InClusterNetworkLatency - Params: - action: gather - - Identifier: DnsLookupLatency - Method: DnsLookupLatency - Params: - action: gather - - Identifier: NetworkProgrammingLatency - Method: NetworkProgrammingLatency - Params: - action: gather - - Identifier: TestMetrics - Method: TestMetrics - Params: - action: gather diff --git a/load_testing/k3s/cluster-loader/small/configmap.yaml b/load_testing/k3s/cluster-loader/small/configmap.yaml deleted file mode 100644 index b249a39143..0000000000 --- a/load_testing/k3s/cluster-loader/small/configmap.yaml +++ /dev/null @@ -1,9 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{.Name}} -data: - data.yaml: |- - a: 1 - b: 2 - c: 3 diff --git a/load_testing/k3s/cluster-loader/small/deployment.yaml b/load_testing/k3s/cluster-loader/small/deployment.yaml deleted file mode 100644 index dcd581914a..0000000000 --- a/load_testing/k3s/cluster-loader/small/deployment.yaml +++ /dev/null @@ -1,62 +0,0 @@ -{{$EnableConfigMaps := DefaultParam .ENABLE_CONFIGMAPS false}} -{{$EnableSecrets := DefaultParam .ENABLE_SECRETS false}} - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{.Name}} - labels: - group: load - svc: {{.SvcName}}-{{.Index}} -spec: - replicas: {{RandIntRange .ReplicasMin .ReplicasMax}} - selector: - matchLabels: - name: {{.Name}} - template: - metadata: - labels: - group: load - name: {{.Name}} - svc: {{.SvcName}}-{{.Index}} - spec: - containers: - - image: k8s.gcr.io/pause:3.1 - name: {{.Name}} - resources: - requests: - cpu: 10m - memory: "10M" - volumeMounts: - {{if and $EnableConfigMaps (eq (Mod .Index 20) 0 19) }} # .Index % 20 in {0,19} - 10% deployments will have ConfigMap - - name: configmap - mountPath: /var/configmap - {{end}} - {{if and $EnableSecrets (eq (Mod .Index 20) 10 19) }} # .Index % 20 in {10,19} - 10% deployments will have Secret - - name: secret - mountPath: /var/secret - {{end}} - dnsPolicy: Default - terminationGracePeriodSeconds: 1 - # Add not-ready/unreachable tolerations for 15 minutes so that node - # failure doesn't trigger pod deletion. - tolerations: - - key: "node.kubernetes.io/not-ready" - operator: "Exists" - effect: "NoExecute" - tolerationSeconds: 900 - - key: "node.kubernetes.io/unreachable" - operator: "Exists" - effect: "NoExecute" - tolerationSeconds: 900 - volumes: - {{if and $EnableConfigMaps (eq (Mod .Index 20) 0 19) }} # .Index % 20 in {0,19} - 10% deployments will have ConfigMap - - name: configmap - configMap: - name: {{.BaseName}}-{{.Index}} - {{end}} - {{if and $EnableSecrets (eq (Mod .Index 20) 10 19) }} # .Index % 20 in {10,19} - 10% deployments will have Secret - - name: secret - secret: - secretName: {{.BaseName}}-{{.Index}} - {{end}} diff --git a/load_testing/k3s/cluster-loader/small/run-test.sh b/load_testing/k3s/cluster-loader/small/run-test.sh deleted file mode 100644 index b190c99ef3..0000000000 --- a/load_testing/k3s/cluster-loader/small/run-test.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/sh - -clusterloader --kubeconfig=../kubeConfig.yaml --testconfig=config.yaml diff --git a/load_testing/k3s/cluster-loader/small/secret.yaml b/load_testing/k3s/cluster-loader/small/secret.yaml deleted file mode 100644 index 67134b355f..0000000000 --- a/load_testing/k3s/cluster-loader/small/secret.yaml +++ /dev/null @@ -1,7 +0,0 @@ -apiVersion: v1 -kind: Secret -metadata: - name: {{.Name}} -type: Opaque -data: - password: c2NhbGFiaWxpdHkK diff --git a/load_testing/k3s/cluster-loader/small/service.yaml b/load_testing/k3s/cluster-loader/small/service.yaml deleted file mode 100644 index ed6a22c8cf..0000000000 --- a/load_testing/k3s/cluster-loader/small/service.yaml +++ /dev/null @@ -1,16 +0,0 @@ -{{$SetServiceProxyLabel := DefaultParam .SetServiceProxyLabel false}} - -apiVersion: v1 -kind: Service -metadata: - name: {{.Name}} -{{if and $SetServiceProxyLabel (eq (Mod .Index 2) 0)}} - labels: - service.kubernetes.io/service-proxy-name: foo -{{end}} -spec: - selector: - svc: {{.Name}} - ports: - - port: 80 - targetPort: 80 diff --git a/load_testing/k3s/cluster-loader/small/statefulset.yaml b/load_testing/k3s/cluster-loader/small/statefulset.yaml deleted file mode 100644 index bb97bfce9a..0000000000 --- a/load_testing/k3s/cluster-loader/small/statefulset.yaml +++ /dev/null @@ -1,30 +0,0 @@ -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: {{.Name}} - labels: - group: load -spec: - podManagementPolicy: Parallel - selector: - matchLabels: - name: {{.Name}} - serviceName: {{.Name}} - replicas: {{RandIntRange .ReplicasMin .ReplicasMax}} - template: - metadata: - labels: - group: statefulset - name: {{.Name}} - spec: - terminationGracePeriodSeconds: 1 - containers: - - name: {{.Name}} - image: k8s.gcr.io/pause:3.1 - ports: - - containerPort: 80 - name: web - resources: - requests: - cpu: 10m - memory: "10M" diff --git a/load_testing/k3s/cluster-loader/small/statefulset_service.yaml b/load_testing/k3s/cluster-loader/small/statefulset_service.yaml deleted file mode 100644 index 5e16a47a19..0000000000 --- a/load_testing/k3s/cluster-loader/small/statefulset_service.yaml +++ /dev/null @@ -1,10 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: {{.Name}} - labels: - name: {{.Name}} -spec: - clusterIP: None - selector: - name: {{.Name}} diff --git a/load_testing/k3s/pool/data.tf b/load_testing/k3s/pool/data.tf deleted file mode 100644 index bff5eb3ea3..0000000000 --- a/load_testing/k3s/pool/data.tf +++ /dev/null @@ -1,44 +0,0 @@ -data "terraform_remote_state" "server" { - backend = "local" - - config = { - path = "${path.module}/../server/server.tfstate" - } -} - -data "aws_vpc" "default" { - default = true -} - -data "aws_subnet_ids" "available" { - vpc_id = data.aws_vpc.default.id -} - -data "aws_subnet" "selected" { - id = "${tolist(data.aws_subnet_ids.available.ids)[1]}" -} - -data "aws_ami" "ubuntu" { - most_recent = true - owners = ["099720109477"] - - filter { - name = "name" - values = ["ubuntu-minimal/images/*/ubuntu-bionic-18.04-*"] - } - - filter { - name = "virtualization-type" - values = ["hvm"] - } - - filter { - name = "root-device-type" - values = ["ebs"] - } - - filter { - name = "architecture" - values = ["x86_64"] - } -} diff --git a/load_testing/k3s/pool/files/pool_worker_userdata.tmpl b/load_testing/k3s/pool/files/pool_worker_userdata.tmpl deleted file mode 100644 index b47b2fb344..0000000000 --- a/load_testing/k3s/pool/files/pool_worker_userdata.tmpl +++ /dev/null @@ -1,34 +0,0 @@ -#cloud-config -%{ if length(extra_ssh_keys) > 0 } -ssh_authorized_keys: -%{ for ssh_key in extra_ssh_keys } -- ${ssh_key} -%{ endfor } -%{ endif } -runcmd: -- echo "net.ipv4.neigh.default.gc_interval = 3600" >> /etc/sysctl.conf -- echo "net.ipv4.neigh.default.gc_stale_time = 3600" >> /etc/sysctl.conf -- echo "net.ipv4.neigh.default.gc_thresh3 = 16384" >> /etc/sysctl.conf -- echo "net.ipv4.neigh.default.gc_thresh2 = 8192" >> /etc/sysctl.conf -- echo "net.ipv4.neigh.default.gc_thresh1 = 4096" >> /etc/sysctl.conf -- echo "fs.file-max = 12000500" >> /etc/sysctl.conf -- echo "fs.nr_open = 20000500" >> /etc/sysctl.conf -- echo "net.ipv4.tcp_mem = '10000000 10000000 10000000'" >> /etc/sysctl.conf -- echo "net.ipv4.tcp_rmem = '1024 4096 16384'" >> /etc/sysctl.conf -- echo "net.ipv4.tcp_wmem = '1024 4096 16384'" >> /etc/sysctl.conf -- echo "net.core.rmem_max = 16384" >> /etc/sysctl.conf -- echo "net.core.wmem_max = 16384" >> /etc/sysctl.conf -- ulimit -n 20000000 -- echo "# " >> /etc/security/limits.d/limits.conf -- echo " * soft nofile 20000" >> /etc/security/limits.d/limits.conf -- echo " * hard nofile 20000" >> /etc/security/limits.d/limits.conf -- sysctl -p -- apt-get update -- apt-get install -y software-properties-common -- curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - -- add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" -- apt-get update -- apt-get -y install docker-ce -- apt-get install -y resolvconf linux-headers-$(uname -r) && echo "nameserver 1.1.1.1" > /etc/resolvconf/resolv.conf.d/tail && systemctl start resolvconf -- DEBIAN_FRONTEND=noninteractive apt-get upgrade -y -- n=1; while [ $n -le ${k3s_per_node} ]; do docker run -d --restart=unless-stopped -e K3S_URL=https://${k3s_url}:6443 -e K3S_CLUSTER_SECRET="${k3s_cluster_secret}" --privileged --mount type=tmpfs,destination=/var/run --mount type=tmpfs,destination=/run -m 1g --cpus=".7" rancher/k3s:${install_k3s_version}; n=$(( n + 1 )); done diff --git a/load_testing/k3s/pool/main.tf b/load_testing/k3s/pool/main.tf deleted file mode 100644 index 6bfc0c2a0e..0000000000 --- a/load_testing/k3s/pool/main.tf +++ /dev/null @@ -1,80 +0,0 @@ -terraform { - backend "local" { - path = "pool.tfstate" - } -} - -locals { - name = "load-test-pool" - k3s_cluster_secret = "pvc-6476dcaf-73a0-11e9-b8e5-06943b744282" - install_k3s_version = "v0.9.0-rc2" -} - -provider "aws" { - region = "us-west-2" - profile = "rancher-eng" -} - -resource "aws_security_group" "k3s" { - name = "${local.name}-pool" - vpc_id = data.aws_vpc.default.id - - ingress { - from_port = 22 - to_port = 22 - protocol = "TCP" - cidr_blocks = ["0.0.0.0/0"] - } - - ingress { - from_port = 0 - to_port = 0 - protocol = "-1" - cidr_blocks = ["0.0.0.0/0"] - } - - ingress { - from_port = 0 - to_port = 0 - protocol = "-1" - self = true - } - - egress { - from_port = 0 - to_port = 0 - protocol = "-1" - cidr_blocks = ["0.0.0.0/0"] - } -} - -module "k3s-pool-worker-asg" { - source = "terraform-aws-modules/autoscaling/aws" - version = "3.0.0" - name = local.name - asg_name = local.name - instance_type = var.worker_instance_type - image_id = data.aws_ami.ubuntu.id - user_data = base64encode(templatefile("${path.module}/files/pool_worker_userdata.tmpl", { k3s_url = data.terraform_remote_state.server.outputs.public_ip[0], k3s_cluster_secret = local.k3s_cluster_secret, install_k3s_version = local.install_k3s_version, k3s_per_node = var.k3s_per_node, extra_ssh_keys = var.extra_ssh_keys })) - ebs_optimized = true - - desired_capacity = var.node_count - health_check_type = "EC2" - max_size = var.node_count - min_size = var.node_count - vpc_zone_identifier = [data.aws_subnet.selected.id] - spot_price = "0.680" - - security_groups = [ - aws_security_group.k3s.id, - ] - - lc_name = local.name - - root_block_device = [ - { - volume_size = "100" - volume_type = "gp2" - }, - ] -} diff --git a/load_testing/k3s/pool/outputs.tf b/load_testing/k3s/pool/outputs.tf deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/load_testing/k3s/pool/variables.tf b/load_testing/k3s/pool/variables.tf deleted file mode 100644 index 87ac1c0c0f..0000000000 --- a/load_testing/k3s/pool/variables.tf +++ /dev/null @@ -1,22 +0,0 @@ -variable "node_count" { - description = "Number of nodes to run k3s agents on." - type = number - # default = 10 -} - -variable "k3s_per_node" { - description = "Number of k3s agent docker containers to run per ec2 instance" - type = number - default = 10 -} - -variable "worker_instance_type" { - type = string - default = "c5.4xlarge" -} - -variable "extra_ssh_keys" { - type = list - default = [] - description = "Extra ssh keys to inject into Rancher instances" -} diff --git a/load_testing/k3s/pool/versions.tf b/load_testing/k3s/pool/versions.tf deleted file mode 100644 index ac97c6ac8e..0000000000 --- a/load_testing/k3s/pool/versions.tf +++ /dev/null @@ -1,4 +0,0 @@ - -terraform { - required_version = ">= 0.12" -} diff --git a/load_testing/k3s/readme.MD b/load_testing/k3s/readme.MD deleted file mode 100644 index 3b3d1154ea..0000000000 --- a/load_testing/k3s/readme.MD +++ /dev/null @@ -1,47 +0,0 @@ -# K3S Load Testing - -This directory contains tooling to help spin up k3s clusters for scale testing (load testing the k3s server). - -## Usage - -From inside the `server` directory, run the following commands: - -``` -cd server -terraform init -terraform apply -``` - -You will be asked to specify an instance type for the k3s server. For a `large` cluster test, use a `c4.8xlarge`. For a `small` cluster test, use a `t3.micro`. -To run these commands, you will need access to an AWS account (configured here by default will be the `rancher-eng` account). - -When the server terraform completes, go to the `pool` directory and run: - -``` -cd pool -terraform init -terraform apply -``` - -You will be asked to specify how many ec2 instances to create (variable is `node_count`). You can also specify the `k3s_per_node` and `worker_instance_type` variables when you run apply to override these defaults. - -For the `large` cluster test, you will want to specify `node_count=100`. That will get you 100 ec2 instances with 10 k3s agents each - for a total of 1000 nodes in your k3s cluster. - -For the `small` test, `node_count=1`, and you will override `k3s_per_node=5`. That will get you 1 ec2 instance with 5 agents on it - for a total of 5 nodes in your k3s cluster. - - -Once `pool` is finished, you can run through the cluster-loader scenarios using the `run-test.sh` script in the corresponding directory (depending on whether you are running the `large` or `small` scenario). - -``` -cd cluster-loader/ -./run-test.sh -``` - -* The `run-test.sh` script assumes you have [cluster-loader](https://github.com/kubernetes/perf-tests/tree/master/clusterloader2) installed on your machine. - - -### TODO - -* Investigate cluster-loader failures. -* Simplify this process. -* Organized reporting on SLOs after cluster-loader is complete. diff --git a/load_testing/k3s/server/data.tf b/load_testing/k3s/server/data.tf deleted file mode 100644 index aa0843d511..0000000000 --- a/load_testing/k3s/server/data.tf +++ /dev/null @@ -1,47 +0,0 @@ -data "aws_vpc" "default" { - default = true -} - -data "aws_subnet_ids" "available" { - vpc_id = data.aws_vpc.default.id -} - -data "aws_subnet" "selected" { - id = "${tolist(data.aws_subnet_ids.available.ids)[1]}" -} - -data "aws_ami" "ubuntu" { - most_recent = true - owners = ["099720109477"] - - filter { - name = "name" - values = ["ubuntu-minimal/images/*/ubuntu-bionic-18.04-*"] - } - - filter { - name = "virtualization-type" - values = ["hvm"] - } - - filter { - name = "root-device-type" - values = ["ebs"] - } - - filter { - name = "architecture" - values = ["x86_64"] - } -} - -data "template_file" "metrics" { - template = file("${path.module}/files/metrics.yaml") -} -data "template_file" "k3s-prom-yaml" { - template = file("${path.module}/files/prom.yaml") - vars = { - prom_host = var.prom_host - graf_host = var.graf_host - } -} diff --git a/load_testing/k3s/server/files/metrics.yaml b/load_testing/k3s/server/files/metrics.yaml deleted file mode 100644 index 3b35b737d6..0000000000 --- a/load_testing/k3s/server/files/metrics.yaml +++ /dev/null @@ -1,227 +0,0 @@ -%{ if local.prom_worker_node_count != 0 } ---- -apiVersion: rbac.authorization.k8s.io/v1 -# kubernetes versions before 1.8.0 should use rbac.authorization.k8s.io/v1beta1 -kind: ClusterRoleBinding -metadata: - name: kube-state-metrics -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: kube-state-metrics -subjects: -- kind: ServiceAccount - name: kube-state-metrics - namespace: kube-system ---- -apiVersion: rbac.authorization.k8s.io/v1 -# kubernetes versions before 1.8.0 should use rbac.authorization.k8s.io/v1beta1 -kind: ClusterRole -metadata: - name: kube-state-metrics -rules: -- apiGroups: [""] - resources: - - configmaps - - secrets - - nodes - - pods - - services - - resourcequotas - - replicationcontrollers - - limitranges - - persistentvolumeclaims - - persistentvolumes - - namespaces - - endpoints - verbs: ["list", "watch"] -- apiGroups: ["extensions"] - resources: - - daemonsets - - deployments - - replicasets - - ingresses - verbs: ["list", "watch"] -- apiGroups: ["apps"] - resources: - - daemonsets - - deployments - - replicasets - - statefulsets - verbs: ["list", "watch"] -- apiGroups: ["batch"] - resources: - - cronjobs - - jobs - verbs: ["list", "watch"] -- apiGroups: ["autoscaling"] - resources: - - horizontalpodautoscalers - verbs: ["list", "watch"] -- apiGroups: ["policy"] - resources: - - poddisruptionbudgets - verbs: ["list", "watch"] -- apiGroups: ["certificates.k8s.io"] - resources: - - certificatesigningrequests - verbs: ["list", "watch"] -- apiGroups: ["storage.k8s.io"] - resources: - - storageclasses - verbs: ["list", "watch"] -- apiGroups: ["autoscaling.k8s.io"] - resources: - - verticalpodautoscalers - verbs: ["list", "watch"] ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - labels: - k8s-app: kube-state-metrics - name: kube-state-metrics - namespace: kube-system -spec: - selector: - matchLabels: - k8s-app: kube-state-metrics - replicas: 1 - template: - metadata: - labels: - k8s-app: kube-state-metrics - spec: - serviceAccountName: kube-state-metrics - containers: - - name: kube-state-metrics - image: quay.io/coreos/kube-state-metrics:v1.7.2 - ports: - - name: http-metrics - containerPort: 8080 - - name: telemetry - containerPort: 8081 - livenessProbe: - httpGet: - path: /healthz - port: 8080 - initialDelaySeconds: 5 - timeoutSeconds: 5 - readinessProbe: - httpGet: - path: / - port: 8080 - initialDelaySeconds: 5 - timeoutSeconds: 5 ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: kube-state-metrics - namespace: kube-system ---- -apiVersion: v1 -kind: Service -metadata: - name: kube-state-metrics - namespace: kube-system - labels: - k8s-app: kube-state-metrics - annotations: - prometheus.io/scrape: 'true' -spec: - ports: - - name: http-metrics - port: 8080 - targetPort: http-metrics - protocol: TCP - - name: telemetry - port: 8081 - targetPort: telemetry - protocol: TCP - selector: - k8s-app: kube-state-metrics ---- -kind: ClusterRoleBinding -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - name: slo-monitor -subjects: -- kind: ServiceAccount - name: slo-monitor - namespace: kube-system -roleRef: - kind: ClusterRole - name: slo-monitor - apiGroup: rbac.authorization.k8s.io ---- -kind: ClusterRole -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - name: slo-monitor - namespace: kube-system -rules: -- apiGroups: [""] - resources: ["pods", "events"] - verbs: ["get", "watch", "list"] ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: slo-monitor - namespace: kube-system ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: slo-monitor - namespace: kube-system - labels: - app: slo-monitor -spec: - selector: - matchLabels: - app: slo-monitor - template: - metadata: - labels: - app: slo-monitor - annotations: - prometheus.io/scrape: "true" - spec: - containers: - - name: slo-monitor - image: gcr.io/google-containers/slo-monitor:0.12.0 - command: - - /slo-monitor - - --alsologtostderr=true - imagePullPolicy: Always - ports: - - name: metrics - containerPort: 8080 - resources: - requests: - cpu: 300m - memory: 100Mi - limits: - cpu: 300m - memory: 100Mi - restartPolicy: Always - serviceAccountName: slo-monitor ---- -apiVersion: v1 -kind: Service -metadata: - name: slo-monitor - namespace: kube-system - labels: - app: slo-monitor -spec: - selector: - app: slo-monitor - ports: - - name: metrics - port: 80 - targetPort: metrics - type: LoadBalancer -%{ endif } diff --git a/load_testing/k3s/server/files/prom.yaml b/load_testing/k3s/server/files/prom.yaml deleted file mode 100644 index 9c780b2d36..0000000000 --- a/load_testing/k3s/server/files/prom.yaml +++ /dev/null @@ -1,86 +0,0 @@ -%{ if local.prom_worker_node_count != 0 } ---- -apiVersion: v1 -kind: Namespace -metadata: - name: monitoring - ---- -apiVersion: helm.cattle.io/v1 -kind: HelmChart -metadata: - name: prometheus - namespace: kube-system -spec: - chart: https://raw.githubusercontent.com/drpebcak/charts/master/prometheus-9.1.0.tgz - targetNamespace: monitoring - valuesContent: |- - alertmanager: - nodeSelector: - prom: "true" - persistentVolume: - enabled: false - kubeStateMetrics: - nodeSelector: - prom: "true" - nodeExporter: - nodeSelector: - prom: "true" - server: - nodeSelector: - prom: "true" - ingress: - enabled: true - hosts: - - ${prom_host} - persistentVolume: - enabled: false - pushgateway: - nodeSelector: - prom: "true" - persistentVolume: - enabled: false - serverFiles: - prometheus.yml: - scrape_configs: - - job_name: prometheus - static_configs: - - targets: - - localhost:9090 - - job_name: kubernetes-apiservers - scrape_interval: 10s - scrape_timeout: 10s - metrics_path: /metrics - scheme: https - kubernetes_sd_configs: - - api_server: null - role: endpoints - namespaces: - names: [] - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token - tls_config: - ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - insecure_skip_verify: true - relabel_configs: - - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] - separator: ; - regex: default;kubernetes;https - replacement: $1 - action: keep ---- -apiVersion: helm.cattle.io/v1 -kind: HelmChart -metadata: - name: grafana - namespace: kube-system -spec: - chart: stable/grafana - targetNamespace: monitoring - valuesContent: |- - ingress: - enabled: true - hosts: - - ${graf_host} - nodeSelector: - prom: "true" -%{ endif } diff --git a/load_testing/k3s/server/files/server_userdata.tmpl b/load_testing/k3s/server/files/server_userdata.tmpl deleted file mode 100644 index f9e195315c..0000000000 --- a/load_testing/k3s/server/files/server_userdata.tmpl +++ /dev/null @@ -1,41 +0,0 @@ -#cloud-config -%{ if length(extra_ssh_keys) > 0 } -ssh_authorized_keys: -%{ for ssh_key in extra_ssh_keys } -- ${ssh_key} -%{ endfor } -%{ endif } -write_files: -- path: /var/lib/rancher/k3s/server/manifests/metrics.yaml - permissions: "0755" - owner: root:root - encoding: b64 - content: ${metrics_yaml} -- path: /var/lib/rancher/k3s/server/manifests/prom.yaml - permissions: "0755" - owner: root:root - encoding: b64 - content: ${prom_yaml} -runcmd: -- echo "net.ipv4.neigh.default.gc_interval = 3600" >> /etc/sysctl.conf -- echo "net.ipv4.neigh.default.gc_stale_time = 3600" >> /etc/sysctl.conf -- echo "net.ipv4.neigh.default.gc_thresh3 = 16384" >> /etc/sysctl.conf -- echo "net.ipv4.neigh.default.gc_thresh2 = 8192" >> /etc/sysctl.conf -- echo "net.ipv4.neigh.default.gc_thresh1 = 4096" >> /etc/sysctl.conf -- echo "fs.file-max = 12000500" >> /etc/sysctl.conf -- echo "fs.nr_open = 20000500" >> /etc/sysctl.conf -- echo "net.ipv4.tcp_mem = '10000000 10000000 10000000'" >> /etc/sysctl.conf -- echo "net.ipv4.tcp_rmem = '1024 4096 16384'" >> /etc/sysctl.conf -- echo "net.ipv4.tcp_wmem = '1024 4096 16384'" >> /etc/sysctl.conf -- echo "net.core.rmem_max = 16384" >> /etc/sysctl.conf -- echo "net.core.wmem_max = 16384" >> /etc/sysctl.conf -- ulimit -n 20000000 -- echo "# " >> /etc/security/limits.d/limits.conf -- echo " * soft nofile 20000" >> /etc/security/limits.d/limits.conf -- echo " * hard nofile 20000" >> /etc/security/limits.d/limits.conf -- sysctl -p -- apt-get update -- apt-get install -y software-properties-common resolvconf linux-headers-$(uname -r) -- echo "nameserver 1.1.1.1" > /etc/resolvconf/resolv.conf.d/tail -- systemctl start resolvconf -- until (curl -sfL https://get.k3s.io | INSTALL_K3S_EXEC="${k3s_server_args} --disable-agent --no-deploy traefik --no-deploy servicelb --cluster-cidr=10.0.0.0/8 --service-cidr=192.168.0.0/16 --cluster-dns=192.168.0.10 --tls-san ${public_ip}" K3S_CLUSTER_SECRET="${k3s_cluster_secret}" INSTALL_K3S_VERSION=${install_k3s_version} sh -); do echo 'Error installing k3s'; sleep 1; done diff --git a/load_testing/k3s/server/files/worker_userdata.tmpl b/load_testing/k3s/server/files/worker_userdata.tmpl deleted file mode 100644 index e451a6d0ce..0000000000 --- a/load_testing/k3s/server/files/worker_userdata.tmpl +++ /dev/null @@ -1,26 +0,0 @@ -#cloud-config -%{ if length(extra_ssh_keys) > 0 } -ssh_authorized_keys: -%{ for ssh_key in extra_ssh_keys } -- ${ssh_key} -%{ endfor } -%{ endif } -runcmd: -- echo "net.ipv4.neigh.default.gc_interval = 3600" >> /etc/sysctl.conf -- echo "net.ipv4.neigh.default.gc_stale_time = 3600" >> /etc/sysctl.conf -- echo "net.ipv4.neigh.default.gc_thresh3 = 16384" >> /etc/sysctl.conf -- echo "net.ipv4.neigh.default.gc_thresh2 = 8192" >> /etc/sysctl.conf -- echo "net.ipv4.neigh.default.gc_thresh1 = 4096" >> /etc/sysctl.conf -- echo "fs.file-max = 12000500" >> /etc/sysctl.conf -- echo "fs.nr_open = 20000500" >> /etc/sysctl.conf -- echo "net.ipv4.tcp_mem = '10000000 10000000 10000000'" >> /etc/sysctl.conf -- echo "net.ipv4.tcp_rmem = '1024 4096 16384'" >> /etc/sysctl.conf -- echo "net.ipv4.tcp_wmem = '1024 4096 16384'" >> /etc/sysctl.conf -- echo "net.core.rmem_max = 16384" >> /etc/sysctl.conf -- echo "net.core.wmem_max = 16384" >> /etc/sysctl.conf -- ulimit -n 20000 -- echo "# " >> /etc/security/limits.d/limits.conf -- echo " * soft nofile 20000" >> /etc/security/limits.d/limits.conf -- echo " * hard nofile 20000" >> /etc/security/limits.d/limits.conf -- sysctl -p -- until (curl -sfL https://get.k3s.io | INSTALL_K3S_VERSION=${install_k3s_version} INSTALL_K3S_EXEC="${k3s_exec}" K3S_URL=https://${k3s_url}:6443 K3S_CLUSTER_SECRET="${k3s_cluster_secret}" sh -); do echo 'k3s did not install correctly'; sleep 1; done diff --git a/load_testing/k3s/server/main.tf b/load_testing/k3s/server/main.tf deleted file mode 100644 index 391ef3d1fb..0000000000 --- a/load_testing/k3s/server/main.tf +++ /dev/null @@ -1,116 +0,0 @@ -terraform { - backend "local" { - path = "server.tfstate" - } -} - -locals { - name = var.name - k3s_cluster_secret = var.k3s_cluster_secret - install_k3s_version = var.k3s_version - prom_worker_node_count = var.prom_worker_node_count -} - -provider "aws" { - region = "us-west-2" - profile = "rancher-eng" -} - -resource "aws_security_group" "k3s" { - name = "${local.name}-sg" - vpc_id = data.aws_vpc.default.id - - ingress { - from_port = 22 - to_port = 22 - protocol = "TCP" - cidr_blocks = ["0.0.0.0/0"] - } - - ingress { - from_port = 6443 - to_port = 6443 - protocol = "TCP" - cidr_blocks = ["0.0.0.0/0"] - } - - ingress { - from_port = 0 - to_port = 0 - protocol = "-1" - cidr_blocks = ["0.0.0.0/0"] - } - - ingress { - from_port = 0 - to_port = 0 - protocol = "-1" - self = true - } - - egress { - from_port = 0 - to_port = 0 - protocol = "-1" - cidr_blocks = ["0.0.0.0/0"] - } -} - -resource "aws_spot_instance_request" "k3s-server" { - instance_type = var.server_instance_type - ami = data.aws_ami.ubuntu.id - user_data = base64encode(templatefile("${path.module}/files/server_userdata.tmpl", { extra_ssh_keys = var.extra_ssh_keys, public_ip = aws_spot_instance_request.k3s-server.public_ip, metrics_yaml = base64encode(data.template_file.metrics.rendered), prom_yaml = base64encode(data.template_file.k3s-prom-yaml.rendered), k3s_cluster_secret = local.k3s_cluster_secret, install_k3s_version = local.install_k3s_version, k3s_server_args = var.k3s_server_args })) - - ebs_optimized = true - wait_for_fulfillment = true - security_groups = [ - aws_security_group.k3s.id, - ] - - root_block_device { - volume_size = "1000" - volume_type = "gp2" - } - - tags = { - Name = "${local.name}-server" - } -} - -module "k3s-prom-worker-asg" { - source = "terraform-aws-modules/autoscaling/aws" - version = "3.0.0" - name = "${local.name}-prom-worker" - asg_name = "${local.name}-prom-worker" - instance_type = "m5.large" - image_id = data.aws_ami.ubuntu.id - user_data = base64encode(templatefile("${path.module}/files/worker_userdata.tmpl", { extra_ssh_keys = var.extra_ssh_keys, k3s_url = aws_spot_instance_request.k3s-server.public_ip, k3s_cluster_secret = local.k3s_cluster_secret, install_k3s_version = local.install_k3s_version, k3s_exec = "--node-label prom=true" })) - ebs_optimized = true - - desired_capacity = local.prom_worker_node_count - health_check_type = "EC2" - max_size = local.prom_worker_node_count - min_size = local.prom_worker_node_count - vpc_zone_identifier = [data.aws_subnet.selected.id] - spot_price = "0.340" - - security_groups = [ - aws_security_group.k3s.id, - ] - - lc_name = "${local.name}-prom-worker" - - root_block_device = [ - { - volume_size = "100" - volume_type = "gp2" - }, - ] -} - -resource "null_resource" "get-kubeconfig" { - provisioner "local-exec" { - interpreter = ["bash", "-c"] - command = "until ssh ubuntu@${aws_spot_instance_request.k3s-server.public_ip} 'sudo sed \"s/localhost/$aws_spot_instance_request.k3s-server.public_ip}/g;s/127.0.0.1/${aws_spot_instance_request.k3s-server.public_ip}/g\" /etc/rancher/k3s/k3s.yaml' >| ../cluster-loader/kubeConfig.yaml; do sleep 5; done" - } -} diff --git a/load_testing/k3s/server/outputs.tf b/load_testing/k3s/server/outputs.tf deleted file mode 100644 index 7f0cac2753..0000000000 --- a/load_testing/k3s/server/outputs.tf +++ /dev/null @@ -1,11 +0,0 @@ -output "public_ip" { - value = aws_spot_instance_request.k3s-server.public_ip -} - -output "install_k3s_version" { - value = local.install_k3s_version -} - -output "k3s_cluster_secret" { - value = local.k3s_cluster_secret -} diff --git a/load_testing/k3s/server/variables.tf b/load_testing/k3s/server/variables.tf deleted file mode 100644 index b45abc7211..0000000000 --- a/load_testing/k3s/server/variables.tf +++ /dev/null @@ -1,38 +0,0 @@ -variable "server_instance_type" { - # default = "c4.8xlarge" -} -variable "k3s_version" { - default = "v0.9.1" - type = string - description = "Version of K3S to install" -} -variable "k3s_server_args" { - default = "" -} -variable "prom_worker_node_count" { - default = 0 - type = number - description = "The number of workers to create labeled for prometheus" -} -variable "k3s_cluster_secret" { - default = "pvc-6476dcaf-73a0-11e9-b8e5-06943b744282" - type = string - description = "Cluster secret for k3s cluster registration" -} -variable "prom_host" { - default = "" -} -variable "graf_host" { - default = "" -} -variable "name" { - default = "k3s-loadtest" - type = string - description = "Name to identify this cluster" -} - -variable "extra_ssh_keys" { - type = list - default = [] - description = "Extra ssh keys to inject into Rancher instances" -} diff --git a/load_testing/k3s/server/versions.tf b/load_testing/k3s/server/versions.tf deleted file mode 100644 index ac97c6ac8e..0000000000 --- a/load_testing/k3s/server/versions.tf +++ /dev/null @@ -1,4 +0,0 @@ - -terraform { - required_version = ">= 0.12" -} From 128eff9b0e219b5dc154a9fabd877012ac2cccd8 Mon Sep 17 00:00:00 2001 From: galal-hussein Date: Thu, 31 Oct 2019 23:06:54 +0200 Subject: [PATCH 4/7] Add perf test automation --- tests/perf/.gitignore | 7 + tests/perf/Makefile | 21 + tests/perf/agents/data.tf | 44 + .../agents/files/pool_worker_userdata.tmpl | 33 + tests/perf/agents/main.tf | 79 ++ tests/perf/agents/outputs.tf | 0 tests/perf/agents/variables.tf | 28 + tests/perf/agents/versions.tf | 4 + tests/perf/scripts/config | 28 + tests/perf/scripts/perf | 83 ++ tests/perf/scripts/test | 48 ++ tests/perf/server/data.tf | 52 ++ tests/perf/server/files/metrics.yaml | 227 ++++++ tests/perf/server/files/prom.yaml | 86 ++ tests/perf/server/files/server_userdata.tmpl | 55 ++ tests/perf/server/files/worker_userdata.tmpl | 29 + tests/perf/server/main.tf | 188 +++++ tests/perf/server/outputs.tf | 15 + tests/perf/server/variables.tf | 78 ++ tests/perf/server/versions.tf | 4 + .../tests/density/2000_nodes/override.yaml | 1 + .../tests/density/5000_nodes/override.yaml | 1 + .../600_nodes/high_density_override.yaml | 1 + tests/perf/tests/density/config.yaml | 248 ++++++ tests/perf/tests/density/deployment.yaml | 37 + tests/perf/tests/load/config.yaml | 765 ++++++++++++++++++ tests/perf/tests/load/configmap.yaml | 9 + .../tests/load/daemonset-priorityclass.yaml | 9 + tests/perf/tests/load/daemonset.yaml | 41 + tests/perf/tests/load/deployment.yaml | 63 ++ tests/perf/tests/load/job.yaml | 39 + tests/perf/tests/load/networkpolicy.yaml | 19 + tests/perf/tests/load/pvc.yaml | 4 + tests/perf/tests/load/secret.yaml | 7 + tests/perf/tests/load/service.yaml | 16 + tests/perf/tests/load/statefulset.yaml | 61 ++ .../perf/tests/load/statefulset_service.yaml | 10 + 37 files changed, 2440 insertions(+) create mode 100644 tests/perf/.gitignore create mode 100644 tests/perf/Makefile create mode 100644 tests/perf/agents/data.tf create mode 100644 tests/perf/agents/files/pool_worker_userdata.tmpl create mode 100644 tests/perf/agents/main.tf create mode 100644 tests/perf/agents/outputs.tf create mode 100644 tests/perf/agents/variables.tf create mode 100644 tests/perf/agents/versions.tf create mode 100755 tests/perf/scripts/config create mode 100755 tests/perf/scripts/perf create mode 100755 tests/perf/scripts/test create mode 100644 tests/perf/server/data.tf create mode 100644 tests/perf/server/files/metrics.yaml create mode 100644 tests/perf/server/files/prom.yaml create mode 100644 tests/perf/server/files/server_userdata.tmpl create mode 100644 tests/perf/server/files/worker_userdata.tmpl create mode 100644 tests/perf/server/main.tf create mode 100644 tests/perf/server/outputs.tf create mode 100644 tests/perf/server/variables.tf create mode 100644 tests/perf/server/versions.tf create mode 100644 tests/perf/tests/density/2000_nodes/override.yaml create mode 100644 tests/perf/tests/density/5000_nodes/override.yaml create mode 100644 tests/perf/tests/density/600_nodes/high_density_override.yaml create mode 100644 tests/perf/tests/density/config.yaml create mode 100644 tests/perf/tests/density/deployment.yaml create mode 100644 tests/perf/tests/load/config.yaml create mode 100644 tests/perf/tests/load/configmap.yaml create mode 100644 tests/perf/tests/load/daemonset-priorityclass.yaml create mode 100644 tests/perf/tests/load/daemonset.yaml create mode 100644 tests/perf/tests/load/deployment.yaml create mode 100644 tests/perf/tests/load/job.yaml create mode 100644 tests/perf/tests/load/networkpolicy.yaml create mode 100644 tests/perf/tests/load/pvc.yaml create mode 100644 tests/perf/tests/load/secret.yaml create mode 100644 tests/perf/tests/load/service.yaml create mode 100644 tests/perf/tests/load/statefulset.yaml create mode 100644 tests/perf/tests/load/statefulset_service.yaml diff --git a/tests/perf/.gitignore b/tests/perf/.gitignore new file mode 100644 index 0000000000..99829f7ce8 --- /dev/null +++ b/tests/perf/.gitignore @@ -0,0 +1,7 @@ +.terraform* +*.tfstate* +*.tfvars* +*.plan* +*tests_results* +*junit.xml +*kubeconfig.yaml diff --git a/tests/perf/Makefile b/tests/perf/Makefile new file mode 100644 index 0000000000..a1d63a52ad --- /dev/null +++ b/tests/perf/Makefile @@ -0,0 +1,21 @@ +MODULE := $(shell basename $$PWD) + +.PHONY: init config apply destroy clean test + +init: + @scripts/perf init + +config: + @scripts/perf config + +apply: + @scripts/perf apply + +destroy: + @scripts/perf destroy + +clean: + @scripts/perf clean + +test: + @scripts/test test_load diff --git a/tests/perf/agents/data.tf b/tests/perf/agents/data.tf new file mode 100644 index 0000000000..bff5eb3ea3 --- /dev/null +++ b/tests/perf/agents/data.tf @@ -0,0 +1,44 @@ +data "terraform_remote_state" "server" { + backend = "local" + + config = { + path = "${path.module}/../server/server.tfstate" + } +} + +data "aws_vpc" "default" { + default = true +} + +data "aws_subnet_ids" "available" { + vpc_id = data.aws_vpc.default.id +} + +data "aws_subnet" "selected" { + id = "${tolist(data.aws_subnet_ids.available.ids)[1]}" +} + +data "aws_ami" "ubuntu" { + most_recent = true + owners = ["099720109477"] + + filter { + name = "name" + values = ["ubuntu-minimal/images/*/ubuntu-bionic-18.04-*"] + } + + filter { + name = "virtualization-type" + values = ["hvm"] + } + + filter { + name = "root-device-type" + values = ["ebs"] + } + + filter { + name = "architecture" + values = ["x86_64"] + } +} diff --git a/tests/perf/agents/files/pool_worker_userdata.tmpl b/tests/perf/agents/files/pool_worker_userdata.tmpl new file mode 100644 index 0000000000..6e08a5d300 --- /dev/null +++ b/tests/perf/agents/files/pool_worker_userdata.tmpl @@ -0,0 +1,33 @@ +#cloud-config +%{ if length(extra_ssh_keys) > 0 } +ssh_authorized_keys: +%{ for ssh_key in extra_ssh_keys } +- ${ssh_key} +%{ endfor } +%{ endif } +runcmd: +- echo "net.ipv4.neigh.default.gc_interval = 3600" >> /etc/sysctl.conf +- echo "net.ipv4.neigh.default.gc_stale_time = 3600" >> /etc/sysctl.conf +- echo "net.ipv4.neigh.default.gc_thresh3 = 16384" >> /etc/sysctl.conf +- echo "net.ipv4.neigh.default.gc_thresh2 = 8192" >> /etc/sysctl.conf +- echo "net.ipv4.neigh.default.gc_thresh1 = 4096" >> /etc/sysctl.conf +- echo "fs.file-max = 12000500" >> /etc/sysctl.conf +- echo "fs.nr_open = 20000500" >> /etc/sysctl.conf +- echo "net.ipv4.tcp_mem = '10000000 10000000 10000000'" >> /etc/sysctl.conf +- echo "net.ipv4.tcp_rmem = '1024 4096 16384'" >> /etc/sysctl.conf +- echo "net.ipv4.tcp_wmem = '1024 4096 16384'" >> /etc/sysctl.conf +- echo "net.core.rmem_max = 16384" >> /etc/sysctl.conf +- echo "net.core.wmem_max = 16384" >> /etc/sysctl.conf +- ulimit -n 20000000 +- echo "# " >> /etc/security/limits.d/limits.conf +- echo " * soft nofile 20000" >> /etc/security/limits.d/limits.conf +- echo " * hard nofile 20000" >> /etc/security/limits.d/limits.conf +- sysctl -p +- apt-get update +- apt-get install -y software-properties-common +- apt-get install -y resolvconf linux-headers-$(uname -r) && echo "nameserver 1.1.1.1" > /etc/resolvconf/resolv.conf.d/tail && systemctl start resolvconf +- DEBIAN_FRONTEND=noninteractive apt-get upgrade -y +- wget https://raw.githubusercontent.com/galal-hussein/k3s/k3s_with_kine_fix/k3s +- cp k3s /usr/local/bin/k3s +- chmod +x /usr/local/bin/k3s +- until (curl -sfL https://get.k3s.io | K3S_URL=https://${k3s_url}:6443 K3S_CLUSTER_SECRET="${k3s_cluster_secret}" K3S_CLUSTER_SECRET="${k3s_cluster_secret}" INSTALL_K3S_VERSION="${install_k3s_version}" sh -); do echo 'Error installing k3s agent'; sleep 1; done diff --git a/tests/perf/agents/main.tf b/tests/perf/agents/main.tf new file mode 100644 index 0000000000..975117cddd --- /dev/null +++ b/tests/perf/agents/main.tf @@ -0,0 +1,79 @@ +terraform { + backend "local" { + path = "pool.tfstate" + } +} + +locals { + name = var.name + k3s_cluster_secret = "pvc-6476dcaf-73a0-11e9-b8e5-06943b744282" +} + +provider "aws" { + region = "us-west-2" + profile = "rancher-eng" +} + +resource "aws_security_group" "k3s" { + name = "${local.name}-pool" + vpc_id = data.aws_vpc.default.id + + ingress { + from_port = 22 + to_port = 22 + protocol = "TCP" + cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + from_port = 0 + to_port = 0 + protocol = "-1" + self = true + } + + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } +} + +module "k3s-pool-worker-asg" { + source = "terraform-aws-modules/autoscaling/aws" + version = "3.0.0" + name = "${local.name}-pool" + asg_name = "${local.name}-pool" + instance_type = var.worker_instance_type + image_id = data.aws_ami.ubuntu.id + user_data = base64encode(templatefile("${path.module}/files/pool_worker_userdata.tmpl", { k3s_url = data.terraform_remote_state.server.outputs.public_ip, k3s_cluster_secret = local.k3s_cluster_secret, extra_ssh_keys = var.extra_ssh_keys, install_k3s_version = var.k3s_version })) + ebs_optimized = true + + desired_capacity = var.node_count + health_check_type = "EC2" + max_size = var.node_count + min_size = var.node_count + vpc_zone_identifier = [data.aws_subnet.selected.id] + spot_price = "0.680" + + security_groups = [ + aws_security_group.k3s.id, + ] + + lc_name = "${local.name}-pool" + + root_block_device = [ + { + volume_size = "100" + volume_type = "gp2" + }, + ] +} diff --git a/tests/perf/agents/outputs.tf b/tests/perf/agents/outputs.tf new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/perf/agents/variables.tf b/tests/perf/agents/variables.tf new file mode 100644 index 0000000000..37a587d413 --- /dev/null +++ b/tests/perf/agents/variables.tf @@ -0,0 +1,28 @@ +variable "node_count" { + description = "Number of nodes to run k3s agents on." + type = number + # default = 10 +} + +variable "worker_instance_type" { + type = string + default = "t3.2xlarge" +} + +variable "extra_ssh_keys" { + type = list + default = [] + description = "Extra ssh keys to inject into Rancher instances" +} + +variable "k3s_version" { + default = "v0.9.1" + type = string + description = "Version of K3S to install" +} + +variable "name" { + default = "k3s-loadtest" + type = string + description = "Name to identify this cluster" +} diff --git a/tests/perf/agents/versions.tf b/tests/perf/agents/versions.tf new file mode 100644 index 0000000000..ac97c6ac8e --- /dev/null +++ b/tests/perf/agents/versions.tf @@ -0,0 +1,4 @@ + +terraform { + required_version = ">= 0.12" +} diff --git a/tests/perf/scripts/config b/tests/perf/scripts/config new file mode 100755 index 0000000000..8e5f09a3fd --- /dev/null +++ b/tests/perf/scripts/config @@ -0,0 +1,28 @@ +## MAIN VARIABLES ## +#################### +CLUSTER_NAME="hgalal-k3s" +K3S_VERSION="v0.10.0" +EXTRA_SSH_KEYS="ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDZBAE6I9J733HJfCBVu7iWSUuJ7th0U4P4IFfpFDca52n/Hk4yFFr8SPR8JJc1n42c3vEVCbExp/MD4ihqEBy9+pLewxA+fkb7UAT4cT2eLfvZdTTVe8KSiw6lVN6tWSoNXmNqY+wH7zWQ04lfjXPa/c01L1n2XwV/O+5xii9vEuSxN9YhfQ/s61SdLFqQ5yS8gPsM0qQW+bFt5KGGbapqztDO+h9lxGbZRcRAKbCzZ5kF1mhjI/+VubTWKtoVLCumjzjYqILYyx9g/mLSo26qjDEZvtwBQB9KLugDAtnalLVp0HgivC5YfLHr8PxViVSHfIIKS2DhUpn07jr8eKi9" +PRIVATE_KEY_PATH="/home/hussein/.ssh/id_rsa" #this has to be a full path + + +## K3S SERVER VARIABLES ## +########################## +K3S_HA=1 +MASTER_COUNT=3 +DB_INSTANCE_TYPE="db.m4.4xlarge" +SERVER_INSTANCE_TYPE="m5.2xlarge" +DEBUG=1 + + +## PROMETHEUS SERVER VARIABLES ## +################################# +PROM_WORKER_NODE_COUNT=1 +PROM_HOST="prometheus-load.eng.rancher.space" +GRAF_HOST="prometheus-load.eng.rancher.space" + + +## K3S AGENTS VARIABLES ## +########################## +AGENT_NODE_COUNT=100 +WORKER_INSTANCE_TYPE="m5.xlarge" diff --git a/tests/perf/scripts/perf b/tests/perf/scripts/perf new file mode 100755 index 0000000000..bf60a715e0 --- /dev/null +++ b/tests/perf/scripts/perf @@ -0,0 +1,83 @@ +#!/bin/bash -ex + +TERRAFORM_PLAN_CMD="terraform plan --var-file variables.tfvars --out k3s.plan" +TERRAFORM_APPLY_CMD="terraform apply k3s.plan" +TERRAFORM_DESTROY_CMD="terraform destroy --var-file variables.tfvars --force" + +for bin in docker kubectl terraform; do + type $bin >/dev/null 2>&1 || (echo "$bin is not in the path. Please make sure it is installed and in PATH."; exit 1) +done + +init() { + for i in server agents; do + pushd $i + terraform init + popd + done +} + +apply() { + # init terraform + config + # Run apply for server and agents + for i in server agents; do + if [ $i == "agents" ]; then + echo "Sleeping 1 minute until server(s) is initialized" + sleep 60 + fi + pushd $i + $TERRAFORM_PLAN_CMD + $TERRAFORM_APPLY_CMD + popd + done +} + +config() { + source scripts/config + pushd ./server +cat <
variables.tfvars +name = "${CLUSTER_NAME}" +db_instance_type = "${DB_INSTANCE_TYPE}" +server_instance_type = "${SERVER_INSTANCE_TYPE}" +extra_ssh_keys = ["${EXTRA_SSH_KEYS}"] +master_count = ${MASTER_COUNT} +k3s_ha = ${K3S_HA} +k3s_version = "${K3S_VERSION}" +prom_worker_node_count = ${PROM_WORKER_NODE_COUNT} +prom_host = "${PROM_HOST}" +graf_host = "${GRAF_HOST}" +ssh_key_path = "${PRIVATE_KEY_PATH}" +debug = ${DEBUG} +MAIN +popd + +pushd ./agents +cat <
variables.tfvars +name = "${CLUSTER_NAME}" +node_count = ${AGENT_NODE_COUNT} +extra_ssh_keys = ["${EXTRA_SSH_KEYS}"] +k3s_version = "${K3S_VERSION}" +worker_instance_type = "${WORKER_INSTANCE_TYPE}" +MAIN +popd +} + +clean() { + # clean server and agents + for i in server agents; do + pushd $i + rm -f *.plan *.tfvars *.tfstate* + popd + done +} + +destroy() { + for i in agents server; do + pushd $i + terraform destroy --var-file variables.tfvars --force + popd + done + clean +} + +$@ diff --git a/tests/perf/scripts/test b/tests/perf/scripts/test new file mode 100755 index 0000000000..150bd9eff9 --- /dev/null +++ b/tests/perf/scripts/test @@ -0,0 +1,48 @@ +#!/bin/bash -ex + +test_load() { + source scripts/config + masterips=`terraform output -state=server/server.tfstate | grep k3s_server_ips | cut -d "=" -f 2` + pushd tests/ + docker run -v $PRIVATE_KEY_PATH:/opt/priv_key \ + -e KUBE_SSH_USER=ubuntu \ + -e LOCAL_SSH_KEY=/opt/priv_key \ + -it -v $PWD/:/opt/k3s/perf-tests husseingalal/clusterloader:dev \ + clusterloader --testconfig /opt/k3s/perf-tests/load/config.yaml \ + --kubeconfig /opt/k3s/perf-tests/kubeconfig.yaml \ + --masterip $masterips \ + --provider=local \ + --report-dir /opt/k3s/perf-tests/load_tests_results \ + --enable-prometheus-server \ + --tear-down-prometheus-server=0 + popd +} + +test_density() { + source scripts/config + masterips=`terraform output -state=server/server.tfstate | grep k3s_server_ips | cut -d "=" -f 2` + pushd tests/ + docker run -e KUBE_SSH_USER=ubuntu \ + -v $PRIVATE_KEY_PATH:/opt/priv_key \ + -e LOCAL_SSH_KEY=/opt/priv_key \ + -it -v $PWD/:/opt/k3s/perf-tests husseingalal/clusterloader:dev \ + clusterloader --testconfig /opt/k3s/perf-tests/density/config.yaml \ + --kubeconfig /opt/k3s/perf-tests/kubeconfig.yaml \ + --masterip $masterips \ + --provider=local \ + --report-dir /opt/k3s/perf-tests/density_tests_results \ + --enable-prometheus-server \ + --tear-down-prometheus-server=0 + popd +} + +clean() { + # clean kubeconfig + pushd tests/ + rm -f kubeconfig + rm -rf load_tests_results/ + rm -rf density_tests_results/ + popd +} + +$@ diff --git a/tests/perf/server/data.tf b/tests/perf/server/data.tf new file mode 100644 index 0000000000..9a269d4e1e --- /dev/null +++ b/tests/perf/server/data.tf @@ -0,0 +1,52 @@ +data "aws_vpc" "default" { + default = true +} + +data "aws_subnet_ids" "available" { + vpc_id = data.aws_vpc.default.id +} + +data "aws_subnet" "selected" { + id = "${tolist(data.aws_subnet_ids.available.ids)[1]}" +} + +data "aws_ami" "ubuntu" { + most_recent = true + owners = ["099720109477"] + + filter { + name = "name" + values = ["ubuntu-minimal/images/*/ubuntu-bionic-18.04-*"] + } + + filter { + name = "virtualization-type" + values = ["hvm"] + } + + filter { + name = "root-device-type" + values = ["ebs"] + } + + filter { + name = "architecture" + values = ["x86_64"] + } +} + +data "template_file" "metrics" { + template = file("${path.module}/files/metrics.yaml") + vars = { + prom_worker_node_count = local.prom_worker_node_count + + } +} +data "template_file" "k3s-prom-yaml" { + template = file("${path.module}/files/prom.yaml") + vars = { + prom_host = var.prom_host + graf_host = var.graf_host + prom_worker_node_count = local.prom_worker_node_count + } +} diff --git a/tests/perf/server/files/metrics.yaml b/tests/perf/server/files/metrics.yaml new file mode 100644 index 0000000000..d3cfb79659 --- /dev/null +++ b/tests/perf/server/files/metrics.yaml @@ -0,0 +1,227 @@ +%{ if prom_worker_node_count != 0 } +--- +apiVersion: rbac.authorization.k8s.io/v1 +# kubernetes versions before 1.8.0 should use rbac.authorization.k8s.io/v1beta1 +kind: ClusterRoleBinding +metadata: + name: kube-state-metrics +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: kube-state-metrics +subjects: +- kind: ServiceAccount + name: kube-state-metrics + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +# kubernetes versions before 1.8.0 should use rbac.authorization.k8s.io/v1beta1 +kind: ClusterRole +metadata: + name: kube-state-metrics +rules: +- apiGroups: [""] + resources: + - configmaps + - secrets + - nodes + - pods + - services + - resourcequotas + - replicationcontrollers + - limitranges + - persistentvolumeclaims + - persistentvolumes + - namespaces + - endpoints + verbs: ["list", "watch"] +- apiGroups: ["extensions"] + resources: + - daemonsets + - deployments + - replicasets + - ingresses + verbs: ["list", "watch"] +- apiGroups: ["apps"] + resources: + - daemonsets + - deployments + - replicasets + - statefulsets + verbs: ["list", "watch"] +- apiGroups: ["batch"] + resources: + - cronjobs + - jobs + verbs: ["list", "watch"] +- apiGroups: ["autoscaling"] + resources: + - horizontalpodautoscalers + verbs: ["list", "watch"] +- apiGroups: ["policy"] + resources: + - poddisruptionbudgets + verbs: ["list", "watch"] +- apiGroups: ["certificates.k8s.io"] + resources: + - certificatesigningrequests + verbs: ["list", "watch"] +- apiGroups: ["storage.k8s.io"] + resources: + - storageclasses + verbs: ["list", "watch"] +- apiGroups: ["autoscaling.k8s.io"] + resources: + - verticalpodautoscalers + verbs: ["list", "watch"] +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + k8s-app: kube-state-metrics + name: kube-state-metrics + namespace: kube-system +spec: + selector: + matchLabels: + k8s-app: kube-state-metrics + replicas: 1 + template: + metadata: + labels: + k8s-app: kube-state-metrics + spec: + serviceAccountName: kube-state-metrics + containers: + - name: kube-state-metrics + image: quay.io/coreos/kube-state-metrics:v1.7.2 + ports: + - name: http-metrics + containerPort: 8080 + - name: telemetry + containerPort: 8081 + livenessProbe: + httpGet: + path: /healthz + port: 8080 + initialDelaySeconds: 5 + timeoutSeconds: 5 + readinessProbe: + httpGet: + path: / + port: 8080 + initialDelaySeconds: 5 + timeoutSeconds: 5 +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: kube-state-metrics + namespace: kube-system +--- +apiVersion: v1 +kind: Service +metadata: + name: kube-state-metrics + namespace: kube-system + labels: + k8s-app: kube-state-metrics + annotations: + prometheus.io/scrape: 'true' +spec: + ports: + - name: http-metrics + port: 8080 + targetPort: http-metrics + protocol: TCP + - name: telemetry + port: 8081 + targetPort: telemetry + protocol: TCP + selector: + k8s-app: kube-state-metrics +--- +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: slo-monitor +subjects: +- kind: ServiceAccount + name: slo-monitor + namespace: kube-system +roleRef: + kind: ClusterRole + name: slo-monitor + apiGroup: rbac.authorization.k8s.io +--- +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: slo-monitor + namespace: kube-system +rules: +- apiGroups: [""] + resources: ["pods", "events"] + verbs: ["get", "watch", "list"] +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: slo-monitor + namespace: kube-system +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: slo-monitor + namespace: kube-system + labels: + app: slo-monitor +spec: + selector: + matchLabels: + app: slo-monitor + template: + metadata: + labels: + app: slo-monitor + annotations: + prometheus.io/scrape: "true" + spec: + containers: + - name: slo-monitor + image: gcr.io/google-containers/slo-monitor:0.12.0 + command: + - /slo-monitor + - --alsologtostderr=true + imagePullPolicy: Always + ports: + - name: metrics + containerPort: 8080 + resources: + requests: + cpu: 300m + memory: 100Mi + limits: + cpu: 300m + memory: 100Mi + restartPolicy: Always + serviceAccountName: slo-monitor +--- +apiVersion: v1 +kind: Service +metadata: + name: slo-monitor + namespace: kube-system + labels: + app: slo-monitor +spec: + selector: + app: slo-monitor + ports: + - name: metrics + port: 80 + targetPort: metrics + type: ClusterIP +%{ endif } diff --git a/tests/perf/server/files/prom.yaml b/tests/perf/server/files/prom.yaml new file mode 100644 index 0000000000..369a922548 --- /dev/null +++ b/tests/perf/server/files/prom.yaml @@ -0,0 +1,86 @@ +%{ if prom_worker_node_count != 0 } +--- +apiVersion: v1 +kind: Namespace +metadata: + name: monitoring + +--- +apiVersion: helm.cattle.io/v1 +kind: HelmChart +metadata: + name: prometheus + namespace: kube-system +spec: + chart: https://raw.githubusercontent.com/galal-hussein/charts/master/prometheus-9.2.0.tgz + targetNamespace: monitoring + valuesContent: |- + alertmanager: + nodeSelector: + prom: "true" + persistentVolume: + enabled: false + kubeStateMetrics: + nodeSelector: + prom: "true" + nodeExporter: + nodeSelector: + prom: "true" + server: + nodeSelector: + prom: "true" + ingress: + enabled: true + hosts: + - ${prom_host} + persistentVolume: + enabled: false + pushgateway: + nodeSelector: + prom: "true" + persistentVolume: + enabled: false + serverFiles: + prometheus.yml: + scrape_configs: + - job_name: prometheus + static_configs: + - targets: + - localhost:9090 + - job_name: kubernetes-apiservers + scrape_interval: 10s + scrape_timeout: 10s + metrics_path: /metrics + scheme: https + kubernetes_sd_configs: + - api_server: null + role: endpoints + namespaces: + names: [] + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + relabel_configs: + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] + separator: ; + regex: default;kubernetes;https + replacement: $1 + action: keep +--- +apiVersion: helm.cattle.io/v1 +kind: HelmChart +metadata: + name: grafana + namespace: kube-system +spec: + chart: stable/grafana + targetNamespace: monitoring + valuesContent: |- + ingress: + enabled: true + hosts: + - ${graf_host} + nodeSelector: + prom: "true" +%{ endif } diff --git a/tests/perf/server/files/server_userdata.tmpl b/tests/perf/server/files/server_userdata.tmpl new file mode 100644 index 0000000000..17cad50b88 --- /dev/null +++ b/tests/perf/server/files/server_userdata.tmpl @@ -0,0 +1,55 @@ +#cloud-config +%{ if length(extra_ssh_keys) > 0 } +ssh_authorized_keys: +%{ for ssh_key in extra_ssh_keys } +- ${ssh_key} +%{ endfor } +%{ endif } +write_files: +- path: /var/lib/rancher/k3s/server/manifests/metrics.yaml + permissions: "0755" + owner: root:root + encoding: b64 + content: ${metrics_yaml} +- path: /var/lib/rancher/k3s/server/manifests/prom.yaml + permissions: "0755" + owner: root:root + encoding: b64 + content: ${prom_yaml} +runcmd: +- echo "net.ipv4.neigh.default.gc_interval = 3600" >> /etc/sysctl.conf +- echo "net.ipv4.neigh.default.gc_stale_time = 3600" >> /etc/sysctl.conf +- echo "net.ipv4.neigh.default.gc_thresh3 = 16384" >> /etc/sysctl.conf +- echo "net.ipv4.neigh.default.gc_thresh2 = 8192" >> /etc/sysctl.conf +- echo "net.ipv4.neigh.default.gc_thresh1 = 4096" >> /etc/sysctl.conf +- echo "fs.file-max = 12000500" >> /etc/sysctl.conf +- echo "fs.nr_open = 20000500" >> /etc/sysctl.conf +- echo "net.ipv4.tcp_mem = '10000000 10000000 10000000'" >> /etc/sysctl.conf +- echo "net.ipv4.tcp_rmem = '1024 4096 16384'" >> /etc/sysctl.conf +- echo "net.ipv4.tcp_wmem = '1024 4096 16384'" >> /etc/sysctl.conf +- echo "net.core.rmem_max = 16384" >> /etc/sysctl.conf +- echo "net.core.wmem_max = 16384" >> /etc/sysctl.conf +- ulimit -n 20000000 +- echo "# " >> /etc/security/limits.d/limits.conf +- echo " * soft nofile 20000" >> /etc/security/limits.d/limits.conf +- echo " * hard nofile 20000" >> /etc/security/limits.d/limits.conf +- sysctl -p +- apt-get update +- apt-get install -y git vim software-properties-common resolvconf linux-headers-$(uname -r) +- echo "nameserver 1.1.1.1" > /etc/resolvconf/resolv.conf.d/tail +- echo "RateLimitIntervalSec=0" >> /etc/systemd/journald.conf +- echo "RateLimitBurst=0" >> /etc/systemd/journald.conf +- systemctl restart systemd-journald.service +- systemctl start resolvconf +- wget https://raw.githubusercontent.com/galal-hussein/k3s/k3s_with_kine_fix/k3s +- cp k3s /usr/local/bin/k3s +- chmod +x /usr/local/bin/k3s +%{if master_index != 0 } +- sleep 20 +%{ endif } +- until (curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_DOWNLOAD=true K3S_CLUSTER_SECRET="${k3s_cluster_secret}" INSTALL_K3S_VERSION="${install_k3s_version}" INSTALL_K3S_EXEC="${k3s_server_args} --cluster-cidr=10.0.0.0/8 --no-deploy traefik --no-deploy servicelb --tls-san ${lb_address} %{ if use_ha == "true" } --storage-endpoint="postgres://${db_username}:${db_password}@${db_address}:5432/${db_name}" %{ if master_index == 0 }--bootstrap-save%{ endif } %{ endif }" sh -); do echo 'Error installing k3s'; sleep 1; done +%{if debug != 0 } +- sed -i 's/bin\/k3s/bin\/k3s --debug/g' /etc/systemd/system/k3s.service +- systemctl daemon-reload +- systemctl restart k3s +%{ endif } diff --git a/tests/perf/server/files/worker_userdata.tmpl b/tests/perf/server/files/worker_userdata.tmpl new file mode 100644 index 0000000000..90712c0bdc --- /dev/null +++ b/tests/perf/server/files/worker_userdata.tmpl @@ -0,0 +1,29 @@ +#cloud-config +%{ if length(extra_ssh_keys) > 0 } +ssh_authorized_keys: +%{ for ssh_key in extra_ssh_keys } +- ${ssh_key} +%{ endfor } +%{ endif } +runcmd: +- echo "net.ipv4.neigh.default.gc_interval = 3600" >> /etc/sysctl.conf +- echo "net.ipv4.neigh.default.gc_stale_time = 3600" >> /etc/sysctl.conf +- echo "net.ipv4.neigh.default.gc_thresh3 = 16384" >> /etc/sysctl.conf +- echo "net.ipv4.neigh.default.gc_thresh2 = 8192" >> /etc/sysctl.conf +- echo "net.ipv4.neigh.default.gc_thresh1 = 4096" >> /etc/sysctl.conf +- echo "fs.file-max = 12000500" >> /etc/sysctl.conf +- echo "fs.nr_open = 20000500" >> /etc/sysctl.conf +- echo "net.ipv4.tcp_mem = '10000000 10000000 10000000'" >> /etc/sysctl.conf +- echo "net.ipv4.tcp_rmem = '1024 4096 16384'" >> /etc/sysctl.conf +- echo "net.ipv4.tcp_wmem = '1024 4096 16384'" >> /etc/sysctl.conf +- echo "net.core.rmem_max = 16384" >> /etc/sysctl.conf +- echo "net.core.wmem_max = 16384" >> /etc/sysctl.conf +- ulimit -n 20000 +- echo "# " >> /etc/security/limits.d/limits.conf +- echo " * soft nofile 20000" >> /etc/security/limits.d/limits.conf +- echo " * hard nofile 20000" >> /etc/security/limits.d/limits.conf +- sysctl -p +- wget https://raw.githubusercontent.com/galal-hussein/k3s/k3s_with_kine_fix/k3s +- cp k3s /usr/local/bin/k3s +- chmod +x /usr/local/bin/k3s +- until (curl -sfL https://get.k3s.io | INSTALL_K3S_VERSION=${install_k3s_version} INSTALL_K3S_EXEC="${k3s_exec}" K3S_URL=https://${k3s_url}:6443 K3S_CLUSTER_SECRET="${k3s_cluster_secret}" sh -); do echo 'k3s did not install correctly'; sleep 1; done diff --git a/tests/perf/server/main.tf b/tests/perf/server/main.tf new file mode 100644 index 0000000000..bffc863b2a --- /dev/null +++ b/tests/perf/server/main.tf @@ -0,0 +1,188 @@ +terraform { + backend "local" { + path = "server.tfstate" + } +} + +locals { + name = var.name + k3s_cluster_secret = var.k3s_cluster_secret + install_k3s_version = var.k3s_version + prom_worker_node_count = var.prom_worker_node_count +} + +provider "aws" { + region = "us-west-2" + profile = "rancher-eng" +} + +resource "aws_security_group" "k3s" { + name = "${local.name}-sg" + vpc_id = data.aws_vpc.default.id + + ingress { + from_port = 22 + to_port = 22 + protocol = "TCP" + cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + from_port = 6443 + to_port = 6443 + protocol = "TCP" + cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + from_port = 0 + to_port = 0 + protocol = "-1" + self = true + } + + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } +} + +resource "aws_db_instance" "k3s_db" { + count = "${var.k3s_ha}" + allocated_storage = 100 #baseline iops is 300 with gp2 + storage_type = "io1" + iops = "3000" + engine = "postgres" + engine_version = "11.5" + instance_class = "${var.db_instance_type}" + name = "${var.db_name}" + username = "${var.db_username}" + password = "${var.db_password}" + skip_final_snapshot = true + multi_az = false +} + +resource "aws_lb" "k3s-master-nlb" { + name = "${local.name}-nlb" + internal = false + load_balancer_type = "network" + subnets = [data.aws_subnet.selected.id] +} + +resource "aws_lb_target_group" "k3s-master-nlb-tg" { + name = "${local.name}-nlb-tg" + port = "6443" + protocol = "TCP" + vpc_id = data.aws_vpc.default.id + deregistration_delay = "300" + health_check { + interval = "30" + port = "6443" + protocol = "TCP" + healthy_threshold = "10" + unhealthy_threshold= "10" + } +} + +resource "aws_lb_listener" "k3s-master-nlb-tg" { + load_balancer_arn = "${aws_lb.k3s-master-nlb.arn}" + port = "6443" + protocol = "TCP" + default_action { + target_group_arn = "${aws_lb_target_group.k3s-master-nlb-tg.arn}" + type = "forward" + } +} + +resource "aws_lb_target_group_attachment" "test" { + count = "${var.master_count}" + target_group_arn = "${aws_lb_target_group.k3s-master-nlb-tg.arn}" + target_id = "${aws_spot_instance_request.k3s-server[count.index].spot_instance_id}" + port = 6443 +} + +resource "aws_spot_instance_request" "k3s-server" { + count = "${var.master_count}" + instance_type = var.server_instance_type + ami = data.aws_ami.ubuntu.id + user_data = base64encode(templatefile("${path.module}/files/server_userdata.tmpl", + { + extra_ssh_keys = var.extra_ssh_keys, + metrics_yaml = base64encode(data.template_file.metrics.rendered), + prom_yaml = base64encode(data.template_file.k3s-prom-yaml.rendered), + k3s_cluster_secret = local.k3s_cluster_secret, + install_k3s_version = local.install_k3s_version, + k3s_server_args = var.k3s_server_args, + db_address = aws_db_instance.k3s_db[0].address, + db_name = aws_db_instance.k3s_db[0].name, + db_username = aws_db_instance.k3s_db[0].username, + db_password = aws_db_instance.k3s_db[0].password, + use_ha = "${var.k3s_ha == 1 ? "true": "false"}", + master_index = count.index, + lb_address = aws_lb.k3s-master-nlb.dns_name, + prom_worker_node_count = local.prom_worker_node_count, + debug = var.debug,})) + + wait_for_fulfillment = true + security_groups = [ + aws_security_group.k3s.name, + ] + + root_block_device { + volume_size = "100" + volume_type = "gp2" + } + + tags = { + Name = "${local.name}-server-${count.index}" + } + provisioner "local-exec" { + command = "sleep 10" + } +} + +module "k3s-prom-worker-asg" { + source = "terraform-aws-modules/autoscaling/aws" + version = "3.0.0" + name = "${local.name}-prom-worker" + asg_name = "${local.name}-prom-worker" + instance_type = "m5.large" + image_id = data.aws_ami.ubuntu.id + user_data = base64encode(templatefile("${path.module}/files/worker_userdata.tmpl", { extra_ssh_keys = var.extra_ssh_keys, k3s_url = aws_lb.k3s-master-nlb.dns_name, k3s_cluster_secret = local.k3s_cluster_secret, install_k3s_version = local.install_k3s_version, k3s_exec = "--node-label prom=true" })) + + desired_capacity = local.prom_worker_node_count + health_check_type = "EC2" + max_size = local.prom_worker_node_count + min_size = local.prom_worker_node_count + vpc_zone_identifier = [data.aws_subnet.selected.id] + spot_price = "0.340" + + security_groups = [ + aws_security_group.k3s.id, + ] + + lc_name = "${local.name}-prom-worker" + + root_block_device = [ + { + volume_size = "100" + volume_type = "gp2" + }, + ] +} + +resource "null_resource" "get-kubeconfig" { + provisioner "local-exec" { + interpreter = ["bash", "-c"] + command = "until ssh -i ${var.ssh_key_path} ubuntu@${aws_spot_instance_request.k3s-server[0].public_ip} 'sudo sed \"s/localhost/$aws_lb.k3s-master-nlb.dns_name}/g;s/127.0.0.1/${aws_lb.k3s-master-nlb.dns_name}/g\" /etc/rancher/k3s/k3s.yaml' >| ../tests/kubeconfig.yaml; do sleep 5; done" + } +} diff --git a/tests/perf/server/outputs.tf b/tests/perf/server/outputs.tf new file mode 100644 index 0000000000..6e2ffd61ea --- /dev/null +++ b/tests/perf/server/outputs.tf @@ -0,0 +1,15 @@ +output "public_ip" { + value = aws_lb.k3s-master-nlb.dns_name +} + +output "install_k3s_version" { + value = local.install_k3s_version +} + +output "k3s_cluster_secret" { + value = local.k3s_cluster_secret +} + +output "k3s_server_ips" { + value = join(",", aws_spot_instance_request.k3s-server.*.public_ip) +} diff --git a/tests/perf/server/variables.tf b/tests/perf/server/variables.tf new file mode 100644 index 0000000000..0a7209ed42 --- /dev/null +++ b/tests/perf/server/variables.tf @@ -0,0 +1,78 @@ +variable "server_instance_type" { + # default = "c4.8xlarge" +} + +variable "k3s_version" { + default = "v0.9.1" + type = string + description = "Version of K3S to install" +} + +variable "k3s_server_args" { + default = "" +} + +variable "prom_worker_node_count" { + default = 0 + type = number + description = "The number of workers to create labeled for prometheus" +} + +variable "k3s_cluster_secret" { + default = "pvc-6476dcaf-73a0-11e9-b8e5-06943b744282" + type = string + description = "Cluster secret for k3s cluster registration" +} +variable "prom_host" { + default = "" +} +variable "graf_host" { + default = "" +} +variable "name" { + default = "k3s-loadtest" + type = string + description = "Name to identify this cluster" +} + +variable "ssh_key_path" { + default = "~/.ssh/id_rsa" + type = string + description = "Path of the private key to ssh to the nodes" +} + +variable "extra_ssh_keys" { + type = list + default = [] + description = "Extra ssh keys to inject into Rancher instances" +} + +variable "k3s_ha" { + default = 0 + description = "Enable k3s in HA mode" +} + +variable "db_instance_type" { +} + +variable "db_name" { + default = "k3s" +} + +variable "db_username" { + default = "postgres" +} + +variable "db_password" { + default = "b58bf234c4bd0133fc7a92b782e498a6" +} + +variable "master_count" { + default = 1 + description = "Count of k3s master servers" +} + +variable "debug" { + default = 0 + description = "Enable Debug log" +} diff --git a/tests/perf/server/versions.tf b/tests/perf/server/versions.tf new file mode 100644 index 0000000000..ac97c6ac8e --- /dev/null +++ b/tests/perf/server/versions.tf @@ -0,0 +1,4 @@ + +terraform { + required_version = ">= 0.12" +} diff --git a/tests/perf/tests/density/2000_nodes/override.yaml b/tests/perf/tests/density/2000_nodes/override.yaml new file mode 100644 index 0000000000..8d38cbac56 --- /dev/null +++ b/tests/perf/tests/density/2000_nodes/override.yaml @@ -0,0 +1 @@ +NODE_MODE: masteranddns diff --git a/tests/perf/tests/density/5000_nodes/override.yaml b/tests/perf/tests/density/5000_nodes/override.yaml new file mode 100644 index 0000000000..8d38cbac56 --- /dev/null +++ b/tests/perf/tests/density/5000_nodes/override.yaml @@ -0,0 +1 @@ +NODE_MODE: masteranddns diff --git a/tests/perf/tests/density/600_nodes/high_density_override.yaml b/tests/perf/tests/density/600_nodes/high_density_override.yaml new file mode 100644 index 0000000000..56d78a0775 --- /dev/null +++ b/tests/perf/tests/density/600_nodes/high_density_override.yaml @@ -0,0 +1 @@ +PODS_PER_NODE: 95 diff --git a/tests/perf/tests/density/config.yaml b/tests/perf/tests/density/config.yaml new file mode 100644 index 0000000000..802d47acde --- /dev/null +++ b/tests/perf/tests/density/config.yaml @@ -0,0 +1,248 @@ +# ASSUMPTIONS: +# - Underlying cluster should have 100+ nodes. +# - Number of nodes should be divisible by NODES_PER_NAMESPACE (default 100). + +#Constants +{{$DENSITY_RESOURCE_CONSTRAINTS_FILE := DefaultParam .DENSITY_RESOURCE_CONSTRAINTS_FILE ""}} +{{$NODE_MODE := DefaultParam .NODE_MODE "allnodes"}} +{{$NODES_PER_NAMESPACE := DefaultParam .NODES_PER_NAMESPACE 100}} +{{$PODS_PER_NODE := DefaultParam .PODS_PER_NODE 30}} +{{$DENSITY_TEST_THROUGHPUT := DefaultParam .DENSITY_TEST_THROUGHPUT 20}} +# LATENCY_POD_MEMORY and LATENCY_POD_CPU are calculated for 1-core 4GB node. +# Increasing allocation of both memory and cpu by 10% +# decreases the value of priority function in scheduler by one point. +# This results in decreased probability of choosing the same node again. +{{$LATENCY_POD_CPU := DefaultParam .LATENCY_POD_CPU 100}} +{{$LATENCY_POD_MEMORY := DefaultParam .LATENCY_POD_MEMORY 350}} +{{$MIN_LATENCY_PODS := 500}} +{{$MIN_SATURATION_PODS_TIMEOUT := 180}} +{{$ENABLE_CHAOSMONKEY := DefaultParam .ENABLE_CHAOSMONKEY false}} +{{$ENABLE_PROMETHEUS_API_RESPONSIVENESS := DefaultParam .ENABLE_PROMETHEUS_API_RESPONSIVENESS false}} +{{$ENABLE_SYSTEM_POD_METRICS:= DefaultParam .ENABLE_SYSTEM_POD_METRICS true}} +{{$USE_SIMPLE_LATENCY_QUERY := DefaultParam .USE_SIMPLE_LATENCY_QUERY false}} +#Variables +{{$namespaces := DivideInt .Nodes $NODES_PER_NAMESPACE}} +{{$podsPerNamespace := MultiplyInt $PODS_PER_NODE $NODES_PER_NAMESPACE}} +{{$totalPods := MultiplyInt $podsPerNamespace $namespaces}} +{{$latencyReplicas := DivideInt (MaxInt $MIN_LATENCY_PODS .Nodes) $namespaces}} +{{$totalLatencyPods := MultiplyInt $namespaces $latencyReplicas}} +{{$saturationDeploymentTimeout := DivideFloat $totalPods $DENSITY_TEST_THROUGHPUT | AddInt $MIN_SATURATION_PODS_TIMEOUT}} +# saturationDeploymentHardTimeout must be at least 20m to make sure that ~10m node +# failure won't fail the test. See https://github.com/kubernetes/kubernetes/issues/73461#issuecomment-467338711 +{{$saturationDeploymentHardTimeout := MaxInt $saturationDeploymentTimeout 1200}} + +name: density +automanagedNamespaces: {{$namespaces}} +tuningSets: +- name: Uniform5qps + qpsLoad: + qps: 5 +{{if $ENABLE_CHAOSMONKEY}} +chaosMonkey: + nodeFailure: + failureRate: 0.01 + interval: 1m + jitterFactor: 10.0 + simulatedDowntime: 10m +{{end}} +steps: +- name: Starting measurements + measurements: + - Identifier: APIResponsiveness + Method: APIResponsiveness + Params: + action: reset + - Identifier: APIResponsivenessPrometheus + Method: APIResponsivenessPrometheus + Params: + action: start + # TODO(oxddr): figure out how many probers to run in function of cluster + - Identifier: InClusterNetworkLatency + Method: InClusterNetworkLatency + Params: + action: start + replicasPerProbe: {{AddInt 2 (DivideInt .Nodes 100)}} + - Identifier: DnsLookupLatency + Method: DnsLookupLatency + Params: + action: start + replicasPerProbe: {{AddInt 2 (DivideInt .Nodes 100)}} + - Identifier: TestMetrics + Method: TestMetrics + Params: + action: start + nodeMode: {{$NODE_MODE}} + resourceConstraints: {{$DENSITY_RESOURCE_CONSTRAINTS_FILE}} + systemPodMetricsEnabled: {{$ENABLE_SYSTEM_POD_METRICS}} + +- name: Starting saturation pod measurements + measurements: + - Identifier: SaturationPodStartupLatency + Method: PodStartupLatency + Params: + action: start + labelSelector: group = saturation + threshold: {{$saturationDeploymentTimeout}}s + - Identifier: WaitForRunningSaturationDeployments + Method: WaitForControlledPodsRunning + Params: + action: start + apiVersion: apps/v1 + kind: Deployment + labelSelector: group = saturation + operationTimeout: {{$saturationDeploymentHardTimeout}}s + - Identifier: SchedulingThroughput + Method: SchedulingThroughput + Params: + action: start + labelSelector: group = saturation + +- name: Creating saturation pods + phases: + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: 1 + tuningSet: Uniform5qps + objectBundle: + - basename: saturation-deployment + objectTemplatePath: deployment.yaml + templateFillMap: + Replicas: {{$podsPerNamespace}} + Group: saturation + CpuRequest: 1m + MemoryRequest: 10M + +- name: Collecting saturation pod measurements + measurements: + - Identifier: WaitForRunningSaturationDeployments + Method: WaitForControlledPodsRunning + Params: + action: gather +- measurements: + - Identifier: SaturationPodStartupLatency + Method: PodStartupLatency + Params: + action: gather +- measurements: + - Identifier: SchedulingThroughput + Method: SchedulingThroughput + Params: + action: gather + +- name: Starting latency pod measurements + measurements: + - Identifier: PodStartupLatency + Method: PodStartupLatency + Params: + action: start + labelSelector: group = latency + - Identifier: WaitForRunningLatencyDeployments + Method: WaitForControlledPodsRunning + Params: + action: start + apiVersion: apps/v1 + kind: Deployment + labelSelector: group = latency + operationTimeout: 15m + +- name: Creating latency pods + phases: + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: {{$latencyReplicas}} + tuningSet: Uniform5qps + objectBundle: + - basename: latency-deployment + objectTemplatePath: deployment.yaml + templateFillMap: + Replicas: 1 + Group: latency + CpuRequest: {{$LATENCY_POD_CPU}}m + MemoryRequest: {{$LATENCY_POD_MEMORY}}M + +- name: Waiting for latency pods to be running + measurements: + - Identifier: WaitForRunningLatencyDeployments + Method: WaitForControlledPodsRunning + Params: + action: gather + +- name: Deleting latency pods + phases: + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: 0 + tuningSet: Uniform5qps + objectBundle: + - basename: latency-deployment + objectTemplatePath: deployment.yaml + +- name: Waiting for latency pods to be deleted + measurements: + - Identifier: WaitForRunningLatencyDeployments + Method: WaitForControlledPodsRunning + Params: + action: gather + +- name: Collecting pod startup latency + measurements: + - Identifier: PodStartupLatency + Method: PodStartupLatency + Params: + action: gather + +- name: Deleting saturation pods + phases: + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: 0 + tuningSet: Uniform5qps + objectBundle: + - basename: saturation-deployment + objectTemplatePath: deployment.yaml + +- name: Waiting for saturation pods to be deleted + measurements: + - Identifier: WaitForRunningSaturationDeployments + Method: WaitForControlledPodsRunning + Params: + action: gather + +- name: Collecting measurements + measurements: + - Identifier: APIResponsiveness + Method: APIResponsiveness + Params: + action: gather + - Identifier: APIResponsivenessPrometheusSimple + Method: APIResponsivenessPrometheus + Params: + action: gather + {{if $ENABLE_PROMETHEUS_API_RESPONSIVENESS}} + enableViolations: true + {{end}} + useSimpleLatencyQuery: true + summaryName: APIResponsivenessPrometheus_simple + {{if not $USE_SIMPLE_LATENCY_QUERY}} + - Identifier: APIResponsivenessPrometheus + Method: APIResponsivenessPrometheus + Params: + action: gather + {{end}} + - Identifier: InClusterNetworkLatency + Method: InClusterNetworkLatency + Params: + action: gather + - Identifier: DnsLookupLatency + Method: DnsLookupLatency + Params: + action: gather + - Identifier: TestMetrics + Method: TestMetrics + Params: + action: gather + systemPodMetricsEnabled: {{$ENABLE_SYSTEM_POD_METRICS}} diff --git a/tests/perf/tests/density/deployment.yaml b/tests/perf/tests/density/deployment.yaml new file mode 100644 index 0000000000..1903dbaf89 --- /dev/null +++ b/tests/perf/tests/density/deployment.yaml @@ -0,0 +1,37 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{.Name}} + labels: + group: {{.Group}} +spec: + replicas: {{.Replicas}} + selector: + matchLabels: + name: {{.Name}} + template: + metadata: + labels: + name: {{.Name}} + group: {{.Group}} + spec: + containers: + - image: k8s.gcr.io/pause:3.1 + imagePullPolicy: IfNotPresent + name: {{.Name}} + ports: + resources: + requests: + cpu: {{.CpuRequest}} + memory: {{.MemoryRequest}} + # Add not-ready/unreachable tolerations for 15 minutes so that node + # failure doesn't trigger pod deletion. + tolerations: + - key: "node.kubernetes.io/not-ready" + operator: "Exists" + effect: "NoExecute" + tolerationSeconds: 900 + - key: "node.kubernetes.io/unreachable" + operator: "Exists" + effect: "NoExecute" + tolerationSeconds: 900 diff --git a/tests/perf/tests/load/config.yaml b/tests/perf/tests/load/config.yaml new file mode 100644 index 0000000000..413fd81eec --- /dev/null +++ b/tests/perf/tests/load/config.yaml @@ -0,0 +1,765 @@ +# ASSUMPTIONS: +# - Underlying cluster should have 100+ nodes. +# - Number of nodes should be divisible by NODES_PER_NAMESPACE (default 100). +# - The number of created SVCs is half the number of created Deployments. +# - Only half of Deployments will be assigned 1-1 to existing SVCs. + +#Constants +{{$NODE_MODE := DefaultParam .NODE_MODE "allnodes"}} +{{$NODES_PER_NAMESPACE := DefaultParam .NODES_PER_NAMESPACE 100}} +{{$PODS_PER_NODE := DefaultParam .PODS_PER_NODE 30}} +{{$LOAD_TEST_THROUGHPUT := DefaultParam .LOAD_TEST_THROUGHPUT 10}} +{{$BIG_GROUP_SIZE := 1000}} +{{$MEDIUM_GROUP_SIZE := 500}} +{{$SMALL_GROUP_SIZE := 50}} +{{$SMALL_STATEFUL_SETS_PER_NAMESPACE := 1}} +{{$MEDIUM_STATEFUL_SETS_PER_NAMESPACE := 1}} +{{$ENABLE_CHAOSMONKEY := DefaultParam .ENABLE_CHAOSMONKEY false}} +{{$PROMETHEUS_SCRAPE_KUBE_PROXY := DefaultParam .PROMETHEUS_SCRAPE_KUBE_PROXY true}} +{{$ENABLE_PROMETHEUS_API_RESPONSIVENESS := DefaultParam .ENABLE_PROMETHEUS_API_RESPONSIVENESS false}} +{{$ENABLE_CONFIGMAPS := DefaultParam .ENABLE_CONFIGMAPS false}} +{{$ENABLE_DAEMONSETS := DefaultParam .ENABLE_DAEMONSETS false}} +{{$ENABLE_JOBS := DefaultParam .ENABLE_JOBS false}} +{{$ENABLE_PVS := DefaultParam .ENABLE_PVS false}} +{{$ENABLE_SECRETS := DefaultParam .ENABLE_SECRETS false}} +{{$ENABLE_STATEFULSETS := DefaultParam .ENABLE_STATEFULSETS false}} +{{$ENABLE_NETWORKPOLICIES := DefaultParam .ENABLE_NETWORKPOLICIES false}} +{{$ENABLE_SYSTEM_POD_METRICS:= DefaultParam .ENABLE_SYSTEM_POD_METRICS true}} +{{$USE_SIMPLE_LATENCY_QUERY := DefaultParam .USE_SIMPLE_LATENCY_QUERY false}} +#Variables +{{$namespaces := DivideInt .Nodes $NODES_PER_NAMESPACE}} +{{$totalPods := MultiplyInt $namespaces $NODES_PER_NAMESPACE $PODS_PER_NODE}} +{{$podsPerNamespace := DivideInt $totalPods $namespaces}} +{{$saturationTime := DivideInt $totalPods $LOAD_TEST_THROUGHPUT}} +# bigDeployments - 1/4 of namespace pods should be in big Deployments. +{{$bigDeploymentsPerNamespace := DivideInt $podsPerNamespace (MultiplyInt 4 $BIG_GROUP_SIZE)}} +# mediumDeployments - 1/4 of namespace pods should be in medium Deployments. +{{$mediumDeploymentsPerNamespace := DivideInt $podsPerNamespace (MultiplyInt 4 $MEDIUM_GROUP_SIZE)}} +# smallDeployments - 1/2 of namespace pods should be in small Deployments. +{{$smallDeploymentsPerNamespace := DivideInt $podsPerNamespace (MultiplyInt 2 $SMALL_GROUP_SIZE)}} +# If StatefulSets are enabled reduce the number of small and medium deployments per namespace +{{$smallDeploymentsPerNamespace := SubtractInt $smallDeploymentsPerNamespace (IfThenElse $ENABLE_STATEFULSETS $SMALL_STATEFUL_SETS_PER_NAMESPACE 0)}} +{{$mediumDeploymentsPerNamespace := SubtractInt $mediumDeploymentsPerNamespace (IfThenElse $ENABLE_STATEFULSETS $MEDIUM_STATEFUL_SETS_PER_NAMESPACE 0)}} + +# If Jobs are enabled reduce the number of small, medium, big deployments per namespace. +{{$smallDeploymentsPerNamespace := SubtractInt $smallDeploymentsPerNamespace (IfThenElse $ENABLE_JOBS 1 0)}} +{{$mediumDeploymentsPerNamespace := SubtractInt $mediumDeploymentsPerNamespace (IfThenElse $ENABLE_JOBS 1 0)}} +{{$bigDeploymentsPerNamespace := SubtractInt $bigDeploymentsPerNamespace (IfThenElse $ENABLE_JOBS 1 0)}} + +name: load +automanagedNamespaces: {{$namespaces}} +tuningSets: +- name: Sequence + parallelismLimitedLoad: + parallelismLimit: 1 +- name: RandomizedSaturationTimeLimited + RandomizedTimeLimitedLoad: + timeLimit: {{$saturationTime}}s +- name: RandomizedScalingTimeLimited + RandomizedTimeLimitedLoad: + # The expected number of created/deleted pods is totalPods/4 when scaling, + # as each RS changes its size from X to a uniform random value in [X/2, 3X/2]. + # To match 10 [pods/s] requirement, we need to divide saturationTime by 4. + timeLimit: {{DivideInt $saturationTime 4}}s +{{if $ENABLE_CHAOSMONKEY}} +chaosMonkey: + nodeFailure: + failureRate: 0.01 + interval: 1m + jitterFactor: 10.0 + simulatedDowntime: 10m +{{end}} +steps: +- name: Starting measurements + measurements: + - Identifier: APIResponsiveness + Method: APIResponsiveness + Params: + action: reset + - Identifier: APIResponsivenessPrometheus + Method: APIResponsivenessPrometheus + Params: + action: start + - Identifier: PodStartupLatency + Method: PodStartupLatency + Params: + action: start + labelSelector: group = load + threshold: 1h + - Identifier: InClusterNetworkLatency + Method: InClusterNetworkLatency + Params: + action: start + replicasPerProbe: {{AddInt 2 (DivideInt .Nodes 100)}} + - Identifier: DnsLookupLatency + Method: DnsLookupLatency + Params: + action: start + replicasPerProbe: {{AddInt 2 (DivideInt .Nodes 100)}} + {{if $PROMETHEUS_SCRAPE_KUBE_PROXY}} + - Identifier: NetworkProgrammingLatency + Method: NetworkProgrammingLatency + Params: + action: start + {{end}} + - Identifier: TestMetrics + Method: TestMetrics + Params: + action: start + nodeMode: {{$NODE_MODE}} + systemPodMetricsEnabled: {{$ENABLE_SYSTEM_POD_METRICS}} + +- name: Creating SVCs + phases: + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: {{DivideInt (AddInt $bigDeploymentsPerNamespace 1) 2}} + tuningSet: Sequence + objectBundle: + - basename: big-service + objectTemplatePath: service.yaml + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: {{DivideInt (AddInt $mediumDeploymentsPerNamespace 1) 2}} + tuningSet: Sequence + objectBundle: + - basename: medium-service + objectTemplatePath: service.yaml + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: {{DivideInt (AddInt $smallDeploymentsPerNamespace 1) 2}} + tuningSet: Sequence + objectBundle: + - basename: small-service + objectTemplatePath: service.yaml + +{{if $ENABLE_DAEMONSETS}} +- name: Creating PriorityClass for DaemonSets + phases: + - replicasPerNamespace: 1 + tuningSet: Sequence + objectBundle: + - basename: daemonset-priorityclass + objectTemplatePath: daemonset-priorityclass.yaml +{{end}} + +- name: Starting measurement for waiting for pods + measurements: + - Identifier: WaitForRunningDeployments + Method: WaitForControlledPodsRunning + Params: + action: start + apiVersion: apps/v1 + kind: Deployment + labelSelector: group = load + operationTimeout: 15m + {{if $ENABLE_STATEFULSETS}} + - Identifier: WaitForRunningStatefulSets + Method: WaitForControlledPodsRunning + Params: + action: start + apiVersion: apps/v1 + kind: StatefulSet + labelSelector: group = load + operationTimeout: 15m + {{end}} + {{if $ENABLE_DAEMONSETS}} + - Identifier: WaitForRunningDaemonSets + Method: WaitForControlledPodsRunning + Params: + action: start + apiVersion: apps/v1 + kind: DaemonSet + labelSelector: group = load + operationTimeout: 15m + {{end}} + {{if $ENABLE_JOBS}} + - Identifier: WaitForRunningJobs + Method: WaitForControlledPodsRunning + Params: + action: start + apiVersion: batch/v1 + kind: Job + labelSelector: group = load + operationTimeout: 15m + {{end}} + +- name: Creating objects + phases: + {{if $ENABLE_DAEMONSETS}} + - namespaceRange: + min: 1 + max: 1 + replicasPerNamespace: 1 + tuningSet: RandomizedSaturationTimeLimited + objectBundle: + - basename: daemonset + objectTemplatePath: daemonset.yaml + templateFillMap: + Image: k8s.gcr.io/pause:3.0 + {{end}} + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: {{$bigDeploymentsPerNamespace}} + tuningSet: RandomizedSaturationTimeLimited + objectBundle: + {{if $ENABLE_CONFIGMAPS}} + - basename: big-deployment + objectTemplatePath: configmap.yaml + {{end}} + {{if $ENABLE_SECRETS}} + - basename: big-deployment + objectTemplatePath: secret.yaml + {{end}} + {{if $ENABLE_NETWORKPOLICIES}} + - basename: big-deployment + objectTemplatePath: networkpolicy.yaml + {{end}} + - basename: big-deployment + objectTemplatePath: deployment.yaml + templateFillMap: + ReplicasMin: {{$BIG_GROUP_SIZE}} + ReplicasMax: {{$BIG_GROUP_SIZE}} + SvcName: big-service + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: {{$mediumDeploymentsPerNamespace}} + tuningSet: RandomizedSaturationTimeLimited + objectBundle: + {{if $ENABLE_CONFIGMAPS}} + - basename: medium-deployment + objectTemplatePath: configmap.yaml + {{end}} + {{if $ENABLE_SECRETS}} + - basename: medium-deployment + objectTemplatePath: secret.yaml + {{end}} + {{if $ENABLE_NETWORKPOLICIES}} + - basename: medium-deployment + objectTemplatePath: networkpolicy.yaml + {{end}} + - basename: medium-deployment + objectTemplatePath: deployment.yaml + templateFillMap: + ReplicasMin: {{$MEDIUM_GROUP_SIZE}} + ReplicasMax: {{$MEDIUM_GROUP_SIZE}} + SvcName: medium-service + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: {{$smallDeploymentsPerNamespace}} + tuningSet: RandomizedSaturationTimeLimited + objectBundle: + {{if $ENABLE_CONFIGMAPS}} + - basename: small-deployment + objectTemplatePath: configmap.yaml + {{end}} + {{if $ENABLE_SECRETS}} + - basename: small-deployment + objectTemplatePath: secret.yaml + {{end}} + {{if $ENABLE_NETWORKPOLICIES}} + - basename: small-deployment + objectTemplatePath: networkpolicy.yaml + {{end}} + - basename: small-deployment + objectTemplatePath: deployment.yaml + templateFillMap: + ReplicasMin: {{$SMALL_GROUP_SIZE}} + ReplicasMax: {{$SMALL_GROUP_SIZE}} + SvcName: small-service + {{if $ENABLE_STATEFULSETS}} + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: {{$SMALL_STATEFUL_SETS_PER_NAMESPACE}} + tuningSet: RandomizedSaturationTimeLimited + objectBundle: + - basename: small-statefulset + objectTemplatePath: statefulset_service.yaml + - basename: small-statefulset + objectTemplatePath: statefulset.yaml + templateFillMap: + ReplicasMin: {{$SMALL_GROUP_SIZE}} + ReplicasMax: {{$SMALL_GROUP_SIZE}} + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: {{$MEDIUM_STATEFUL_SETS_PER_NAMESPACE}} + tuningSet: RandomizedSaturationTimeLimited + objectBundle: + - basename: medium-statefulset + objectTemplatePath: statefulset_service.yaml + - basename: medium-statefulset + objectTemplatePath: statefulset.yaml + templateFillMap: + ReplicasMin: {{$MEDIUM_GROUP_SIZE}} + ReplicasMax: {{$MEDIUM_GROUP_SIZE}} + {{end}} + {{if $ENABLE_JOBS}} + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: 1 + tuningSet: RandomizedSaturationTimeLimited + objectBundle: + - basename: small-job + objectTemplatePath: job.yaml + templateFillMap: + ReplicasMin: {{$SMALL_GROUP_SIZE}} + ReplicasMax: {{$SMALL_GROUP_SIZE}} + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: 1 + tuningSet: RandomizedSaturationTimeLimited + objectBundle: + - basename: medium-job + objectTemplatePath: job.yaml + templateFillMap: + ReplicasMin: {{$MEDIUM_GROUP_SIZE}} + ReplicasMax: {{$MEDIUM_GROUP_SIZE}} + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: 1 + tuningSet: RandomizedSaturationTimeLimited + objectBundle: + - basename: big-job + objectTemplatePath: job.yaml + templateFillMap: + ReplicasMin: {{$BIG_GROUP_SIZE}} + ReplicasMax: {{$BIG_GROUP_SIZE}} + {{end}} + +- name: Waiting for pods to be running + measurements: + - Identifier: WaitForRunningDeployments + Method: WaitForControlledPodsRunning + Params: + action: gather + {{if $ENABLE_STATEFULSETS}} + - Identifier: WaitForRunningStatefulSets + Method: WaitForControlledPodsRunning + Params: + action: gather + {{end}} + {{if $ENABLE_DAEMONSETS}} + - Identifier: WaitForRunningDaemonSets + Method: WaitForControlledPodsRunning + Params: + action: gather + {{end}} + {{if $ENABLE_JOBS}} + - Identifier: WaitForRunningJobs + Method: WaitForControlledPodsRunning + Params: + action: gather + {{end}} + +- name: Scaling and updating objects + phases: + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: {{$bigDeploymentsPerNamespace}} + tuningSet: RandomizedScalingTimeLimited + objectBundle: + - basename: big-deployment + objectTemplatePath: deployment.yaml + templateFillMap: + ReplicasMin: {{MultiplyInt $BIG_GROUP_SIZE 0.5}} + ReplicasMax: {{MultiplyInt $BIG_GROUP_SIZE 1.5}} + SvcName: big-service + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: {{$mediumDeploymentsPerNamespace}} + tuningSet: RandomizedScalingTimeLimited + objectBundle: + - basename: medium-deployment + objectTemplatePath: deployment.yaml + templateFillMap: + ReplicasMin: {{MultiplyInt $MEDIUM_GROUP_SIZE 0.5}} + ReplicasMax: {{MultiplyInt $MEDIUM_GROUP_SIZE 1.5}} + SvcName: medium-service + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: {{$smallDeploymentsPerNamespace}} + tuningSet: RandomizedScalingTimeLimited + objectBundle: + - basename: small-deployment + objectTemplatePath: deployment.yaml + templateFillMap: + ReplicasMin: {{MultiplyInt $SMALL_GROUP_SIZE 0.5}} + ReplicasMax: {{MultiplyInt $SMALL_GROUP_SIZE 1.5}} + SvcName: small-service + {{if $ENABLE_STATEFULSETS}} + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: {{$SMALL_STATEFUL_SETS_PER_NAMESPACE}} + tuningSet: RandomizedScalingTimeLimited + objectBundle: + - basename: small-statefulset + objectTemplatePath: statefulset.yaml + templateFillMap: + ReplicasMin: {{MultiplyInt $SMALL_GROUP_SIZE 0.5}} + ReplicasMax: {{MultiplyInt $SMALL_GROUP_SIZE 1.5}} + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: {{$MEDIUM_STATEFUL_SETS_PER_NAMESPACE}} + tuningSet: RandomizedScalingTimeLimited + objectBundle: + - basename: medium-statefulset + objectTemplatePath: statefulset.yaml + templateFillMap: + ReplicasMin: {{MultiplyInt $MEDIUM_GROUP_SIZE 0.5}} + ReplicasMax: {{MultiplyInt $MEDIUM_GROUP_SIZE 1.5}} + {{end}} + {{if $ENABLE_DAEMONSETS}} + - namespaceRange: + min: 1 + max: 1 + replicasPerNamespace: 1 + tuningSet: RandomizedScalingTimeLimited + objectBundle: + - basename: daemonset + objectTemplatePath: daemonset.yaml + templateFillMap: + Image: k8s.gcr.io/pause:3.1 + {{end}} + {{if $ENABLE_JOBS}} + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: 1 + tuningSet: RandomizedScalingTimeLimited + objectBundle: + - basename: small-job + objectTemplatePath: job.yaml + templateFillMap: + ReplicasMin: {{MultiplyInt $SMALL_GROUP_SIZE 0.5}} + ReplicasMax: {{MultiplyInt $SMALL_GROUP_SIZE 1.5}} + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: 1 + tuningSet: RandomizedScalingTimeLimited + objectBundle: + - basename: medium-job + objectTemplatePath: job.yaml + templateFillMap: + ReplicasMin: {{MultiplyInt $MEDIUM_GROUP_SIZE 0.5}} + ReplicasMax: {{MultiplyInt $MEDIUM_GROUP_SIZE 1.5}} + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: 1 + tuningSet: RandomizedScalingTimeLimited + objectBundle: + - basename: big-job + objectTemplatePath: job.yaml + templateFillMap: + ReplicasMin: {{MultiplyInt $BIG_GROUP_SIZE 0.5}} + ReplicasMax: {{MultiplyInt $BIG_GROUP_SIZE 1.5}} + {{end}} + +- name: Waiting for objects to become scaled + measurements: + - Identifier: WaitForRunningDeployments + Method: WaitForControlledPodsRunning + Params: + action: gather + {{if $ENABLE_STATEFULSETS}} + - Identifier: WaitForRunningStatefulSets + Method: WaitForControlledPodsRunning + Params: + action: gather + {{end}} + {{if $ENABLE_DAEMONSETS}} + - Identifier: WaitForRunningDaemonSets + Method: WaitForControlledPodsRunning + Params: + action: gather + {{end}} + {{if $ENABLE_JOBS}} + - Identifier: WaitForRunningJobs + Method: WaitForControlledPodsRunning + Params: + action: gather + {{end}} + +- name: Deleting objects + phases: + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: 0 + tuningSet: RandomizedSaturationTimeLimited + objectBundle: + - basename: big-deployment + objectTemplatePath: deployment.yaml + {{if $ENABLE_CONFIGMAPS}} + - basename: big-deployment + objectTemplatePath: configmap.yaml + {{end}} + {{if $ENABLE_SECRETS}} + - basename: big-deployment + objectTemplatePath: secret.yaml + {{end}} + {{if $ENABLE_NETWORKPOLICIES}} + - basename: big-deployment + objectTemplatePath: networkpolicy.yaml + {{end}} + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: 0 + tuningSet: RandomizedSaturationTimeLimited + objectBundle: + - basename: medium-deployment + objectTemplatePath: deployment.yaml + {{if $ENABLE_CONFIGMAPS}} + - basename: medium-deployment + objectTemplatePath: configmap.yaml + {{end}} + {{if $ENABLE_SECRETS}} + - basename: medium-deployment + objectTemplatePath: secret.yaml + {{end}} + {{if $ENABLE_NETWORKPOLICIES}} + - basename: medium-deployment + objectTemplatePath: networkpolicy.yaml + {{end}} + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: 0 + tuningSet: RandomizedSaturationTimeLimited + objectBundle: + - basename: small-deployment + objectTemplatePath: deployment.yaml + {{if $ENABLE_CONFIGMAPS}} + - basename: small-deployment + objectTemplatePath: configmap.yaml + {{end}} + {{if $ENABLE_SECRETS}} + - basename: small-deployment + objectTemplatePath: secret.yaml + {{end}} + {{if $ENABLE_NETWORKPOLICIES}} + - basename: small-deployment + objectTemplatePath: networkpolicy.yaml + {{end}} + {{if $ENABLE_STATEFULSETS}} + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: 0 + tuningSet: RandomizedSaturationTimeLimited + objectBundle: + - basename: small-statefulset + objectTemplatePath: statefulset.yaml + - basename: small-statefulset + objectTemplatePath: statefulset_service.yaml + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: 0 + tuningSet: RandomizedSaturationTimeLimited + objectBundle: + - basename: medium-statefulset + objectTemplatePath: statefulset.yaml + - basename: medium-statefulset + objectTemplatePath: statefulset_service.yaml + {{end}} + {{if $ENABLE_DAEMONSETS}} + - namespaceRange: + min: 1 + max: 1 + replicasPerNamespace: 0 + tuningSet: RandomizedSaturationTimeLimited + objectBundle: + - basename: daemonset + objectTemplatePath: daemonset.yaml + {{end}} + {{if $ENABLE_JOBS}} + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: 0 + tuningSet: RandomizedSaturationTimeLimited + objectBundle: + - basename: small-job + objectTemplatePath: job.yaml + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: 0 + tuningSet: RandomizedSaturationTimeLimited + objectBundle: + - basename: medium-job + objectTemplatePath: job.yaml + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: 0 + tuningSet: RandomizedSaturationTimeLimited + objectBundle: + - basename: big-job + objectTemplatePath: job.yaml + {{end}} + # If both StatefulSets and PVs were enabled we need to delete PVs manually. + {{if and $ENABLE_STATEFULSETS $ENABLE_PVS}} + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: 0 + tuningSet: RandomizedSaturationTimeLimited + objectBundle: + {{range $ssIndex := Seq $SMALL_STATEFUL_SETS_PER_NAMESPACE}} + - basename: pv-small-statefulset-{{$ssIndex}} + objectTemplatePath: pvc.yaml + listUnknownObjectOptions: + labelSelector: + matchLabels: + name: small-statefulset-{{$ssIndex}} + {{end}} + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: 0 + tuningSet: RandomizedSaturationTimeLimited + objectBundle: + {{range $ssIndex := Seq $MEDIUM_STATEFUL_SETS_PER_NAMESPACE}} + - basename: pv-medium-statefulset-{{$ssIndex}} + objectTemplatePath: pvc.yaml + listUnknownObjectOptions: + labelSelector: + matchLabels: + name: medium-statefulset-{{$ssIndex}} + {{end}} + {{end}} + +- name: Waiting for pods to be deleted + measurements: + - Identifier: WaitForRunningDeployments + Method: WaitForControlledPodsRunning + Params: + action: gather + {{if $ENABLE_STATEFULSETS}} + - Identifier: WaitForRunningStatefulSets + Method: WaitForControlledPodsRunning + Params: + action: gather + {{end}} + {{if $ENABLE_DAEMONSETS}} + - Identifier: WaitForRunningDaemonSets + Method: WaitForControlledPodsRunning + Params: + action: gather + {{end}} + {{if $ENABLE_JOBS}} + - Identifier: WaitForRunningJobs + Method: WaitForControlledPodsRunning + Params: + action: gather + {{end}} + {{if and $ENABLE_STATEFULSETS $ENABLE_PVS}} + - Identifier: WaitForPVCsToBeDeleted + Method: WaitForBoundPVCs + Params: + desiredPVCCount: 0 + labelSelector: group = load + timeout: 15m + {{end}} + +{{if $ENABLE_DAEMONSETS}} +- name: Deleting PriorityClass for DaemonSets + phases: + - replicasPerNamespace: 0 + tuningSet: Sequence + objectBundle: + - basename: daemonset-priorityclass + objectTemplatePath: daemonset-priorityclass.yaml +{{end}} + +- name: Deleting SVCs + phases: + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: 0 + tuningSet: Sequence + objectBundle: + - basename: big-service + objectTemplatePath: service.yaml + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: 0 + tuningSet: Sequence + objectBundle: + - basename: medium-service + objectTemplatePath: service.yaml + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: 0 + tuningSet: Sequence + objectBundle: + - basename: small-service + objectTemplatePath: service.yaml + +- name: Collecting measurements + measurements: + - Identifier: APIResponsiveness + Method: APIResponsiveness + Params: + action: gather + - Identifier: APIResponsivenessPrometheusSimple + Method: APIResponsivenessPrometheus + Params: + action: gather + {{if $ENABLE_PROMETHEUS_API_RESPONSIVENESS}} + enableViolations: true + {{end}} + useSimpleLatencyQuery: true + summaryName: APIResponsivenessPrometheus_simple + {{if not $USE_SIMPLE_LATENCY_QUERY}} + - Identifier: APIResponsivenessPrometheus + Method: APIResponsivenessPrometheus + Params: + action: gather + {{end}} + - Identifier: PodStartupLatency + Method: PodStartupLatency + Params: + action: gather + - Identifier: InClusterNetworkLatency + Method: InClusterNetworkLatency + Params: + action: gather + - Identifier: DnsLookupLatency + Method: DnsLookupLatency + Params: + action: gather + {{if $PROMETHEUS_SCRAPE_KUBE_PROXY}} + - Identifier: NetworkProgrammingLatency + Method: NetworkProgrammingLatency + Params: + action: gather + {{end}} + - Identifier: TestMetrics + Method: TestMetrics + Params: + action: gather + systemPodMetricsEnabled: {{$ENABLE_SYSTEM_POD_METRICS}} diff --git a/tests/perf/tests/load/configmap.yaml b/tests/perf/tests/load/configmap.yaml new file mode 100644 index 0000000000..b249a39143 --- /dev/null +++ b/tests/perf/tests/load/configmap.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{.Name}} +data: + data.yaml: |- + a: 1 + b: 2 + c: 3 diff --git a/tests/perf/tests/load/daemonset-priorityclass.yaml b/tests/perf/tests/load/daemonset-priorityclass.yaml new file mode 100644 index 0000000000..e264a740d5 --- /dev/null +++ b/tests/perf/tests/load/daemonset-priorityclass.yaml @@ -0,0 +1,9 @@ +apiVersion: scheduling.k8s.io/v1 +kind: PriorityClass +metadata: + name: {{.Name}} +value: 1000000 +globalDefault: false +description: "Designated priority class to be used for DaemonSet pods. This is + to make sure they have higher priority than other test pods and there is always + place for them on each node, see kubernetes/kubernetes#82818." diff --git a/tests/perf/tests/load/daemonset.yaml b/tests/perf/tests/load/daemonset.yaml new file mode 100644 index 0000000000..68acfefaec --- /dev/null +++ b/tests/perf/tests/load/daemonset.yaml @@ -0,0 +1,41 @@ +{{$Image := DefaultParam .Image "k8s.gcr.io/pause:3.1"}} + +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: {{.Name}} + labels: + group: load +spec: + updateStrategy: + rollingUpdate: + maxUnavailable: {{MaxInt 10 (DivideInt .Nodes 20)}} # 5% of nodes, but not less than 10 + selector: + matchLabels: + name: {{.Name}} + template: + metadata: + labels: + group: load + name: {{.Name}} + spec: + containers: + - name: {{.Name}} + image: {{$Image}} + resources: + requests: + cpu: 10m + memory: "10M" + priorityClassName: daemonset-priorityclass-0 # Name is autogenerated, hence the -0 prefix. + terminationGracePeriodSeconds: 1 + # Add not-ready/unreachable tolerations for 15 minutes so that node + # failure doesn't trigger pod deletion. + tolerations: + - key: "node.kubernetes.io/not-ready" + operator: "Exists" + effect: "NoExecute" + tolerationSeconds: 900 + - key: "node.kubernetes.io/unreachable" + operator: "Exists" + effect: "NoExecute" + tolerationSeconds: 900 diff --git a/tests/perf/tests/load/deployment.yaml b/tests/perf/tests/load/deployment.yaml new file mode 100644 index 0000000000..8a2f3a798b --- /dev/null +++ b/tests/perf/tests/load/deployment.yaml @@ -0,0 +1,63 @@ +{{$EnableConfigMaps := DefaultParam .ENABLE_CONFIGMAPS false}} +{{$EnableSecrets := DefaultParam .ENABLE_SECRETS false}} + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{.Name}} + labels: + group: load + svc: {{.SvcName}}-{{.Index}} +spec: + replicas: {{RandIntRange .ReplicasMin .ReplicasMax}} + selector: + matchLabels: + name: {{.Name}} + template: + metadata: + labels: + group: load + name: {{.Name}} + svc: {{.SvcName}}-{{.Index}} + spec: + containers: + - image: k8s.gcr.io/pause:3.1 + name: {{.Name}} + resources: + requests: + cpu: 10m + memory: "10M" + volumeMounts: + {{if and $EnableConfigMaps (eq (Mod .Index 20) 0 19) }} # .Index % 20 in {0,19} - 10% deployments will have ConfigMap + - name: configmap + mountPath: /var/configmap + {{end}} + {{if and $EnableSecrets (eq (Mod .Index 20) 10 19) }} # .Index % 20 in {10,19} - 10% deployments will have Secret + - name: secret + mountPath: /var/secret + {{end}} + dnsPolicy: Default + terminationGracePeriodSeconds: 1 + # Add not-ready/unreachable tolerations for 15 minutes so that node + # failure doesn't trigger pod deletion. + tolerations: + - key: "node.kubernetes.io/not-ready" + operator: "Exists" + effect: "NoExecute" + tolerationSeconds: 900 + - key: "node.kubernetes.io/unreachable" + operator: "Exists" + effect: "NoExecute" + tolerationSeconds: 900 + volumes: + {{if and $EnableConfigMaps (eq (Mod .Index 20) 0 19) }} # .Index % 20 in {0,19} - 10% deployments will have ConfigMap + - name: configmap + configMap: + name: {{.BaseName}}-{{.Index}} + {{end}} + {{if and $EnableSecrets (eq (Mod .Index 20) 10 19) }} # .Index % 20 in {10,19} - 10% deployments will have Secret + - name: secret + secret: + secretName: {{.BaseName}}-{{.Index}} + {{end}} + diff --git a/tests/perf/tests/load/job.yaml b/tests/perf/tests/load/job.yaml new file mode 100644 index 0000000000..f28e1b3ee2 --- /dev/null +++ b/tests/perf/tests/load/job.yaml @@ -0,0 +1,39 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: {{.Name}} + labels: + group: load +spec: + manualSelector: true + parallelism: {{RandIntRange .ReplicasMin .ReplicasMax}} + selector: + matchLabels: + name: {{.Name}} + template: + metadata: + labels: + group: load + name: {{.Name}} + spec: + containers: + - name: {{.Name}} + # TODO(#799): We should test the "run-to-completion" workflow and hence don't use pause pods. + image: k8s.gcr.io/pause:3.1 + resources: + requests: + cpu: 10m + memory: "10M" + restartPolicy: Never + terminationGracePeriodSeconds: 1 + # Add not-ready/unreachable tolerations for 15 minutes so that node + # failure doesn't trigger pod deletion. + tolerations: + - key: "node.kubernetes.io/not-ready" + operator: "Exists" + effect: "NoExecute" + tolerationSeconds: 900 + - key: "node.kubernetes.io/unreachable" + operator: "Exists" + effect: "NoExecute" + tolerationSeconds: 900 diff --git a/tests/perf/tests/load/networkpolicy.yaml b/tests/perf/tests/load/networkpolicy.yaml new file mode 100644 index 0000000000..1aae9b23c0 --- /dev/null +++ b/tests/perf/tests/load/networkpolicy.yaml @@ -0,0 +1,19 @@ +{{if eq (Mod .Index 10) 0}} # Create for only 10% of deployments +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{.Name}} +spec: + podSelector: + matchLabels: + name: {{.BaseName}}-{{.Index}} + policyTypes: + - Egress + egress: + - to: + - ipBlock: + cidr: 10.0.0.0/24 + ports: + - protocol: TCP + port: 8080 +{{end}} diff --git a/tests/perf/tests/load/pvc.yaml b/tests/perf/tests/load/pvc.yaml new file mode 100644 index 0000000000..d19d23053e --- /dev/null +++ b/tests/perf/tests/load/pvc.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{.Name}} diff --git a/tests/perf/tests/load/secret.yaml b/tests/perf/tests/load/secret.yaml new file mode 100644 index 0000000000..67134b355f --- /dev/null +++ b/tests/perf/tests/load/secret.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: Secret +metadata: + name: {{.Name}} +type: Opaque +data: + password: c2NhbGFiaWxpdHkK diff --git a/tests/perf/tests/load/service.yaml b/tests/perf/tests/load/service.yaml new file mode 100644 index 0000000000..ed6a22c8cf --- /dev/null +++ b/tests/perf/tests/load/service.yaml @@ -0,0 +1,16 @@ +{{$SetServiceProxyLabel := DefaultParam .SetServiceProxyLabel false}} + +apiVersion: v1 +kind: Service +metadata: + name: {{.Name}} +{{if and $SetServiceProxyLabel (eq (Mod .Index 2) 0)}} + labels: + service.kubernetes.io/service-proxy-name: foo +{{end}} +spec: + selector: + svc: {{.Name}} + ports: + - port: 80 + targetPort: 80 diff --git a/tests/perf/tests/load/statefulset.yaml b/tests/perf/tests/load/statefulset.yaml new file mode 100644 index 0000000000..43157b7928 --- /dev/null +++ b/tests/perf/tests/load/statefulset.yaml @@ -0,0 +1,61 @@ +{{$EnablePVs := DefaultParam .ENABLE_PVS false}} + +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: {{.Name}} + labels: + group: load +spec: + podManagementPolicy: Parallel + selector: + matchLabels: + group: load + name: {{.Name}} + serviceName: {{.Name}} + replicas: {{RandIntRange .ReplicasMin .ReplicasMax}} + template: + metadata: + labels: + group: load + name: {{.Name}} + spec: + containers: + - name: {{.Name}} + image: k8s.gcr.io/pause:3.1 + ports: + - containerPort: 80 + name: web + resources: + requests: + cpu: 10m + memory: "10M" + {{if $EnablePVs}} + volumeMounts: + - name: pv + mountPath: /var/pv + {{end}} + terminationGracePeriodSeconds: 1 + # Add not-ready/unreachable tolerations for 15 minutes so that node + # failure doesn't trigger pod deletion. + tolerations: + - key: "node.kubernetes.io/not-ready" + operator: "Exists" + effect: "NoExecute" + tolerationSeconds: 900 + - key: "node.kubernetes.io/unreachable" + operator: "Exists" + effect: "NoExecute" + tolerationSeconds: 900 + {{if $EnablePVs}} + # NOTE: PVs created this way should be cleaned-up manually, as deleting the StatefulSet doesn't automatically delete PVs. + # To avoid deleting all the PVs at once during namespace deletion, they should be deleted explicitly via Phase. + volumeClaimTemplates: + - metadata: + name: pv + spec: + accessModes: [ "ReadWriteOnce" ] + resources: + requests: + storage: 100Mi + {{end}} diff --git a/tests/perf/tests/load/statefulset_service.yaml b/tests/perf/tests/load/statefulset_service.yaml new file mode 100644 index 0000000000..5e16a47a19 --- /dev/null +++ b/tests/perf/tests/load/statefulset_service.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{.Name}} + labels: + name: {{.Name}} +spec: + clusterIP: None + selector: + name: {{.Name}} From 2165464af6296080a0c29232b9a6d8df5812899b Mon Sep 17 00:00:00 2001 From: galal-hussein Date: Mon, 4 Nov 2019 18:09:54 +0200 Subject: [PATCH 5/7] more changes and Add readme --- tests/perf/Makefile | 8 +- tests/perf/README.md | 116 ++++++++++ tests/perf/agents/main.tf | 12 +- tests/perf/agents/variables.tf | 4 +- tests/perf/scripts/config | 32 +-- tests/perf/scripts/perf | 57 ++++- tests/perf/scripts/test | 14 +- tests/perf/server/data.tf | 16 -- tests/perf/server/files/etcd.tmpl | 31 +++ tests/perf/server/files/etcd_build.sh | 22 ++ tests/perf/server/files/metrics.yaml | 227 ------------------- tests/perf/server/files/prom.yaml | 86 ------- tests/perf/server/files/server_userdata.tmpl | 42 ++-- tests/perf/server/files/worker_userdata.tmpl | 2 +- tests/perf/server/main.tf | 99 +++++--- tests/perf/server/outputs.tf | 4 +- tests/perf/server/variables.tf | 34 ++- tests/perf/tests/load/config.yaml | 2 +- 18 files changed, 395 insertions(+), 413 deletions(-) create mode 100644 tests/perf/README.md create mode 100644 tests/perf/server/files/etcd.tmpl create mode 100755 tests/perf/server/files/etcd_build.sh delete mode 100644 tests/perf/server/files/metrics.yaml delete mode 100644 tests/perf/server/files/prom.yaml diff --git a/tests/perf/Makefile b/tests/perf/Makefile index a1d63a52ad..9b216482d2 100644 --- a/tests/perf/Makefile +++ b/tests/perf/Makefile @@ -1,6 +1,6 @@ MODULE := $(shell basename $$PWD) -.PHONY: init config apply destroy clean test +.PHONY: init config apply destroy clean test info init: @scripts/perf init @@ -8,6 +8,9 @@ init: config: @scripts/perf config +plan: + @scripts/perf plan + apply: @scripts/perf apply @@ -19,3 +22,6 @@ clean: test: @scripts/test test_load + +info: + @scripts/perf info diff --git a/tests/perf/README.md b/tests/perf/README.md new file mode 100644 index 0000000000..60ae23a2c5 --- /dev/null +++ b/tests/perf/README.md @@ -0,0 +1,116 @@ +## K3S Performance Tests +--- + +These scripts uses Terraform to automate building and testing on k3s clusters on AWS, it supports building normal and HA clusters with N master nodes, N workers nodes and multiple storage backends including: + +- MySQL RDS +- Postgres RDS +- Etcd +- SQlite + +The scripts divides into three sections: + +- server +- agents +- tests + +### Server + +The server section deploys the storage backend and then deploys N master nodes, the scripts can be customized to use HA mode or use a single node cluster with sqlite backend, it can also support using 1 master node with external DB, the scripts can also be customized to specify instance type and k3s version, all available options are described in the variable section below. + +The server section will also create a one or more agent nodes specifically for Prometheus deployment, clusterloader2 will deploy prometheus and grafana. + +### Agents + +The agents section deploys the k3s agents, it can be customized with different options that controls the agent node count and the instance types. + +### Tests + +The tests section uses a fork off the (clusterloader2)[https://github.com/kubernetes/perf-tests/tree/master/clusterloader2] tool, the fork just modifies the logging and removes the etcd metrics probes. + +this section will use a dockerized version of the tool, which will run the tests and save the report in `tests/-`. + +The current available tests are: + +- load test +- density test + +## Variables + +The scripts can be modified by customizing the variables in `scripts/config`, the variables includes: + +**Main Vars** + +| Name | Description | +|:----------------:|:------------------------------------------------------------------------------:| +| CLUSTER_NAME | The cluster name on aws, this will prefix each component in the cluster | +| DOMAIN_NAME | DNS name of the Loadbalancer for k3s master(s) | +| ZONE_ID | AWS route53 zone id for modifying the dns name | +| K3S_VERSION | K3S version that will be used with the cluster | +| EXTRA_SSH_KEYS | Public ssh keys that will be added to the servers | +| PRIVATE_KEY_PATH | Private ssh key that will be used by clusterloader2 to ssh and collect metrics | +| DEBUG | Debug mode for k3s servers | + +**Database Variables** + +| Name | Description | +|:----------------:|:---------------------------------------------------------------------------------------------------:| +| DB_ENGINE | The database type, this can be "mysql", "postgres", or "etcd" | +| DB_INSTANCE_TYPE | The RDS instance type for mysql and postgres, etcd uses db.* class as well as its parsed internally | +| DB_NAME | Database name created only in postgres and mysql | +| DB_USERNAME | Database username created only for postgres and mysql | +| DB_PASSWORD | Database password for the user created only for postgres and mysql | +| DB_VERSION | Database version | + +**K3S Server Variables** + +| Name | Description | +|:--------------------:|:---------------------------------------------------------------------------------:| +| SERVER_HA | Whether or not to use HA mode, if not then sqlite will be used as storage backend | +| SERVER_COUNT | k3s master node count | +| SERVER_INSTANCE_TYPE | Ec2 instance type created for k3s server(s) | + +**K3S Agent Variables** + +| Name | Description | +|:-------------------:|:-----------------------------------------:| +| AGENT_NODE_COUNT | Number of k3s agents that will be created | +| AGENT_INSTANCE_TYPE | Ec2 instance type created for k3s agents | + +**Prometheus server Variables** + +| Name | Description | +|:-------------------------:|:-------------------------------------------------------------------:| +| PROM_WORKER_NODE_COUNT | Number of k3s agents that will be created for prometheus deployment | +| PROM_WORKER_INSTANCE_TYPE | Ec2 instance type created for k3s prometheus agents | + + +## Usage + +### build + +The script includes a Makefile that run different sections, to build the master and workers, adjust the config file in `tests/perf/scripts/config` and then use the following: + +``` +cd tests/perf +make apply +``` + +This will basically build the db, server, and agent layers, it will also deploy a kubeconfig file in tests/kubeconfig.yaml. + +### test + +To start the clusterloader2 load test you can modify the tests/perf/tests/load/config.yaml and then run the following: + +``` +cd tests/perf +make test +``` + +### destroy + +To destroy the cluster just run the following: +``` +make destroy +make clean +``` diff --git a/tests/perf/agents/main.tf b/tests/perf/agents/main.tf index 975117cddd..f62c432fd3 100644 --- a/tests/perf/agents/main.tf +++ b/tests/perf/agents/main.tf @@ -52,15 +52,19 @@ module "k3s-pool-worker-asg" { version = "3.0.0" name = "${local.name}-pool" asg_name = "${local.name}-pool" - instance_type = var.worker_instance_type + instance_type = var.agent_instance_type image_id = data.aws_ami.ubuntu.id user_data = base64encode(templatefile("${path.module}/files/pool_worker_userdata.tmpl", { k3s_url = data.terraform_remote_state.server.outputs.public_ip, k3s_cluster_secret = local.k3s_cluster_secret, extra_ssh_keys = var.extra_ssh_keys, install_k3s_version = var.k3s_version })) ebs_optimized = true - desired_capacity = var.node_count + default_cooldown = 10 + health_check_grace_period = 30 + wait_for_capacity_timeout = "60m" + + desired_capacity = var.agent_node_count health_check_type = "EC2" - max_size = var.node_count - min_size = var.node_count + max_size = var.agent_node_count + min_size = var.agent_node_count vpc_zone_identifier = [data.aws_subnet.selected.id] spot_price = "0.680" diff --git a/tests/perf/agents/variables.tf b/tests/perf/agents/variables.tf index 37a587d413..f0924930fe 100644 --- a/tests/perf/agents/variables.tf +++ b/tests/perf/agents/variables.tf @@ -1,10 +1,10 @@ -variable "node_count" { +variable "agent_node_count" { description = "Number of nodes to run k3s agents on." type = number # default = 10 } -variable "worker_instance_type" { +variable "agent_instance_type" { type = string default = "t3.2xlarge" } diff --git a/tests/perf/scripts/config b/tests/perf/scripts/config index 8e5f09a3fd..3505846732 100755 --- a/tests/perf/scripts/config +++ b/tests/perf/scripts/config @@ -1,28 +1,34 @@ ## MAIN VARIABLES ## #################### -CLUSTER_NAME="hgalal-k3s" -K3S_VERSION="v0.10.0" -EXTRA_SSH_KEYS="ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDZBAE6I9J733HJfCBVu7iWSUuJ7th0U4P4IFfpFDca52n/Hk4yFFr8SPR8JJc1n42c3vEVCbExp/MD4ihqEBy9+pLewxA+fkb7UAT4cT2eLfvZdTTVe8KSiw6lVN6tWSoNXmNqY+wH7zWQ04lfjXPa/c01L1n2XwV/O+5xii9vEuSxN9YhfQ/s61SdLFqQ5yS8gPsM0qQW+bFt5KGGbapqztDO+h9lxGbZRcRAKbCzZ5kF1mhjI/+VubTWKtoVLCumjzjYqILYyx9g/mLSo26qjDEZvtwBQB9KLugDAtnalLVp0HgivC5YfLHr8PxViVSHfIIKS2DhUpn07jr8eKi9" -PRIVATE_KEY_PATH="/home/hussein/.ssh/id_rsa" #this has to be a full path +CLUSTER_NAME="loadtest-k3s" +DOMAIN_NAME="" +ZONE_ID="" +K3S_VERSION="v0.11.0-alpha2" +EXTRA_SSH_KEYS="" # comma separated public keys +PRIVATE_KEY_PATH="~/.ssh/id_rsa" +DEBUG=1 +## K3S DB VARIABLES ## +########################## +DB_ENGINE="postgres" +DB_INSTANCE_TYPE="db.m4.4xlarge" +DB_NAME="k3s" +DB_USERNAME="k3suser" +DB_PASSWORD="024d9442b3add64b7ef90655bc302cd8" +DB_VERSION=11.5 ## K3S SERVER VARIABLES ## ########################## -K3S_HA=1 -MASTER_COUNT=3 -DB_INSTANCE_TYPE="db.m4.4xlarge" +SERVER_HA=1 +SERVER_COUNT=3 SERVER_INSTANCE_TYPE="m5.2xlarge" -DEBUG=1 - ## PROMETHEUS SERVER VARIABLES ## ################################# PROM_WORKER_NODE_COUNT=1 -PROM_HOST="prometheus-load.eng.rancher.space" -GRAF_HOST="prometheus-load.eng.rancher.space" - +PROM_WORKER_INSTANCE_TYPE="m5.large" ## K3S AGENTS VARIABLES ## ########################## AGENT_NODE_COUNT=100 -WORKER_INSTANCE_TYPE="m5.xlarge" +AGENT_INSTANCE_TYPE="m5.large" diff --git a/tests/perf/scripts/perf b/tests/perf/scripts/perf index bf60a715e0..9dbae96166 100755 --- a/tests/perf/scripts/perf +++ b/tests/perf/scripts/perf @@ -18,6 +18,8 @@ init() { apply() { # init terraform + init + # configure variables config # Run apply for server and agents for i in server agents; do @@ -32,32 +34,52 @@ apply() { done } +plan() { + # init terraform + config + # Run apply for server and agents + for i in server agents; do + pushd $i + $TERRAFORM_PLAN_CMD + popd + done +} + + config() { source scripts/config pushd ./server + eval PRIVATE_KEY_PATH=$PRIVATE_KEY_PATH + EXPANDED_PRIV_KEY_PATH=`readlink -f $PRIVATE_KEY_PATH` cat <
variables.tfvars name = "${CLUSTER_NAME}" db_instance_type = "${DB_INSTANCE_TYPE}" +db_name = "${DB_NAME}" +db_username = "${DB_USERNAME}" +db_password = "${DB_PASSWORD}" +db_engine = "${DB_ENGINE}" +db_version = "${DB_VERSION}" server_instance_type = "${SERVER_INSTANCE_TYPE}" extra_ssh_keys = ["${EXTRA_SSH_KEYS}"] -master_count = ${MASTER_COUNT} -k3s_ha = ${K3S_HA} +server_count = ${SERVER_COUNT} +server_ha = ${SERVER_HA} k3s_version = "${K3S_VERSION}" prom_worker_node_count = ${PROM_WORKER_NODE_COUNT} -prom_host = "${PROM_HOST}" -graf_host = "${GRAF_HOST}" -ssh_key_path = "${PRIVATE_KEY_PATH}" +prom_worker_instance_type = "${PROM_WORKER_INSTANCE_TYPE}" +ssh_key_path = "${EXPANDED_PRIV_KEY_PATH}" debug = ${DEBUG} +domain_name = "${DOMAIN_NAME}" +zone_id = "${ZONE_ID}" MAIN popd pushd ./agents cat <
variables.tfvars name = "${CLUSTER_NAME}" -node_count = ${AGENT_NODE_COUNT} extra_ssh_keys = ["${EXTRA_SSH_KEYS}"] k3s_version = "${K3S_VERSION}" -worker_instance_type = "${WORKER_INSTANCE_TYPE}" +agent_node_count = ${AGENT_NODE_COUNT} +agent_instance_type = "${AGENT_INSTANCE_TYPE}" MAIN popd } @@ -71,6 +93,16 @@ clean() { done } +cleanall() { + clean + # clean kubeconfig + pushd tests/ + rm -f kubeconfig + rm -rf load_tests_results* + rm -rf density_tests_results* + popd +} + destroy() { for i in agents server; do pushd $i @@ -80,4 +112,15 @@ destroy() { clean } +info() { + set +x + for i in agents server; do + pushd $i + if [ -f $i.tfstate ]; then + terraform output --state=$i.tfstate + fi + popd + done +} + $@ diff --git a/tests/perf/scripts/test b/tests/perf/scripts/test index 150bd9eff9..5866907849 100755 --- a/tests/perf/scripts/test +++ b/tests/perf/scripts/test @@ -2,9 +2,11 @@ test_load() { source scripts/config + eval PRIVATE_KEY_PATH=$PRIVATE_KEY_PATH + EXPANDED_PRIV_KEY_PATH=`readlink -f $PRIVATE_KEY_PATH` masterips=`terraform output -state=server/server.tfstate | grep k3s_server_ips | cut -d "=" -f 2` pushd tests/ - docker run -v $PRIVATE_KEY_PATH:/opt/priv_key \ + docker run -v $EXPANDED_PRIV_KEY_PATH:/opt/priv_key \ -e KUBE_SSH_USER=ubuntu \ -e LOCAL_SSH_KEY=/opt/priv_key \ -it -v $PWD/:/opt/k3s/perf-tests husseingalal/clusterloader:dev \ @@ -12,7 +14,7 @@ test_load() { --kubeconfig /opt/k3s/perf-tests/kubeconfig.yaml \ --masterip $masterips \ --provider=local \ - --report-dir /opt/k3s/perf-tests/load_tests_results \ + --report-dir /opt/k3s/perf-tests/load_tests_results-$RANDOM \ --enable-prometheus-server \ --tear-down-prometheus-server=0 popd @@ -20,17 +22,19 @@ test_load() { test_density() { source scripts/config + eval PRIVATE_KEY_PATH=$PRIVATE_KEY_PATH + EXPANDED_PRIV_KEY_PATH=`readlink -f $PRIVATE_KEY_PATH` masterips=`terraform output -state=server/server.tfstate | grep k3s_server_ips | cut -d "=" -f 2` pushd tests/ docker run -e KUBE_SSH_USER=ubuntu \ - -v $PRIVATE_KEY_PATH:/opt/priv_key \ + -v $EXPANDED_PRIV_KEY_PATH:/opt/priv_key \ -e LOCAL_SSH_KEY=/opt/priv_key \ -it -v $PWD/:/opt/k3s/perf-tests husseingalal/clusterloader:dev \ clusterloader --testconfig /opt/k3s/perf-tests/density/config.yaml \ --kubeconfig /opt/k3s/perf-tests/kubeconfig.yaml \ --masterip $masterips \ --provider=local \ - --report-dir /opt/k3s/perf-tests/density_tests_results \ + --report-dir /opt/k3s/perf-tests/density_tests_results-$RANDOM \ --enable-prometheus-server \ --tear-down-prometheus-server=0 popd @@ -40,7 +44,7 @@ clean() { # clean kubeconfig pushd tests/ rm -f kubeconfig - rm -rf load_tests_results/ + rm -rf load_tests_results* rm -rf density_tests_results/ popd } diff --git a/tests/perf/server/data.tf b/tests/perf/server/data.tf index 9a269d4e1e..240c9f225e 100644 --- a/tests/perf/server/data.tf +++ b/tests/perf/server/data.tf @@ -34,19 +34,3 @@ data "aws_ami" "ubuntu" { values = ["x86_64"] } } - -data "template_file" "metrics" { - template = file("${path.module}/files/metrics.yaml") - vars = { - prom_worker_node_count = local.prom_worker_node_count - - } -} -data "template_file" "k3s-prom-yaml" { - template = file("${path.module}/files/prom.yaml") - vars = { - prom_host = var.prom_host - graf_host = var.graf_host - prom_worker_node_count = local.prom_worker_node_count - } -} diff --git a/tests/perf/server/files/etcd.tmpl b/tests/perf/server/files/etcd.tmpl new file mode 100644 index 0000000000..41727d6708 --- /dev/null +++ b/tests/perf/server/files/etcd.tmpl @@ -0,0 +1,31 @@ +#cloud-config +%{ if length(extra_ssh_keys) > 0 } +ssh_authorized_keys: +%{ for ssh_key in extra_ssh_keys } +- ${ssh_key} +%{ endfor } +%{ endif } +runcmd: +- echo "net.ipv4.neigh.default.gc_interval = 3600" >> /etc/sysctl.conf +- echo "net.ipv4.neigh.default.gc_stale_time = 3600" >> /etc/sysctl.conf +- echo "net.ipv4.neigh.default.gc_thresh3 = 16384" >> /etc/sysctl.conf +- echo "net.ipv4.neigh.default.gc_thresh2 = 8192" >> /etc/sysctl.conf +- echo "net.ipv4.neigh.default.gc_thresh1 = 4096" >> /etc/sysctl.conf +- echo "fs.file-max = 12000500" >> /etc/sysctl.conf +- echo "fs.nr_open = 20000500" >> /etc/sysctl.conf +- echo "net.ipv4.tcp_mem = '10000000 10000000 10000000'" >> /etc/sysctl.conf +- echo "net.ipv4.tcp_rmem = '1024 4096 16384'" >> /etc/sysctl.conf +- echo "net.ipv4.tcp_wmem = '1024 4096 16384'" >> /etc/sysctl.conf +- echo "net.core.rmem_max = 16384" >> /etc/sysctl.conf +- echo "net.core.wmem_max = 16384" >> /etc/sysctl.conf +- ulimit -n 20000000 +- echo "# " >> /etc/security/limits.d/limits.conf +- echo " * soft nofile 20000" >> /etc/security/limits.d/limits.conf +- echo " * hard nofile 20000" >> /etc/security/limits.d/limits.conf +- sysctl -p +- apt-get update +- apt-get install -y git vim software-properties-common resolvconf linux-headers-$(uname -r) +- echo "nameserver 1.1.1.1" > /etc/resolvconf/resolv.conf.d/tail +- echo "RateLimitIntervalSec=0" >> /etc/systemd/journald.conf +- echo "RateLimitBurst=0" >> /etc/systemd/journald.conf +- curl -sSL https://releases.rancher.com/install-docker/19.03.sh | sh diff --git a/tests/perf/server/files/etcd_build.sh b/tests/perf/server/files/etcd_build.sh new file mode 100755 index 0000000000..51d3074668 --- /dev/null +++ b/tests/perf/server/files/etcd_build.sh @@ -0,0 +1,22 @@ +#!/bin/bash +set -x + +IFS=',' read -r -a public_ips <<< "$PUBLIC_IPS" +IFS=',' read -r -a private_ips <<< "$PRIVATE_IPS" + +conn_string="" +for i in "${!private_ips[@]}"; do + conn_string=$conn_string"etcd-$i=http://${private_ips[i]}:2380," +done +conn_string=${conn_string%?} +for i in "${!public_ips[@]}"; do + while true; do + ssh -i $SSH_KEY_PATH -l ubuntu ${public_ips[i]} "sudo docker run -v /etcd-data:/etcd-data -d -p ${private_ips[i]}:2379:2379 -p ${private_ips[i]}:2380:2380 quay.io/coreos/etcd:$DB_VERSION etcd --initial-advertise-peer-urls http://${private_ips[i]}:2380 --name=etcd-$i --data-dir=/etcd-data --advertise-client-urls=http://0.0.0.0:2379 --listen-peer-urls=http://0.0.0.0:2380 --listen-client-urls=http://0.0.0.0:2379 --initial-cluster-token=etcd-cluster-1 --initial-cluster-state new --initial-cluster $conn_string" + if [ $? == 0 ]; then + break + fi + sleep 10 + done +done + +# diff --git a/tests/perf/server/files/metrics.yaml b/tests/perf/server/files/metrics.yaml deleted file mode 100644 index d3cfb79659..0000000000 --- a/tests/perf/server/files/metrics.yaml +++ /dev/null @@ -1,227 +0,0 @@ -%{ if prom_worker_node_count != 0 } ---- -apiVersion: rbac.authorization.k8s.io/v1 -# kubernetes versions before 1.8.0 should use rbac.authorization.k8s.io/v1beta1 -kind: ClusterRoleBinding -metadata: - name: kube-state-metrics -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: kube-state-metrics -subjects: -- kind: ServiceAccount - name: kube-state-metrics - namespace: kube-system ---- -apiVersion: rbac.authorization.k8s.io/v1 -# kubernetes versions before 1.8.0 should use rbac.authorization.k8s.io/v1beta1 -kind: ClusterRole -metadata: - name: kube-state-metrics -rules: -- apiGroups: [""] - resources: - - configmaps - - secrets - - nodes - - pods - - services - - resourcequotas - - replicationcontrollers - - limitranges - - persistentvolumeclaims - - persistentvolumes - - namespaces - - endpoints - verbs: ["list", "watch"] -- apiGroups: ["extensions"] - resources: - - daemonsets - - deployments - - replicasets - - ingresses - verbs: ["list", "watch"] -- apiGroups: ["apps"] - resources: - - daemonsets - - deployments - - replicasets - - statefulsets - verbs: ["list", "watch"] -- apiGroups: ["batch"] - resources: - - cronjobs - - jobs - verbs: ["list", "watch"] -- apiGroups: ["autoscaling"] - resources: - - horizontalpodautoscalers - verbs: ["list", "watch"] -- apiGroups: ["policy"] - resources: - - poddisruptionbudgets - verbs: ["list", "watch"] -- apiGroups: ["certificates.k8s.io"] - resources: - - certificatesigningrequests - verbs: ["list", "watch"] -- apiGroups: ["storage.k8s.io"] - resources: - - storageclasses - verbs: ["list", "watch"] -- apiGroups: ["autoscaling.k8s.io"] - resources: - - verticalpodautoscalers - verbs: ["list", "watch"] ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - labels: - k8s-app: kube-state-metrics - name: kube-state-metrics - namespace: kube-system -spec: - selector: - matchLabels: - k8s-app: kube-state-metrics - replicas: 1 - template: - metadata: - labels: - k8s-app: kube-state-metrics - spec: - serviceAccountName: kube-state-metrics - containers: - - name: kube-state-metrics - image: quay.io/coreos/kube-state-metrics:v1.7.2 - ports: - - name: http-metrics - containerPort: 8080 - - name: telemetry - containerPort: 8081 - livenessProbe: - httpGet: - path: /healthz - port: 8080 - initialDelaySeconds: 5 - timeoutSeconds: 5 - readinessProbe: - httpGet: - path: / - port: 8080 - initialDelaySeconds: 5 - timeoutSeconds: 5 ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: kube-state-metrics - namespace: kube-system ---- -apiVersion: v1 -kind: Service -metadata: - name: kube-state-metrics - namespace: kube-system - labels: - k8s-app: kube-state-metrics - annotations: - prometheus.io/scrape: 'true' -spec: - ports: - - name: http-metrics - port: 8080 - targetPort: http-metrics - protocol: TCP - - name: telemetry - port: 8081 - targetPort: telemetry - protocol: TCP - selector: - k8s-app: kube-state-metrics ---- -kind: ClusterRoleBinding -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - name: slo-monitor -subjects: -- kind: ServiceAccount - name: slo-monitor - namespace: kube-system -roleRef: - kind: ClusterRole - name: slo-monitor - apiGroup: rbac.authorization.k8s.io ---- -kind: ClusterRole -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - name: slo-monitor - namespace: kube-system -rules: -- apiGroups: [""] - resources: ["pods", "events"] - verbs: ["get", "watch", "list"] ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: slo-monitor - namespace: kube-system ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: slo-monitor - namespace: kube-system - labels: - app: slo-monitor -spec: - selector: - matchLabels: - app: slo-monitor - template: - metadata: - labels: - app: slo-monitor - annotations: - prometheus.io/scrape: "true" - spec: - containers: - - name: slo-monitor - image: gcr.io/google-containers/slo-monitor:0.12.0 - command: - - /slo-monitor - - --alsologtostderr=true - imagePullPolicy: Always - ports: - - name: metrics - containerPort: 8080 - resources: - requests: - cpu: 300m - memory: 100Mi - limits: - cpu: 300m - memory: 100Mi - restartPolicy: Always - serviceAccountName: slo-monitor ---- -apiVersion: v1 -kind: Service -metadata: - name: slo-monitor - namespace: kube-system - labels: - app: slo-monitor -spec: - selector: - app: slo-monitor - ports: - - name: metrics - port: 80 - targetPort: metrics - type: ClusterIP -%{ endif } diff --git a/tests/perf/server/files/prom.yaml b/tests/perf/server/files/prom.yaml deleted file mode 100644 index 369a922548..0000000000 --- a/tests/perf/server/files/prom.yaml +++ /dev/null @@ -1,86 +0,0 @@ -%{ if prom_worker_node_count != 0 } ---- -apiVersion: v1 -kind: Namespace -metadata: - name: monitoring - ---- -apiVersion: helm.cattle.io/v1 -kind: HelmChart -metadata: - name: prometheus - namespace: kube-system -spec: - chart: https://raw.githubusercontent.com/galal-hussein/charts/master/prometheus-9.2.0.tgz - targetNamespace: monitoring - valuesContent: |- - alertmanager: - nodeSelector: - prom: "true" - persistentVolume: - enabled: false - kubeStateMetrics: - nodeSelector: - prom: "true" - nodeExporter: - nodeSelector: - prom: "true" - server: - nodeSelector: - prom: "true" - ingress: - enabled: true - hosts: - - ${prom_host} - persistentVolume: - enabled: false - pushgateway: - nodeSelector: - prom: "true" - persistentVolume: - enabled: false - serverFiles: - prometheus.yml: - scrape_configs: - - job_name: prometheus - static_configs: - - targets: - - localhost:9090 - - job_name: kubernetes-apiservers - scrape_interval: 10s - scrape_timeout: 10s - metrics_path: /metrics - scheme: https - kubernetes_sd_configs: - - api_server: null - role: endpoints - namespaces: - names: [] - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token - tls_config: - ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - insecure_skip_verify: true - relabel_configs: - - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] - separator: ; - regex: default;kubernetes;https - replacement: $1 - action: keep ---- -apiVersion: helm.cattle.io/v1 -kind: HelmChart -metadata: - name: grafana - namespace: kube-system -spec: - chart: stable/grafana - targetNamespace: monitoring - valuesContent: |- - ingress: - enabled: true - hosts: - - ${graf_host} - nodeSelector: - prom: "true" -%{ endif } diff --git a/tests/perf/server/files/server_userdata.tmpl b/tests/perf/server/files/server_userdata.tmpl index 17cad50b88..65145c7c68 100644 --- a/tests/perf/server/files/server_userdata.tmpl +++ b/tests/perf/server/files/server_userdata.tmpl @@ -6,16 +6,33 @@ ssh_authorized_keys: %{ endfor } %{ endif } write_files: -- path: /var/lib/rancher/k3s/server/manifests/metrics.yaml +- path: /opt/k3s/run_k3s.sh permissions: "0755" owner: root:root - encoding: b64 - content: ${metrics_yaml} -- path: /var/lib/rancher/k3s/server/manifests/prom.yaml - permissions: "0755" - owner: root:root - encoding: b64 - content: ${prom_yaml} + content: | + #!/bin/bash + set -x + if [ ${db_engine} == "postgres" ]; then + STORAGE_ENDPOINT="postgres://${db_username}:${db_password}@${db_address}:5432/${db_name}" + elif [ ${db_engine} == "mysql" ]; then + STORAGE_ENDPOINT="mysql://${db_username}:${db_password}@(${db_address})/${db_name}" + else + IFS=',' read -r -a private_ips <<< "${db_address}" + for i in "$${!private_ips[@]}"; do + STORAGE_ENDPOINT=$STORAGE_ENDPOINT"http://$${private_ips[i]}:2379", + done + STORAGE_ENDPOINT=$${STORAGE_ENDPOINT%?} + echo hello + fi + while true; do + curl -sfL https://get.k3s.io | K3S_CLUSTER_SECRET="${k3s_cluster_secret}" \ + INSTALL_K3S_VERSION="${install_k3s_version}" \ + INSTALL_K3S_EXEC="${k3s_server_args} --cluster-cidr=10.0.0.0/8 --no-deploy traefik --no-deploy servicelb --tls-san ${lb_address} %{ if use_ha == "true" } --storage-endpoint=$STORAGE_ENDPOINT %{ endif }" sh - + if [ $? -eq 0 ]; then + break + fi + sleep 1 + done runcmd: - echo "net.ipv4.neigh.default.gc_interval = 3600" >> /etc/sysctl.conf - echo "net.ipv4.neigh.default.gc_stale_time = 3600" >> /etc/sysctl.conf @@ -41,14 +58,11 @@ runcmd: - echo "RateLimitBurst=0" >> /etc/systemd/journald.conf - systemctl restart systemd-journald.service - systemctl start resolvconf -- wget https://raw.githubusercontent.com/galal-hussein/k3s/k3s_with_kine_fix/k3s -- cp k3s /usr/local/bin/k3s -- chmod +x /usr/local/bin/k3s -%{if master_index != 0 } +%{ if master_index != 0 } - sleep 20 %{ endif } -- until (curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_DOWNLOAD=true K3S_CLUSTER_SECRET="${k3s_cluster_secret}" INSTALL_K3S_VERSION="${install_k3s_version}" INSTALL_K3S_EXEC="${k3s_server_args} --cluster-cidr=10.0.0.0/8 --no-deploy traefik --no-deploy servicelb --tls-san ${lb_address} %{ if use_ha == "true" } --storage-endpoint="postgres://${db_username}:${db_password}@${db_address}:5432/${db_name}" %{ if master_index == 0 }--bootstrap-save%{ endif } %{ endif }" sh -); do echo 'Error installing k3s'; sleep 1; done -%{if debug != 0 } +- /opt/k3s/run_k3s.sh +%{ if debug != 0 } - sed -i 's/bin\/k3s/bin\/k3s --debug/g' /etc/systemd/system/k3s.service - systemctl daemon-reload - systemctl restart k3s diff --git a/tests/perf/server/files/worker_userdata.tmpl b/tests/perf/server/files/worker_userdata.tmpl index 90712c0bdc..ae2aaa9609 100644 --- a/tests/perf/server/files/worker_userdata.tmpl +++ b/tests/perf/server/files/worker_userdata.tmpl @@ -23,7 +23,7 @@ runcmd: - echo " * soft nofile 20000" >> /etc/security/limits.d/limits.conf - echo " * hard nofile 20000" >> /etc/security/limits.d/limits.conf - sysctl -p -- wget https://raw.githubusercontent.com/galal-hussein/k3s/k3s_with_kine_fix/k3s +- wget https://raw.githubusercontent.com/galal-hussein/k3s/scale_test/k3s - cp k3s /usr/local/bin/k3s - chmod +x /usr/local/bin/k3s - until (curl -sfL https://get.k3s.io | INSTALL_K3S_VERSION=${install_k3s_version} INSTALL_K3S_EXEC="${k3s_exec}" K3S_URL=https://${k3s_url}:6443 K3S_CLUSTER_SECRET="${k3s_cluster_secret}" sh -); do echo 'k3s did not install correctly'; sleep 1; done diff --git a/tests/perf/server/main.tf b/tests/perf/server/main.tf index bffc863b2a..9c5bdcceda 100644 --- a/tests/perf/server/main.tf +++ b/tests/perf/server/main.tf @@ -5,10 +5,11 @@ terraform { } locals { - name = var.name - k3s_cluster_secret = var.k3s_cluster_secret - install_k3s_version = var.k3s_version - prom_worker_node_count = var.prom_worker_node_count + name = var.name + k3s_cluster_secret = var.k3s_cluster_secret + install_k3s_version = var.k3s_version + prom_worker_node_count = var.prom_worker_node_count + prom_worker_instance_type = var.prom_worker_instance_type } provider "aws" { @@ -57,12 +58,12 @@ resource "aws_security_group" "k3s" { } resource "aws_db_instance" "k3s_db" { - count = "${var.k3s_ha}" + count = "${var.db_engine == "etcd" ? 0 : var.server_ha}" allocated_storage = 100 #baseline iops is 300 with gp2 storage_type = "io1" iops = "3000" - engine = "postgres" - engine_version = "11.5" + engine = "${var.db_engine}" + engine_version = "${var.db_version}" instance_class = "${var.db_instance_type}" name = "${var.db_name}" username = "${var.db_username}" @@ -71,13 +72,48 @@ resource "aws_db_instance" "k3s_db" { multi_az = false } +resource "aws_instance" "k3s_etcd" { + count = "${var.etcd_count * (var.db_engine == "etcd" ? 1 * var.server_ha : 0)}" + instance_type = replace(var.db_instance_type, "/db./", "") + ami = data.aws_ami.ubuntu.id + user_data = base64encode(templatefile("${path.module}/files/etcd.tmpl", + { + extra_ssh_keys = var.extra_ssh_keys, + db_version = var.db_version + etcd_count = var.etcd_count + })) + security_groups = [ + aws_security_group.k3s.name, + ] + + root_block_device { + volume_size = "100" + volume_type = "gp2" + } + + tags = { + Name = "${local.name}-etcd-${count.index}" + } +} + resource "aws_lb" "k3s-master-nlb" { name = "${local.name}-nlb" internal = false load_balancer_type = "network" - subnets = [data.aws_subnet.selected.id] + subnets = data.aws_subnet_ids.available.ids } +resource "aws_route53_record" "www" { + # currently there is the only way to use nlb dns name in k3s + # because the real dns name is too long and cause an issue + zone_id = "${var.zone_id}" + name = "${var.domain_name}" + type = "CNAME" + ttl = "30" + records = ["${aws_lb.k3s-master-nlb.dns_name}"] +} + + resource "aws_lb_target_group" "k3s-master-nlb-tg" { name = "${local.name}-nlb-tg" port = "6443" @@ -104,35 +140,33 @@ resource "aws_lb_listener" "k3s-master-nlb-tg" { } resource "aws_lb_target_group_attachment" "test" { - count = "${var.master_count}" + count = "${var.server_count}" target_group_arn = "${aws_lb_target_group.k3s-master-nlb-tg.arn}" - target_id = "${aws_spot_instance_request.k3s-server[count.index].spot_instance_id}" + target_id = "${aws_instance.k3s-server[count.index].id}" port = 6443 } -resource "aws_spot_instance_request" "k3s-server" { - count = "${var.master_count}" +resource "aws_instance" "k3s-server" { + count = "${var.server_count}" instance_type = var.server_instance_type ami = data.aws_ami.ubuntu.id user_data = base64encode(templatefile("${path.module}/files/server_userdata.tmpl", { extra_ssh_keys = var.extra_ssh_keys, - metrics_yaml = base64encode(data.template_file.metrics.rendered), - prom_yaml = base64encode(data.template_file.k3s-prom-yaml.rendered), k3s_cluster_secret = local.k3s_cluster_secret, install_k3s_version = local.install_k3s_version, k3s_server_args = var.k3s_server_args, - db_address = aws_db_instance.k3s_db[0].address, - db_name = aws_db_instance.k3s_db[0].name, - db_username = aws_db_instance.k3s_db[0].username, - db_password = aws_db_instance.k3s_db[0].password, - use_ha = "${var.k3s_ha == 1 ? "true": "false"}", + db_engine = var.db_engine + db_address = "${var.db_engine == "etcd" ? join(",",aws_instance.k3s_etcd.*.private_ip) : aws_db_instance.k3s_db[0].address}", + db_name = var.db_name, + db_username = var.db_username, + db_password = var.db_password, + use_ha = "${var.server_ha == 1 ? "true": "false"}", master_index = count.index, - lb_address = aws_lb.k3s-master-nlb.dns_name, + lb_address = var.domain_name, prom_worker_node_count = local.prom_worker_node_count, - debug = var.debug,})) - - wait_for_fulfillment = true + debug = var.debug, + k3s_cluster_secret = local.k3s_cluster_secret,})) security_groups = [ aws_security_group.k3s.name, ] @@ -155,9 +189,9 @@ module "k3s-prom-worker-asg" { version = "3.0.0" name = "${local.name}-prom-worker" asg_name = "${local.name}-prom-worker" - instance_type = "m5.large" + instance_type = local.prom_worker_instance_type image_id = data.aws_ami.ubuntu.id - user_data = base64encode(templatefile("${path.module}/files/worker_userdata.tmpl", { extra_ssh_keys = var.extra_ssh_keys, k3s_url = aws_lb.k3s-master-nlb.dns_name, k3s_cluster_secret = local.k3s_cluster_secret, install_k3s_version = local.install_k3s_version, k3s_exec = "--node-label prom=true" })) + user_data = base64encode(templatefile("${path.module}/files/worker_userdata.tmpl", { extra_ssh_keys = var.extra_ssh_keys, k3s_url = var.domain_name, k3s_cluster_secret = local.k3s_cluster_secret, install_k3s_version = local.install_k3s_version, k3s_exec = "--node-label prom=true" })) desired_capacity = local.prom_worker_node_count health_check_type = "EC2" @@ -180,9 +214,22 @@ module "k3s-prom-worker-asg" { ] } +resource "null_resource" "run_etcd" { + count = "${var.db_engine == "etcd" ? 1 : 0}" + + triggers = { + etcd_instance_ids = "${join(",", aws_instance.k3s_etcd.*.id)}" + } + + provisioner "local-exec" { + interpreter = ["bash", "-c"] + command = "DB_VERSION=${var.db_version} SSH_KEY_PATH=${var.ssh_key_path} PUBLIC_IPS=${join(",",aws_instance.k3s_etcd.*.public_ip)} PRIVATE_IPS=${join(",",aws_instance.k3s_etcd.*.private_ip)} files/etcd_build.sh" + } +} + resource "null_resource" "get-kubeconfig" { provisioner "local-exec" { interpreter = ["bash", "-c"] - command = "until ssh -i ${var.ssh_key_path} ubuntu@${aws_spot_instance_request.k3s-server[0].public_ip} 'sudo sed \"s/localhost/$aws_lb.k3s-master-nlb.dns_name}/g;s/127.0.0.1/${aws_lb.k3s-master-nlb.dns_name}/g\" /etc/rancher/k3s/k3s.yaml' >| ../tests/kubeconfig.yaml; do sleep 5; done" + command = "until ssh -i ${var.ssh_key_path} ubuntu@${aws_instance.k3s-server[0].public_ip} 'sudo sed \"s/localhost/$var.domain_name}/g;s/127.0.0.1/${var.domain_name}/g\" /etc/rancher/k3s/k3s.yaml' >| ../tests/kubeconfig.yaml; do sleep 5; done" } } diff --git a/tests/perf/server/outputs.tf b/tests/perf/server/outputs.tf index 6e2ffd61ea..7c5c84fd2a 100644 --- a/tests/perf/server/outputs.tf +++ b/tests/perf/server/outputs.tf @@ -1,5 +1,5 @@ output "public_ip" { - value = aws_lb.k3s-master-nlb.dns_name + value = var.domain_name } output "install_k3s_version" { @@ -11,5 +11,5 @@ output "k3s_cluster_secret" { } output "k3s_server_ips" { - value = join(",", aws_spot_instance_request.k3s-server.*.public_ip) + value = join(",", aws_instance.k3s-server.*.public_ip) } diff --git a/tests/perf/server/variables.tf b/tests/perf/server/variables.tf index 0a7209ed42..cbe680d24a 100644 --- a/tests/perf/server/variables.tf +++ b/tests/perf/server/variables.tf @@ -23,12 +23,7 @@ variable "k3s_cluster_secret" { type = string description = "Cluster secret for k3s cluster registration" } -variable "prom_host" { - default = "" -} -variable "graf_host" { - default = "" -} + variable "name" { default = "k3s-loadtest" type = string @@ -47,11 +42,19 @@ variable "extra_ssh_keys" { description = "Extra ssh keys to inject into Rancher instances" } -variable "k3s_ha" { +variable "server_ha" { default = 0 description = "Enable k3s in HA mode" } +variable "etcd_count" { + default = 3 +} + +variable "db_engine" { + default = "postgres" +} + variable "db_instance_type" { } @@ -67,7 +70,9 @@ variable "db_password" { default = "b58bf234c4bd0133fc7a92b782e498a6" } -variable "master_count" { +variable "db_version" {} + +variable "server_count" { default = 1 description = "Count of k3s master servers" } @@ -76,3 +81,16 @@ variable "debug" { default = 0 description = "Enable Debug log" } + +variable "prom_worker_instance_type" { + default = "m5.large" + description = "Prometheus instance type" +} + +variable "domain_name" { + description = "FQDN of the cluster" +} + +variable "zone_id" { + description = "route53 zone id to register the domain name" +} diff --git a/tests/perf/tests/load/config.yaml b/tests/perf/tests/load/config.yaml index 413fd81eec..a0612bc371 100644 --- a/tests/perf/tests/load/config.yaml +++ b/tests/perf/tests/load/config.yaml @@ -7,7 +7,7 @@ #Constants {{$NODE_MODE := DefaultParam .NODE_MODE "allnodes"}} {{$NODES_PER_NAMESPACE := DefaultParam .NODES_PER_NAMESPACE 100}} -{{$PODS_PER_NODE := DefaultParam .PODS_PER_NODE 30}} +{{$PODS_PER_NODE := DefaultParam .PODS_PER_NODE 100}} {{$LOAD_TEST_THROUGHPUT := DefaultParam .LOAD_TEST_THROUGHPUT 10}} {{$BIG_GROUP_SIZE := 1000}} {{$MEDIUM_GROUP_SIZE := 500}} From 9969dc9cff92b65dc0ac5a37c4921cbf18d01222 Mon Sep 17 00:00:00 2001 From: galal-hussein Date: Fri, 15 Nov 2019 13:14:28 +0200 Subject: [PATCH 6/7] add dqlite --- .../agents/files/pool_worker_userdata.tmpl | 3 --- tests/perf/agents/main.tf | 4 ++-- tests/perf/scripts/config | 10 +++++----- tests/perf/server/files/server_userdata.tmpl | 12 +++++++++--- tests/perf/server/files/worker_userdata.tmpl | 3 --- tests/perf/server/main.tf | 19 ++++++++++--------- tests/perf/tests/load/config.yaml | 8 ++++---- 7 files changed, 30 insertions(+), 29 deletions(-) diff --git a/tests/perf/agents/files/pool_worker_userdata.tmpl b/tests/perf/agents/files/pool_worker_userdata.tmpl index 6e08a5d300..b117a5635b 100644 --- a/tests/perf/agents/files/pool_worker_userdata.tmpl +++ b/tests/perf/agents/files/pool_worker_userdata.tmpl @@ -27,7 +27,4 @@ runcmd: - apt-get install -y software-properties-common - apt-get install -y resolvconf linux-headers-$(uname -r) && echo "nameserver 1.1.1.1" > /etc/resolvconf/resolv.conf.d/tail && systemctl start resolvconf - DEBIAN_FRONTEND=noninteractive apt-get upgrade -y -- wget https://raw.githubusercontent.com/galal-hussein/k3s/k3s_with_kine_fix/k3s -- cp k3s /usr/local/bin/k3s -- chmod +x /usr/local/bin/k3s - until (curl -sfL https://get.k3s.io | K3S_URL=https://${k3s_url}:6443 K3S_CLUSTER_SECRET="${k3s_cluster_secret}" K3S_CLUSTER_SECRET="${k3s_cluster_secret}" INSTALL_K3S_VERSION="${install_k3s_version}" sh -); do echo 'Error installing k3s agent'; sleep 1; done diff --git a/tests/perf/agents/main.tf b/tests/perf/agents/main.tf index f62c432fd3..106b7e8dcb 100644 --- a/tests/perf/agents/main.tf +++ b/tests/perf/agents/main.tf @@ -10,7 +10,7 @@ locals { } provider "aws" { - region = "us-west-2" + region = "us-east-2" profile = "rancher-eng" } @@ -76,7 +76,7 @@ module "k3s-pool-worker-asg" { root_block_device = [ { - volume_size = "100" + volume_size = "30" volume_type = "gp2" }, ] diff --git a/tests/perf/scripts/config b/tests/perf/scripts/config index 3505846732..d85bf41426 100755 --- a/tests/perf/scripts/config +++ b/tests/perf/scripts/config @@ -1,21 +1,21 @@ ## MAIN VARIABLES ## #################### CLUSTER_NAME="loadtest-k3s" -DOMAIN_NAME="" +DOMAIN_NAME="loadtest.eng.rancher.space" ZONE_ID="" -K3S_VERSION="v0.11.0-alpha2" +K3S_VERSION="v1.0.0" EXTRA_SSH_KEYS="" # comma separated public keys PRIVATE_KEY_PATH="~/.ssh/id_rsa" DEBUG=1 ## K3S DB VARIABLES ## ########################## -DB_ENGINE="postgres" +DB_ENGINE="dqlite" DB_INSTANCE_TYPE="db.m4.4xlarge" DB_NAME="k3s" DB_USERNAME="k3suser" DB_PASSWORD="024d9442b3add64b7ef90655bc302cd8" -DB_VERSION=11.5 +DB_VERSION=5.7 ## K3S SERVER VARIABLES ## ########################## @@ -30,5 +30,5 @@ PROM_WORKER_INSTANCE_TYPE="m5.large" ## K3S AGENTS VARIABLES ## ########################## -AGENT_NODE_COUNT=100 +AGENT_NODE_COUNT=10 AGENT_INSTANCE_TYPE="m5.large" diff --git a/tests/perf/server/files/server_userdata.tmpl b/tests/perf/server/files/server_userdata.tmpl index 65145c7c68..e831429736 100644 --- a/tests/perf/server/files/server_userdata.tmpl +++ b/tests/perf/server/files/server_userdata.tmpl @@ -16,18 +16,24 @@ write_files: STORAGE_ENDPOINT="postgres://${db_username}:${db_password}@${db_address}:5432/${db_name}" elif [ ${db_engine} == "mysql" ]; then STORAGE_ENDPOINT="mysql://${db_username}:${db_password}@(${db_address})/${db_name}" - else + elif [ ${db_engine} == "etcd" ]; then IFS=',' read -r -a private_ips <<< "${db_address}" for i in "$${!private_ips[@]}"; do STORAGE_ENDPOINT=$STORAGE_ENDPOINT"http://$${private_ips[i]}:2379", done STORAGE_ENDPOINT=$${STORAGE_ENDPOINT%?} - echo hello fi + while true; do + if [ ${db_engine} == "dqlite" ]; then + curl -sfL https://get.k3s.io | K3S_CLUSTER_SECRET="${k3s_cluster_secret}" \ + INSTALL_K3S_VERSION="${install_k3s_version}" \ + INSTALL_K3S_EXEC="${k3s_server_args} --cluster-cidr=10.0.0.0/8 --no-deploy traefik --no-deploy servicelb --tls-san ${lb_address} %{ if master_index != 0 } --server https://${lb_address}:6443 %{ else } --cluster-init %{ endif }" sh - + else curl -sfL https://get.k3s.io | K3S_CLUSTER_SECRET="${k3s_cluster_secret}" \ INSTALL_K3S_VERSION="${install_k3s_version}" \ - INSTALL_K3S_EXEC="${k3s_server_args} --cluster-cidr=10.0.0.0/8 --no-deploy traefik --no-deploy servicelb --tls-san ${lb_address} %{ if use_ha == "true" } --storage-endpoint=$STORAGE_ENDPOINT %{ endif }" sh - + INSTALL_K3S_EXEC="${k3s_server_args} --cluster-cidr=10.0.0.0/8 --no-deploy traefik --no-deploy servicelb --tls-san ${lb_address} %{ if use_ha == "true" } --datastore-endpoint=$STORAGE_ENDPOINT %{ endif }" sh - + fi if [ $? -eq 0 ]; then break fi diff --git a/tests/perf/server/files/worker_userdata.tmpl b/tests/perf/server/files/worker_userdata.tmpl index ae2aaa9609..e451a6d0ce 100644 --- a/tests/perf/server/files/worker_userdata.tmpl +++ b/tests/perf/server/files/worker_userdata.tmpl @@ -23,7 +23,4 @@ runcmd: - echo " * soft nofile 20000" >> /etc/security/limits.d/limits.conf - echo " * hard nofile 20000" >> /etc/security/limits.d/limits.conf - sysctl -p -- wget https://raw.githubusercontent.com/galal-hussein/k3s/scale_test/k3s -- cp k3s /usr/local/bin/k3s -- chmod +x /usr/local/bin/k3s - until (curl -sfL https://get.k3s.io | INSTALL_K3S_VERSION=${install_k3s_version} INSTALL_K3S_EXEC="${k3s_exec}" K3S_URL=https://${k3s_url}:6443 K3S_CLUSTER_SECRET="${k3s_cluster_secret}" sh -); do echo 'k3s did not install correctly'; sleep 1; done diff --git a/tests/perf/server/main.tf b/tests/perf/server/main.tf index 9c5bdcceda..0e5e1895c8 100644 --- a/tests/perf/server/main.tf +++ b/tests/perf/server/main.tf @@ -13,7 +13,7 @@ locals { } provider "aws" { - region = "us-west-2" + region = "us-east-2" profile = "rancher-eng" } @@ -58,10 +58,9 @@ resource "aws_security_group" "k3s" { } resource "aws_db_instance" "k3s_db" { - count = "${var.db_engine == "etcd" ? 0 : var.server_ha}" + count = "${var.db_engine == "postgres" || var.db_engine == "mysql" ? 1 : 0 }" allocated_storage = 100 #baseline iops is 300 with gp2 - storage_type = "io1" - iops = "3000" + storage_type = "gp2" engine = "${var.db_engine}" engine_version = "${var.db_version}" instance_class = "${var.db_instance_type}" @@ -87,7 +86,7 @@ resource "aws_instance" "k3s_etcd" { ] root_block_device { - volume_size = "100" + volume_size = "30" volume_type = "gp2" } @@ -156,8 +155,8 @@ resource "aws_instance" "k3s-server" { k3s_cluster_secret = local.k3s_cluster_secret, install_k3s_version = local.install_k3s_version, k3s_server_args = var.k3s_server_args, - db_engine = var.db_engine - db_address = "${var.db_engine == "etcd" ? join(",",aws_instance.k3s_etcd.*.private_ip) : aws_db_instance.k3s_db[0].address}", + db_engine = var.db_engine, + db_address = "${var.db_engine == "etcd" ? join(",",aws_instance.k3s_etcd.*.private_ip) : var.db_engine == "dqlite" ? "null" : aws_db_instance.k3s_db[0].address}", db_name = var.db_name, db_username = var.db_username, db_password = var.db_password, @@ -172,12 +171,14 @@ resource "aws_instance" "k3s-server" { ] root_block_device { - volume_size = "100" + volume_size = "30" volume_type = "gp2" } tags = { Name = "${local.name}-server-${count.index}" + Role = "master" + Leader = "${count.index == 0 ? "true" : "false"}" } provisioner "local-exec" { command = "sleep 10" @@ -208,7 +209,7 @@ module "k3s-prom-worker-asg" { root_block_device = [ { - volume_size = "100" + volume_size = "30" volume_type = "gp2" }, ] diff --git a/tests/perf/tests/load/config.yaml b/tests/perf/tests/load/config.yaml index a0612bc371..a70765330a 100644 --- a/tests/perf/tests/load/config.yaml +++ b/tests/perf/tests/load/config.yaml @@ -6,11 +6,11 @@ #Constants {{$NODE_MODE := DefaultParam .NODE_MODE "allnodes"}} -{{$NODES_PER_NAMESPACE := DefaultParam .NODES_PER_NAMESPACE 100}} -{{$PODS_PER_NODE := DefaultParam .PODS_PER_NODE 100}} +{{$NODES_PER_NAMESPACE := DefaultParam .NODES_PER_NAMESPACE 10}} +{{$PODS_PER_NODE := DefaultParam .PODS_PER_NODE 30}} {{$LOAD_TEST_THROUGHPUT := DefaultParam .LOAD_TEST_THROUGHPUT 10}} -{{$BIG_GROUP_SIZE := 1000}} -{{$MEDIUM_GROUP_SIZE := 500}} +{{$BIG_GROUP_SIZE := 300}} +{{$MEDIUM_GROUP_SIZE := 150}} {{$SMALL_GROUP_SIZE := 50}} {{$SMALL_STATEFUL_SETS_PER_NAMESPACE := 1}} {{$MEDIUM_STATEFUL_SETS_PER_NAMESPACE := 1}} From 93e28126aabc9c329a46ebb675d934fe46098d82 Mon Sep 17 00:00:00 2001 From: galal-hussein Date: Wed, 27 Nov 2019 20:30:56 +0200 Subject: [PATCH 7/7] randomize the secrets --- tests/perf/agents/main.tf | 2 +- tests/perf/agents/variables.tf | 5 +++++ tests/perf/scripts/config | 3 ++- tests/perf/scripts/perf | 10 ++++++++++ tests/perf/server/variables.tf | 5 +---- 5 files changed, 19 insertions(+), 6 deletions(-) diff --git a/tests/perf/agents/main.tf b/tests/perf/agents/main.tf index 106b7e8dcb..ece3c1a8e9 100644 --- a/tests/perf/agents/main.tf +++ b/tests/perf/agents/main.tf @@ -6,7 +6,7 @@ terraform { locals { name = var.name - k3s_cluster_secret = "pvc-6476dcaf-73a0-11e9-b8e5-06943b744282" + k3s_cluster_secret = var.k3s_cluster_secret } provider "aws" { diff --git a/tests/perf/agents/variables.tf b/tests/perf/agents/variables.tf index f0924930fe..8cf3b28a9f 100644 --- a/tests/perf/agents/variables.tf +++ b/tests/perf/agents/variables.tf @@ -26,3 +26,8 @@ variable "name" { type = string description = "Name to identify this cluster" } + +variable "k3s_cluster_secret" { + type = string + description = "Cluster secret for k3s cluster registration" +} \ No newline at end of file diff --git a/tests/perf/scripts/config b/tests/perf/scripts/config index d85bf41426..5467439d05 100755 --- a/tests/perf/scripts/config +++ b/tests/perf/scripts/config @@ -1,6 +1,7 @@ ## MAIN VARIABLES ## #################### CLUSTER_NAME="loadtest-k3s" +CLUSTER_SECRET="" DOMAIN_NAME="loadtest.eng.rancher.space" ZONE_ID="" K3S_VERSION="v1.0.0" @@ -14,7 +15,7 @@ DB_ENGINE="dqlite" DB_INSTANCE_TYPE="db.m4.4xlarge" DB_NAME="k3s" DB_USERNAME="k3suser" -DB_PASSWORD="024d9442b3add64b7ef90655bc302cd8" +DB_PASSWORD="" DB_VERSION=5.7 ## K3S SERVER VARIABLES ## diff --git a/tests/perf/scripts/perf b/tests/perf/scripts/perf index 9dbae96166..e5e2f4f6a1 100755 --- a/tests/perf/scripts/perf +++ b/tests/perf/scripts/perf @@ -51,8 +51,17 @@ config() { pushd ./server eval PRIVATE_KEY_PATH=$PRIVATE_KEY_PATH EXPANDED_PRIV_KEY_PATH=`readlink -f $PRIVATE_KEY_PATH` + if [ -z "$DB_PASSWORD" ]; then + # randomize database password + DB_PASSWORD=$(cat /dev/urandom | tr -dc 'a-zA-Z0-9' | fold -w 32 | head -n 1) + fi + if [ -z "$CLUSTER_SECRET" ]; then + # randomize cluster secret + CLUSTER_SECRET=$(cat /dev/urandom | tr -dc 'a-zA-Z0-9' | fold -w 32 | head -n 1) + fi cat <
variables.tfvars name = "${CLUSTER_NAME}" +k3s_cluster_secret = "${CLUSTER_SECRET}" db_instance_type = "${DB_INSTANCE_TYPE}" db_name = "${DB_NAME}" db_username = "${DB_USERNAME}" @@ -80,6 +89,7 @@ extra_ssh_keys = ["${EXTRA_SSH_KEYS}"] k3s_version = "${K3S_VERSION}" agent_node_count = ${AGENT_NODE_COUNT} agent_instance_type = "${AGENT_INSTANCE_TYPE}" +k3s_cluster_secret = "${CLUSTER_SECRET}" MAIN popd } diff --git a/tests/perf/server/variables.tf b/tests/perf/server/variables.tf index cbe680d24a..3ae6f96fea 100644 --- a/tests/perf/server/variables.tf +++ b/tests/perf/server/variables.tf @@ -19,7 +19,6 @@ variable "prom_worker_node_count" { } variable "k3s_cluster_secret" { - default = "pvc-6476dcaf-73a0-11e9-b8e5-06943b744282" type = string description = "Cluster secret for k3s cluster registration" } @@ -66,9 +65,7 @@ variable "db_username" { default = "postgres" } -variable "db_password" { - default = "b58bf234c4bd0133fc7a92b782e498a6" -} +variable "db_password" {} variable "db_version" {}