delete load testsing

This commit is contained in:
galal-hussein 2019-10-31 23:06:10 +02:00
parent bf6ca8b2d8
commit 31a615fc0f
34 changed files with 0 additions and 2045 deletions

View File

@ -1 +0,0 @@
.terraform*

View File

@ -1 +0,0 @@
kubeConfig.yaml

View File

@ -1,471 +0,0 @@
# ASSUMPTIONS:
# - Underlying cluster should have 100+ nodes.
# - Number of nodes should be divisible by NODES_PER_NAMESPACE (default 100).
# - The number of created SVCs is half the number of created Deployments.
# - Only half of Deployments will be assigned 1-1 to existing SVCs.
#Constants
{{$NODE_MODE := DefaultParam .NODE_MODE "allnodes"}}
{{$NODES_PER_NAMESPACE := DefaultParam .NODES_PER_NAMESPACE 100}}
{{$PODS_PER_NODE := DefaultParam .PODS_PER_NODE 30}}
{{$LOAD_TEST_THROUGHPUT := DefaultParam .LOAD_TEST_THROUGHPUT 10}}
{{$BIG_GROUP_SIZE := 25}}
{{$MEDIUM_GROUP_SIZE := 15}}
{{$SMALL_GROUP_SIZE := 1}}
{{$SMALL_STATEFUL_SETS_PER_NAMESPACE := 1}}
{{$MEDIUM_STATEFUL_SETS_PER_NAMESPACE := 1}}
{{$ENABLE_CHAOSMONKEY := DefaultParam .ENABLE_CHAOSMONKEY false}}
{{$ENABLE_PROMETHEUS_API_RESPONSIVENESS := DefaultParam .ENABLE_PROMETHEUS_API_RESPONSIVENESS false}}
{{$ENABLE_CONFIGMAPS := DefaultParam .ENABLE_CONFIGMAPS false}}
{{$ENABLE_SECRETS := DefaultParam .ENABLE_SECRETS false}}
{{$ENABLE_STATEFULSETS := DefaultParam .ENABLE_STATEFULSETS false}}
#Variables
{{$namespaces := DivideInt .Nodes $NODES_PER_NAMESPACE}}
{{$totalPods := MultiplyInt $namespaces $NODES_PER_NAMESPACE $PODS_PER_NODE}}
{{$podsPerNamespace := DivideInt $totalPods $namespaces}}
{{$saturationTime := DivideInt $totalPods $LOAD_TEST_THROUGHPUT}}
# bigDeployments - 1/4 of namespace pods should be in big Deployments.
{{$bigDeploymentsPerNamespace := DivideInt $podsPerNamespace (MultiplyInt 4 $BIG_GROUP_SIZE)}}
# mediumDeployments - 1/4 of namespace pods should be in medium Deployments.
{{$mediumDeploymentsPerNamespace := DivideInt $podsPerNamespace (MultiplyInt 4 $MEDIUM_GROUP_SIZE)}}
# smallDeployments - 1/2 of namespace pods should be in small Deployments.
{{$smallDeploymentsPerNamespace := DivideInt $podsPerNamespace (MultiplyInt 2 $SMALL_GROUP_SIZE)}}
# If StatefulSets are enabled reduce the number of small and medium deployments per namespace
{{$smallDeploymentsPerNamespace := SubtractInt $smallDeploymentsPerNamespace (IfThenElse $ENABLE_STATEFULSETS $SMALL_STATEFUL_SETS_PER_NAMESPACE 0)}}
{{$mediumDeploymentsPerNamespace := SubtractInt $mediumDeploymentsPerNamespace (IfThenElse $ENABLE_STATEFULSETS $MEDIUM_STATEFUL_SETS_PER_NAMESPACE 0)}}
name: load
automanagedNamespaces: {{$namespaces}}
tuningSets:
- name: Sequence
parallelismLimitedLoad:
parallelismLimit: 1
- name: RandomizedSaturationTimeLimited
RandomizedTimeLimitedLoad:
timeLimit: {{$saturationTime}}s
- name: RandomizedScalingTimeLimited
RandomizedTimeLimitedLoad:
# The expected number of created/deleted pods is totalPods/4 when scaling,
# as each RS changes its size from X to a uniform random value in [X/2, 3X/2].
# To match 10 [pods/s] requirement, we need to divide saturationTime by 4.
timeLimit: {{DivideInt $saturationTime 4}}s
{{if $ENABLE_CHAOSMONKEY}}
chaosMonkey:
nodeFailure:
failureRate: 0.01
interval: 1m
jitterFactor: 10.0
simulatedDowntime: 10m
{{end}}
steps:
- name: Starting measurements
measurements:
- Identifier: APIResponsiveness
Method: APIResponsiveness
Params:
action: reset
- Identifier: APIResponsivenessPrometheus
Method: APIResponsivenessPrometheus
Params:
action: start
- Identifier: PodStartupLatency
Method: PodStartupLatency
Params:
action: start
labelSelector: group = load
threshold: 1h
- Identifier: InClusterNetworkLatency
Method: InClusterNetworkLatency
Params:
action: start
replicasPerProbe: {{DivideInt .Nodes 100}}
- Identifier: DnsLookupLatency
Method: DnsLookupLatency
Params:
action: start
replicasPerProbe: {{DivideInt .Nodes 100}}
- Identifier: NetworkProgrammingLatency
Method: NetworkProgrammingLatency
Params:
action: start
- Identifier: TestMetrics
Method: TestMetrics
Params:
action: start
nodeMode: {{$NODE_MODE}}
- name: Creating SVCs
phases:
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: {{DivideInt (AddInt $bigDeploymentsPerNamespace 1) 2}}
tuningSet: Sequence
objectBundle:
- basename: big-service
objectTemplatePath: service.yaml
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: {{DivideInt (AddInt $mediumDeploymentsPerNamespace 1) 2}}
tuningSet: Sequence
objectBundle:
- basename: medium-service
objectTemplatePath: service.yaml
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: {{DivideInt (AddInt $smallDeploymentsPerNamespace 1) 2}}
tuningSet: Sequence
objectBundle:
- basename: small-service
objectTemplatePath: service.yaml
- name: Starting measurement for waiting for pods
measurements:
- Identifier: WaitForRunningDeployments
Method: WaitForControlledPodsRunning
Params:
action: start
apiVersion: apps/v1
kind: Deployment
labelSelector: group = load
operationTimeout: 15m
{{if $ENABLE_STATEFULSETS}}
- Identifier: WaitForRunningStatefulSets
Method: WaitForControlledPodsRunning
Params:
action: start
apiVersion: apps/v1
kind: StatefulSet
labelSelector: group = load
operationTimeout: 15m
{{end}}
- name: Creating objects
phases:
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: {{$bigDeploymentsPerNamespace}}
tuningSet: RandomizedSaturationTimeLimited
objectBundle:
{{if $ENABLE_CONFIGMAPS}}
- basename: big-deployment
objectTemplatePath: configmap.yaml
{{end}}
{{if $ENABLE_SECRETS}}
- basename: big-deployment
objectTemplatePath: secret.yaml
{{end}}
- basename: big-deployment
objectTemplatePath: deployment.yaml
templateFillMap:
ReplicasMin: {{$BIG_GROUP_SIZE}}
ReplicasMax: {{$BIG_GROUP_SIZE}}
SvcName: big-service
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: {{$mediumDeploymentsPerNamespace}}
tuningSet: RandomizedSaturationTimeLimited
objectBundle:
{{if $ENABLE_CONFIGMAPS}}
- basename: medium-deployment
objectTemplatePath: configmap.yaml
{{end}}
{{if $ENABLE_SECRETS}}
- basename: medium-deployment
objectTemplatePath: secret.yaml
{{end}}
- basename: medium-deployment
objectTemplatePath: deployment.yaml
templateFillMap:
ReplicasMin: {{$MEDIUM_GROUP_SIZE}}
ReplicasMax: {{$MEDIUM_GROUP_SIZE}}
SvcName: medium-service
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: {{$smallDeploymentsPerNamespace}}
tuningSet: RandomizedSaturationTimeLimited
objectBundle:
{{if $ENABLE_CONFIGMAPS}}
- basename: small-deployment
objectTemplatePath: configmap.yaml
{{end}}
{{if $ENABLE_SECRETS}}
- basename: small-deployment
objectTemplatePath: secret.yaml
{{end}}
- basename: small-deployment
objectTemplatePath: deployment.yaml
templateFillMap:
ReplicasMin: {{$SMALL_GROUP_SIZE}}
ReplicasMax: {{$SMALL_GROUP_SIZE}}
SvcName: small-service
{{if $ENABLE_STATEFULSETS}}
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: {{$SMALL_STATEFUL_SETS_PER_NAMESPACE}}
tuningSet: RandomizedSaturationTimeLimited
objectBundle:
- basename: small-statefulset
objectTemplatePath: statefulset_service.yaml
- basename: small-statefulset
objectTemplatePath: statefulset.yaml
templateFillMap:
ReplicasMin: {{$SMALL_GROUP_SIZE}}
ReplicasMax: {{$SMALL_GROUP_SIZE}}
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: {{$MEDIUM_STATEFUL_SETS_PER_NAMESPACE}}
tuningSet: RandomizedSaturationTimeLimited
objectBundle:
- basename: medium-statefulset
objectTemplatePath: statefulset_service.yaml
- basename: medium-statefulset
objectTemplatePath: statefulset.yaml
templateFillMap:
ReplicasMin: {{$MEDIUM_GROUP_SIZE}}
ReplicasMax: {{$MEDIUM_GROUP_SIZE}}
{{end}}
- name: Waiting for pods to be running
measurements:
- Identifier: WaitForRunningDeployments
Method: WaitForControlledPodsRunning
Params:
action: gather
{{if $ENABLE_STATEFULSETS}}
- Identifier: WaitForRunningStatefulSets
Method: WaitForControlledPodsRunning
Params:
action: gather
{{end}}
- name: Scaling objects
phases:
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: {{$bigDeploymentsPerNamespace}}
tuningSet: RandomizedScalingTimeLimited
objectBundle:
- basename: big-deployment
objectTemplatePath: deployment.yaml
templateFillMap:
ReplicasMin: {{MultiplyInt $BIG_GROUP_SIZE 0.5}}
ReplicasMax: {{MultiplyInt $BIG_GROUP_SIZE 1.5}}
SvcName: big-service
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: {{$mediumDeploymentsPerNamespace}}
tuningSet: RandomizedScalingTimeLimited
objectBundle:
- basename: medium-deployment
objectTemplatePath: deployment.yaml
templateFillMap:
ReplicasMin: {{MultiplyInt $MEDIUM_GROUP_SIZE 0.5}}
ReplicasMax: {{MultiplyInt $MEDIUM_GROUP_SIZE 1.5}}
SvcName: medium-service
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: {{$smallDeploymentsPerNamespace}}
tuningSet: RandomizedScalingTimeLimited
objectBundle:
- basename: small-deployment
objectTemplatePath: deployment.yaml
templateFillMap:
ReplicasMin: {{MultiplyInt $SMALL_GROUP_SIZE 0.5}}
ReplicasMax: {{MultiplyInt $SMALL_GROUP_SIZE 1.5}}
SvcName: small-service
{{if $ENABLE_STATEFULSETS}}
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: {{$SMALL_STATEFUL_SETS_PER_NAMESPACE}}
tuningSet: RandomizedScalingTimeLimited
objectBundle:
- basename: small-statefulset
objectTemplatePath: statefulset.yaml
templateFillMap:
ReplicasMin: {{MultiplyInt $SMALL_GROUP_SIZE 0.5}}
ReplicasMax: {{MultiplyInt $SMALL_GROUP_SIZE 1.5}}
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: {{$MEDIUM_STATEFUL_SETS_PER_NAMESPACE}}
tuningSet: RandomizedScalingTimeLimited
objectBundle:
- basename: medium-statefulset
objectTemplatePath: statefulset.yaml
templateFillMap:
ReplicasMin: {{MultiplyInt $MEDIUM_GROUP_SIZE 0.5}}
ReplicasMax: {{MultiplyInt $MEDIUM_GROUP_SIZE 1.5}}
{{end}}
- name: Waiting for objects to become scaled
measurements:
- Identifier: WaitForRunningDeployments
Method: WaitForControlledPodsRunning
Params:
action: gather
{{if $ENABLE_STATEFULSETS}}
- Identifier: WaitForRunningStatefulSets
Method: WaitForControlledPodsRunning
Params:
action: gather
{{end}}
- name: Deleting objects
phases:
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: 0
tuningSet: RandomizedSaturationTimeLimited
objectBundle:
- basename: big-deployment
objectTemplatePath: deployment.yaml
{{if $ENABLE_CONFIGMAPS}}
- basename: big-deployment
objectTemplatePath: configmap.yaml
{{end}}
{{if $ENABLE_SECRETS}}
- basename: big-deployment
objectTemplatePath: secret.yaml
{{end}}
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: 0
tuningSet: RandomizedSaturationTimeLimited
objectBundle:
- basename: medium-deployment
objectTemplatePath: deployment.yaml
{{if $ENABLE_CONFIGMAPS}}
- basename: medium-deployment
objectTemplatePath: configmap.yaml
{{end}}
{{if $ENABLE_SECRETS}}
- basename: medium-deployment
objectTemplatePath: secret.yaml
{{end}}
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: 0
tuningSet: RandomizedSaturationTimeLimited
objectBundle:
- basename: small-deployment
objectTemplatePath: deployment.yaml
{{if $ENABLE_CONFIGMAPS}}
- basename: small-deployment
objectTemplatePath: configmap.yaml
{{end}}
{{if $ENABLE_SECRETS}}
- basename: small-deployment
objectTemplatePath: secret.yaml
{{end}}
{{if $ENABLE_STATEFULSETS}}
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: 0
tuningSet: RandomizedSaturationTimeLimited
objectBundle:
- basename: small-statefulset
objectTemplatePath: statefulset.yaml
- basename: small-statefulset
objectTemplatePath: statefulset_service.yaml
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: 0
tuningSet: RandomizedSaturationTimeLimited
objectBundle:
- basename: medium-statefulset
objectTemplatePath: statefulset.yaml
- basename: medium-statefulset
objectTemplatePath: statefulset_service.yaml
{{end}}
- name: Waiting for pods to be deleted
measurements:
- Identifier: WaitForRunningDeployments
Method: WaitForControlledPodsRunning
Params:
action: gather
{{if $ENABLE_STATEFULSETS}}
- Identifier: WaitForRunningStatefulSets
Method: WaitForControlledPodsRunning
Params:
action: gather
{{end}}
- name: Deleting SVCs
phases:
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: 0
tuningSet: Sequence
objectBundle:
- basename: big-service
objectTemplatePath: service.yaml
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: 0
tuningSet: Sequence
objectBundle:
- basename: medium-service
objectTemplatePath: service.yaml
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: 0
tuningSet: Sequence
objectBundle:
- basename: small-service
objectTemplatePath: service.yaml
- name: Collecting measurements
measurements:
- Identifier: APIResponsiveness
Method: APIResponsiveness
Params:
action: gather
- Identifier: APIResponsivenessPrometheus
Method: APIResponsivenessPrometheus
Params:
action: gather
{{if $ENABLE_PROMETHEUS_API_RESPONSIVENESS}}
enableViolations: true
{{end}}
- Identifier: PodStartupLatency
Method: PodStartupLatency
Params:
action: gather
- Identifier: InClusterNetworkLatency
Method: InClusterNetworkLatency
Params:
action: gather
- Identifier: DnsLookupLatency
Method: DnsLookupLatency
Params:
action: gather
- Identifier: NetworkProgrammingLatency
Method: NetworkProgrammingLatency
Params:
action: gather
- Identifier: TestMetrics
Method: TestMetrics
Params:
action: gather

View File

@ -1,9 +0,0 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: {{.Name}}
data:
data.yaml: |-
a: 1
b: 2
c: 3

View File

@ -1,62 +0,0 @@
{{$EnableConfigMaps := DefaultParam .ENABLE_CONFIGMAPS false}}
{{$EnableSecrets := DefaultParam .ENABLE_SECRETS false}}
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{.Name}}
labels:
group: load
svc: {{.SvcName}}-{{.Index}}
spec:
replicas: {{RandIntRange .ReplicasMin .ReplicasMax}}
selector:
matchLabels:
name: {{.Name}}
template:
metadata:
labels:
group: load
name: {{.Name}}
svc: {{.SvcName}}-{{.Index}}
spec:
containers:
- image: k8s.gcr.io/pause:3.1
name: {{.Name}}
resources:
requests:
cpu: 10m
memory: "10M"
volumeMounts:
{{if and $EnableConfigMaps (eq (Mod .Index 20) 0 19) }} # .Index % 20 in {0,19} - 10% deployments will have ConfigMap
- name: configmap
mountPath: /var/configmap
{{end}}
{{if and $EnableSecrets (eq (Mod .Index 20) 10 19) }} # .Index % 20 in {10,19} - 10% deployments will have Secret
- name: secret
mountPath: /var/secret
{{end}}
dnsPolicy: Default
terminationGracePeriodSeconds: 1
# Add not-ready/unreachable tolerations for 15 minutes so that node
# failure doesn't trigger pod deletion.
tolerations:
- key: "node.kubernetes.io/not-ready"
operator: "Exists"
effect: "NoExecute"
tolerationSeconds: 900
- key: "node.kubernetes.io/unreachable"
operator: "Exists"
effect: "NoExecute"
tolerationSeconds: 900
volumes:
{{if and $EnableConfigMaps (eq (Mod .Index 20) 0 19) }} # .Index % 20 in {0,19} - 10% deployments will have ConfigMap
- name: configmap
configMap:
name: {{.BaseName}}-{{.Index}}
{{end}}
{{if and $EnableSecrets (eq (Mod .Index 20) 10 19) }} # .Index % 20 in {10,19} - 10% deployments will have Secret
- name: secret
secret:
secretName: {{.BaseName}}-{{.Index}}
{{end}}

View File

@ -1,3 +0,0 @@
#!/bin/sh
clusterloader --kubeconfig=../kubeConfig.yaml --testconfig=config.yaml

View File

@ -1,7 +0,0 @@
apiVersion: v1
kind: Secret
metadata:
name: {{.Name}}
type: Opaque
data:
password: c2NhbGFiaWxpdHkK

View File

@ -1,16 +0,0 @@
{{$SetServiceProxyLabel := DefaultParam .SetServiceProxyLabel false}}
apiVersion: v1
kind: Service
metadata:
name: {{.Name}}
{{if and $SetServiceProxyLabel (eq (Mod .Index 2) 0)}}
labels:
service.kubernetes.io/service-proxy-name: foo
{{end}}
spec:
selector:
svc: {{.Name}}
ports:
- port: 80
targetPort: 80

View File

@ -1,30 +0,0 @@
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: {{.Name}}
labels:
group: load
spec:
podManagementPolicy: Parallel
selector:
matchLabels:
name: {{.Name}}
serviceName: {{.Name}}
replicas: {{RandIntRange .ReplicasMin .ReplicasMax}}
template:
metadata:
labels:
group: statefulset
name: {{.Name}}
spec:
terminationGracePeriodSeconds: 1
containers:
- name: {{.Name}}
image: k8s.gcr.io/pause:3.1
ports:
- containerPort: 80
name: web
resources:
requests:
cpu: 10m
memory: "10M"

View File

@ -1,10 +0,0 @@
apiVersion: v1
kind: Service
metadata:
name: {{.Name}}
labels:
name: {{.Name}}
spec:
clusterIP: None
selector:
name: {{.Name}}

View File

@ -1,471 +0,0 @@
# ASSUMPTIONS:
# - Underlying cluster should have 100+ nodes.
# - Number of nodes should be divisible by NODES_PER_NAMESPACE (default 100).
# - The number of created SVCs is half the number of created Deployments.
# - Only half of Deployments will be assigned 1-1 to existing SVCs.
#Constants
{{$NODE_MODE := DefaultParam .NODE_MODE "allnodes"}}
{{$NODES_PER_NAMESPACE := DefaultParam .NODES_PER_NAMESPACE 1}}
{{$PODS_PER_NODE := DefaultParam .PODS_PER_NODE 5}}
{{$LOAD_TEST_THROUGHPUT := DefaultParam .LOAD_TEST_THROUGHPUT 10}}
{{$BIG_GROUP_SIZE := 25}}
{{$MEDIUM_GROUP_SIZE := 15}}
{{$SMALL_GROUP_SIZE := 1}}
{{$SMALL_STATEFUL_SETS_PER_NAMESPACE := 1}}
{{$MEDIUM_STATEFUL_SETS_PER_NAMESPACE := 1}}
{{$ENABLE_CHAOSMONKEY := DefaultParam .ENABLE_CHAOSMONKEY false}}
{{$ENABLE_PROMETHEUS_API_RESPONSIVENESS := DefaultParam .ENABLE_PROMETHEUS_API_RESPONSIVENESS false}}
{{$ENABLE_CONFIGMAPS := DefaultParam .ENABLE_CONFIGMAPS false}}
{{$ENABLE_SECRETS := DefaultParam .ENABLE_SECRETS false}}
{{$ENABLE_STATEFULSETS := DefaultParam .ENABLE_STATEFULSETS false}}
#Variables
{{$namespaces := DivideInt .Nodes $NODES_PER_NAMESPACE}}
{{$totalPods := MultiplyInt $namespaces $NODES_PER_NAMESPACE $PODS_PER_NODE}}
{{$podsPerNamespace := DivideInt $totalPods $namespaces}}
{{$saturationTime := DivideInt $totalPods $LOAD_TEST_THROUGHPUT}}
# bigDeployments - 1/4 of namespace pods should be in big Deployments.
{{$bigDeploymentsPerNamespace := DivideInt $podsPerNamespace (MultiplyInt 4 $BIG_GROUP_SIZE)}}
# mediumDeployments - 1/4 of namespace pods should be in medium Deployments.
{{$mediumDeploymentsPerNamespace := DivideInt $podsPerNamespace (MultiplyInt 4 $MEDIUM_GROUP_SIZE)}}
# smallDeployments - 1/2 of namespace pods should be in small Deployments.
{{$smallDeploymentsPerNamespace := DivideInt $podsPerNamespace (MultiplyInt 2 $SMALL_GROUP_SIZE)}}
# If StatefulSets are enabled reduce the number of small and medium deployments per namespace
{{$smallDeploymentsPerNamespace := SubtractInt $smallDeploymentsPerNamespace (IfThenElse $ENABLE_STATEFULSETS $SMALL_STATEFUL_SETS_PER_NAMESPACE 0)}}
{{$mediumDeploymentsPerNamespace := SubtractInt $mediumDeploymentsPerNamespace (IfThenElse $ENABLE_STATEFULSETS $MEDIUM_STATEFUL_SETS_PER_NAMESPACE 0)}}
name: load
automanagedNamespaces: {{$namespaces}}
tuningSets:
- name: Sequence
parallelismLimitedLoad:
parallelismLimit: 1
- name: RandomizedSaturationTimeLimited
RandomizedTimeLimitedLoad:
timeLimit: {{$saturationTime}}s
- name: RandomizedScalingTimeLimited
RandomizedTimeLimitedLoad:
# The expected number of created/deleted pods is totalPods/4 when scaling,
# as each RS changes its size from X to a uniform random value in [X/2, 3X/2].
# To match 10 [pods/s] requirement, we need to divide saturationTime by 4.
timeLimit: {{DivideInt $saturationTime 4}}s
{{if $ENABLE_CHAOSMONKEY}}
chaosMonkey:
nodeFailure:
failureRate: 0.01
interval: 1m
jitterFactor: 10.0
simulatedDowntime: 10m
{{end}}
steps:
- name: Starting measurements
measurements:
- Identifier: APIResponsiveness
Method: APIResponsiveness
Params:
action: reset
- Identifier: APIResponsivenessPrometheus
Method: APIResponsivenessPrometheus
Params:
action: start
- Identifier: PodStartupLatency
Method: PodStartupLatency
Params:
action: start
labelSelector: group = load
threshold: 1h
- Identifier: InClusterNetworkLatency
Method: InClusterNetworkLatency
Params:
action: start
replicasPerProbe: {{DivideInt .Nodes 100}}
- Identifier: DnsLookupLatency
Method: DnsLookupLatency
Params:
action: start
replicasPerProbe: {{DivideInt .Nodes 100}}
- Identifier: NetworkProgrammingLatency
Method: NetworkProgrammingLatency
Params:
action: start
- Identifier: TestMetrics
Method: TestMetrics
Params:
action: start
nodeMode: {{$NODE_MODE}}
- name: Creating SVCs
phases:
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: {{DivideInt (AddInt $bigDeploymentsPerNamespace 1) 2}}
tuningSet: Sequence
objectBundle:
- basename: big-service
objectTemplatePath: service.yaml
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: {{DivideInt (AddInt $mediumDeploymentsPerNamespace 1) 2}}
tuningSet: Sequence
objectBundle:
- basename: medium-service
objectTemplatePath: service.yaml
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: {{DivideInt (AddInt $smallDeploymentsPerNamespace 1) 2}}
tuningSet: Sequence
objectBundle:
- basename: small-service
objectTemplatePath: service.yaml
- name: Starting measurement for waiting for pods
measurements:
- Identifier: WaitForRunningDeployments
Method: WaitForControlledPodsRunning
Params:
action: start
apiVersion: apps/v1
kind: Deployment
labelSelector: group = load
operationTimeout: 15m
{{if $ENABLE_STATEFULSETS}}
- Identifier: WaitForRunningStatefulSets
Method: WaitForControlledPodsRunning
Params:
action: start
apiVersion: apps/v1
kind: StatefulSet
labelSelector: group = load
operationTimeout: 15m
{{end}}
- name: Creating objects
phases:
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: {{$bigDeploymentsPerNamespace}}
tuningSet: RandomizedSaturationTimeLimited
objectBundle:
{{if $ENABLE_CONFIGMAPS}}
- basename: big-deployment
objectTemplatePath: configmap.yaml
{{end}}
{{if $ENABLE_SECRETS}}
- basename: big-deployment
objectTemplatePath: secret.yaml
{{end}}
- basename: big-deployment
objectTemplatePath: deployment.yaml
templateFillMap:
ReplicasMin: {{$BIG_GROUP_SIZE}}
ReplicasMax: {{$BIG_GROUP_SIZE}}
SvcName: big-service
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: {{$mediumDeploymentsPerNamespace}}
tuningSet: RandomizedSaturationTimeLimited
objectBundle:
{{if $ENABLE_CONFIGMAPS}}
- basename: medium-deployment
objectTemplatePath: configmap.yaml
{{end}}
{{if $ENABLE_SECRETS}}
- basename: medium-deployment
objectTemplatePath: secret.yaml
{{end}}
- basename: medium-deployment
objectTemplatePath: deployment.yaml
templateFillMap:
ReplicasMin: {{$MEDIUM_GROUP_SIZE}}
ReplicasMax: {{$MEDIUM_GROUP_SIZE}}
SvcName: medium-service
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: {{$smallDeploymentsPerNamespace}}
tuningSet: RandomizedSaturationTimeLimited
objectBundle:
{{if $ENABLE_CONFIGMAPS}}
- basename: small-deployment
objectTemplatePath: configmap.yaml
{{end}}
{{if $ENABLE_SECRETS}}
- basename: small-deployment
objectTemplatePath: secret.yaml
{{end}}
- basename: small-deployment
objectTemplatePath: deployment.yaml
templateFillMap:
ReplicasMin: {{$SMALL_GROUP_SIZE}}
ReplicasMax: {{$SMALL_GROUP_SIZE}}
SvcName: small-service
{{if $ENABLE_STATEFULSETS}}
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: {{$SMALL_STATEFUL_SETS_PER_NAMESPACE}}
tuningSet: RandomizedSaturationTimeLimited
objectBundle:
- basename: small-statefulset
objectTemplatePath: statefulset_service.yaml
- basename: small-statefulset
objectTemplatePath: statefulset.yaml
templateFillMap:
ReplicasMin: {{$SMALL_GROUP_SIZE}}
ReplicasMax: {{$SMALL_GROUP_SIZE}}
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: {{$MEDIUM_STATEFUL_SETS_PER_NAMESPACE}}
tuningSet: RandomizedSaturationTimeLimited
objectBundle:
- basename: medium-statefulset
objectTemplatePath: statefulset_service.yaml
- basename: medium-statefulset
objectTemplatePath: statefulset.yaml
templateFillMap:
ReplicasMin: {{$MEDIUM_GROUP_SIZE}}
ReplicasMax: {{$MEDIUM_GROUP_SIZE}}
{{end}}
- name: Waiting for pods to be running
measurements:
- Identifier: WaitForRunningDeployments
Method: WaitForControlledPodsRunning
Params:
action: gather
{{if $ENABLE_STATEFULSETS}}
- Identifier: WaitForRunningStatefulSets
Method: WaitForControlledPodsRunning
Params:
action: gather
{{end}}
- name: Scaling objects
phases:
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: {{$bigDeploymentsPerNamespace}}
tuningSet: RandomizedScalingTimeLimited
objectBundle:
- basename: big-deployment
objectTemplatePath: deployment.yaml
templateFillMap:
ReplicasMin: {{MultiplyInt $BIG_GROUP_SIZE 0.5}}
ReplicasMax: {{MultiplyInt $BIG_GROUP_SIZE 1.5}}
SvcName: big-service
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: {{$mediumDeploymentsPerNamespace}}
tuningSet: RandomizedScalingTimeLimited
objectBundle:
- basename: medium-deployment
objectTemplatePath: deployment.yaml
templateFillMap:
ReplicasMin: {{MultiplyInt $MEDIUM_GROUP_SIZE 0.5}}
ReplicasMax: {{MultiplyInt $MEDIUM_GROUP_SIZE 1.5}}
SvcName: medium-service
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: {{$smallDeploymentsPerNamespace}}
tuningSet: RandomizedScalingTimeLimited
objectBundle:
- basename: small-deployment
objectTemplatePath: deployment.yaml
templateFillMap:
ReplicasMin: {{MultiplyInt $SMALL_GROUP_SIZE 0.5}}
ReplicasMax: {{MultiplyInt $SMALL_GROUP_SIZE 1.5}}
SvcName: small-service
{{if $ENABLE_STATEFULSETS}}
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: {{$SMALL_STATEFUL_SETS_PER_NAMESPACE}}
tuningSet: RandomizedScalingTimeLimited
objectBundle:
- basename: small-statefulset
objectTemplatePath: statefulset.yaml
templateFillMap:
ReplicasMin: {{MultiplyInt $SMALL_GROUP_SIZE 0.5}}
ReplicasMax: {{MultiplyInt $SMALL_GROUP_SIZE 1.5}}
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: {{$MEDIUM_STATEFUL_SETS_PER_NAMESPACE}}
tuningSet: RandomizedScalingTimeLimited
objectBundle:
- basename: medium-statefulset
objectTemplatePath: statefulset.yaml
templateFillMap:
ReplicasMin: {{MultiplyInt $MEDIUM_GROUP_SIZE 0.5}}
ReplicasMax: {{MultiplyInt $MEDIUM_GROUP_SIZE 1.5}}
{{end}}
- name: Waiting for objects to become scaled
measurements:
- Identifier: WaitForRunningDeployments
Method: WaitForControlledPodsRunning
Params:
action: gather
{{if $ENABLE_STATEFULSETS}}
- Identifier: WaitForRunningStatefulSets
Method: WaitForControlledPodsRunning
Params:
action: gather
{{end}}
- name: Deleting objects
phases:
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: 0
tuningSet: RandomizedSaturationTimeLimited
objectBundle:
- basename: big-deployment
objectTemplatePath: deployment.yaml
{{if $ENABLE_CONFIGMAPS}}
- basename: big-deployment
objectTemplatePath: configmap.yaml
{{end}}
{{if $ENABLE_SECRETS}}
- basename: big-deployment
objectTemplatePath: secret.yaml
{{end}}
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: 0
tuningSet: RandomizedSaturationTimeLimited
objectBundle:
- basename: medium-deployment
objectTemplatePath: deployment.yaml
{{if $ENABLE_CONFIGMAPS}}
- basename: medium-deployment
objectTemplatePath: configmap.yaml
{{end}}
{{if $ENABLE_SECRETS}}
- basename: medium-deployment
objectTemplatePath: secret.yaml
{{end}}
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: 0
tuningSet: RandomizedSaturationTimeLimited
objectBundle:
- basename: small-deployment
objectTemplatePath: deployment.yaml
{{if $ENABLE_CONFIGMAPS}}
- basename: small-deployment
objectTemplatePath: configmap.yaml
{{end}}
{{if $ENABLE_SECRETS}}
- basename: small-deployment
objectTemplatePath: secret.yaml
{{end}}
{{if $ENABLE_STATEFULSETS}}
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: 0
tuningSet: RandomizedSaturationTimeLimited
objectBundle:
- basename: small-statefulset
objectTemplatePath: statefulset.yaml
- basename: small-statefulset
objectTemplatePath: statefulset_service.yaml
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: 0
tuningSet: RandomizedSaturationTimeLimited
objectBundle:
- basename: medium-statefulset
objectTemplatePath: statefulset.yaml
- basename: medium-statefulset
objectTemplatePath: statefulset_service.yaml
{{end}}
- name: Waiting for pods to be deleted
measurements:
- Identifier: WaitForRunningDeployments
Method: WaitForControlledPodsRunning
Params:
action: gather
{{if $ENABLE_STATEFULSETS}}
- Identifier: WaitForRunningStatefulSets
Method: WaitForControlledPodsRunning
Params:
action: gather
{{end}}
- name: Deleting SVCs
phases:
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: 0
tuningSet: Sequence
objectBundle:
- basename: big-service
objectTemplatePath: service.yaml
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: 0
tuningSet: Sequence
objectBundle:
- basename: medium-service
objectTemplatePath: service.yaml
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: 0
tuningSet: Sequence
objectBundle:
- basename: small-service
objectTemplatePath: service.yaml
- name: Collecting measurements
measurements:
- Identifier: APIResponsiveness
Method: APIResponsiveness
Params:
action: gather
- Identifier: APIResponsivenessPrometheus
Method: APIResponsivenessPrometheus
Params:
action: gather
{{if $ENABLE_PROMETHEUS_API_RESPONSIVENESS}}
enableViolations: true
{{end}}
- Identifier: PodStartupLatency
Method: PodStartupLatency
Params:
action: gather
- Identifier: InClusterNetworkLatency
Method: InClusterNetworkLatency
Params:
action: gather
- Identifier: DnsLookupLatency
Method: DnsLookupLatency
Params:
action: gather
- Identifier: NetworkProgrammingLatency
Method: NetworkProgrammingLatency
Params:
action: gather
- Identifier: TestMetrics
Method: TestMetrics
Params:
action: gather

View File

@ -1,9 +0,0 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: {{.Name}}
data:
data.yaml: |-
a: 1
b: 2
c: 3

View File

@ -1,62 +0,0 @@
{{$EnableConfigMaps := DefaultParam .ENABLE_CONFIGMAPS false}}
{{$EnableSecrets := DefaultParam .ENABLE_SECRETS false}}
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{.Name}}
labels:
group: load
svc: {{.SvcName}}-{{.Index}}
spec:
replicas: {{RandIntRange .ReplicasMin .ReplicasMax}}
selector:
matchLabels:
name: {{.Name}}
template:
metadata:
labels:
group: load
name: {{.Name}}
svc: {{.SvcName}}-{{.Index}}
spec:
containers:
- image: k8s.gcr.io/pause:3.1
name: {{.Name}}
resources:
requests:
cpu: 10m
memory: "10M"
volumeMounts:
{{if and $EnableConfigMaps (eq (Mod .Index 20) 0 19) }} # .Index % 20 in {0,19} - 10% deployments will have ConfigMap
- name: configmap
mountPath: /var/configmap
{{end}}
{{if and $EnableSecrets (eq (Mod .Index 20) 10 19) }} # .Index % 20 in {10,19} - 10% deployments will have Secret
- name: secret
mountPath: /var/secret
{{end}}
dnsPolicy: Default
terminationGracePeriodSeconds: 1
# Add not-ready/unreachable tolerations for 15 minutes so that node
# failure doesn't trigger pod deletion.
tolerations:
- key: "node.kubernetes.io/not-ready"
operator: "Exists"
effect: "NoExecute"
tolerationSeconds: 900
- key: "node.kubernetes.io/unreachable"
operator: "Exists"
effect: "NoExecute"
tolerationSeconds: 900
volumes:
{{if and $EnableConfigMaps (eq (Mod .Index 20) 0 19) }} # .Index % 20 in {0,19} - 10% deployments will have ConfigMap
- name: configmap
configMap:
name: {{.BaseName}}-{{.Index}}
{{end}}
{{if and $EnableSecrets (eq (Mod .Index 20) 10 19) }} # .Index % 20 in {10,19} - 10% deployments will have Secret
- name: secret
secret:
secretName: {{.BaseName}}-{{.Index}}
{{end}}

View File

@ -1,3 +0,0 @@
#!/bin/sh
clusterloader --kubeconfig=../kubeConfig.yaml --testconfig=config.yaml

View File

@ -1,7 +0,0 @@
apiVersion: v1
kind: Secret
metadata:
name: {{.Name}}
type: Opaque
data:
password: c2NhbGFiaWxpdHkK

View File

@ -1,16 +0,0 @@
{{$SetServiceProxyLabel := DefaultParam .SetServiceProxyLabel false}}
apiVersion: v1
kind: Service
metadata:
name: {{.Name}}
{{if and $SetServiceProxyLabel (eq (Mod .Index 2) 0)}}
labels:
service.kubernetes.io/service-proxy-name: foo
{{end}}
spec:
selector:
svc: {{.Name}}
ports:
- port: 80
targetPort: 80

View File

@ -1,30 +0,0 @@
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: {{.Name}}
labels:
group: load
spec:
podManagementPolicy: Parallel
selector:
matchLabels:
name: {{.Name}}
serviceName: {{.Name}}
replicas: {{RandIntRange .ReplicasMin .ReplicasMax}}
template:
metadata:
labels:
group: statefulset
name: {{.Name}}
spec:
terminationGracePeriodSeconds: 1
containers:
- name: {{.Name}}
image: k8s.gcr.io/pause:3.1
ports:
- containerPort: 80
name: web
resources:
requests:
cpu: 10m
memory: "10M"

View File

@ -1,10 +0,0 @@
apiVersion: v1
kind: Service
metadata:
name: {{.Name}}
labels:
name: {{.Name}}
spec:
clusterIP: None
selector:
name: {{.Name}}

View File

@ -1,44 +0,0 @@
data "terraform_remote_state" "server" {
backend = "local"
config = {
path = "${path.module}/../server/server.tfstate"
}
}
data "aws_vpc" "default" {
default = true
}
data "aws_subnet_ids" "available" {
vpc_id = data.aws_vpc.default.id
}
data "aws_subnet" "selected" {
id = "${tolist(data.aws_subnet_ids.available.ids)[1]}"
}
data "aws_ami" "ubuntu" {
most_recent = true
owners = ["099720109477"]
filter {
name = "name"
values = ["ubuntu-minimal/images/*/ubuntu-bionic-18.04-*"]
}
filter {
name = "virtualization-type"
values = ["hvm"]
}
filter {
name = "root-device-type"
values = ["ebs"]
}
filter {
name = "architecture"
values = ["x86_64"]
}
}

View File

@ -1,34 +0,0 @@
#cloud-config
%{ if length(extra_ssh_keys) > 0 }
ssh_authorized_keys:
%{ for ssh_key in extra_ssh_keys }
- ${ssh_key}
%{ endfor }
%{ endif }
runcmd:
- echo "net.ipv4.neigh.default.gc_interval = 3600" >> /etc/sysctl.conf
- echo "net.ipv4.neigh.default.gc_stale_time = 3600" >> /etc/sysctl.conf
- echo "net.ipv4.neigh.default.gc_thresh3 = 16384" >> /etc/sysctl.conf
- echo "net.ipv4.neigh.default.gc_thresh2 = 8192" >> /etc/sysctl.conf
- echo "net.ipv4.neigh.default.gc_thresh1 = 4096" >> /etc/sysctl.conf
- echo "fs.file-max = 12000500" >> /etc/sysctl.conf
- echo "fs.nr_open = 20000500" >> /etc/sysctl.conf
- echo "net.ipv4.tcp_mem = '10000000 10000000 10000000'" >> /etc/sysctl.conf
- echo "net.ipv4.tcp_rmem = '1024 4096 16384'" >> /etc/sysctl.conf
- echo "net.ipv4.tcp_wmem = '1024 4096 16384'" >> /etc/sysctl.conf
- echo "net.core.rmem_max = 16384" >> /etc/sysctl.conf
- echo "net.core.wmem_max = 16384" >> /etc/sysctl.conf
- ulimit -n 20000000
- echo "# <domain> <type> <item> <value>" >> /etc/security/limits.d/limits.conf
- echo " * soft nofile 20000" >> /etc/security/limits.d/limits.conf
- echo " * hard nofile 20000" >> /etc/security/limits.d/limits.conf
- sysctl -p
- apt-get update
- apt-get install -y software-properties-common
- curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
- add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
- apt-get update
- apt-get -y install docker-ce
- apt-get install -y resolvconf linux-headers-$(uname -r) && echo "nameserver 1.1.1.1" > /etc/resolvconf/resolv.conf.d/tail && systemctl start resolvconf
- DEBIAN_FRONTEND=noninteractive apt-get upgrade -y
- n=1; while [ $n -le ${k3s_per_node} ]; do docker run -d --restart=unless-stopped -e K3S_URL=https://${k3s_url}:6443 -e K3S_CLUSTER_SECRET="${k3s_cluster_secret}" --privileged --mount type=tmpfs,destination=/var/run --mount type=tmpfs,destination=/run -m 1g --cpus=".7" rancher/k3s:${install_k3s_version}; n=$(( n + 1 )); done

View File

@ -1,80 +0,0 @@
terraform {
backend "local" {
path = "pool.tfstate"
}
}
locals {
name = "load-test-pool"
k3s_cluster_secret = "pvc-6476dcaf-73a0-11e9-b8e5-06943b744282"
install_k3s_version = "v0.9.0-rc2"
}
provider "aws" {
region = "us-west-2"
profile = "rancher-eng"
}
resource "aws_security_group" "k3s" {
name = "${local.name}-pool"
vpc_id = data.aws_vpc.default.id
ingress {
from_port = 22
to_port = 22
protocol = "TCP"
cidr_blocks = ["0.0.0.0/0"]
}
ingress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}
ingress {
from_port = 0
to_port = 0
protocol = "-1"
self = true
}
egress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}
}
module "k3s-pool-worker-asg" {
source = "terraform-aws-modules/autoscaling/aws"
version = "3.0.0"
name = local.name
asg_name = local.name
instance_type = var.worker_instance_type
image_id = data.aws_ami.ubuntu.id
user_data = base64encode(templatefile("${path.module}/files/pool_worker_userdata.tmpl", { k3s_url = data.terraform_remote_state.server.outputs.public_ip[0], k3s_cluster_secret = local.k3s_cluster_secret, install_k3s_version = local.install_k3s_version, k3s_per_node = var.k3s_per_node, extra_ssh_keys = var.extra_ssh_keys }))
ebs_optimized = true
desired_capacity = var.node_count
health_check_type = "EC2"
max_size = var.node_count
min_size = var.node_count
vpc_zone_identifier = [data.aws_subnet.selected.id]
spot_price = "0.680"
security_groups = [
aws_security_group.k3s.id,
]
lc_name = local.name
root_block_device = [
{
volume_size = "100"
volume_type = "gp2"
},
]
}

View File

@ -1,22 +0,0 @@
variable "node_count" {
description = "Number of nodes to run k3s agents on."
type = number
# default = 10
}
variable "k3s_per_node" {
description = "Number of k3s agent docker containers to run per ec2 instance"
type = number
default = 10
}
variable "worker_instance_type" {
type = string
default = "c5.4xlarge"
}
variable "extra_ssh_keys" {
type = list
default = []
description = "Extra ssh keys to inject into Rancher instances"
}

View File

@ -1,4 +0,0 @@
terraform {
required_version = ">= 0.12"
}

View File

@ -1,47 +0,0 @@
# K3S Load Testing
This directory contains tooling to help spin up k3s clusters for scale testing (load testing the k3s server).
## Usage
From inside the `server` directory, run the following commands:
```
cd server
terraform init
terraform apply
```
You will be asked to specify an instance type for the k3s server. For a `large` cluster test, use a `c4.8xlarge`. For a `small` cluster test, use a `t3.micro`.
To run these commands, you will need access to an AWS account (configured here by default will be the `rancher-eng` account).
When the server terraform completes, go to the `pool` directory and run:
```
cd pool
terraform init
terraform apply
```
You will be asked to specify how many ec2 instances to create (variable is `node_count`). You can also specify the `k3s_per_node` and `worker_instance_type` variables when you run apply to override these defaults.
For the `large` cluster test, you will want to specify `node_count=100`. That will get you 100 ec2 instances with 10 k3s agents each - for a total of 1000 nodes in your k3s cluster.
For the `small` test, `node_count=1`, and you will override `k3s_per_node=5`. That will get you 1 ec2 instance with 5 agents on it - for a total of 5 nodes in your k3s cluster.
Once `pool` is finished, you can run through the cluster-loader scenarios using the `run-test.sh` script in the corresponding directory (depending on whether you are running the `large` or `small` scenario).
```
cd cluster-loader/<small/large>
./run-test.sh
```
* The `run-test.sh` script assumes you have [cluster-loader](https://github.com/kubernetes/perf-tests/tree/master/clusterloader2) installed on your machine.
### TODO
* Investigate cluster-loader failures.
* Simplify this process.
* Organized reporting on SLOs after cluster-loader is complete.

View File

@ -1,47 +0,0 @@
data "aws_vpc" "default" {
default = true
}
data "aws_subnet_ids" "available" {
vpc_id = data.aws_vpc.default.id
}
data "aws_subnet" "selected" {
id = "${tolist(data.aws_subnet_ids.available.ids)[1]}"
}
data "aws_ami" "ubuntu" {
most_recent = true
owners = ["099720109477"]
filter {
name = "name"
values = ["ubuntu-minimal/images/*/ubuntu-bionic-18.04-*"]
}
filter {
name = "virtualization-type"
values = ["hvm"]
}
filter {
name = "root-device-type"
values = ["ebs"]
}
filter {
name = "architecture"
values = ["x86_64"]
}
}
data "template_file" "metrics" {
template = file("${path.module}/files/metrics.yaml")
}
data "template_file" "k3s-prom-yaml" {
template = file("${path.module}/files/prom.yaml")
vars = {
prom_host = var.prom_host
graf_host = var.graf_host
}
}

View File

@ -1,227 +0,0 @@
%{ if local.prom_worker_node_count != 0 }
---
apiVersion: rbac.authorization.k8s.io/v1
# kubernetes versions before 1.8.0 should use rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
name: kube-state-metrics
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: kube-state-metrics
subjects:
- kind: ServiceAccount
name: kube-state-metrics
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
# kubernetes versions before 1.8.0 should use rbac.authorization.k8s.io/v1beta1
kind: ClusterRole
metadata:
name: kube-state-metrics
rules:
- apiGroups: [""]
resources:
- configmaps
- secrets
- nodes
- pods
- services
- resourcequotas
- replicationcontrollers
- limitranges
- persistentvolumeclaims
- persistentvolumes
- namespaces
- endpoints
verbs: ["list", "watch"]
- apiGroups: ["extensions"]
resources:
- daemonsets
- deployments
- replicasets
- ingresses
verbs: ["list", "watch"]
- apiGroups: ["apps"]
resources:
- daemonsets
- deployments
- replicasets
- statefulsets
verbs: ["list", "watch"]
- apiGroups: ["batch"]
resources:
- cronjobs
- jobs
verbs: ["list", "watch"]
- apiGroups: ["autoscaling"]
resources:
- horizontalpodautoscalers
verbs: ["list", "watch"]
- apiGroups: ["policy"]
resources:
- poddisruptionbudgets
verbs: ["list", "watch"]
- apiGroups: ["certificates.k8s.io"]
resources:
- certificatesigningrequests
verbs: ["list", "watch"]
- apiGroups: ["storage.k8s.io"]
resources:
- storageclasses
verbs: ["list", "watch"]
- apiGroups: ["autoscaling.k8s.io"]
resources:
- verticalpodautoscalers
verbs: ["list", "watch"]
---
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
k8s-app: kube-state-metrics
name: kube-state-metrics
namespace: kube-system
spec:
selector:
matchLabels:
k8s-app: kube-state-metrics
replicas: 1
template:
metadata:
labels:
k8s-app: kube-state-metrics
spec:
serviceAccountName: kube-state-metrics
containers:
- name: kube-state-metrics
image: quay.io/coreos/kube-state-metrics:v1.7.2
ports:
- name: http-metrics
containerPort: 8080
- name: telemetry
containerPort: 8081
livenessProbe:
httpGet:
path: /healthz
port: 8080
initialDelaySeconds: 5
timeoutSeconds: 5
readinessProbe:
httpGet:
path: /
port: 8080
initialDelaySeconds: 5
timeoutSeconds: 5
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: kube-state-metrics
namespace: kube-system
---
apiVersion: v1
kind: Service
metadata:
name: kube-state-metrics
namespace: kube-system
labels:
k8s-app: kube-state-metrics
annotations:
prometheus.io/scrape: 'true'
spec:
ports:
- name: http-metrics
port: 8080
targetPort: http-metrics
protocol: TCP
- name: telemetry
port: 8081
targetPort: telemetry
protocol: TCP
selector:
k8s-app: kube-state-metrics
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: slo-monitor
subjects:
- kind: ServiceAccount
name: slo-monitor
namespace: kube-system
roleRef:
kind: ClusterRole
name: slo-monitor
apiGroup: rbac.authorization.k8s.io
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: slo-monitor
namespace: kube-system
rules:
- apiGroups: [""]
resources: ["pods", "events"]
verbs: ["get", "watch", "list"]
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: slo-monitor
namespace: kube-system
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: slo-monitor
namespace: kube-system
labels:
app: slo-monitor
spec:
selector:
matchLabels:
app: slo-monitor
template:
metadata:
labels:
app: slo-monitor
annotations:
prometheus.io/scrape: "true"
spec:
containers:
- name: slo-monitor
image: gcr.io/google-containers/slo-monitor:0.12.0
command:
- /slo-monitor
- --alsologtostderr=true
imagePullPolicy: Always
ports:
- name: metrics
containerPort: 8080
resources:
requests:
cpu: 300m
memory: 100Mi
limits:
cpu: 300m
memory: 100Mi
restartPolicy: Always
serviceAccountName: slo-monitor
---
apiVersion: v1
kind: Service
metadata:
name: slo-monitor
namespace: kube-system
labels:
app: slo-monitor
spec:
selector:
app: slo-monitor
ports:
- name: metrics
port: 80
targetPort: metrics
type: LoadBalancer
%{ endif }

View File

@ -1,86 +0,0 @@
%{ if local.prom_worker_node_count != 0 }
---
apiVersion: v1
kind: Namespace
metadata:
name: monitoring
---
apiVersion: helm.cattle.io/v1
kind: HelmChart
metadata:
name: prometheus
namespace: kube-system
spec:
chart: https://raw.githubusercontent.com/drpebcak/charts/master/prometheus-9.1.0.tgz
targetNamespace: monitoring
valuesContent: |-
alertmanager:
nodeSelector:
prom: "true"
persistentVolume:
enabled: false
kubeStateMetrics:
nodeSelector:
prom: "true"
nodeExporter:
nodeSelector:
prom: "true"
server:
nodeSelector:
prom: "true"
ingress:
enabled: true
hosts:
- ${prom_host}
persistentVolume:
enabled: false
pushgateway:
nodeSelector:
prom: "true"
persistentVolume:
enabled: false
serverFiles:
prometheus.yml:
scrape_configs:
- job_name: prometheus
static_configs:
- targets:
- localhost:9090
- job_name: kubernetes-apiservers
scrape_interval: 10s
scrape_timeout: 10s
metrics_path: /metrics
scheme: https
kubernetes_sd_configs:
- api_server: null
role: endpoints
namespaces:
names: []
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
separator: ;
regex: default;kubernetes;https
replacement: $1
action: keep
---
apiVersion: helm.cattle.io/v1
kind: HelmChart
metadata:
name: grafana
namespace: kube-system
spec:
chart: stable/grafana
targetNamespace: monitoring
valuesContent: |-
ingress:
enabled: true
hosts:
- ${graf_host}
nodeSelector:
prom: "true"
%{ endif }

View File

@ -1,41 +0,0 @@
#cloud-config
%{ if length(extra_ssh_keys) > 0 }
ssh_authorized_keys:
%{ for ssh_key in extra_ssh_keys }
- ${ssh_key}
%{ endfor }
%{ endif }
write_files:
- path: /var/lib/rancher/k3s/server/manifests/metrics.yaml
permissions: "0755"
owner: root:root
encoding: b64
content: ${metrics_yaml}
- path: /var/lib/rancher/k3s/server/manifests/prom.yaml
permissions: "0755"
owner: root:root
encoding: b64
content: ${prom_yaml}
runcmd:
- echo "net.ipv4.neigh.default.gc_interval = 3600" >> /etc/sysctl.conf
- echo "net.ipv4.neigh.default.gc_stale_time = 3600" >> /etc/sysctl.conf
- echo "net.ipv4.neigh.default.gc_thresh3 = 16384" >> /etc/sysctl.conf
- echo "net.ipv4.neigh.default.gc_thresh2 = 8192" >> /etc/sysctl.conf
- echo "net.ipv4.neigh.default.gc_thresh1 = 4096" >> /etc/sysctl.conf
- echo "fs.file-max = 12000500" >> /etc/sysctl.conf
- echo "fs.nr_open = 20000500" >> /etc/sysctl.conf
- echo "net.ipv4.tcp_mem = '10000000 10000000 10000000'" >> /etc/sysctl.conf
- echo "net.ipv4.tcp_rmem = '1024 4096 16384'" >> /etc/sysctl.conf
- echo "net.ipv4.tcp_wmem = '1024 4096 16384'" >> /etc/sysctl.conf
- echo "net.core.rmem_max = 16384" >> /etc/sysctl.conf
- echo "net.core.wmem_max = 16384" >> /etc/sysctl.conf
- ulimit -n 20000000
- echo "# <domain> <type> <item> <value>" >> /etc/security/limits.d/limits.conf
- echo " * soft nofile 20000" >> /etc/security/limits.d/limits.conf
- echo " * hard nofile 20000" >> /etc/security/limits.d/limits.conf
- sysctl -p
- apt-get update
- apt-get install -y software-properties-common resolvconf linux-headers-$(uname -r)
- echo "nameserver 1.1.1.1" > /etc/resolvconf/resolv.conf.d/tail
- systemctl start resolvconf
- until (curl -sfL https://get.k3s.io | INSTALL_K3S_EXEC="${k3s_server_args} --disable-agent --no-deploy traefik --no-deploy servicelb --cluster-cidr=10.0.0.0/8 --service-cidr=192.168.0.0/16 --cluster-dns=192.168.0.10 --tls-san ${public_ip}" K3S_CLUSTER_SECRET="${k3s_cluster_secret}" INSTALL_K3S_VERSION=${install_k3s_version} sh -); do echo 'Error installing k3s'; sleep 1; done

View File

@ -1,26 +0,0 @@
#cloud-config
%{ if length(extra_ssh_keys) > 0 }
ssh_authorized_keys:
%{ for ssh_key in extra_ssh_keys }
- ${ssh_key}
%{ endfor }
%{ endif }
runcmd:
- echo "net.ipv4.neigh.default.gc_interval = 3600" >> /etc/sysctl.conf
- echo "net.ipv4.neigh.default.gc_stale_time = 3600" >> /etc/sysctl.conf
- echo "net.ipv4.neigh.default.gc_thresh3 = 16384" >> /etc/sysctl.conf
- echo "net.ipv4.neigh.default.gc_thresh2 = 8192" >> /etc/sysctl.conf
- echo "net.ipv4.neigh.default.gc_thresh1 = 4096" >> /etc/sysctl.conf
- echo "fs.file-max = 12000500" >> /etc/sysctl.conf
- echo "fs.nr_open = 20000500" >> /etc/sysctl.conf
- echo "net.ipv4.tcp_mem = '10000000 10000000 10000000'" >> /etc/sysctl.conf
- echo "net.ipv4.tcp_rmem = '1024 4096 16384'" >> /etc/sysctl.conf
- echo "net.ipv4.tcp_wmem = '1024 4096 16384'" >> /etc/sysctl.conf
- echo "net.core.rmem_max = 16384" >> /etc/sysctl.conf
- echo "net.core.wmem_max = 16384" >> /etc/sysctl.conf
- ulimit -n 20000
- echo "# <domain> <type> <item> <value>" >> /etc/security/limits.d/limits.conf
- echo " * soft nofile 20000" >> /etc/security/limits.d/limits.conf
- echo " * hard nofile 20000" >> /etc/security/limits.d/limits.conf
- sysctl -p
- until (curl -sfL https://get.k3s.io | INSTALL_K3S_VERSION=${install_k3s_version} INSTALL_K3S_EXEC="${k3s_exec}" K3S_URL=https://${k3s_url}:6443 K3S_CLUSTER_SECRET="${k3s_cluster_secret}" sh -); do echo 'k3s did not install correctly'; sleep 1; done

View File

@ -1,116 +0,0 @@
terraform {
backend "local" {
path = "server.tfstate"
}
}
locals {
name = var.name
k3s_cluster_secret = var.k3s_cluster_secret
install_k3s_version = var.k3s_version
prom_worker_node_count = var.prom_worker_node_count
}
provider "aws" {
region = "us-west-2"
profile = "rancher-eng"
}
resource "aws_security_group" "k3s" {
name = "${local.name}-sg"
vpc_id = data.aws_vpc.default.id
ingress {
from_port = 22
to_port = 22
protocol = "TCP"
cidr_blocks = ["0.0.0.0/0"]
}
ingress {
from_port = 6443
to_port = 6443
protocol = "TCP"
cidr_blocks = ["0.0.0.0/0"]
}
ingress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}
ingress {
from_port = 0
to_port = 0
protocol = "-1"
self = true
}
egress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}
}
resource "aws_spot_instance_request" "k3s-server" {
instance_type = var.server_instance_type
ami = data.aws_ami.ubuntu.id
user_data = base64encode(templatefile("${path.module}/files/server_userdata.tmpl", { extra_ssh_keys = var.extra_ssh_keys, public_ip = aws_spot_instance_request.k3s-server.public_ip, metrics_yaml = base64encode(data.template_file.metrics.rendered), prom_yaml = base64encode(data.template_file.k3s-prom-yaml.rendered), k3s_cluster_secret = local.k3s_cluster_secret, install_k3s_version = local.install_k3s_version, k3s_server_args = var.k3s_server_args }))
ebs_optimized = true
wait_for_fulfillment = true
security_groups = [
aws_security_group.k3s.id,
]
root_block_device {
volume_size = "1000"
volume_type = "gp2"
}
tags = {
Name = "${local.name}-server"
}
}
module "k3s-prom-worker-asg" {
source = "terraform-aws-modules/autoscaling/aws"
version = "3.0.0"
name = "${local.name}-prom-worker"
asg_name = "${local.name}-prom-worker"
instance_type = "m5.large"
image_id = data.aws_ami.ubuntu.id
user_data = base64encode(templatefile("${path.module}/files/worker_userdata.tmpl", { extra_ssh_keys = var.extra_ssh_keys, k3s_url = aws_spot_instance_request.k3s-server.public_ip, k3s_cluster_secret = local.k3s_cluster_secret, install_k3s_version = local.install_k3s_version, k3s_exec = "--node-label prom=true" }))
ebs_optimized = true
desired_capacity = local.prom_worker_node_count
health_check_type = "EC2"
max_size = local.prom_worker_node_count
min_size = local.prom_worker_node_count
vpc_zone_identifier = [data.aws_subnet.selected.id]
spot_price = "0.340"
security_groups = [
aws_security_group.k3s.id,
]
lc_name = "${local.name}-prom-worker"
root_block_device = [
{
volume_size = "100"
volume_type = "gp2"
},
]
}
resource "null_resource" "get-kubeconfig" {
provisioner "local-exec" {
interpreter = ["bash", "-c"]
command = "until ssh ubuntu@${aws_spot_instance_request.k3s-server.public_ip} 'sudo sed \"s/localhost/$aws_spot_instance_request.k3s-server.public_ip}/g;s/127.0.0.1/${aws_spot_instance_request.k3s-server.public_ip}/g\" /etc/rancher/k3s/k3s.yaml' >| ../cluster-loader/kubeConfig.yaml; do sleep 5; done"
}
}

View File

@ -1,11 +0,0 @@
output "public_ip" {
value = aws_spot_instance_request.k3s-server.public_ip
}
output "install_k3s_version" {
value = local.install_k3s_version
}
output "k3s_cluster_secret" {
value = local.k3s_cluster_secret
}

View File

@ -1,38 +0,0 @@
variable "server_instance_type" {
# default = "c4.8xlarge"
}
variable "k3s_version" {
default = "v0.9.1"
type = string
description = "Version of K3S to install"
}
variable "k3s_server_args" {
default = ""
}
variable "prom_worker_node_count" {
default = 0
type = number
description = "The number of workers to create labeled for prometheus"
}
variable "k3s_cluster_secret" {
default = "pvc-6476dcaf-73a0-11e9-b8e5-06943b744282"
type = string
description = "Cluster secret for k3s cluster registration"
}
variable "prom_host" {
default = ""
}
variable "graf_host" {
default = ""
}
variable "name" {
default = "k3s-loadtest"
type = string
description = "Name to identify this cluster"
}
variable "extra_ssh_keys" {
type = list
default = []
description = "Extra ssh keys to inject into Rancher instances"
}

View File

@ -1,4 +0,0 @@
terraform {
required_version = ">= 0.12"
}