Remove infra for now

This commit is contained in:
Tyler Perkins 2024-11-02 19:02:11 -04:00
parent b188cd0825
commit 1bba385e76
Signed by: tyler
GPG Key ID: 03B27509E17EFDC8
12 changed files with 0 additions and 1498 deletions

View File

@ -1,344 +0,0 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: metallb-helm-chart-value-overrides
namespace: metallb-system
data:
values.yaml: |-
# Default values for metallb.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
imagePullSecrets: []
nameOverride: ""
fullnameOverride: ""
loadBalancerClass: ""
# To configure MetalLB, you must specify ONE of the following two
# options.
rbac:
# create specifies whether to install and use RBAC rules.
create: true
prometheus:
# scrape annotations specifies whether to add Prometheus metric
# auto-collection annotations to pods. See
# https://github.com/prometheus/prometheus/blob/release-2.1/documentation/examples/prometheus-kubernetes.yml
# for a corresponding Prometheus configuration. Alternatively, you
# may want to use the Prometheus Operator
# (https://github.com/coreos/prometheus-operator) for more powerful
# monitoring configuration. If you use the Prometheus operator, this
# can be left at false.
scrapeAnnotations: false
# port both controller and speaker will listen on for metrics
metricsPort: 7472
# if set, enables rbac proxy on the controller and speaker to expose
# the metrics via tls.
# secureMetricsPort: 9120
# the name of the secret to be mounted in the speaker pod
# to expose the metrics securely. If not present, a self signed
# certificate to be used.
speakerMetricsTLSSecret: ""
# the name of the secret to be mounted in the controller pod
# to expose the metrics securely. If not present, a self signed
# certificate to be used.
controllerMetricsTLSSecret: ""
# prometheus doens't have the permission to scrape all namespaces so we give it permission to scrape metallb's one
rbacPrometheus: true
# the service account used by prometheus
# required when " .Values.prometheus.rbacPrometheus == true " and " .Values.prometheus.podMonitor.enabled=true or prometheus.serviceMonitor.enabled=true "
serviceAccount: ""
# the namespace where prometheus is deployed
# required when " .Values.prometheus.rbacPrometheus == true " and " .Values.prometheus.podMonitor.enabled=true or prometheus.serviceMonitor.enabled=true "
namespace: ""
# Prometheus Operator PodMonitors
podMonitor:
# enable support for Prometheus Operator
enabled: false
# optional additionnal labels for podMonitors
additionalLabels: {}
# optional annotations for podMonitors
annotations: {}
# Job label for scrape target
jobLabel: "app.kubernetes.io/name"
# Scrape interval. If not set, the Prometheus default scrape interval is used.
interval:
# metric relabel configs to apply to samples before ingestion.
metricRelabelings: []
# - action: keep
# regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
# sourceLabels: [__name__]
# relabel configs to apply to samples before ingestion.
relabelings: []
# - sourceLabels: [__meta_kubernetes_pod_node_name]
# separator: ;
# regex: ^(.*)$
# target_label: nodename
# replacement: $1
# action: replace
# Prometheus Operator ServiceMonitors. To be used as an alternative
# to podMonitor, supports secure metrics.
serviceMonitor:
# enable support for Prometheus Operator
enabled: false
speaker:
# optional additional labels for the speaker serviceMonitor
additionalLabels: {}
# optional additional annotations for the speaker serviceMonitor
annotations: {}
# optional tls configuration for the speaker serviceMonitor, in case
# secure metrics are enabled.
tlsConfig:
insecureSkipVerify: true
controller:
# optional additional labels for the controller serviceMonitor
additionalLabels: {}
# optional additional annotations for the controller serviceMonitor
annotations: {}
# optional tls configuration for the controller serviceMonitor, in case
# secure metrics are enabled.
tlsConfig:
insecureSkipVerify: true
# Job label for scrape target
jobLabel: "app.kubernetes.io/name"
# Scrape interval. If not set, the Prometheus default scrape interval is used.
interval:
# metric relabel configs to apply to samples before ingestion.
metricRelabelings: []
# - action: keep
# regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
# sourceLabels: [__name__]
# relabel configs to apply to samples before ingestion.
relabelings: []
# - sourceLabels: [__meta_kubernetes_pod_node_name]
# separator: ;
# regex: ^(.*)$
# target_label: nodename
# replacement: $1
# action: replace
# Prometheus Operator alertmanager alerts
prometheusRule:
# enable alertmanager alerts
enabled: false
# optional additionnal labels for prometheusRules
additionalLabels: {}
# optional annotations for prometheusRules
annotations: {}
# MetalLBStaleConfig
staleConfig:
enabled: true
labels:
severity: warning
# MetalLBConfigNotLoaded
configNotLoaded:
enabled: true
labels:
severity: warning
# MetalLBAddressPoolExhausted
addressPoolExhausted:
enabled: true
labels:
severity: alert
addressPoolUsage:
enabled: true
thresholds:
- percent: 75
labels:
severity: warning
- percent: 85
labels:
severity: warning
- percent: 95
labels:
severity: alert
# MetalLBBGPSessionDown
bgpSessionDown:
enabled: true
labels:
severity: alert
extraAlerts: []
# controller contains configuration specific to the MetalLB cluster
# controller.
controller:
enabled: true
# -- Controller log level. Must be one of: `all`, `debug`, `info`, `warn`, `error` or `none`
logLevel: info
# command: /controller
# webhookMode: enabled
image:
repository: quay.io/metallb/controller
tag:
pullPolicy:
## @param controller.updateStrategy.type Metallb controller deployment strategy type.
## ref: https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#strategy
## e.g:
## strategy:
## type: RollingUpdate
## rollingUpdate:
## maxSurge: 25%
## maxUnavailable: 25%
##
strategy:
type: RollingUpdate
serviceAccount:
# Specifies whether a ServiceAccount should be created
create: true
# The name of the ServiceAccount to use. If not set and create is
# true, a name is generated using the fullname template
name: ""
annotations: {}
securityContext:
runAsNonRoot: true
# nobody
runAsUser: 65534
fsGroup: 65534
resources: {}
# limits:
# cpu: 100m
# memory: 100Mi
nodeSelector: {}
tolerations: []
priorityClassName: ""
runtimeClassName: ""
affinity: {}
podAnnotations: {}
labels: {}
livenessProbe:
enabled: true
failureThreshold: 3
initialDelaySeconds: 10
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 1
readinessProbe:
enabled: true
failureThreshold: 3
initialDelaySeconds: 10
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 1
# speaker contains configuration specific to the MetalLB speaker
# daemonset.
speaker:
enabled: true
# command: /speaker
# -- Speaker log level. Must be one of: `all`, `debug`, `info`, `warn`, `error` or `none`
logLevel: info
tolerateMaster: true
memberlist:
enabled: true
mlBindPort: 7946
mlSecretKeyPath: "/etc/ml_secret_key"
excludeInterfaces:
enabled: true
image:
repository: quay.io/metallb/speaker
tag:
pullPolicy:
## @param speaker.updateStrategy.type Speaker daemonset strategy type
## ref: https://kubernetes.io/docs/tasks/manage-daemon/update-daemon-set/
##
updateStrategy:
## StrategyType
## Can be set to RollingUpdate or OnDelete
##
type: RollingUpdate
serviceAccount:
# Specifies whether a ServiceAccount should be created
create: true
# The name of the ServiceAccount to use. If not set and create is
# true, a name is generated using the fullname template
name: ""
annotations: {}
## Defines a secret name for the controller to generate a memberlist encryption secret
## By default secretName: {{ "metallb.fullname" }}-memberlist
##
# secretName:
resources: {}
# limits:
# cpu: 100m
# memory: 100Mi
nodeSelector: {}
tolerations: []
priorityClassName: ""
affinity: {}
## Selects which runtime class will be used by the pod.
runtimeClassName: ""
podAnnotations: {}
labels: {}
livenessProbe:
enabled: true
failureThreshold: 3
initialDelaySeconds: 10
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 1
readinessProbe:
enabled: true
failureThreshold: 3
initialDelaySeconds: 10
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 1
startupProbe:
enabled: true
failureThreshold: 30
periodSeconds: 5
# frr contains configuration specific to the MetalLB FRR container,
# for speaker running alongside FRR.
frr:
enabled: true
image:
repository: quay.io/frrouting/frr
tag: 9.1.0
pullPolicy:
metricsPort: 7473
resources: {}
# if set, enables a rbac proxy sidecar container on the speaker to
# expose the frr metrics via tls.
# secureMetricsPort: 9121
reloader:
resources: {}
frrMetrics:
resources: {}
crds:
enabled: true
validationFailurePolicy: Fail

View File

@ -1,22 +0,0 @@
apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
metadata:
name: metallb
namespace: metallb-system
spec:
chart:
spec:
chart: metallb
version: 0.14.x
sourceRef:
kind: HelmRepository
name: metallb
namespace: flux-system
interval: 15m
timeout: 5m
releaseName: metallb
valuesFrom:
- kind: ConfigMap
name: metallb-helm-chart-value-overrides
valuesKey: values.yaml

View File

@ -1,9 +0,0 @@
apiVersion: metallb.io/v1beta1
kind: IPAddressPool
metadata:
name: metallb-pool-addresses
namespace: metallb-system
spec:
addresses:
- 10.0.3.64-10.0.3.253

View File

@ -1,9 +0,0 @@
apiVersion: metallb.io/v1beta1
kind: L2Advertisement
metadata:
name: my-l2-advertisment
namespace: metallb-system
spec:
ipAddressPools:
- metallb-pool-addresses

View File

@ -1,21 +0,0 @@
apiVersion: kustomize.toolkit.fluxcd.io/v1beta1
kind: Kustomization
metadata:
name: config--metallb-system
namespace: flux-system
spec:
interval: 15m
dependsOn:
- name: metallb--metallb-system
path: "./infra/metallb"
prune: true # remove any elements later removed from the above path
timeout: 2m # if not set, this defaults to interval duration, which is 1h
sourceRef:
kind: GitRepository
name: flux-system
healthChecks:
- apiVersion: apps/v1
kind: Deployment
name: metallb-controller
namespace: metallb-system

View File

@ -1,19 +0,0 @@
apiVersion: kustomize.toolkit.fluxcd.io/v1beta1
kind: Kustomization
metadata:
name: metallb--metallb-system
namespace: flux-system
spec:
interval: 15m
path: "./infra/metallb"
prune: true # remove any elements later removed from the above path
timeout: 2m # if not set, this defaults to interval duration, which is 1h
sourceRef:
kind: GitRepository
name: flux-system
healthChecks:
- apiVersion: apps/v1
kind: Deployment
name: metallb-controller
namespace: metallb-system

View File

@ -1,556 +0,0 @@
apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
metadata:
name: gpu-operator
namespace: nvidia-system
spec:
chart:
spec:
chart: gpu-operator
sourceRef:
kind: HelmRepository
name: nvidia-operator
namespace: flux-system
interval: 15m0s
timeout: 5m
releaseName: gpu-operator
values:
# Default values for gpu-operator.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
platform:
openshift: false
nfd:
enabled: true
nodefeaturerules: false
psa:
enabled: false
cdi:
enabled: false
default: false
sandboxWorkloads:
enabled: false
defaultWorkload: "container"
daemonsets:
labels: {}
annotations: {}
priorityClassName: system-node-critical
tolerations:
- key: nvidia.com/gpu
operator: Exists
effect: NoSchedule
# configuration for controlling update strategy("OnDelete" or "RollingUpdate") of GPU Operands
# note that driver Daemonset is always set with OnDelete to avoid unintended disruptions
updateStrategy: "RollingUpdate"
# configuration for controlling rolling update of GPU Operands
rollingUpdate:
# maximum number of nodes to simultaneously apply pod updates on.
# can be specified either as number or percentage of nodes. Default 1.
maxUnavailable: "1"
validator:
repository: nvcr.io/nvidia/cloud-native
image: gpu-operator-validator
# If version is not specified, then default is to use chart.AppVersion
#version: ""
imagePullPolicy: IfNotPresent
imagePullSecrets: []
env: []
args: []
resources: {}
plugin:
env:
- name: WITH_WORKLOAD
value: "false"
operator:
repository: nvcr.io/nvidia
image: gpu-operator
# If version is not specified, then default is to use chart.AppVersion
#version: ""
imagePullPolicy: IfNotPresent
imagePullSecrets: []
priorityClassName: system-node-critical
defaultRuntime: docker
runtimeClass: nvidia
use_ocp_driver_toolkit: false
# cleanup CRD on chart un-install
cleanupCRD: false
# upgrade CRD on chart upgrade, requires --disable-openapi-validation flag
# to be passed during helm upgrade.
upgradeCRD: false
initContainer:
image: cuda
repository: nvcr.io/nvidia
version: 12.3.2-base-ubi8
imagePullPolicy: IfNotPresent
tolerations:
- key: "node-role.kubernetes.io/master"
operator: "Equal"
value: ""
effect: "NoSchedule"
- key: "node-role.kubernetes.io/control-plane"
operator: "Equal"
value: ""
effect: "NoSchedule"
annotations:
openshift.io/scc: restricted-readonly
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 1
preference:
matchExpressions:
- key: "node-role.kubernetes.io/master"
operator: In
values: [""]
- weight: 1
preference:
matchExpressions:
- key: "node-role.kubernetes.io/control-plane"
operator: In
values: [""]
logging:
# Zap time encoding (one of 'epoch', 'millis', 'nano', 'iso8601', 'rfc3339' or 'rfc3339nano')
timeEncoding: epoch
# Zap Level to configure the verbosity of logging. Can be one of 'debug', 'info', 'error', or any integer value > 0 which corresponds to custom debug levels of increasing verbosity
level: info
# Development Mode defaults(encoder=consoleEncoder,logLevel=Debug,stackTraceLevel=Warn)
# Production Mode defaults(encoder=jsonEncoder,logLevel=Info,stackTraceLevel=Error)
develMode: false
resources:
limits:
cpu: 500m
memory: 350Mi
requests:
cpu: 200m
memory: 100Mi
mig:
strategy: single
driver:
enabled: true
nvidiaDriverCRD:
enabled: false
deployDefaultCR: true
driverType: gpu
nodeSelector: {}
useOpenKernelModules: false
# use pre-compiled packages for NVIDIA driver installation.
# only supported for as a tech-preview feature on ubuntu22.04 kernels.
usePrecompiled: false
repository: nvcr.io/nvidia
image: driver
version: "550.54.15"
imagePullPolicy: IfNotPresent
imagePullSecrets: []
startupProbe:
initialDelaySeconds: 60
periodSeconds: 10
# nvidia-smi can take longer than 30s in some cases
# ensure enough timeout is set
timeoutSeconds: 60
failureThreshold: 120
rdma:
enabled: false
useHostMofed: false
upgradePolicy:
# global switch for automatic upgrade feature
# if set to false all other options are ignored
autoUpgrade: true
# how many nodes can be upgraded in parallel
# 0 means no limit, all nodes will be upgraded in parallel
maxParallelUpgrades: 1
# maximum number of nodes with the driver installed, that can be unavailable during
# the upgrade. Value can be an absolute number (ex: 5) or
# a percentage of total nodes at the start of upgrade (ex:
# 10%). Absolute number is calculated from percentage by rounding
# up. By default, a fixed value of 25% is used.'
maxUnavailable: 25%
# options for waiting on pod(job) completions
waitForCompletion:
timeoutSeconds: 0
podSelector: ""
# options for gpu pod deletion
gpuPodDeletion:
force: false
timeoutSeconds: 300
deleteEmptyDir: false
# options for node drain (`kubectl drain`) before the driver reload
# this is required only if default GPU pod deletions done by the operator
# are not sufficient to re-install the driver
drain:
enable: false
force: false
podSelector: ""
# It's recommended to set a timeout to avoid infinite drain in case non-fatal error keeps happening on retries
timeoutSeconds: 300
deleteEmptyDir: false
manager:
image: k8s-driver-manager
repository: nvcr.io/nvidia/cloud-native
# When choosing a different version of k8s-driver-manager, DO NOT downgrade to a version lower than v0.6.4
# to ensure k8s-driver-manager stays compatible with gpu-operator starting from v24.3.0
version: v0.6.7
imagePullPolicy: IfNotPresent
env:
- name: ENABLE_GPU_POD_EVICTION
value: "true"
- name: ENABLE_AUTO_DRAIN
value: "false"
- name: DRAIN_USE_FORCE
value: "false"
- name: DRAIN_POD_SELECTOR_LABEL
value: ""
- name: DRAIN_TIMEOUT_SECONDS
value: "0s"
- name: DRAIN_DELETE_EMPTYDIR_DATA
value: "false"
env: []
resources: {}
# Private mirror repository configuration
repoConfig:
configMapName: ""
# custom ssl key/certificate configuration
certConfig:
name: ""
# vGPU licensing configuration
licensingConfig:
configMapName: ""
nlsEnabled: true
# vGPU topology daemon configuration
virtualTopology:
config: ""
# kernel module configuration for NVIDIA driver
kernelModuleConfig:
name: ""
toolkit:
enabled: true
repository: nvcr.io/nvidia/k8s
image: container-toolkit
version: v1.15.0-rc.4-ubuntu20.04
imagePullPolicy: IfNotPresent
imagePullSecrets: []
env:
- name: CONTAINERD_CONFIG
value: /var/lib/rancher/k3s/agent/etc/containerd/config.toml
- name: CONTAINERD_SOCKET
value: /run/k3s/containerd/containerd.sock
resources: {}
installDir: "/usr/local/nvidia"
devicePlugin:
enabled: true
repository: nvcr.io/nvidia
image: k8s-device-plugin
version: v0.15.0-rc.2-ubi8
imagePullPolicy: IfNotPresent
imagePullSecrets: []
args: []
env:
- name: PASS_DEVICE_SPECS
value: "true"
- name: FAIL_ON_INIT_ERROR
value: "true"
- name: DEVICE_LIST_STRATEGY
value: envvar
- name: DEVICE_ID_STRATEGY
value: uuid
- name: NVIDIA_VISIBLE_DEVICES
value: all
- name: NVIDIA_DRIVER_CAPABILITIES
value: all
resources: {}
# Plugin configuration
# Use "name" to either point to an existing ConfigMap or to create a new one with a list of configurations(i.e with create=true).
# Use "data" to build an integrated ConfigMap from a set of configurations as
# part of this helm chart. An example of setting "data" might be:
# config:
# name: device-plugin-config
# create: true
# data:
# default: |-
# version: v1
# flags:
# migStrategy: none
# mig-single: |-
# version: v1
# flags:
# migStrategy: single
# mig-mixed: |-
# version: v1
# flags:
# migStrategy: mixed
config:
# Create a ConfigMap (default: false)
create: false
# ConfigMap name (either exiting or to create a new one with create=true above)
name: ""
# Default config name within the ConfigMap
default: ""
# Data section for the ConfigMap to create (i.e only applies when create=true)
data: {}
# MPS related configuration for the plugin
mps:
# MPS root path on the host
root: "/run/nvidia/mps"
# standalone dcgm hostengine
dcgm:
# disabled by default to use embedded nv-hostengine by exporter
enabled: false
repository: nvcr.io/nvidia/cloud-native
image: dcgm
version: 3.3.3-1-ubuntu22.04
imagePullPolicy: IfNotPresent
hostPort: 5555
args: []
env: []
resources: {}
dcgmExporter:
enabled: true
repository: nvcr.io/nvidia/k8s
image: dcgm-exporter
version: 3.3.5-3.4.0-ubuntu22.04
imagePullPolicy: IfNotPresent
env:
- name: DCGM_EXPORTER_LISTEN
value: ":9400"
- name: DCGM_EXPORTER_KUBERNETES
value: "true"
- name: DCGM_EXPORTER_COLLECTORS
value: "/etc/dcgm-exporter/dcp-metrics-included.csv"
resources: {}
serviceMonitor:
enabled: false
interval: 15s
honorLabels: false
additionalLabels: {}
relabelings: []
# - source_labels:
# - __meta_kubernetes_pod_node_name
# regex: (.*)
# target_label: instance
# replacement: $1
# action: replace
gfd:
enabled: true
repository: nvcr.io/nvidia
image: k8s-device-plugin
version: v0.15.0-rc.2-ubi8
imagePullPolicy: IfNotPresent
imagePullSecrets: []
env:
- name: GFD_SLEEP_INTERVAL
value: 60s
- name: GFD_FAIL_ON_INIT_ERROR
value: "true"
resources: {}
migManager:
enabled: true
repository: nvcr.io/nvidia/cloud-native
image: k8s-mig-manager
version: v0.6.0-ubuntu20.04
imagePullPolicy: IfNotPresent
imagePullSecrets: []
env:
- name: WITH_REBOOT
value: "false"
resources: {}
config:
name: "default-mig-parted-config"
default: "all-disabled"
gpuClientsConfig:
name: ""
nodeStatusExporter:
enabled: false
repository: nvcr.io/nvidia/cloud-native
image: gpu-operator-validator
# If version is not specified, then default is to use chart.AppVersion
#version: ""
imagePullPolicy: IfNotPresent
imagePullSecrets: []
resources: {}
gds:
enabled: false
repository: nvcr.io/nvidia/cloud-native
image: nvidia-fs
version: "2.17.5"
imagePullPolicy: IfNotPresent
imagePullSecrets: []
env: []
args: []
gdrcopy:
enabled: false
repository: nvcr.io/nvidia/cloud-native
image: gdrdrv
version: "v2.4.1"
imagePullPolicy: IfNotPresent
imagePullSecrets: []
env: []
args: []
vgpuManager:
enabled: false
repository: ""
image: vgpu-manager
version: ""
imagePullPolicy: IfNotPresent
imagePullSecrets: []
env: []
resources: {}
driverManager:
image: k8s-driver-manager
repository: nvcr.io/nvidia/cloud-native
# When choosing a different version of k8s-driver-manager, DO NOT downgrade to a version lower than v0.6.4
# to ensure k8s-driver-manager stays compatible with gpu-operator starting from v24.3.0
version: v0.6.7
imagePullPolicy: IfNotPresent
env:
- name: ENABLE_GPU_POD_EVICTION
value: "false"
- name: ENABLE_AUTO_DRAIN
value: "false"
vgpuDeviceManager:
enabled: true
repository: nvcr.io/nvidia/cloud-native
image: vgpu-device-manager
version: "v0.2.5"
imagePullPolicy: IfNotPresent
imagePullSecrets: []
env: []
config:
name: ""
default: "default"
vfioManager:
enabled: true
repository: nvcr.io/nvidia
image: cuda
version: 12.3.2-base-ubi8
imagePullPolicy: IfNotPresent
imagePullSecrets: []
env: []
resources: {}
driverManager:
image: k8s-driver-manager
repository: nvcr.io/nvidia/cloud-native
# When choosing a different version of k8s-driver-manager, DO NOT downgrade to a version lower than v0.6.4
# to ensure k8s-driver-manager stays compatible with gpu-operator starting from v24.3.0
version: v0.6.7
imagePullPolicy: IfNotPresent
env:
- name: ENABLE_GPU_POD_EVICTION
value: "false"
- name: ENABLE_AUTO_DRAIN
value: "false"
kataManager:
enabled: false
config:
artifactsDir: "/opt/nvidia-gpu-operator/artifacts/runtimeclasses"
runtimeClasses:
- name: kata-qemu-nvidia-gpu
nodeSelector: {}
artifacts:
url: nvcr.io/nvidia/cloud-native/kata-gpu-artifacts:ubuntu22.04-535.54.03
pullSecret: ""
- name: kata-qemu-nvidia-gpu-snp
nodeSelector:
"nvidia.com/cc.capable": "true"
artifacts:
url: nvcr.io/nvidia/cloud-native/kata-gpu-artifacts:ubuntu22.04-535.86.10-snp
pullSecret: ""
repository: nvcr.io/nvidia/cloud-native
image: k8s-kata-manager
version: v0.1.2
imagePullPolicy: IfNotPresent
imagePullSecrets: []
env: []
resources: {}
sandboxDevicePlugin:
enabled: true
repository: nvcr.io/nvidia
image: kubevirt-gpu-device-plugin
version: v1.2.6
imagePullPolicy: IfNotPresent
imagePullSecrets: []
args: []
env: []
resources: {}
ccManager:
enabled: false
defaultMode: "off"
repository: nvcr.io/nvidia/cloud-native
image: k8s-cc-manager
version: v0.1.1
imagePullPolicy: IfNotPresent
imagePullSecrets: []
env:
- name: CC_CAPABLE_DEVICE_IDS
value: "0x2339,0x2331,0x2330,0x2324,0x2322,0x233d"
resources: {}
node-feature-discovery:
enableNodeFeatureApi: true
gc:
enable: true
replicaCount: 1
serviceAccount:
name: node-feature-discovery
create: false
worker:
serviceAccount:
name: node-feature-discovery
# disable creation to avoid duplicate serviceaccount creation by master spec below
create: false
tolerations:
- key: "node-role.kubernetes.io/master"
operator: "Equal"
value: ""
effect: "NoSchedule"
- key: "node-role.kubernetes.io/control-plane"
operator: "Equal"
value: ""
effect: "NoSchedule"
- key: nvidia.com/gpu
operator: Exists
effect: NoSchedule
config:
sources:
pci:
deviceClassWhitelist:
- "02"
- "0200"
- "0207"
- "0300"
- "0302"
deviceLabelFields:
- vendor
master:
serviceAccount:
name: node-feature-discovery
create: true
config:
extraLabelNs: ["nvidia.com"]
# noPublish: false
# resourceLabels: ["nvidia.com/feature-1","nvidia.com/feature-2"]
# enableTaints: false
# labelWhiteList: "nvidia.com/gpu"

View File

@ -1,13 +0,0 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: secret-full-access-rolebinding
namespace: sealed-secrets
subjects:
- kind: ServiceAccount
name: sealed-secrets
namespace: sealed-secrets
roleRef:
kind: ClusterRole
name: secret-full-access-role
apiGroup: rbac.authorization.k8s.io

View File

@ -1,9 +0,0 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: secret-full-access-role
namespace: sealed-secrets
rules:
- apiGroups: [""]
resources: ["secrets"]
verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]

View File

@ -1,456 +0,0 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: sealed-secrets-helm-chart-value-overrides
namespace: sealed-secrets
data:
values.yaml: |-
## @section Common parameters
## @param kubeVersion Override Kubernetes version
##
kubeVersion: ""
## @param nameOverride String to partially override sealed-secrets.fullname
##
nameOverride: ""
## @param fullnameOverride String to fully override sealed-secrets.fullname
##
fullnameOverride: ""
## @param namespace Namespace where to deploy the Sealed Secrets controller
##
namespace: ""
## @param extraDeploy [array] Array of extra objects to deploy with the release
##
extraDeploy: []
## @param commonAnnotations [object] Annotations to add to all deployed resources
## ref: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/
##
commonAnnotations: {}
## @section Sealed Secrets Parameters
## Sealed Secrets image
## ref: https://hub.docker.com/r/bitnami/sealed-secrets-controller/tags
## @param image.registry Sealed Secrets image registry
## @param image.repository Sealed Secrets image repository
## @param image.tag Sealed Secrets image tag (immutable tags are recommended)
## @param image.pullPolicy Sealed Secrets image pull policy
## @param image.pullSecrets [array] Sealed Secrets image pull secrets
##
image:
registry: docker.io
repository: bitnami/sealed-secrets-controller
tag: 0.27.1
## Specify a imagePullPolicy
## Defaults to 'Always' if image tag is 'latest', else set to 'IfNotPresent'
## ref: http://kubernetes.io/docs/user-guide/images/#pre-pulling-images
##
pullPolicy: IfNotPresent
## Optionally specify an array of imagePullSecrets.
## Secrets must be manually created in the namespace.
## ref: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/
## e.g:
## pullSecrets:
## - myRegistryKeySecretName
##
pullSecrets: []
## @param revisionHistoryLimit Number of old history to retain to allow rollback (If not set, default Kubernetes value is set to 10)
## e.g:
revisionHistoryLimit: ""
## @param createController Specifies whether the Sealed Secrets controller should be created
##
createController: true
## @param secretName The name of an existing TLS secret containing the key used to encrypt secrets
##
secretName: "sealed-secrets-key"
## @param updateStatus Specifies whether the Sealed Secrets controller should update the status subresource
##
updateStatus: true
## @param skipRecreate Specifies whether the Sealed Secrets controller should skip recreating removed secrets
## Setting it to false allows to optionally restore backward compatibility in low priviledge
## environments when old versions of the controller did not require watch permissions on secrets
## for secret re-creation.
##
skipRecreate: true
## @param keyrenewperiod Specifies key renewal period. Default 30 days
## e.g
## keyrenewperiod: "720h30m"
##
keyrenewperiod: "720h"
## @param rateLimit Number of allowed sustained request per second for verify endpoint
##
rateLimit: ""
## @param rateLimitBurst Number of requests allowed to exceed the rate limit per second for verify endpoint
##
rateLimitBurst: ""
## @param additionalNamespaces List of namespaces used to manage the Sealed Secrets
##
additionalNamespaces: []
## @param privateKeyAnnotations Map of annotations to be set on the sealing keypairs
##
privateKeyAnnotations: {}
## @param privateKeyLabels Map of labels to be set on the sealing keypairs
##
privateKeyLabels: {}
## @param logInfoStdout Specifies whether the Sealed Secrets controller will log info to stdout
##
logInfoStdout: false
## @param command Override default container command
##
command: []
## @param args Override default container args
##
args: []
## Configure extra options for Sealed Secret containers' liveness, readiness and startup probes
## ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#configure-probes
## @param livenessProbe.enabled Enable livenessProbe on Sealed Secret containers
## @param livenessProbe.initialDelaySeconds Initial delay seconds for livenessProbe
## @param livenessProbe.periodSeconds Period seconds for livenessProbe
## @param livenessProbe.timeoutSeconds Timeout seconds for livenessProbe
## @param livenessProbe.failureThreshold Failure threshold for livenessProbe
## @param livenessProbe.successThreshold Success threshold for livenessProbe
##
livenessProbe:
enabled: true
initialDelaySeconds: 0
periodSeconds: 10
timeoutSeconds: 1
failureThreshold: 3
successThreshold: 1
## @param readinessProbe.enabled Enable readinessProbe on Sealed Secret containers
## @param readinessProbe.initialDelaySeconds Initial delay seconds for readinessProbe
## @param readinessProbe.periodSeconds Period seconds for readinessProbe
## @param readinessProbe.timeoutSeconds Timeout seconds for readinessProbe
## @param readinessProbe.failureThreshold Failure threshold for readinessProbe
## @param readinessProbe.successThreshold Success threshold for readinessProbe
##
readinessProbe:
enabled: true
initialDelaySeconds: 0
periodSeconds: 10
timeoutSeconds: 1
failureThreshold: 3
successThreshold: 1
## @param startupProbe.enabled Enable startupProbe on Sealed Secret containers
## @param startupProbe.initialDelaySeconds Initial delay seconds for startupProbe
## @param startupProbe.periodSeconds Period seconds for startupProbe
## @param startupProbe.timeoutSeconds Timeout seconds for startupProbe
## @param startupProbe.failureThreshold Failure threshold for startupProbe
## @param startupProbe.successThreshold Success threshold for startupProbe
##
startupProbe:
enabled: false
initialDelaySeconds: 0
periodSeconds: 10
timeoutSeconds: 1
failureThreshold: 3
successThreshold: 1
## @param customLivenessProbe Custom livenessProbe that overrides the default one
##
customLivenessProbe: {}
## @param customReadinessProbe Custom readinessProbe that overrides the default one
##
customReadinessProbe: {}
## @param customStartupProbe Custom startupProbe that overrides the default one
##
customStartupProbe: {}
## Sealed Secret resource requests and limits
## ref: http://kubernetes.io/docs/user-guide/compute-resources/
## @param resources.limits [object] The resources limits for the Sealed Secret containers
## @param resources.requests [object] The requested resources for the Sealed Secret containers
##
resources:
limits: {}
requests: {}
## Configure Pods Security Context
## ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-pod
## @param podSecurityContext.enabled Enabled Sealed Secret pods' Security Context
## @param podSecurityContext.fsGroup Set Sealed Secret pod's Security Context fsGroup
##
podSecurityContext:
enabled: true
fsGroup: 65534
## Configure Container Security Context
## ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-pod
## @param containerSecurityContext.enabled Enabled Sealed Secret containers' Security Context
## @param containerSecurityContext.readOnlyRootFilesystem Whether the Sealed Secret container has a read-only root filesystem
## @param containerSecurityContext.runAsNonRoot Indicates that the Sealed Secret container must run as a non-root user
## @param containerSecurityContext.runAsUser Set Sealed Secret containers' Security Context runAsUser
## @extra containerSecurityContext.capabilities Adds and removes POSIX capabilities from running containers (see `values.yaml`)
## @skip containerSecurityContext.capabilities.drop
##
containerSecurityContext:
enabled: true
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1001
capabilities:
drop:
- ALL
## @param podLabels [object] Extra labels for Sealed Secret pods
## ref: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/
##
podLabels: {}
## @param podAnnotations [object] Annotations for Sealed Secret pods
## ref: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/
##
podAnnotations: {}
## @param priorityClassName Sealed Secret pods' priorityClassName
##
priorityClassName: ""
## @param runtimeClassName Sealed Secret pods' runtimeClassName
##
runtimeClassName: ""
## @param affinity [object] Affinity for Sealed Secret pods assignment
## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity
##
affinity: {}
## @param nodeSelector [object] Node labels for Sealed Secret pods assignment
## ref: https://kubernetes.io/docs/user-guide/node-selection/
##
nodeSelector: {}
## @param tolerations [array] Tolerations for Sealed Secret pods assignment
## ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/
##
tolerations: []
## @param additionalVolumes [object] Extra Volumes for the Sealed Secrets Controller Deployment
## ref: https://kubernetes.io/docs/concepts/storage/volumes/
##
additionalVolumes: []
## @param additionalVolumeMounts [object] Extra volumeMounts for the Sealed Secrets Controller container
## ref: https://kubernetes.io/docs/concepts/storage/volumes/
##
additionalVolumeMounts: []
## @param hostNetwork Sealed Secrets pods' hostNetwork
hostNetwork: false
## @param dnsPolicy Sealed Secrets pods' dnsPolicy
dnsPolicy: ""
## @section Traffic Exposure Parameters
## Sealed Secret service parameters
##
service:
## @param service.type Sealed Secret service type
##
type: ClusterIP
## @param service.port Sealed Secret service HTTP port
##
port: 8080
## @param service.nodePort Node port for HTTP
## Specify the nodePort value for the LoadBalancer and NodePort service types
## ref: https://kubernetes.io/docs/concepts/services-networking/service/#type-nodeport
## NOTE: choose port between <30000-32767>
##
nodePort: ""
## @param service.annotations [object] Additional custom annotations for Sealed Secret service
##
annotations: {}
## Sealed Secret ingress parameters
## ref: http://kubernetes.io/docs/user-guide/ingress/
##
ingress:
## @param ingress.enabled Enable ingress record generation for Sealed Secret
##
enabled: false
## @param ingress.pathType Ingress path type
##
pathType: ImplementationSpecific
## @param ingress.apiVersion Force Ingress API version (automatically detected if not set)
##
apiVersion: ""
## @param ingress.ingressClassName IngressClass that will be be used to implement the Ingress
## This is supported in Kubernetes 1.18+ and required if you have more than one IngressClass marked as the default for your cluster.
## ref: https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/
##
ingressClassName: ""
## @param ingress.hostname Default host for the ingress record
##
hostname: sealed-secrets.local
## @param ingress.path Default path for the ingress record
##
path: /v1/cert.pem
## @param ingress.annotations [object] Additional annotations for the Ingress resource. To enable certificate autogeneration, place here your cert-manager annotations.
## Use this parameter to set the required annotations for cert-manager, see
## ref: https://cert-manager.io/docs/usage/ingress/#supported-annotations
## e.g:
## annotations:
## kubernetes.io/ingress.class: nginx
## cert-manager.io/cluster-issuer: cluster-issuer-name
##
annotations: {}
## @param ingress.tls Enable TLS configuration for the host defined at `ingress.hostname` parameter
## TLS certificates will be retrieved from a TLS secret with name: `{{- printf "%s-tls" .Values.ingress.hostname }}`
## You can:
## - Use the `ingress.secrets` parameter to create this TLS secret
## - Relay on cert-manager to create it by setting the corresponding annotations
## - Relay on Helm to create self-signed certificates by setting `ingress.selfSigned=true`
##
tls: false
## @param ingress.selfSigned Create a TLS secret for this ingress record using self-signed certificates generated by Helm
##
selfSigned: false
## @param ingress.extraHosts [array] An array with additional hostname(s) to be covered with the ingress record
## e.g:
## extraHosts:
## - name: sealed-secrets.local
## path: /
##
extraHosts: []
## @param ingress.extraPaths [array] An array with additional arbitrary paths that may need to be added to the ingress under the main host
## e.g:
## extraPaths:
## - path: /*
## backend:
## serviceName: ssl-redirect
## servicePort: use-annotation
##
extraPaths: []
## @param ingress.extraTls [array] TLS configuration for additional hostname(s) to be covered with this ingress record
## ref: https://kubernetes.io/docs/concepts/services-networking/ingress/#tls
## e.g:
## extraTls:
## - hosts:
## - sealed-secrets.local
## secretName: sealed-secrets.local-tls
##
extraTls: []
## @param ingress.secrets [array] Custom TLS certificates as secrets
## NOTE: 'key' and 'certificate' are expected in PEM format
## NOTE: 'name' should line up with a 'secretName' set further up
## If it is not set and you're using cert-manager, this is unneeded, as it will create a secret for you with valid certificates
## If it is not set and you're NOT using cert-manager either, self-signed certificates will be created valid for 365 days
## It is also possible to create and manage the certificates outside of this helm chart
## Please see README.md for more information
## e.g:
## secrets:
## - name: sealed-secrets.local-tls
## key: |-
## -----BEGIN RSA PRIVATE KEY-----
## ...
## -----END RSA PRIVATE KEY-----
## certificate: |-
## -----BEGIN CERTIFICATE-----
## ...
## -----END CERTIFICATE-----
##
secrets: []
## Network policies
## Ref: https://kubernetes.io/docs/concepts/services-networking/network-policies/
##
networkPolicy:
## @param networkPolicy.enabled Specifies whether a NetworkPolicy should be created
##
enabled: false
## NetworkPolicy Egress configuration
##
egress:
## @param networkPolicy.egress.enabled Specifies wheter a egress is set in the NetworkPolicy
##
enabled: false
## @param networkPolicy.egress.kubeapiCidr Specifies the kubeapiCidr, which is the only egress allowed. If not set, kubeapiCidr will be found using Helm lookup
##
kubeapiCidr: ""
## @param networkPolicy.egress.kubeapiPort Specifies the kubeapiPort, which is the only egress allowed. If not set, kubeapiPort will be found using Helm lookup
##
kubeapiPort: ""
## @section Other Parameters
## ServiceAccount configuration
##
serviceAccount:
## @param serviceAccount.annotations [object] Annotations for Sealed Secret service account
## ref: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/
##
annotations: {}
## @param serviceAccount.create Specifies whether a ServiceAccount should be created
##
create: true
## @param serviceAccount.labels Extra labels to be added to the ServiceAccount
##
labels: {}
## @param serviceAccount.name The name of the ServiceAccount to use.
## If not set and create is true, a name is generated using the sealed-secrets.fullname template
##
name: ""
## RBAC configuration
##
rbac:
## @param rbac.create Specifies whether RBAC resources should be created
##
create: true
## @param rbac.clusterRole Specifies whether the Cluster Role resource should be created
##
clusterRole: true
## @param rbac.clusterRoleName Specifies the name for the Cluster Role resource
##
clusterRoleName: "secrets-unsealer"
## @param rbac.namespacedRoles Specifies whether the namespaced Roles should be created (in each of the specified additionalNamespaces)
##
namespacedRoles: false
## @param rbac.namespacedRolesName Specifies the name for the namesapced Role resource
##
namespacedRolesName: "secrets-unsealer"
## @param rbac.labels Extra labels to be added to RBAC resources
##
labels: {}
## @param rbac.pspEnabled PodSecurityPolicy
##
pspEnabled: false
## @section Metrics parameters
metrics:
## Prometheus Operator ServiceMonitor configuration
##
serviceMonitor:
## @param metrics.serviceMonitor.enabled Specify if a ServiceMonitor will be deployed for Prometheus Operator
##
enabled: false
## @param metrics.serviceMonitor.namespace Namespace where Prometheus Operator is running in
##
namespace: ""
## @param metrics.serviceMonitor.labels Extra labels for the ServiceMonitor
##
labels: {}
## @param metrics.serviceMonitor.annotations Extra annotations for the ServiceMonitor
##
annotations: {}
## @param metrics.serviceMonitor.interval How frequently to scrape metrics
## e.g:
## interval: 10s
##
interval: ""
## @param metrics.serviceMonitor.scrapeTimeout Timeout after which the scrape is ended
## e.g:
## scrapeTimeout: 10s
##
scrapeTimeout: ""
## @param metrics.serviceMonitor.honorLabels Specify if ServiceMonitor endPoints will honor labels
##
honorLabels: true
## @param metrics.serviceMonitor.metricRelabelings [array] Specify additional relabeling of metrics
##
metricRelabelings: []
## @param metrics.serviceMonitor.relabelings [array] Specify general relabeling
##
relabelings: []
## Grafana dashboards configuration
##
dashboards:
## @param metrics.dashboards.create Specifies whether a ConfigMap with a Grafana dashboard configuration should be created
## ref https://github.com/helm/charts/tree/master/stable/grafana#configuration
##
create: false
## @param metrics.dashboards.labels Extra labels to be added to the Grafana dashboard ConfigMap
##
labels: {}
## @param metrics.dashboards.annotations Annotations to be added to the Grafana dashboard ConfigMap
##
annotations: {}
## @param metrics.dashboards.namespace Namespace where Grafana dashboard ConfigMap is deployed
##
namespace: ""

View File

@ -1,21 +0,0 @@
apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
metadata:
name: sealed-secrets
namespace: sealed-secrets
spec:
chart:
spec:
chart: sealed-secrets
version: 2.16.x
sourceRef:
kind: HelmRepository
name: sealed-secrets
namespace: flux-system
interval: 15m
timeout: 5m
releaseName: sealed-secrets
valuesFrom:
- kind: ConfigMap
name: sealed-secrets-helm-chart-value-overrides
valuesKey: values.yaml # This is the default, but best to be explicit for clarity

View File

@ -1,19 +0,0 @@
apiVersion: kustomize.toolkit.fluxcd.io/v1beta1
kind: Kustomization
metadata:
name: sealed-secrets
namespace: flux-system
spec:
interval: 15m
path: "./infra/sealed-secrets"
prune: true # remove any elements later removed from the above path
timeout: 5m # if not set, this defaults to interval duration, which is 1h
sourceRef:
kind: GitRepository
name: flux-system
healthChecks:
- apiVersion: apps/v1
kind: Deployment
name: sealed-secrets
namespace: sealed-secrets