Guardrails That Stop It
- Mandatory
plan -> review -> apply: Never run direct apply in CI or production. - Remote State Locking: Required for team workflows to prevent state corruption.
- CI Concurrency: Apply pipeline concurrency must be limited to
1per environment. - Destroy Discipline:
terraform destroyis denied by default outsidedevelop. - Break-Glass Record: Any destructive action outside
developrequires an explicit record (incident ID, scope, rollback plan).
Local Git Guardrails (Pre-Hooks)
Before Terraform changes are committed, our local hooks enforce:
terraform fmt -recursive -diff -check: Ensures consistent formatting.scripts/terraform-validate.sh: Catches configuration errors.scripts/terraform-security.sh: Flags security misconfigurations using Checkov.
Safe Workflow (Step-by-Step)
- Install Hooks:
make install-hooks && pre-commit run --all-files. - Generate Plan: Create a plan artifact and perform a peer review.
- Apply Safely: Apply only from the reviewed/fresh plan artifact.
- Confirm State: Run a drift check to confirm the expected state after apply.
Local Development with Kind
For local testing and learning without cloud costs, we use a Kind (Kubernetes in Docker) cluster.
Kind Cluster Setup
The cluster creates a 3-node topology (1 control-plane, 2 worker nodes) and includes a local registry mirror at localhost:5001.
Port Mappings:
8080(Host) ->30080(NodePort HTTP)8443(Host) ->30443(NodePort HTTPS)
Flux Operator Auto-Bootstrap
The Kind cluster uses the same Flux bootstrap path as our production Hetzner environment, ensuring your local rehearsal is provider-realistic.
Kind cluster layout
Show the Kind cluster layout
terraform {
required_providers {
kind = {
source = "tehcyx/kind"
version = "0.9.0"
}
helm = {
source = "hashicorp/helm"
version = "~> 2.12"
}
kubernetes = {
source = "hashicorp/kubernetes"
version = "~> 2.25"
}
null = {
source = "hashicorp/null"
version = "~> 3.2"
}
}
}
provider "kind" {}
provider "helm" {
kubernetes {
host = kind_cluster.sre.endpoint
client_certificate = kind_cluster.sre.client_certificate
client_key = kind_cluster.sre.client_key
cluster_ca_certificate = kind_cluster.sre.cluster_ca_certificate
}
}
provider "kubernetes" {
host = kind_cluster.sre.endpoint
client_certificate = kind_cluster.sre.client_certificate
client_key = kind_cluster.sre.client_key
cluster_ca_certificate = kind_cluster.sre.cluster_ca_certificate
}
locals {
kubeconfig_path = pathexpand("${path.module}/kubeconfig.yaml")
flux_pull_secret_yaml = var.flux_git_token != "" ? " pullSecret: \"flux-system\"\n" : ""
backup_s3_secret_enabled = nonsensitive(var.r2_access_key_id != "" && var.r2_secret_access_key != "")
}
resource "kind_cluster" "sre" {
name = "sre-control-plane"
wait_for_ready = true
kubeconfig_path = local.kubeconfig_path
kind_config {
api_version = "kind.x-k8s.io/v1alpha4"
kind = "Cluster"
networking {
api_server_address = "127.0.0.1"
api_server_port = 6443
kube_proxy_mode = "iptables"
}
containerd_config_patches = [
<<-EOT
[plugins."io.containerd.grpc.v1.cri".registry.mirrors."localhost:5001"]
endpoint = ["http://kind-registry:5000"]
EOT
]
node {
role = "control-plane"
kubeadm_config_patches = [
<<-EOT
kind: InitConfiguration
nodeRegistration:
kubeletExtraArgs:
node-labels: "ingress-ready=true"
authorization-mode: "Webhook"
EOT
]
extra_port_mappings {
container_port = 30080
host_port = 8080
listen_address = "127.0.0.1"
protocol = "TCP"
}
extra_port_mappings {
container_port = 30443
host_port = 8443
listen_address = "127.0.0.1"
protocol = "TCP"
}
}
node {
role = "worker"
}
}
}
resource "null_resource" "merge_kubeconfig" {
depends_on = [kind_cluster.sre]
provisioner "local-exec" {
when = create
command = "${path.module}/scripts/merge-kubeconfig.sh \"${local.kubeconfig_path}\""
interpreter = ["/bin/bash", "-c"]
}
}
resource "time_sleep" "wait_for_cluster" {
depends_on = [null_resource.merge_kubeconfig]
create_duration = "30s"
}
output "kubeconfig" {
description = "Path to the generated kubeconfig for the kind cluster"
value = local.kubeconfig_path
}
output "kubeconfig_load_instructions" {
description = "How to use the generated kubeconfig"
value = <<-EOT
export KUBECONFIG="${local.kubeconfig_path}"
kubectl get nodes
# Optional: merge into your default kubeconfig
${path.module}/scripts/merge-kubeconfig.sh "${local.kubeconfig_path}"
kubectl config use-context sre-control-plane
EOT
}
resource "kubernetes_namespace" "traefik" {
metadata { name = "traefik" }
depends_on = [time_sleep.wait_for_cluster]
}
resource "helm_release" "traefik" {
name = "traefik"
repository = "https://traefik.github.io/charts"
chart = "traefik"
namespace = "traefik"
version = "34.5.0"
depends_on = [kubernetes_namespace.traefik]
set {
name = "service.type"
value = "NodePort"
}
set {
name = "ports.web.nodePort"
value = "30080"
}
set {
name = "ports.websecure.nodePort"
value = "30443"
}
set {
name = "providers.kubernetesIngress.enabled"
value = "true"
}
set {
name = "providers.kubernetesCRD.enabled"
value = "true"
}
}
resource "helm_release" "metrics_server" {
name = "metrics-server"
repository = "https://kubernetes-sigs.github.io/metrics-server/"
chart = "metrics-server"
namespace = "kube-system"
version = "3.12.2"
depends_on = [time_sleep.wait_for_cluster]
set {
name = "args[0]"
value = "--kubelet-insecure-tls"
}
}
resource "null_resource" "flux_operator_install" {
depends_on = [time_sleep.wait_for_cluster]
triggers = {
kubeconfig_path = local.kubeconfig_path
repo_url = var.flux_git_repository_url
repo_branch = var.flux_git_repository_branch
repo_path = var.flux_kustomization_path
provider = "github"
}
provisioner "local-exec" {
when = create
interpreter = ["/bin/bash", "-c"]
command = "kubectl --kubeconfig=\"${local.kubeconfig_path}\" apply -f https://github.com/controlplaneio-fluxcd/flux-operator/releases/latest/download/install.yaml"
}
}
resource "null_resource" "flux_instance" {
depends_on = [
null_resource.flux_operator_install,
kubernetes_secret.flux_git_auth
]
triggers = {
kubeconfig_path = local.kubeconfig_path
}
provisioner "local-exec" {
when = create
command = <<-EOC
cat <<EOF | kubectl --kubeconfig="${local.kubeconfig_path}" apply -f -
apiVersion: fluxcd.controlplane.io/v1
kind: FluxInstance
metadata:
name: flux
namespace: flux-system
spec:
distribution:
version: "${var.flux_version}"
registry: ghcr.io/fluxcd
components:
- source-controller
- kustomize-controller
- helm-controller
- notification-controller
- image-reflector-controller
- image-automation-controller
cluster:
type: kubernetes
sync:
kind: GitRepository
url: "${var.flux_git_repository_url}"
ref: "refs/heads/${var.flux_git_repository_branch}"
provider: generic
path: "${var.flux_kustomization_path}"
${local.flux_pull_secret_yaml}
EOF
EOC
interpreter = ["/bin/bash", "-c"]
}
provisioner "local-exec" {
when = destroy
on_failure = continue
command = "kubectl --kubeconfig=\"${self.triggers.kubeconfig_path}\" delete fluxinstance flux -n flux-system --ignore-not-found=true --wait=false"
interpreter = ["/bin/bash", "-c"]
}
}
resource "null_resource" "flux_pre_destroy" {
depends_on = [
kind_cluster.sre,
kubernetes_namespace.traefik,
kubernetes_namespace.bootstrap,
null_resource.flux_instance,
]
triggers = {
kubeconfig_path = local.kubeconfig_path
namespaces = "develop,staging,production,observability,traefik"
}
provisioner "local-exec" {
when = destroy
on_failure = continue
command = "\"${path.module}/../scripts/flux-pre-destroy.sh\" \"${self.triggers.kubeconfig_path}\" \"${self.triggers.namespaces}\""
interpreter = ["/bin/bash", "-c"]
}
}
metadata {
name = "flux-system"
namespace = "flux-system"
}
data = {
username = "git"
password = var.flux_git_token
}
type = "Opaque"
}
data = {
cloudflare_proxied = "disabled"
cluster_name = "sre-control-plane"
image_registry = var.image_registry
git_owner = var.git_owner
}
depends_on = [null_resource.flux_operator_install]
}
type = "Opaque"
data = {
uptrace_dsn = var.uptrace_dsn
}
depends_on = [null_resource.flux_operator_install]
}
metadata {
name = each.key
}
depends_on = [time_sleep.wait_for_cluster]
lifecycle {
ignore_changes = [
metadata[0].labels,
]
}
}
metadata {
name = "ghcr-credentials-docker"
namespace = each.key
}
type = "kubernetes.io/dockerconfigjson"
data = {
".dockerconfigjson" = jsonencode({
auths = {
"ghcr.io" = {
username = var.ghcr_username
password = var.ghcr_token
auth = base64encode("${var.ghcr_username}:${var.ghcr_token}")
}
}
})
}
}
metadata {
name = "sops-age"
namespace = "flux-system"
}
type = "Opaque"
data = {
"age.agekey" = var.sops_age_key
}
}
metadata {
name = "cnpg-backup-s3"
namespace = each.key
}
type = "Opaque"
data = merge(
{
ACCESS_KEY_ID = var.r2_access_key_id
ACCESS_SECRET_KEY = var.r2_secret_access_key
BUCKET = var.r2_bucket
},
var.r2_endpoint != "" ? { ENDPOINT = var.r2_endpoint } : {},
var.r2_region != "" ? { REGION = var.r2_region } : {},
)
depends_on = [kubernetes_namespace.bootstrap]
}
output "flux_operator_installed" {
description = "Indicates that Flux Operator has been installed"
value = null_resource.flux_operator_install.id != ""
}
output "flux_instance_created" {
description = "Indicates that FluxInstance has been created"
value = "flux"
depends_on = [null_resource.flux_instance]
}
This builds on: Context verification (Chapter 01) — same discipline, infrastructure scope. This enables: Secrets management (Chapter 03) — IaC state needs encrypted secrets.