Merge branch 'master' of code.ungleich.ch:ungleich-public/ungleich-k8s

2021-08-03 17:43:00 +02:00 · 2021-08-03 17:43:00 +02:00 · 9e76462418
commit 9e76462418
parent 7350055fe2 4f9678be3a
13 changed files with 467 additions and 2 deletions
--- a/apps/fnux-playground/README.md
+++ b/apps/fnux-playground/README.md
@ -0,0 +1,52 @@
+# Fnux's playground
+
+Tests made by Timothée for ungleich.
+
+## OpenLDAP
+
+Simple chart based on [Osixia's OpenLDAP
+image](https://github.com/osixia/docker-openldap). A TLS certificate is
+automatically generated via Let'sEncrypt, but renewal is not handled yet.
+
+TODO: handle TLS certificate renewal.
+NOTE: replication with the osixia image is somewhat broken, see:
+	https://github.com/osixia/docker-openldap/issues/203
+	-> Worked around the issue with https://github.com/ungleich/docker-openldap/commit/3c7c9ece1e67bce0bfe1fdb66a63f5c8c59359f4
+
+```
+kubectl create secret generic ldap1-openldap --from-literal=LDAP_ADMIN_PASSWORD=secretsecretsectet
+helm install ldap1 ./openldap -f ldapN.fnux-playground.yaml
+helm install ldap2 ./openldap -f ldapN.fnux-playground.yaml
+```
+
+## Matrix Synapse
+
+Matrix Homeserver setup based on [Ananace's Helm
+charts](https://github.com/osixia/docker-openldap). I exchanged a few mails
+with him, he's nice!
+
+Note: we need to wire up some network policy to firewall the various components.
+Note: there's some configuration and secret management to work on!
+Note: there's a missing bit for IPv6 support (https://gitlab.com/ananace/charts/-/merge_requests/15)
+
+```
+helm repo add ananace-charts https://ananace.gitlab.io/charts
+helm repo update
+
+helm install matrix ananace-charts/matrix-synapse --set serverName=matrix.fnux-playground.svc.c1.k8s.ooo --set wellknown.enabled=true -f matrix.fnux-playground.yaml
+```
+
+## Ingress
+
+Ingress is used by the matrix-synapse chart to distribute requests across
+synapse workers. We could do it ourselve (just generate a NGINX container from
+synapse's config) but there's already ingress logic around, which do this for
+us...
+
+```
+helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx
+helm repo update
+
+helm install ingress-nginx ingress-nginx/ingress-nginx
+```
+
--- a/apps/fnux-playground/ldapN.fnux-playground.yaml
+++ b/apps/fnux-playground/ldapN.fnux-playground.yaml
@ -0,0 +1,10 @@
+clusterDomain: "c1.k8s.ooo"
+
+ldap:
+  # See https://www.openldap.org/doc/admin24/slapdconf2.html section 5.2.1.2;
+  logLevel: "256"
+  oganisation: "ungleich glarus ag"
+  domain: "ungleich.ch"
+  adminPasswordSecretRef: "ldap-openldap"
+  enableReplication: "true"
+  replicationHosts: "#PYTHON2BASH:['ldaps://ldap1.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }}','ldaps://ldap2.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }}']"
--- a/apps/fnux-playground/matrix.fnux-playground.yaml
+++ b/apps/fnux-playground/matrix.fnux-playground.yaml
@ -0,0 +1,102 @@
+# Note: as of writing we can't template the variables of this file, although
+# I'm pretty sure upstream would accept a patch for this.
+
+# Shared variables.
+clusterName: "c2.k8s.ooo"
+
+# The Matrix domain name, this is what will be used for the domain part in
+# your MXIDs.
+serverName: "matrix.fnux-playground.svc.c2.k8s.ooo"
+
+# The public Matrix server name, this will be used for any public URLs
+# in config as well as for client API links in the ingress.
+publicServerName: "matrix.fnux-playground.svc.c2.k8s.ooo"
+
+# Generic configuration that apply to mixed components.
+config:
+  # Log level for Synapse and all modules.
+  logLevel: INFO
+
+# Configuration to apply to the main Synapse pod.
+synapse:
+  ## Only really applicable when the deployment has an RWO PV attached (e.g. when media repository
+  ## is enabled for the main Synapse pod)
+  ## Since replicas = 1, an update can get "stuck", as the previous pod remains attached to the
+  ## PV, and the "incoming" pod can never start. Changing the strategy to "Recreate" will
+  ## terminate the single previous pod, so that the new, incoming pod can attach to the PV
+  ##
+  strategy:
+    type: RollingUpdate
+  # First/initial startup is slow! The synapse pod get killed before the
+  # database is fully initialied if we don't explicitely wait.
+  # XXX: we should probably use a startupProbe, but this need to be patched
+  # upstream.
+  livenessProbe:
+    httpGet:
+      path: /health
+      port: http
+    initialDelaySeconds: 180
+  readinessProbe:
+    httpGet:
+      path: /health
+      port: http
+    initialDelaySeconds: 180
+
+# Configuration for handling Synapse workers, which are useful for handling
+# high-load deployments.
+#
+# More information is available at;
+# https://github.com/matrix-org/synapse/blob/master/docs/workers.md
+#
+# workers: ...
+
+# Persistence configuration for the media repository function.  This PVC will
+# be mounted in either Synapse or a media_repo worker.
+persistence:
+  enabled: true
+  storageClass: "rook-cephfs"
+  accessMode: ReadWriteMany
+  size: 10Gi
+
+# Serve /.well-known URIs, making federation possible without adding
+# SRV-records to DNS.
+wellknown:
+  enabled: true
+
+  # Lighttpd does not bind to v6 by default - which doesn't play well in a
+  # v6-only cluster!
+  useIpv6: true
+
+  # Data served on .well-known/matrix/server.
+  # See https://matrix.org/docs/spec/server_server/latest#get-well-known-matrix-server
+  server:
+    m.server: "matrix.fnux-playground.svc.c2.k8s.ooo"
+
+  # Data served on .well-known/matrix/client.
+  # See https://matrix.org/docs/spec/client_server/latest#get-well-known-matrix-client
+  client:
+    m.homeserver:
+      base_url: "https://matrix.fnux-playground.svc.c2.k8s.ooo"
+
+# PGSQL database server configuration.
+postgresql:
+  enabled: true
+  postgresqlPassword: "secret"
+  postgresqlUsername: synapse
+  postgresqlDatabase: synapse
+  persistence:
+    storageClass: "rook-cephfs"
+    size: 16Gi
+
+## Redis server for use with workers/sharding.
+redis:
+  enabled: true
+  usePassword: true
+  password: "secret"
+
+# The K8s ingress configuration, this will be quite heavily used in order to
+# set up all routing necessary for use with a sharded Synapse instance. If
+# you're not using a Ingress compatible K8s ingress, you will need to set up
+# your own routing instead.
+ingress:
+  enabled: true
--- a/apps/fnux-playground/openldap/.helmignore
+++ b/apps/fnux-playground/openldap/.helmignore
@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
--- a/apps/fnux-playground/openldap/Chart.yaml
+++ b/apps/fnux-playground/openldap/Chart.yaml
@ -0,0 +1,24 @@
+apiVersion: v2
+name: openldap
+description: OpenLDAP server
+
+# A chart can be either an 'application' or a 'library' chart.
+#
+# Application charts are a collection of templates that can be packaged into versioned archives
+# to be deployed.
+#
+# Library charts provide useful utilities or functions for the chart developer. They're included as
+# a dependency of application charts to inject those utilities and functions into the rendering
+# pipeline. Library charts do not define any templates and therefore cannot be deployed.
+type: application
+
+# This is the chart version. This version number should be incremented each time you make changes
+# to the chart and its templates, including the app version.
+# Versions are expected to follow Semantic Versioning (https://semver.org/)
+version: 0.1.0
+
+# This is the version number of the application being deployed. This version number should be
+# incremented each time you make changes to the application. Versions are not expected to
+# follow Semantic Versioning. They should reflect the version the application is using.
+# It is recommended to use it with quotes.
+appVersion: "1.5.0-serverid-hostname-fallback-2"
--- a/apps/fnux-playground/openldap/templates/deployment.yaml
+++ b/apps/fnux-playground/openldap/templates/deployment.yaml
@ -0,0 +1,138 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: "{{ tpl .Values.identifier . }}"
+  labels:
+    app: openldap
+spec:
+  replicas: 1
+  strategy:
+    # Delete old pod before starting the new one - slapd doesn't react well
+    # with two instances hitting the same database.
+    type: "Recreate"
+  selector:
+    matchLabels:
+      app: "{{ tpl .Values.identifier . }}-openldap"
+  template:
+    metadata:
+      labels:
+        app: "{{ tpl .Values.identifier . }}-openldap"
+    spec:
+      initContainers:
+        - name: wait-for-cert
+          image: busybox
+          command:
+            - "sh"
+            - "-c"
+            - "until ls /etc/letsencrypt/live/{{ tpl .Values.fqdn . }}/fullchain.pem; do sleep 5; done"
+          volumeMounts:
+            - name: "{{ tpl .Values.identifier . }}-letsencrypt-certs"
+              mountPath: /etc/letsencrypt
+      containers:
+        - name: "openldap"
+          image: "ungleich/openldap:{{ .Chart.AppVersion }}"
+          args: ["--loglevel", "trace"]
+          ports:
+            - name: ldap
+              containerPort: 389
+              protocol: TCP
+            - name: ldaps
+              containerPort: 636
+              protocol: TCP
+          livenessProbe:
+            tcpSocket:
+              port: 389
+            initialDelaySeconds: 10
+            periodSeconds: 10
+          readinessProbe:
+            tcpSocket:
+              port: 389
+            initialDelaySeconds: 10
+            periodSeconds: 10
+          env:
+            - name: HOSTNAME
+              value: "{{ tpl .Values.fqdn . }}"
+            - name: LDAP_LOG_LEVEL
+              value: "{{ tpl .Values.ldap.logLevel . }}"
+            - name: LDAP_ORGANISATION
+              value: "{{ tpl .Values.ldap.organisation . }}"
+            - name: LDAP_DOMAIN
+              value: "{{ tpl .Values.ldap.domain . }}"
+            - name: LDAP_ADMIN_PASSWORD
+              valueFrom:
+                secretKeyRef:
+                  name: "{{ tpl .Values.ldap.adminPasswordSecretRef . }}"
+                  key: "{{ tpl .Values.ldap.adminPasswordSecretKey . }}"
+            - name: LDAP_CONFIG_PASSWORD
+              valueFrom:
+                secretKeyRef:
+                  name: "{{ tpl .Values.ldap.adminPasswordSecretRef . }}"
+                  key: "{{ tpl .Values.ldap.adminPasswordSecretKey . }}"
+            - name: LDAP_TLS_CRT_FILENAME
+              value: "live/{{ tpl .Values.fqdn . }}/cert.pem"
+            - name: LDAP_TLS_KEY_FILENAME
+              value: "live/{{ tpl .Values.fqdn . }}/privkey.pem"
+            - name: LDAP_TLS_CA_CRT_FILENAME
+              value: "live/{{ tpl .Values.fqdn . }}/fullchain.pem"
+            - name: LDAP_TLS_VERIFY_CLIENT
+              value: "try"
+            - name: LDAP_REPLICATION
+              value: "{{ .Values.ldap.enableReplication }}"
+            - name: LDAP_REPLICATION_HOSTS
+              value: "{{ tpl .Values.ldap.replicationHosts . }}"
+            - name: LDAP_REPLICATION_CONFIG_SYNCPROV
+              value: "{{ tpl .Values.ldap.replicationConfigSyncprov . }}"
+            - name: LDAP_REPLICATION_DB_SYNCPROV
+              value: "{{ tpl .Values.ldap.replicationDbSyncprov . }}"
+          volumeMounts:
+            - name: "{{ tpl .Values.identifier . }}-openldap-data"
+              mountPath: "/etc/ldap/slapd.d"
+              subPath: configuration
+            - name: "{{ tpl .Values.identifier . }}-openldap-data"
+              mountPath: "/var/lib/ldap"
+              subPath: database
+            - name: "{{ tpl .Values.identifier . }}-letsencrypt-certs"
+              mountPath: /container/service/slapd/assets/certs
+      volumes:
+        - name: "{{ tpl .Values.identifier . }}-openldap-data"
+          persistentVolumeClaim:
+            claimName: "{{ tpl .Values.identifier . }}-openldap-data"
+        - name: "{{ tpl .Values.identifier . }}-letsencrypt-certs"
+          persistentVolumeClaim:
+            claimName: {{ tpl .Values.identifier . }}-letsencrypt-certs
+---
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: {{ tpl .Values.identifier . }}-getcert
+spec:
+  template:
+    metadata:
+      labels:
+        app: {{ tpl .Values.identifier . }}-openldap
+    spec:
+      restartPolicy: Never
+      containers:
+      - name: certbot
+        image: ungleich/ungleich-certbot
+        ports:
+          - containerPort: 80
+        env:
+          - name: ONLYGETCERT
+            value: "yes"
+          - name: DOMAIN
+            value: "{{ tpl .Values.fqdn . }}"
+          - name: EMAIL
+            value: "{{ .Values.letsencryptEmail }}"
+          {{ if not .Values.letsencryptStaging }}
+          - name: STAGING
+            value: "no"
+          {{ end }}
+        volumeMounts:
+          - name: "{{ tpl .Values.identifier . }}-letsencrypt-certs"
+            mountPath: /etc/letsencrypt
+      volumes:
+        - name: "{{ tpl .Values.identifier . }}-letsencrypt-certs"
+          persistentVolumeClaim:
+            claimName: {{ tpl .Values.identifier . }}-letsencrypt-certs
+  backoffLimit: 3
--- a/apps/fnux-playground/openldap/templates/pvc.yaml
+++ b/apps/fnux-playground/openldap/templates/pvc.yaml
@ -0,0 +1,25 @@
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: "{{ tpl .Values.identifier . }}-openldap-data"
+spec:
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 1Gi
+  storageClassName: rook-cephfs
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: "{{ tpl .Values.identifier . }}-letsencrypt-certs"
+spec:
+  accessModes:
+    - ReadWriteMany
+  resources:
+    requests:
+      storage: 50Mi
+  storageClassName: rook-cephfs
+
+
--- a/apps/fnux-playground/openldap/templates/service.yaml
+++ b/apps/fnux-playground/openldap/templates/service.yaml
@ -0,0 +1,18 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: "{{ .Release.Name }}"
+  labels:
+    app: openldap
+spec:
+  type: ClusterIP
+  selector:
+    app: "{{ tpl .Values.identifier . }}-openldap"
+  ports:
+    - port: 389
+      name: ldap
+    - port: 636
+      name: ldaps
+    # Required for TLS certificate generation via LetsEncrypt.
+    - port: 80
+      name: http
--- a/apps/fnux-playground/openldap/values.yaml
+++ b/apps/fnux-playground/openldap/values.yaml
@ -0,0 +1,19 @@
+clusterDomain: "c1.k8s.ooo"
+fqdn: "{{ .Release.Name }}.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }}"
+identifier: "{{ .Release.Name }}"
+
+ldap:
+  # See https://www.openldap.org/doc/admin24/slapdconf2.html section 5.2.1.2;
+  logLevel: "256"
+  organisation: "ungleich glarus ag"
+  domain: "{{ tpl .Values.fqdn . }}"
+  adminPasswordSecretRef: "{{ tpl .Values.identifier . }}-openldap"
+  adminPasswordSecretKey: "LDAP_ADMIN_PASSWORD"
+  enableReplication: false
+  replicationHosts: ""
+  replicationConfigSyncprov: 'binddn=\"cn=admin,cn=config\" bindmethod=simple credentials=$$LDAP_CONFIG_PASSWORD searchbase=\"cn=config\" type=refreshAndPersist retry=\"60 +\" timeout=1 starttls=no'
+  replicationDbSyncprov: 'binddn=\"cn=admin,$$LDAP_BASE_DN\" bindmethod=simple credentials=$$LDAP_ADMIN_PASSWORD searchbase=\"$$LDAP_BASE_DN\" type=refreshAndPersist interval=00:00:00:10 retry=\"60 +\" timeout=1 starttls=no'
+
+# TLS certificate generation.
+letsencryptEmail: "technik@ungleich.ch"
+letsencryptStaging: false
--- a/apps/nextcloud/Chart.yaml
+++ b/apps/nextcloud/Chart.yaml
@ -21,4 +21,4 @@ version: 0.1.0
 # incremented each time you make changes to the application. Versions are not expected to
 # follow Semantic Versioning. They should reflect the version the application is using.
 # It is recommended to use it with quotes.
-appVersion: "20.0.11"
+appVersion: "21.0.3"
--- a/apps/nextcloud/templates/deployment.yaml
+++ b/apps/nextcloud/templates/deployment.yaml
@ -81,16 +81,51 @@ spec:
              mountPath: "/etc/letsencrypt"
            - name: nextcloud-data
              mountPath: "/var/www/html"
+          # Is it ready to work?
+          readinessProbe:
+            tcpSocket:
+              port: 443
+            initialDelaySeconds: 5
+            periodSeconds: 10
+          # Is it still working?
+          livenessProbe:
+            tcpSocket:
+              port: 443
+            initialDelaySeconds: 15
+            periodSeconds: 20
        - name: nextcloud
-          image: nextcloud:20.0.11-fpm
+          image: nextcloud:{{ .Chart.AppVersion }}-fpm-alpine
+          # Wait for 10 minutes to get ready
+          startupProbe:
+            httpGet:
+              path: /ocs/v2.php/apps/serverinfo/api/v1/info
+              port: fpm
+            failureThreshold: 20
+            periodSeconds: 30
+          # Dead if failing for 1 minute
+          livenessProbe:
+            httpGet:
+              path: /ocs/v2.php/apps/serverinfo/api/v1/info
+              port: fpm
+            failureThreshold: 6
+            periodSeconds: 10
+          readinessProbe:
+            httpGet:
+              path: /ocs/v2.php/apps/serverinfo/api/v1/info
+              port: fpm
+            failureThreshold: 3
+            periodSeconds: 30
          ports:
            - containerPort: 9000
+              name: fpm
          env:
            - name: POSTGRES_DB
              valueFrom:
                secretKeyRef:
                  name: {{ tpl .Values.identifier . }}-postgres-config
                  key: POSTGRES_DB
+            - name: NEXTCLOUD_TRUSTED_DOMAINS
+              value: "{{ tpl .Values.fqdn . }}"
            - name: NEXTCLOUD_ADMIN_USER
              valueFrom:
                secretKeyRef:
--- a/rook/README.md
+++ b/rook/README.md
@ -123,3 +123,12 @@ Especially these:
 ## Other flux related problems

 * The host is not cleared / old /var/lib/rook is persisting
+
+## Troubleshooting: PVC stuck pending, no csi-{cephfs,rbd}provisioner-plugin pod in rook-ceph namespace
+
+2021-07-31: it seems that the provisioner plugin tend to silently die.
+Restarting the `rook-ceph-operator` deployment will get them back up:
+
+```
+kubectl rollout restart deployment/rook-ceph-operator -n rook-ceph
+```
--- a/tests/service-without-endpoints.yaml
+++ b/tests/service-without-endpoints.yaml
@ -0,0 +1,10 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: blank-service
+spec:
+  selector:
+    app: something-that-comes-later
+  ports:
+    - protocol: TCP
+      port: 80