diff --git a/apps/fnux-playground/README.md b/apps/fnux-playground/README.md new file mode 100644 index 0000000..5de1ac4 --- /dev/null +++ b/apps/fnux-playground/README.md @@ -0,0 +1,52 @@ +# Fnux's playground + +Tests made by Timothée for ungleich. + +## OpenLDAP + +Simple chart based on [Osixia's OpenLDAP +image](https://github.com/osixia/docker-openldap). A TLS certificate is +automatically generated via Let'sEncrypt, but renewal is not handled yet. + +TODO: handle TLS certificate renewal. +NOTE: replication with the osixia image is somewhat broken, see: + https://github.com/osixia/docker-openldap/issues/203 + -> Worked around the issue with https://github.com/ungleich/docker-openldap/commit/3c7c9ece1e67bce0bfe1fdb66a63f5c8c59359f4 + +``` +kubectl create secret generic ldap1-openldap --from-literal=LDAP_ADMIN_PASSWORD=secretsecretsectet +helm install ldap1 ./openldap -f ldapN.fnux-playground.yaml +helm install ldap2 ./openldap -f ldapN.fnux-playground.yaml +``` + +## Matrix Synapse + +Matrix Homeserver setup based on [Ananace's Helm +charts](https://github.com/osixia/docker-openldap). I exchanged a few mails +with him, he's nice! + +Note: we need to wire up some network policy to firewall the various components. +Note: there's some configuration and secret management to work on! +Note: there's a missing bit for IPv6 support (https://gitlab.com/ananace/charts/-/merge_requests/15) + +``` +helm repo add ananace-charts https://ananace.gitlab.io/charts +helm repo update + +helm install matrix ananace-charts/matrix-synapse --set serverName=matrix.fnux-playground.svc.c1.k8s.ooo --set wellknown.enabled=true -f matrix.fnux-playground.yaml +``` + +## Ingress + +Ingress is used by the matrix-synapse chart to distribute requests across +synapse workers. We could do it ourselve (just generate a NGINX container from +synapse's config) but there's already ingress logic around, which do this for +us... + +``` +helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx +helm repo update + +helm install ingress-nginx ingress-nginx/ingress-nginx +``` + diff --git a/apps/fnux-playground/ldapN.fnux-playground.yaml b/apps/fnux-playground/ldapN.fnux-playground.yaml new file mode 100644 index 0000000..da29629 --- /dev/null +++ b/apps/fnux-playground/ldapN.fnux-playground.yaml @@ -0,0 +1,10 @@ +clusterDomain: "c1.k8s.ooo" + +ldap: + # See https://www.openldap.org/doc/admin24/slapdconf2.html section 5.2.1.2; + logLevel: "256" + oganisation: "ungleich glarus ag" + domain: "ungleich.ch" + adminPasswordSecretRef: "ldap-openldap" + enableReplication: "true" + replicationHosts: "#PYTHON2BASH:['ldaps://ldap1.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }}','ldaps://ldap2.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }}']" diff --git a/apps/fnux-playground/matrix.fnux-playground.yaml b/apps/fnux-playground/matrix.fnux-playground.yaml new file mode 100644 index 0000000..5063b44 --- /dev/null +++ b/apps/fnux-playground/matrix.fnux-playground.yaml @@ -0,0 +1,102 @@ +# Note: as of writing we can't template the variables of this file, although +# I'm pretty sure upstream would accept a patch for this. + +# Shared variables. +clusterName: "c2.k8s.ooo" + +# The Matrix domain name, this is what will be used for the domain part in +# your MXIDs. +serverName: "matrix.fnux-playground.svc.c2.k8s.ooo" + +# The public Matrix server name, this will be used for any public URLs +# in config as well as for client API links in the ingress. +publicServerName: "matrix.fnux-playground.svc.c2.k8s.ooo" + +# Generic configuration that apply to mixed components. +config: + # Log level for Synapse and all modules. + logLevel: INFO + +# Configuration to apply to the main Synapse pod. +synapse: + ## Only really applicable when the deployment has an RWO PV attached (e.g. when media repository + ## is enabled for the main Synapse pod) + ## Since replicas = 1, an update can get "stuck", as the previous pod remains attached to the + ## PV, and the "incoming" pod can never start. Changing the strategy to "Recreate" will + ## terminate the single previous pod, so that the new, incoming pod can attach to the PV + ## + strategy: + type: RollingUpdate + # First/initial startup is slow! The synapse pod get killed before the + # database is fully initialied if we don't explicitely wait. + # XXX: we should probably use a startupProbe, but this need to be patched + # upstream. + livenessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 180 + readinessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 180 + +# Configuration for handling Synapse workers, which are useful for handling +# high-load deployments. +# +# More information is available at; +# https://github.com/matrix-org/synapse/blob/master/docs/workers.md +# +# workers: ... + +# Persistence configuration for the media repository function. This PVC will +# be mounted in either Synapse or a media_repo worker. +persistence: + enabled: true + storageClass: "rook-cephfs" + accessMode: ReadWriteMany + size: 10Gi + +# Serve /.well-known URIs, making federation possible without adding +# SRV-records to DNS. +wellknown: + enabled: true + + # Lighttpd does not bind to v6 by default - which doesn't play well in a + # v6-only cluster! + useIpv6: true + + # Data served on .well-known/matrix/server. + # See https://matrix.org/docs/spec/server_server/latest#get-well-known-matrix-server + server: + m.server: "matrix.fnux-playground.svc.c2.k8s.ooo" + + # Data served on .well-known/matrix/client. + # See https://matrix.org/docs/spec/client_server/latest#get-well-known-matrix-client + client: + m.homeserver: + base_url: "https://matrix.fnux-playground.svc.c2.k8s.ooo" + +# PGSQL database server configuration. +postgresql: + enabled: true + postgresqlPassword: "secret" + postgresqlUsername: synapse + postgresqlDatabase: synapse + persistence: + storageClass: "rook-cephfs" + size: 16Gi + +## Redis server for use with workers/sharding. +redis: + enabled: true + usePassword: true + password: "secret" + +# The K8s ingress configuration, this will be quite heavily used in order to +# set up all routing necessary for use with a sharded Synapse instance. If +# you're not using a Ingress compatible K8s ingress, you will need to set up +# your own routing instead. +ingress: + enabled: true diff --git a/apps/fnux-playground/openldap/.helmignore b/apps/fnux-playground/openldap/.helmignore new file mode 100644 index 0000000..0e8a0eb --- /dev/null +++ b/apps/fnux-playground/openldap/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/apps/fnux-playground/openldap/Chart.yaml b/apps/fnux-playground/openldap/Chart.yaml new file mode 100644 index 0000000..137d4fc --- /dev/null +++ b/apps/fnux-playground/openldap/Chart.yaml @@ -0,0 +1,24 @@ +apiVersion: v2 +name: openldap +description: OpenLDAP server + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +# It is recommended to use it with quotes. +appVersion: "1.5.0-serverid-hostname-fallback-2" diff --git a/apps/fnux-playground/openldap/templates/deployment.yaml b/apps/fnux-playground/openldap/templates/deployment.yaml new file mode 100644 index 0000000..ca9be71 --- /dev/null +++ b/apps/fnux-playground/openldap/templates/deployment.yaml @@ -0,0 +1,138 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: "{{ tpl .Values.identifier . }}" + labels: + app: openldap +spec: + replicas: 1 + strategy: + # Delete old pod before starting the new one - slapd doesn't react well + # with two instances hitting the same database. + type: "Recreate" + selector: + matchLabels: + app: "{{ tpl .Values.identifier . }}-openldap" + template: + metadata: + labels: + app: "{{ tpl .Values.identifier . }}-openldap" + spec: + initContainers: + - name: wait-for-cert + image: busybox + command: + - "sh" + - "-c" + - "until ls /etc/letsencrypt/live/{{ tpl .Values.fqdn . }}/fullchain.pem; do sleep 5; done" + volumeMounts: + - name: "{{ tpl .Values.identifier . }}-letsencrypt-certs" + mountPath: /etc/letsencrypt + containers: + - name: "openldap" + image: "ungleich/openldap:{{ .Chart.AppVersion }}" + args: ["--loglevel", "trace"] + ports: + - name: ldap + containerPort: 389 + protocol: TCP + - name: ldaps + containerPort: 636 + protocol: TCP + livenessProbe: + tcpSocket: + port: 389 + initialDelaySeconds: 10 + periodSeconds: 10 + readinessProbe: + tcpSocket: + port: 389 + initialDelaySeconds: 10 + periodSeconds: 10 + env: + - name: HOSTNAME + value: "{{ tpl .Values.fqdn . }}" + - name: LDAP_LOG_LEVEL + value: "{{ tpl .Values.ldap.logLevel . }}" + - name: LDAP_ORGANISATION + value: "{{ tpl .Values.ldap.organisation . }}" + - name: LDAP_DOMAIN + value: "{{ tpl .Values.ldap.domain . }}" + - name: LDAP_ADMIN_PASSWORD + valueFrom: + secretKeyRef: + name: "{{ tpl .Values.ldap.adminPasswordSecretRef . }}" + key: "{{ tpl .Values.ldap.adminPasswordSecretKey . }}" + - name: LDAP_CONFIG_PASSWORD + valueFrom: + secretKeyRef: + name: "{{ tpl .Values.ldap.adminPasswordSecretRef . }}" + key: "{{ tpl .Values.ldap.adminPasswordSecretKey . }}" + - name: LDAP_TLS_CRT_FILENAME + value: "live/{{ tpl .Values.fqdn . }}/cert.pem" + - name: LDAP_TLS_KEY_FILENAME + value: "live/{{ tpl .Values.fqdn . }}/privkey.pem" + - name: LDAP_TLS_CA_CRT_FILENAME + value: "live/{{ tpl .Values.fqdn . }}/fullchain.pem" + - name: LDAP_TLS_VERIFY_CLIENT + value: "try" + - name: LDAP_REPLICATION + value: "{{ .Values.ldap.enableReplication }}" + - name: LDAP_REPLICATION_HOSTS + value: "{{ tpl .Values.ldap.replicationHosts . }}" + - name: LDAP_REPLICATION_CONFIG_SYNCPROV + value: "{{ tpl .Values.ldap.replicationConfigSyncprov . }}" + - name: LDAP_REPLICATION_DB_SYNCPROV + value: "{{ tpl .Values.ldap.replicationDbSyncprov . }}" + volumeMounts: + - name: "{{ tpl .Values.identifier . }}-openldap-data" + mountPath: "/etc/ldap/slapd.d" + subPath: configuration + - name: "{{ tpl .Values.identifier . }}-openldap-data" + mountPath: "/var/lib/ldap" + subPath: database + - name: "{{ tpl .Values.identifier . }}-letsencrypt-certs" + mountPath: /container/service/slapd/assets/certs + volumes: + - name: "{{ tpl .Values.identifier . }}-openldap-data" + persistentVolumeClaim: + claimName: "{{ tpl .Values.identifier . }}-openldap-data" + - name: "{{ tpl .Values.identifier . }}-letsencrypt-certs" + persistentVolumeClaim: + claimName: {{ tpl .Values.identifier . }}-letsencrypt-certs +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ tpl .Values.identifier . }}-getcert +spec: + template: + metadata: + labels: + app: {{ tpl .Values.identifier . }}-openldap + spec: + restartPolicy: Never + containers: + - name: certbot + image: ungleich/ungleich-certbot + ports: + - containerPort: 80 + env: + - name: ONLYGETCERT + value: "yes" + - name: DOMAIN + value: "{{ tpl .Values.fqdn . }}" + - name: EMAIL + value: "{{ .Values.letsencryptEmail }}" + {{ if not .Values.letsencryptStaging }} + - name: STAGING + value: "no" + {{ end }} + volumeMounts: + - name: "{{ tpl .Values.identifier . }}-letsencrypt-certs" + mountPath: /etc/letsencrypt + volumes: + - name: "{{ tpl .Values.identifier . }}-letsencrypt-certs" + persistentVolumeClaim: + claimName: {{ tpl .Values.identifier . }}-letsencrypt-certs + backoffLimit: 3 diff --git a/apps/fnux-playground/openldap/templates/pvc.yaml b/apps/fnux-playground/openldap/templates/pvc.yaml new file mode 100644 index 0000000..360eb6f --- /dev/null +++ b/apps/fnux-playground/openldap/templates/pvc.yaml @@ -0,0 +1,25 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: "{{ tpl .Values.identifier . }}-openldap-data" +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi + storageClassName: rook-cephfs +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: "{{ tpl .Values.identifier . }}-letsencrypt-certs" +spec: + accessModes: + - ReadWriteMany + resources: + requests: + storage: 50Mi + storageClassName: rook-cephfs + + diff --git a/apps/fnux-playground/openldap/templates/service.yaml b/apps/fnux-playground/openldap/templates/service.yaml new file mode 100644 index 0000000..43e86d2 --- /dev/null +++ b/apps/fnux-playground/openldap/templates/service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: "{{ .Release.Name }}" + labels: + app: openldap +spec: + type: ClusterIP + selector: + app: "{{ tpl .Values.identifier . }}-openldap" + ports: + - port: 389 + name: ldap + - port: 636 + name: ldaps + # Required for TLS certificate generation via LetsEncrypt. + - port: 80 + name: http diff --git a/apps/fnux-playground/openldap/values.yaml b/apps/fnux-playground/openldap/values.yaml new file mode 100644 index 0000000..52e9196 --- /dev/null +++ b/apps/fnux-playground/openldap/values.yaml @@ -0,0 +1,19 @@ +clusterDomain: "c1.k8s.ooo" +fqdn: "{{ .Release.Name }}.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }}" +identifier: "{{ .Release.Name }}" + +ldap: + # See https://www.openldap.org/doc/admin24/slapdconf2.html section 5.2.1.2; + logLevel: "256" + organisation: "ungleich glarus ag" + domain: "{{ tpl .Values.fqdn . }}" + adminPasswordSecretRef: "{{ tpl .Values.identifier . }}-openldap" + adminPasswordSecretKey: "LDAP_ADMIN_PASSWORD" + enableReplication: false + replicationHosts: "" + replicationConfigSyncprov: 'binddn=\"cn=admin,cn=config\" bindmethod=simple credentials=$$LDAP_CONFIG_PASSWORD searchbase=\"cn=config\" type=refreshAndPersist retry=\"60 +\" timeout=1 starttls=no' + replicationDbSyncprov: 'binddn=\"cn=admin,$$LDAP_BASE_DN\" bindmethod=simple credentials=$$LDAP_ADMIN_PASSWORD searchbase=\"$$LDAP_BASE_DN\" type=refreshAndPersist interval=00:00:00:10 retry=\"60 +\" timeout=1 starttls=no' + +# TLS certificate generation. +letsencryptEmail: "technik@ungleich.ch" +letsencryptStaging: false diff --git a/apps/nextcloud/Chart.yaml b/apps/nextcloud/Chart.yaml index 84c101b..ccb5282 100644 --- a/apps/nextcloud/Chart.yaml +++ b/apps/nextcloud/Chart.yaml @@ -21,4 +21,4 @@ version: 0.1.0 # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "20.0.11" +appVersion: "21.0.3" diff --git a/apps/nextcloud/templates/deployment.yaml b/apps/nextcloud/templates/deployment.yaml index fde2e64..32d2318 100644 --- a/apps/nextcloud/templates/deployment.yaml +++ b/apps/nextcloud/templates/deployment.yaml @@ -81,16 +81,51 @@ spec: mountPath: "/etc/letsencrypt" - name: nextcloud-data mountPath: "/var/www/html" + # Is it ready to work? + readinessProbe: + tcpSocket: + port: 443 + initialDelaySeconds: 5 + periodSeconds: 10 + # Is it still working? + livenessProbe: + tcpSocket: + port: 443 + initialDelaySeconds: 15 + periodSeconds: 20 - name: nextcloud - image: nextcloud:20.0.11-fpm + image: nextcloud:{{ .Chart.AppVersion }}-fpm-alpine + # Wait for 10 minutes to get ready + startupProbe: + httpGet: + path: /ocs/v2.php/apps/serverinfo/api/v1/info + port: fpm + failureThreshold: 20 + periodSeconds: 30 + # Dead if failing for 1 minute + livenessProbe: + httpGet: + path: /ocs/v2.php/apps/serverinfo/api/v1/info + port: fpm + failureThreshold: 6 + periodSeconds: 10 + readinessProbe: + httpGet: + path: /ocs/v2.php/apps/serverinfo/api/v1/info + port: fpm + failureThreshold: 3 + periodSeconds: 30 ports: - containerPort: 9000 + name: fpm env: - name: POSTGRES_DB valueFrom: secretKeyRef: name: {{ tpl .Values.identifier . }}-postgres-config key: POSTGRES_DB + - name: NEXTCLOUD_TRUSTED_DOMAINS + value: "{{ tpl .Values.fqdn . }}" - name: NEXTCLOUD_ADMIN_USER valueFrom: secretKeyRef: diff --git a/rook/README.md b/rook/README.md index 58d0016..627631b 100644 --- a/rook/README.md +++ b/rook/README.md @@ -123,3 +123,12 @@ Especially these: ## Other flux related problems * The host is not cleared / old /var/lib/rook is persisting + +## Troubleshooting: PVC stuck pending, no csi-{cephfs,rbd}provisioner-plugin pod in rook-ceph namespace + +2021-07-31: it seems that the provisioner plugin tend to silently die. +Restarting the `rook-ceph-operator` deployment will get them back up: + +``` +kubectl rollout restart deployment/rook-ceph-operator -n rook-ceph +``` diff --git a/tests/service-without-endpoints.yaml b/tests/service-without-endpoints.yaml new file mode 100644 index 0000000..1832176 --- /dev/null +++ b/tests/service-without-endpoints.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: Service +metadata: + name: blank-service +spec: + selector: + app: something-that-comes-later + ports: + - protocol: TCP + port: 80