Merge branch 'master' of code.ungleich.ch:ungleich-public/ungleich-k8s

This commit is contained in:
Nico Schottelius 2021-08-03 17:43:00 +02:00
commit 9e76462418
13 changed files with 467 additions and 2 deletions

View file

@ -0,0 +1,52 @@
# Fnux's playground
Tests made by Timothée for ungleich.
## OpenLDAP
Simple chart based on [Osixia's OpenLDAP
image](https://github.com/osixia/docker-openldap). A TLS certificate is
automatically generated via Let'sEncrypt, but renewal is not handled yet.
TODO: handle TLS certificate renewal.
NOTE: replication with the osixia image is somewhat broken, see:
https://github.com/osixia/docker-openldap/issues/203
-> Worked around the issue with https://github.com/ungleich/docker-openldap/commit/3c7c9ece1e67bce0bfe1fdb66a63f5c8c59359f4
```
kubectl create secret generic ldap1-openldap --from-literal=LDAP_ADMIN_PASSWORD=secretsecretsectet
helm install ldap1 ./openldap -f ldapN.fnux-playground.yaml
helm install ldap2 ./openldap -f ldapN.fnux-playground.yaml
```
## Matrix Synapse
Matrix Homeserver setup based on [Ananace's Helm
charts](https://github.com/osixia/docker-openldap). I exchanged a few mails
with him, he's nice!
Note: we need to wire up some network policy to firewall the various components.
Note: there's some configuration and secret management to work on!
Note: there's a missing bit for IPv6 support (https://gitlab.com/ananace/charts/-/merge_requests/15)
```
helm repo add ananace-charts https://ananace.gitlab.io/charts
helm repo update
helm install matrix ananace-charts/matrix-synapse --set serverName=matrix.fnux-playground.svc.c1.k8s.ooo --set wellknown.enabled=true -f matrix.fnux-playground.yaml
```
## Ingress
Ingress is used by the matrix-synapse chart to distribute requests across
synapse workers. We could do it ourselve (just generate a NGINX container from
synapse's config) but there's already ingress logic around, which do this for
us...
```
helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx
helm repo update
helm install ingress-nginx ingress-nginx/ingress-nginx
```

View file

@ -0,0 +1,10 @@
clusterDomain: "c1.k8s.ooo"
ldap:
# See https://www.openldap.org/doc/admin24/slapdconf2.html section 5.2.1.2;
logLevel: "256"
oganisation: "ungleich glarus ag"
domain: "ungleich.ch"
adminPasswordSecretRef: "ldap-openldap"
enableReplication: "true"
replicationHosts: "#PYTHON2BASH:['ldaps://ldap1.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }}','ldaps://ldap2.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }}']"

View file

@ -0,0 +1,102 @@
# Note: as of writing we can't template the variables of this file, although
# I'm pretty sure upstream would accept a patch for this.
# Shared variables.
clusterName: "c2.k8s.ooo"
# The Matrix domain name, this is what will be used for the domain part in
# your MXIDs.
serverName: "matrix.fnux-playground.svc.c2.k8s.ooo"
# The public Matrix server name, this will be used for any public URLs
# in config as well as for client API links in the ingress.
publicServerName: "matrix.fnux-playground.svc.c2.k8s.ooo"
# Generic configuration that apply to mixed components.
config:
# Log level for Synapse and all modules.
logLevel: INFO
# Configuration to apply to the main Synapse pod.
synapse:
## Only really applicable when the deployment has an RWO PV attached (e.g. when media repository
## is enabled for the main Synapse pod)
## Since replicas = 1, an update can get "stuck", as the previous pod remains attached to the
## PV, and the "incoming" pod can never start. Changing the strategy to "Recreate" will
## terminate the single previous pod, so that the new, incoming pod can attach to the PV
##
strategy:
type: RollingUpdate
# First/initial startup is slow! The synapse pod get killed before the
# database is fully initialied if we don't explicitely wait.
# XXX: we should probably use a startupProbe, but this need to be patched
# upstream.
livenessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 180
readinessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 180
# Configuration for handling Synapse workers, which are useful for handling
# high-load deployments.
#
# More information is available at;
# https://github.com/matrix-org/synapse/blob/master/docs/workers.md
#
# workers: ...
# Persistence configuration for the media repository function. This PVC will
# be mounted in either Synapse or a media_repo worker.
persistence:
enabled: true
storageClass: "rook-cephfs"
accessMode: ReadWriteMany
size: 10Gi
# Serve /.well-known URIs, making federation possible without adding
# SRV-records to DNS.
wellknown:
enabled: true
# Lighttpd does not bind to v6 by default - which doesn't play well in a
# v6-only cluster!
useIpv6: true
# Data served on .well-known/matrix/server.
# See https://matrix.org/docs/spec/server_server/latest#get-well-known-matrix-server
server:
m.server: "matrix.fnux-playground.svc.c2.k8s.ooo"
# Data served on .well-known/matrix/client.
# See https://matrix.org/docs/spec/client_server/latest#get-well-known-matrix-client
client:
m.homeserver:
base_url: "https://matrix.fnux-playground.svc.c2.k8s.ooo"
# PGSQL database server configuration.
postgresql:
enabled: true
postgresqlPassword: "secret"
postgresqlUsername: synapse
postgresqlDatabase: synapse
persistence:
storageClass: "rook-cephfs"
size: 16Gi
## Redis server for use with workers/sharding.
redis:
enabled: true
usePassword: true
password: "secret"
# The K8s ingress configuration, this will be quite heavily used in order to
# set up all routing necessary for use with a sharded Synapse instance. If
# you're not using a Ingress compatible K8s ingress, you will need to set up
# your own routing instead.
ingress:
enabled: true

View file

@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/

View file

@ -0,0 +1,24 @@
apiVersion: v2
name: openldap
description: OpenLDAP server
# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.1.0
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "1.5.0-serverid-hostname-fallback-2"

View file

@ -0,0 +1,138 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: "{{ tpl .Values.identifier . }}"
labels:
app: openldap
spec:
replicas: 1
strategy:
# Delete old pod before starting the new one - slapd doesn't react well
# with two instances hitting the same database.
type: "Recreate"
selector:
matchLabels:
app: "{{ tpl .Values.identifier . }}-openldap"
template:
metadata:
labels:
app: "{{ tpl .Values.identifier . }}-openldap"
spec:
initContainers:
- name: wait-for-cert
image: busybox
command:
- "sh"
- "-c"
- "until ls /etc/letsencrypt/live/{{ tpl .Values.fqdn . }}/fullchain.pem; do sleep 5; done"
volumeMounts:
- name: "{{ tpl .Values.identifier . }}-letsencrypt-certs"
mountPath: /etc/letsencrypt
containers:
- name: "openldap"
image: "ungleich/openldap:{{ .Chart.AppVersion }}"
args: ["--loglevel", "trace"]
ports:
- name: ldap
containerPort: 389
protocol: TCP
- name: ldaps
containerPort: 636
protocol: TCP
livenessProbe:
tcpSocket:
port: 389
initialDelaySeconds: 10
periodSeconds: 10
readinessProbe:
tcpSocket:
port: 389
initialDelaySeconds: 10
periodSeconds: 10
env:
- name: HOSTNAME
value: "{{ tpl .Values.fqdn . }}"
- name: LDAP_LOG_LEVEL
value: "{{ tpl .Values.ldap.logLevel . }}"
- name: LDAP_ORGANISATION
value: "{{ tpl .Values.ldap.organisation . }}"
- name: LDAP_DOMAIN
value: "{{ tpl .Values.ldap.domain . }}"
- name: LDAP_ADMIN_PASSWORD
valueFrom:
secretKeyRef:
name: "{{ tpl .Values.ldap.adminPasswordSecretRef . }}"
key: "{{ tpl .Values.ldap.adminPasswordSecretKey . }}"
- name: LDAP_CONFIG_PASSWORD
valueFrom:
secretKeyRef:
name: "{{ tpl .Values.ldap.adminPasswordSecretRef . }}"
key: "{{ tpl .Values.ldap.adminPasswordSecretKey . }}"
- name: LDAP_TLS_CRT_FILENAME
value: "live/{{ tpl .Values.fqdn . }}/cert.pem"
- name: LDAP_TLS_KEY_FILENAME
value: "live/{{ tpl .Values.fqdn . }}/privkey.pem"
- name: LDAP_TLS_CA_CRT_FILENAME
value: "live/{{ tpl .Values.fqdn . }}/fullchain.pem"
- name: LDAP_TLS_VERIFY_CLIENT
value: "try"
- name: LDAP_REPLICATION
value: "{{ .Values.ldap.enableReplication }}"
- name: LDAP_REPLICATION_HOSTS
value: "{{ tpl .Values.ldap.replicationHosts . }}"
- name: LDAP_REPLICATION_CONFIG_SYNCPROV
value: "{{ tpl .Values.ldap.replicationConfigSyncprov . }}"
- name: LDAP_REPLICATION_DB_SYNCPROV
value: "{{ tpl .Values.ldap.replicationDbSyncprov . }}"
volumeMounts:
- name: "{{ tpl .Values.identifier . }}-openldap-data"
mountPath: "/etc/ldap/slapd.d"
subPath: configuration
- name: "{{ tpl .Values.identifier . }}-openldap-data"
mountPath: "/var/lib/ldap"
subPath: database
- name: "{{ tpl .Values.identifier . }}-letsencrypt-certs"
mountPath: /container/service/slapd/assets/certs
volumes:
- name: "{{ tpl .Values.identifier . }}-openldap-data"
persistentVolumeClaim:
claimName: "{{ tpl .Values.identifier . }}-openldap-data"
- name: "{{ tpl .Values.identifier . }}-letsencrypt-certs"
persistentVolumeClaim:
claimName: {{ tpl .Values.identifier . }}-letsencrypt-certs
---
apiVersion: batch/v1
kind: Job
metadata:
name: {{ tpl .Values.identifier . }}-getcert
spec:
template:
metadata:
labels:
app: {{ tpl .Values.identifier . }}-openldap
spec:
restartPolicy: Never
containers:
- name: certbot
image: ungleich/ungleich-certbot
ports:
- containerPort: 80
env:
- name: ONLYGETCERT
value: "yes"
- name: DOMAIN
value: "{{ tpl .Values.fqdn . }}"
- name: EMAIL
value: "{{ .Values.letsencryptEmail }}"
{{ if not .Values.letsencryptStaging }}
- name: STAGING
value: "no"
{{ end }}
volumeMounts:
- name: "{{ tpl .Values.identifier . }}-letsencrypt-certs"
mountPath: /etc/letsencrypt
volumes:
- name: "{{ tpl .Values.identifier . }}-letsencrypt-certs"
persistentVolumeClaim:
claimName: {{ tpl .Values.identifier . }}-letsencrypt-certs
backoffLimit: 3

View file

@ -0,0 +1,25 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: "{{ tpl .Values.identifier . }}-openldap-data"
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
storageClassName: rook-cephfs
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: "{{ tpl .Values.identifier . }}-letsencrypt-certs"
spec:
accessModes:
- ReadWriteMany
resources:
requests:
storage: 50Mi
storageClassName: rook-cephfs

View file

@ -0,0 +1,18 @@
apiVersion: v1
kind: Service
metadata:
name: "{{ .Release.Name }}"
labels:
app: openldap
spec:
type: ClusterIP
selector:
app: "{{ tpl .Values.identifier . }}-openldap"
ports:
- port: 389
name: ldap
- port: 636
name: ldaps
# Required for TLS certificate generation via LetsEncrypt.
- port: 80
name: http

View file

@ -0,0 +1,19 @@
clusterDomain: "c1.k8s.ooo"
fqdn: "{{ .Release.Name }}.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }}"
identifier: "{{ .Release.Name }}"
ldap:
# See https://www.openldap.org/doc/admin24/slapdconf2.html section 5.2.1.2;
logLevel: "256"
organisation: "ungleich glarus ag"
domain: "{{ tpl .Values.fqdn . }}"
adminPasswordSecretRef: "{{ tpl .Values.identifier . }}-openldap"
adminPasswordSecretKey: "LDAP_ADMIN_PASSWORD"
enableReplication: false
replicationHosts: ""
replicationConfigSyncprov: 'binddn=\"cn=admin,cn=config\" bindmethod=simple credentials=$$LDAP_CONFIG_PASSWORD searchbase=\"cn=config\" type=refreshAndPersist retry=\"60 +\" timeout=1 starttls=no'
replicationDbSyncprov: 'binddn=\"cn=admin,$$LDAP_BASE_DN\" bindmethod=simple credentials=$$LDAP_ADMIN_PASSWORD searchbase=\"$$LDAP_BASE_DN\" type=refreshAndPersist interval=00:00:00:10 retry=\"60 +\" timeout=1 starttls=no'
# TLS certificate generation.
letsencryptEmail: "technik@ungleich.ch"
letsencryptStaging: false

View file

@ -21,4 +21,4 @@ version: 0.1.0
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "20.0.11"
appVersion: "21.0.3"

View file

@ -81,16 +81,51 @@ spec:
mountPath: "/etc/letsencrypt"
- name: nextcloud-data
mountPath: "/var/www/html"
# Is it ready to work?
readinessProbe:
tcpSocket:
port: 443
initialDelaySeconds: 5
periodSeconds: 10
# Is it still working?
livenessProbe:
tcpSocket:
port: 443
initialDelaySeconds: 15
periodSeconds: 20
- name: nextcloud
image: nextcloud:20.0.11-fpm
image: nextcloud:{{ .Chart.AppVersion }}-fpm-alpine
# Wait for 10 minutes to get ready
startupProbe:
httpGet:
path: /ocs/v2.php/apps/serverinfo/api/v1/info
port: fpm
failureThreshold: 20
periodSeconds: 30
# Dead if failing for 1 minute
livenessProbe:
httpGet:
path: /ocs/v2.php/apps/serverinfo/api/v1/info
port: fpm
failureThreshold: 6
periodSeconds: 10
readinessProbe:
httpGet:
path: /ocs/v2.php/apps/serverinfo/api/v1/info
port: fpm
failureThreshold: 3
periodSeconds: 30
ports:
- containerPort: 9000
name: fpm
env:
- name: POSTGRES_DB
valueFrom:
secretKeyRef:
name: {{ tpl .Values.identifier . }}-postgres-config
key: POSTGRES_DB
- name: NEXTCLOUD_TRUSTED_DOMAINS
value: "{{ tpl .Values.fqdn . }}"
- name: NEXTCLOUD_ADMIN_USER
valueFrom:
secretKeyRef:

View file

@ -123,3 +123,12 @@ Especially these:
## Other flux related problems
* The host is not cleared / old /var/lib/rook is persisting
## Troubleshooting: PVC stuck pending, no csi-{cephfs,rbd}provisioner-plugin pod in rook-ceph namespace
2021-07-31: it seems that the provisioner plugin tend to silently die.
Restarting the `rook-ceph-operator` deployment will get them back up:
```
kubectl rollout restart deployment/rook-ceph-operator -n rook-ceph
```

View file

@ -0,0 +1,10 @@
apiVersion: v1
kind: Service
metadata:
name: blank-service
spec:
selector:
app: something-that-comes-later
ports:
- protocol: TCP
port: 80