diff --git a/rook/README.md b/rook/README.md index 627631b..7277797 100644 --- a/rook/README.md +++ b/rook/README.md @@ -31,6 +31,14 @@ for yaml in crds common operator cluster storageclass-cephfs storageclass-rbd to done ``` +## v3 via helm + +``` +helm repo add rook-release https://charts.rook.io/release +helm repo update +helm install --create-namespace --namespace rook-ceph rook-ceph rook-release/rook-ceph + +``` ## Debugging / ceph toolbox diff --git a/rook/values.yaml b/rook/values.yaml new file mode 100644 index 0000000..bed5485 --- /dev/null +++ b/rook/values.yaml @@ -0,0 +1,416 @@ +# Default values for a single rook-ceph cluster +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# Namespace of the main rook operator +operatorNamespace: rook-ceph + +# The metadata.name of the CephCluster CR. The default name is the same as the namespace. +# clusterName: rook-ceph + +# Ability to override ceph.conf +# configOverride: | +# [global] +# mon_allow_pool_delete = true +# osd_pool_default_size = 3 +# osd_pool_default_min_size = 2 + +# Installs a debugging toolbox deployment +toolbox: + enabled: true + image: rook/ceph:VERSION + tolerations: [] + affinity: {} + +monitoring: + # requires Prometheus to be pre-installed + # enabling will also create RBAC rules to allow Operator to create ServiceMonitors + enabled: false + rulesNamespaceOverride: + +# imagePullSecrets option allow to pull docker images from private docker registry. Option will be passed to all service accounts. +# imagePullSecrets: +# - name: my-registry-secret + +# All values below are taken from the CephCluster CRD +# More information can be found at [Ceph Cluster CRD](/Documentation/ceph-cluster-crd.md) +cephClusterSpec: + cephVersion: + # The container image used to launch the Ceph daemon pods (mon, mgr, osd, mds, rgw). + # v14 is nautilus, v15 is octopus, and v16 is pacific. + # RECOMMENDATION: In production, use a specific version tag instead of the general v14 flag, which pulls the latest release and could result in different + # versions running within the cluster. See tags available at https://hub.docker.com/r/ceph/ceph/tags/. + # If you want to be more precise, you can always use a timestamp tag such quay.io/ceph/ceph:v15.2.11-20200419 + # This tag might not contain a new Ceph version, just security fixes from the underlying operating system, which will reduce vulnerabilities + image: quay.io/ceph/ceph:v16.2.5 + # Whether to allow unsupported versions of Ceph. Currently `nautilus` and `octopus` are supported. + # Future versions such as `pacific` would require this to be set to `true`. + # Do not set to true in production. + allowUnsupported: false + + # The path on the host where configuration files will be persisted. Must be specified. + # Important: if you reinstall the cluster, make sure you delete this directory from each host or else the mons will fail to start on the new cluster. + # In Minikube, the '/data' directory is configured to persist across reboots. Use "/data/rook" in Minikube environment. + dataDirHostPath: /var/lib/rook + + # Whether or not upgrade should continue even if a check fails + # This means Ceph's status could be degraded and we don't recommend upgrading but you might decide otherwise + # Use at your OWN risk + # To understand Rook's upgrade process of Ceph, read https://rook.io/docs/rook/master/ceph-upgrade.html#ceph-version-upgrades + skipUpgradeChecks: false + + # Whether or not continue if PGs are not clean during an upgrade + continueUpgradeAfterChecksEvenIfNotHealthy: false + + # WaitTimeoutForHealthyOSDInMinutes defines the time (in minutes) the operator would wait before an OSD can be stopped for upgrade or restart. + # If the timeout exceeds and OSD is not ok to stop, then the operator would skip upgrade for the current OSD and proceed with the next one + # if `continueUpgradeAfterChecksEvenIfNotHealthy` is `false`. If `continueUpgradeAfterChecksEvenIfNotHealthy` is `true`, then opertor would + # continue with the upgrade of an OSD even if its not ok to stop after the timeout. This timeout won't be applied if `skipUpgradeChecks` is `true`. + # The default wait timeout is 10 minutes. + waitTimeoutForHealthyOSDInMinutes: 10 + + mon: + # Set the number of mons to be started. Must be an odd number, and is generally recommended to be 3. + count: 3 + # The mons should be on unique nodes. For production, at least 3 nodes are recommended for this reason. + # Mons should only be allowed on the same node for test environments where data loss is acceptable. + allowMultiplePerNode: false + + mgr: + # When higher availability of the mgr is needed, increase the count to 2. + # In that case, one mgr will be active and one in standby. When Ceph updates which + # mgr is active, Rook will update the mgr services to match the active mgr. + count: 1 + modules: + # Several modules should not need to be included in this list. The "dashboard" and "monitoring" modules + # are already enabled by other settings in the cluster CR. + - name: pg_autoscaler + enabled: true + + # enable the ceph dashboard for viewing cluster status + dashboard: + enabled: true + # serve the dashboard under a subpath (useful when you are accessing the dashboard via a reverse proxy) + # urlPrefix: /ceph-dashboard + # serve the dashboard at the given port. + # port: 8443 + + # Network configuration, see: https://github.com/rook/rook/blob/master/Documentation/ceph-cluster-crd.md#network-configuration-settings + # network: + # # enable host networking + # provider: host + # # EXPERIMENTAL: enable the Multus network provider + # provider: multus + # selectors: + # # The selector keys are required to be `public` and `cluster`. + # # Based on the configuration, the operator will do the following: + # # 1. if only the `public` selector key is specified both public_network and cluster_network Ceph settings will listen on that interface + # # 2. if both `public` and `cluster` selector keys are specified the first one will point to 'public_network' flag and the second one to 'cluster_network' + # # + # # In order to work, each selector value must match a NetworkAttachmentDefinition object in Multus + # # + # # public: public-conf --> NetworkAttachmentDefinition object name in Multus + # # cluster: cluster-conf --> NetworkAttachmentDefinition object name in Multus + # # Provide internet protocol version. IPv6, IPv4 or empty string are valid options. Empty string would mean IPv4 + # ipFamily: "IPv6" + # # Ceph daemons to listen on both IPv4 and Ipv6 networks + # dualStack: false + + # enable the crash collector for ceph daemon crash collection + crashCollector: + disable: false + # Uncomment daysToRetain to prune ceph crash entries older than the + # specified number of days. + # daysToRetain: 30 + + # enable log collector, daemons will log on files and rotate + # logCollector: + # enabled: true + # periodicity: 24h # SUFFIX may be 'h' for hours or 'd' for days. + + # automate [data cleanup process](https://github.com/rook/rook/blob/master/Documentation/ceph-teardown.md#delete-the-data-on-hosts) in cluster destruction. + cleanupPolicy: + # Since cluster cleanup is destructive to data, confirmation is required. + # To destroy all Rook data on hosts during uninstall, confirmation must be set to "yes-really-destroy-data". + # This value should only be set when the cluster is about to be deleted. After the confirmation is set, + # Rook will immediately stop configuring the cluster and only wait for the delete command. + # If the empty string is set, Rook will not destroy any data on hosts during uninstall. + confirmation: "" + # sanitizeDisks represents settings for sanitizing OSD disks on cluster deletion + sanitizeDisks: + # method indicates if the entire disk should be sanitized or simply ceph's metadata + # in both case, re-install is possible + # possible choices are 'complete' or 'quick' (default) + method: quick + # dataSource indicate where to get random bytes from to write on the disk + # possible choices are 'zero' (default) or 'random' + # using random sources will consume entropy from the system and will take much more time then the zero source + dataSource: zero + # iteration overwrite N times instead of the default (1) + # takes an integer value + iteration: 1 + # allowUninstallWithVolumes defines how the uninstall should be performed + # If set to true, cephCluster deletion does not wait for the PVs to be deleted. + allowUninstallWithVolumes: false + + # To control where various services will be scheduled by kubernetes, use the placement configuration sections below. + # The example under 'all' would have all services scheduled on kubernetes nodes labeled with 'role=storage-node' and + # tolerate taints with a key of 'storage-node'. + # placement: + # all: + # nodeAffinity: + # requiredDuringSchedulingIgnoredDuringExecution: + # nodeSelectorTerms: + # - matchExpressions: + # - key: role + # operator: In + # values: + # - storage-node + # podAffinity: + # podAntiAffinity: + # topologySpreadConstraints: + # tolerations: + # - key: storage-node + # operator: Exists + # # The above placement information can also be specified for mon, osd, and mgr components + # mon: + # # Monitor deployments may contain an anti-affinity rule for avoiding monitor + # # collocation on the same node. This is a required rule when host network is used + # # or when AllowMultiplePerNode is false. Otherwise this anti-affinity rule is a + # # preferred rule with weight: 50. + # osd: + # mgr: + # cleanup: + + # annotations: + # all: + # mon: + # osd: + # cleanup: + # prepareosd: + # # If no mgr annotations are set, prometheus scrape annotations will be set by default. + # mgr: + + # labels: + # all: + # mon: + # osd: + # cleanup: + # mgr: + # prepareosd: + # # monitoring is a list of key-value pairs. It is injected into all the monitoring resources created by operator. + # # These labels can be passed as LabelSelector to Prometheus + # monitoring: + + # resources: + # # The requests and limits set here, allow the mgr pod to use half of one CPU core and 1 gigabyte of memory + # mgr: + # limits: + # cpu: "500m" + # memory: "1024Mi" + # requests: + # cpu: "500m" + # memory: "1024Mi" + # # The above example requests/limits can also be added to the other components + # mon: + # osd: + # prepareosd: + # mgr-sidecar: + # crashcollector: + # logcollector: + # cleanup: + + # The option to automatically remove OSDs that are out and are safe to destroy. + removeOSDsIfOutAndSafeToRemove: false + + # priority classes to apply to ceph resources + # priorityClassNames: + # all: rook-ceph-default-priority-class + # mon: rook-ceph-mon-priority-class + # osd: rook-ceph-osd-priority-class + # mgr: rook-ceph-mgr-priority-class + + storage: # cluster level storage configuration and selection + useAllNodes: true + useAllDevices: true + # deviceFilter: + # config: + # crushRoot: "custom-root" # specify a non-default root label for the CRUSH map + # metadataDevice: "md0" # specify a non-rotational storage so ceph-volume will use it as block db device of bluestore. + # databaseSizeMB: "1024" # uncomment if the disks are smaller than 100 GB + # journalSizeMB: "1024" # uncomment if the disks are 20 GB or smaller + # osdsPerDevice: "1" # this value can be overridden at the node or device level + # encryptedDevice: "true" # the default value for this option is "false" + # # Individual nodes and their config can be specified as well, but 'useAllNodes' above must be set to false. Then, only the named + # # nodes below will be used as storage resources. Each node's 'name' field should match their 'kubernetes.io/hostname' label. + # nodes: + # - name: "172.17.4.201" + # devices: # specific devices to use for storage can be specified for each node + # - name: "sdb" + # - name: "nvme01" # multiple osds can be created on high performance devices + # config: + # osdsPerDevice: "5" + # - name: "/dev/disk/by-id/ata-ST4000DM004-XXXX" # devices can be specified using full udev paths + # config: # configuration can be specified at the node level which overrides the cluster level config + # - name: "172.17.4.301" + # deviceFilter: "^sd." + + # The section for configuring management of daemon disruptions during upgrade or fencing. + disruptionManagement: + # If true, the operator will create and manage PodDisruptionBudgets for OSD, Mon, RGW, and MDS daemons. OSD PDBs are managed dynamically + # via the strategy outlined in the [design](https://github.com/rook/rook/blob/master/design/ceph/ceph-managed-disruptionbudgets.md). The operator will + # block eviction of OSDs by default and unblock them safely when drains are detected. + managePodBudgets: true + # A duration in minutes that determines how long an entire failureDomain like `region/zone/host` will be held in `noout` (in addition to the + # default DOWN/OUT interval) when it is draining. This is only relevant when `managePodBudgets` is `true`. The default value is `30` minutes. + osdMaintenanceTimeout: 30 + # A duration in minutes that the operator will wait for the placement groups to become healthy (active+clean) after a drain was completed and OSDs came back up. + # Operator will continue with the next drain if the timeout exceeds. It only works if `managePodBudgets` is `true`. + # No values or 0 means that the operator will wait until the placement groups are healthy before unblocking the next drain. + pgHealthCheckTimeout: 0 + # If true, the operator will create and manage MachineDisruptionBudgets to ensure OSDs are only fenced when the cluster is healthy. + # Only available on OpenShift. + manageMachineDisruptionBudgets: false + # Namespace in which to watch for the MachineDisruptionBudgets. + machineDisruptionBudgetNamespace: openshift-machine-api + + # Configure the healthcheck and liveness probes for ceph pods. + # Valid values for daemons are 'mon', 'osd', 'status' + healthCheck: + daemonHealth: + mon: + disabled: false + interval: 45s + osd: + disabled: false + interval: 60s + status: + disabled: false + interval: 60s + # Change pod liveness probe, it works for all mon, mgr, and osd pods. + livenessProbe: + mon: + disabled: false + mgr: + disabled: false + osd: + disabled: false + +ingress: + dashboard: {} + # annotations: + # kubernetes.io/ingress.class: nginx + # external-dns.alpha.kubernetes.io/hostname: example.com + # nginx.ingress.kubernetes.io/rewrite-target: /ceph-dashboard/$2 + # host: + # name: example.com + # path: "/ceph-dashboard(/|$)(.*)" + # tls: + +cephBlockPools: + - name: ceph-blockpool + # see https://github.com/rook/rook/blob/master/Documentation/ceph-pool-crd.md#spec for available configuration + spec: + failureDomain: host + replicated: + size: 3 + storageClass: + enabled: true + name: ceph-block + isDefault: true + reclaimPolicy: Delete + allowVolumeExpansion: true + # see https://github.com/rook/rook/blob/master/Documentation/ceph-block.md#provision-storage for available configuration + parameters: + # (optional) mapOptions is a comma-separated list of map options. + # For krbd options refer + # https://docs.ceph.com/docs/master/man/8/rbd/#kernel-rbd-krbd-options + # For nbd options refer + # https://docs.ceph.com/docs/master/man/8/rbd-nbd/#options + # mapOptions: lock_on_read,queue_depth=1024 + + # (optional) unmapOptions is a comma-separated list of unmap options. + # For krbd options refer + # https://docs.ceph.com/docs/master/man/8/rbd/#kernel-rbd-krbd-options + # For nbd options refer + # https://docs.ceph.com/docs/master/man/8/rbd-nbd/#options + # unmapOptions: force + + # RBD image format. Defaults to "2". + imageFormat: "2" + # RBD image features. Available for imageFormat: "2". CSI RBD currently supports only `layering` feature. + imageFeatures: layering + # The secrets contain Ceph admin credentials. + csi.storage.k8s.io/provisioner-secret-name: rook-csi-rbd-provisioner + csi.storage.k8s.io/provisioner-secret-namespace: rook-ceph + csi.storage.k8s.io/controller-expand-secret-name: rook-csi-rbd-provisioner + csi.storage.k8s.io/controller-expand-secret-namespace: rook-ceph + csi.storage.k8s.io/node-stage-secret-name: rook-csi-rbd-node + csi.storage.k8s.io/node-stage-secret-namespace: rook-ceph + # Specify the filesystem type of the volume. If not specified, csi-provisioner + # will set default as `ext4`. Note that `xfs` is not recommended due to potential deadlock + # in hyperconverged settings where the volume is mounted on the same node as the osds. + csi.storage.k8s.io/fstype: ext4 + +cephFileSystems: + - name: ceph-filesystem + # see https://github.com/rook/rook/blob/master/Documentation/ceph-filesystem-crd.md#filesystem-settings for available configuration + spec: + metadataPool: + replicated: + size: 3 + dataPools: + - failureDomain: host + replicated: + size: 3 + metadataServer: + activeCount: 1 + activeStandby: true + storageClass: + enabled: true + name: ceph-filesystem + reclaimPolicy: Delete + # see https://github.com/rook/rook/blob/master/Documentation/ceph-filesystem.md#provision-storage for available configuration + parameters: + # The secrets contain Ceph admin credentials. + csi.storage.k8s.io/provisioner-secret-name: rook-csi-cephfs-provisioner + csi.storage.k8s.io/provisioner-secret-namespace: rook-ceph + csi.storage.k8s.io/controller-expand-secret-name: rook-csi-cephfs-provisioner + csi.storage.k8s.io/controller-expand-secret-namespace: rook-ceph + csi.storage.k8s.io/node-stage-secret-name: rook-csi-cephfs-node + csi.storage.k8s.io/node-stage-secret-namespace: rook-ceph + # Specify the filesystem type of the volume. If not specified, csi-provisioner + # will set default as `ext4`. Note that `xfs` is not recommended due to potential deadlock + # in hyperconverged settings where the volume is mounted on the same node as the osds. + csi.storage.k8s.io/fstype: ext4 + +cephObjectStores: + - name: ceph-objectstore + # see https://github.com/rook/rook/blob/master/Documentation/ceph-object-store-crd.md#object-store-settings for available configuration + spec: + metadataPool: + failureDomain: host + replicated: + size: 3 + dataPool: + failureDomain: host + erasureCoded: + dataChunks: 2 + codingChunks: 1 + preservePoolsOnDelete: true + gateway: + port: 80 + # securePort: 443 + # sslCertificateRef: + instances: 1 + healthCheck: + bucket: + interval: 60s + storageClass: + enabled: true + name: ceph-bucket + reclaimPolicy: Delete + # see https://github.com/rook/rook/blob/master/Documentation/ceph-object-bucket-claim.md#storageclass for available configuration + parameters: + # note: objectStoreNamespace and objectStoreName are configured by the chart + region: us-east-1