diff options
109 files changed, 2954 insertions, 507 deletions
diff --git a/.redhat-ci.inventory b/.papr.inventory index 23bc9923c..23bc9923c 100644 --- a/.redhat-ci.inventory +++ b/.papr.inventory diff --git a/.redhat-ci.sh b/.papr.sh index fce8c1d52..decca625f 100755 --- a/.redhat-ci.sh +++ b/.papr.sh @@ -1,10 +1,12 @@ #!/bin/bash set -xeuo pipefail +echo "Targeting OpenShift Origin $OPENSHIFT_IMAGE_TAG" + pip install -r requirements.txt # ping the nodes to check they're responding and register their ostree versions -ansible -vvv -i .redhat-ci.inventory nodes -a 'rpm-ostree status' +ansible -vvv -i .papr.inventory nodes -a 'rpm-ostree status' upload_journals() { mkdir journals @@ -16,7 +18,9 @@ upload_journals() { trap upload_journals ERR # run the actual installer -ansible-playbook -vvv -i .redhat-ci.inventory playbooks/byo/config.yml +# FIXME: override openshift_image_tag defined in the inventory until +# https://github.com/openshift/openshift-ansible/issues/4478 is fixed. +ansible-playbook -vvv -i .papr.inventory playbooks/byo/config.yml -e "openshift_image_tag=$OPENSHIFT_IMAGE_TAG" # run a small subset of origin conformance tests to sanity # check the cluster NB: we run it on the master since we may diff --git a/.papr.yml b/.papr.yml new file mode 100644 index 000000000..16d6e78b1 --- /dev/null +++ b/.papr.yml @@ -0,0 +1,42 @@ +--- + +# This YAML file is used by PAPR. It details the test +# environment to provision and the test procedure. For more +# information on PAPR, see: +# +# https://github.com/projectatomic/papr +# +# The PAPR YAML specification detailing allowed fields can +# be found at: +# +# https://github.com/projectatomic/papr/blob/master/sample.papr.yml + +cluster: + hosts: + - name: ocp-master + distro: fedora/25/atomic + - name: ocp-node1 + distro: fedora/25/atomic + - name: ocp-node2 + distro: fedora/25/atomic + container: + image: fedora:25 + +packages: + - gcc + - python-pip + - python-devel + - libffi-devel + - openssl-devel + - redhat-rpm-config + +context: 'fedora/25/atomic' + +env: + OPENSHIFT_IMAGE_TAG: v3.6.0-alpha.1 + +tests: + - ./.papr.sh + +artifacts: + - journals/ diff --git a/.redhat-ci.yml b/.redhat-ci.yml deleted file mode 100644 index 6dac7b256..000000000 --- a/.redhat-ci.yml +++ /dev/null @@ -1,30 +0,0 @@ ---- - -cluster: - hosts: - - name: ocp-master - distro: fedora/25/atomic - - name: ocp-node1 - distro: fedora/25/atomic - - name: ocp-node2 - distro: fedora/25/atomic - container: - image: fedora:25 - -packages: - - gcc - - python-pip - - python-devel - - openssl-devel - - redhat-rpm-config - -context: 'fedora/25/atomic | origin/v3.6.0-alpha.1' - -env: - OPENSHIFT_IMAGE_TAG: v3.6.0-alpha.1 - -tests: - - ./.redhat-ci.sh - -artifacts: - - journals/ diff --git a/.tito/packages/openshift-ansible b/.tito/packages/openshift-ansible index de50a51a0..baf77acc6 100644 --- a/.tito/packages/openshift-ansible +++ b/.tito/packages/openshift-ansible @@ -1 +1 @@ -3.6.108-1 ./ +3.6.114-1 ./ diff --git a/docs/pull_requests.md b/docs/pull_requests.md index fcc3e275c..45ae01a9d 100644 --- a/docs/pull_requests.md +++ b/docs/pull_requests.md @@ -10,8 +10,8 @@ Whenever a [Pull Request is opened](../CONTRIBUTING.md#submitting-contributions), some automated test jobs must be successfully run before the PR can be merged. -Some of these jobs are automatically triggered, e.g., Travis and Coveralls. -Other jobs need to be manually triggered by a member of the +Some of these jobs are automatically triggered, e.g., Travis, PAPR, and +Coveralls. Other jobs need to be manually triggered by a member of the [Team OpenShift Ansible Contributors](https://github.com/orgs/openshift/teams/team-openshift-ansible-contributors). ## Triggering tests @@ -48,9 +48,9 @@ simplifying the workflow towards a single infrastructure in the future. There are a set of tests that run on Fedora infrastructure. They are started automatically with every pull request. -They are implemented using the [`redhat-ci` framework](https://github.com/jlebon/redhat-ci). +They are implemented using the [`PAPR` framework](https://github.com/projectatomic/papr). -To re-run tests, write a comment containing `bot, retest this please`. +To re-run tests, write a comment containing only `bot, retest this please`. ## Triggering merge diff --git a/docs/repo_structure.md b/docs/repo_structure.md index 693837fba..f598f22c3 100644 --- a/docs/repo_structure.md +++ b/docs/repo_structure.md @@ -52,3 +52,16 @@ These are plugins used in playbooks and roles: . └── test Contains tests. ``` + +### CI + +These files are used by [PAPR](https://github.com/projectatomic/papr), +It is very similar in workflow to Travis, with the test +environment and test scripts defined in a YAML file. + +``` +. +├── .papr.yml +├── .papr.sh +└── .papr.inventory +``` diff --git a/images/installer/system-container/README.md b/images/installer/system-container/README.md index dc95307e5..fbcd47c4a 100644 --- a/images/installer/system-container/README.md +++ b/images/installer/system-container/README.md @@ -11,3 +11,21 @@ These files are needed to run the installer using an [Atomic System container](h * service.template - Template file for the systemd service. * tmpfiles.template - Template file for systemd-tmpfiles. + +## Options + +These options may be set via the ``atomic`` ``--set`` flag. For defaults see ``root/exports/manifest.json`` + +* OPTS - Additional options to pass to ansible when running the installer + +* VAR_LIB_OPENSHIFT_INSTALLER - Full path of the installer code to mount into the container + +* VAR_LOG_OPENSHIFT_LOG - Full path of the log file to mount into the container + +* PLAYBOOK_FILE - Full path of the playbook inside the container + +* HOME_ROOT - Full path on host to mount as the root home directory inside the container (for .ssh/, etc..) + +* ANSIBLE_CONFIG - Full path for the ansible configuration file to use inside the container + +* INVENTORY_FILE - Full path for the inventory to use from the host diff --git a/images/installer/system-container/root/exports/config.json.template b/images/installer/system-container/root/exports/config.json.template index 397ac941a..739c0080f 100644 --- a/images/installer/system-container/root/exports/config.json.template +++ b/images/installer/system-container/root/exports/config.json.template @@ -21,7 +21,8 @@ "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", "TERM=xterm", "OPTS=$OPTS", - "PLAYBOOK_FILE=$PLAYBOOK_FILE" + "PLAYBOOK_FILE=$PLAYBOOK_FILE", + "ANSIBLE_CONFIG=$ANSIBLE_CONFIG" ], "cwd": "/opt/app-root/src/", "rlimits": [ diff --git a/images/installer/system-container/root/exports/manifest.json b/images/installer/system-container/root/exports/manifest.json index f735494d4..321a84ee8 100644 --- a/images/installer/system-container/root/exports/manifest.json +++ b/images/installer/system-container/root/exports/manifest.json @@ -6,6 +6,7 @@ "VAR_LOG_OPENSHIFT_LOG": "/var/log/ansible.log", "PLAYBOOK_FILE": "/usr/share/ansible/openshift-ansible/playbooks/byo/config.yml", "HOME_ROOT": "/root", + "ANSIBLE_CONFIG": "/usr/share/ansible/openshift-ansible/ansible.cfg", "INVENTORY_FILE": "/dev/null" } } diff --git a/inventory/byo/hosts.byo.native-glusterfs.example b/inventory/byo/hosts.byo.native-glusterfs.example index 2dbb57d40..dc847a5b2 100644 --- a/inventory/byo/hosts.byo.native-glusterfs.example +++ b/inventory/byo/hosts.byo.native-glusterfs.example @@ -24,7 +24,7 @@ glusterfs [OSEv3:vars] ansible_ssh_user=root -deployment_type=origin +openshift_deployment_type=origin # Specify that we want to use GlusterFS storage for a hosted registry openshift_hosted_registry_storage_kind=glusterfs diff --git a/inventory/byo/hosts.origin.example b/inventory/byo/hosts.origin.example index 86b4de4b7..962a01a91 100644 --- a/inventory/byo/hosts.origin.example +++ b/inventory/byo/hosts.origin.example @@ -191,6 +191,13 @@ openshift_master_identity_providers=[{'name': 'htpasswd_auth', 'login': 'true', # or #openshift_master_request_header_ca_file=<path to local ca file to use> +# CloudForms Management Engine (ManageIQ) App Install +# +# Enables installation of MIQ server. Recommended for dedicated +# clusters only. See roles/openshift_cfme/README.md for instructions +# and requirements. +#openshift_cfme_install_app=False + # Cloud Provider Configuration # # Note: You may make use of environment variables rather than store @@ -786,6 +793,12 @@ openshift_master_identity_providers=[{'name': 'htpasswd_auth', 'login': 'true', # Enable API service auditing, available as of 1.3 #openshift_master_audit_config={"enabled": true} +# +# In case you want more advanced setup for the auditlog you can +# use this line. +# The directory in "auditFilePath" will be created if it's not +# exist +#openshift_master_audit_config={"enabled": true, "auditFilePath": "/var/log/openpaas-oscp-audit/openpaas-oscp-audit.log", "maximumFileRetentionDays": 14, "maximumFileSizeMegabytes": 500, "maximumRetainedFiles": 5}} # Enable origin repos that point at Centos PAAS SIG, defaults to true, only used # by deployment_type=origin diff --git a/inventory/byo/hosts.ose.example b/inventory/byo/hosts.ose.example index cbaf22810..63f1f00d2 100644 --- a/inventory/byo/hosts.ose.example +++ b/inventory/byo/hosts.ose.example @@ -190,6 +190,13 @@ openshift_master_identity_providers=[{'name': 'htpasswd_auth', 'login': 'true', # or #openshift_master_request_header_ca_file=<path to local ca file to use> +# CloudForms Management Engine (ManageIQ) App Install +# +# Enables installation of MIQ server. Recommended for dedicated +# clusters only. See roles/openshift_cfme/README.md for instructions +# and requirements. +#openshift_cfme_install_app=False + # Cloud Provider Configuration # # Note: You may make use of environment variables rather than store @@ -786,6 +793,12 @@ openshift_master_identity_providers=[{'name': 'htpasswd_auth', 'login': 'true', # Enable API service auditing, available as of 3.2 #openshift_master_audit_config={"enabled": true} +# +# In case you want more advanced setup for the auditlog you can +# use this line. +# The directory in "auditFilePath" will be created if it's not +# exist +#openshift_master_audit_config={"enabled": true, "auditFilePath": "/var/log/openpaas-oscp-audit/openpaas-oscp-audit.log", "maximumFileRetentionDays": 14, "maximumFileSizeMegabytes": 500, "maximumRetainedFiles": 5}} # Validity of the auto-generated OpenShift certificates in days. # See also openshift_hosted_registry_cert_expire_days above. diff --git a/openshift-ansible.spec b/openshift-ansible.spec index 4685b4c99..17e816edd 100644 --- a/openshift-ansible.spec +++ b/openshift-ansible.spec @@ -9,7 +9,7 @@ %global __requires_exclude ^/usr/bin/ansible-playbook$ Name: openshift-ansible -Version: 3.6.108 +Version: 3.6.114 Release: 1%{?dist} Summary: Openshift and Atomic Enterprise Ansible License: ASL 2.0 @@ -280,6 +280,71 @@ Atomic OpenShift Utilities includes %changelog +* Fri Jun 16 2017 Jenkins CD Merge Bot <smunilla@redhat.com> 3.6.114-1 +- + +* Fri Jun 16 2017 Jenkins CD Merge Bot <smunilla@redhat.com> 3.6.113-1 +- Make rollout status check best-effort, add poll (skuznets@redhat.com) +- Verify the rollout status of the hosted router and registry + (skuznets@redhat.com) +- fix es routes for new logging roles (rmeggins@redhat.com) + +* Thu Jun 15 2017 Jenkins CD Merge Bot <smunilla@redhat.com> 3.6.112-1 +- Add the the other featured audit-config paramters as example (al- + git001@none.at) + +* Thu Jun 15 2017 Jenkins CD Merge Bot <smunilla@redhat.com> 3.6.111-1 +- doc: Info for system container installer options (smilner@redhat.com) +- Add ANSIBLE_CONFIG to system container installer (smilner@redhat.com) +- Add missing file. Remove debugging prompt. (tbielawa@redhat.com) +- Update readme one last time (tbielawa@redhat.com) +- Reconfigure masters in serial to avoid HA meltdowns (tbielawa@redhat.com) +- First POC of a CFME turnkey solution in openshift-anisble + (tbielawa@redhat.com) +- Reverted most of this pr 4356 except: adding + openshift_logging_fluentd_buffer_queue_limit: 1024 + openshift_logging_fluentd_buffer_size_limit: 1m + openshift_logging_mux_buffer_queue_limit: 1024 + openshift_logging_mux_buffer_size_limit: 1m and setting the matched + environment variables. (nhosoi@redhat.com) +- Adding the defaults for openshift_logging_fluentd_{cpu,memory}_limit to + roles/openshift_logging_fluentd/defaults/main.yml. (nhosoi@redhat.com) +- Adding environment variables FLUENTD_CPU_LIMIT, FLUENTD_MEMORY_LIMIT, + MUX_CPU_LIMIT, MUX_MEMORY_LIMIT. (nhosoi@redhat.com) +- Introducing fluentd/mux buffer_queue_limit, buffer_size_limit, cpu_limit, and + memory_limit. (nhosoi@redhat.com) + +* Thu Jun 15 2017 Jenkins CD Merge Bot <smunilla@redhat.com> 3.6.110-1 +- papr: add documentation to YAML and simplify context (jlebon@redhat.com) +- docs: better documentation for PAPR (jlebon@redhat.com) +- papr: install libffi-devel (jlebon@redhat.com) +- pre-install checks: add more during byo install (lmeyer@redhat.com) +- move etcd backup to etcd_common role (jchaloup@redhat.com) +- Support installing HOSA via ansible (mwringe@redhat.com) +- GlusterFS: Remove requirement for heketi-cli (jarrpa@redhat.com) +- GlusterFS: Fix bugs in wipe (jarrpa@redhat.com) +- GlusterFS: Skip heketi-cli install on Atomic (jarrpa@redhat.com) +- GlusterFS: Create a StorageClass if specified (jarrpa@redhat.com) +- GlusterFS: Use proper secrets (jarrpa@redhat.com) +- GlusterFS: Allow cleaner separation of multiple clusters (jarrpa@redhat.com) +- GlusterFS: Minor corrections and cleanups (jarrpa@redhat.com) +- GlusterFS: Improve documentation (jarrpa@redhat.com) +- GlusterFS: Allow configuration of kube namespace for heketi + (jarrpa@redhat.com) +- GlusterFS: Adjust when clauses for registry config (jarrpa@redhat.com) +- GlusterFS: Allow failure reporting when deleting deploy-heketi + (jarrpa@redhat.com) +- GlusterFS: Tweak pod probe parameters (jarrpa@redhat.com) +- GlusterFS: Allow for configuration of node selector (jarrpa@redhat.com) +- GlusterFS: Label on Openshift node name (jarrpa@redhat.com) +- GlusterFS: Make sure timeout is an int (jarrpa@redhat.com) +- GlusterFS: Use groups variables (jarrpa@redhat.com) +- papr: rename redhat-ci related files to papr (jlebon@redhat.com) +- singletonize some role tasks that repeat a lot (lmeyer@redhat.com) + +* Wed Jun 14 2017 Jenkins CD Merge Bot <smunilla@redhat.com> 3.6.109-1 +- + * Wed Jun 14 2017 Jenkins CD Merge Bot <smunilla@redhat.com> 3.6.108-1 - Upgraded Calico to 2.2.1 Release (vincent.schwarzer@yahoo.de) diff --git a/playbooks/adhoc/uninstall.yml b/playbooks/adhoc/uninstall.yml index 97d835eae..27c3a9edd 100644 --- a/playbooks/adhoc/uninstall.yml +++ b/playbooks/adhoc/uninstall.yml @@ -103,7 +103,7 @@ - atomic-openshift-sdn-ovs - cockpit-bridge - cockpit-docker - - cockpit-shell + - cockpit-system - cockpit-ws - kubernetes-client - openshift @@ -346,7 +346,7 @@ - atomic-openshift-master - cockpit-bridge - cockpit-docker - - cockpit-shell + - cockpit-system - cockpit-ws - corosync - kubernetes-client diff --git a/playbooks/byo/openshift-cfme/config.yml b/playbooks/byo/openshift-cfme/config.yml new file mode 100644 index 000000000..0e8e7a94d --- /dev/null +++ b/playbooks/byo/openshift-cfme/config.yml @@ -0,0 +1,8 @@ +--- +- include: ../openshift-cluster/initialize_groups.yml + tags: + - always + +- include: ../../common/openshift-cluster/evaluate_groups.yml + +- include: ../../common/openshift-cfme/config.yml diff --git a/playbooks/byo/openshift-cfme/uninstall.yml b/playbooks/byo/openshift-cfme/uninstall.yml new file mode 100644 index 000000000..c8ed16859 --- /dev/null +++ b/playbooks/byo/openshift-cfme/uninstall.yml @@ -0,0 +1,6 @@ +--- +# - include: ../openshift-cluster/initialize_groups.yml +# tags: +# - always + +- include: ../../common/openshift-cfme/uninstall.yml diff --git a/playbooks/byo/openshift-cluster/config.yml b/playbooks/byo/openshift-cluster/config.yml index fd4a9eb26..2372a5322 100644 --- a/playbooks/byo/openshift-cluster/config.yml +++ b/playbooks/byo/openshift-cluster/config.yml @@ -15,6 +15,11 @@ checks: - disk_availability - memory_availability + - package_availability + - package_update + - package_version + - docker_image_availability + - docker_storage - include: ../../common/openshift-cluster/std_include.yml tags: diff --git a/playbooks/common/openshift-cfme/config.yml b/playbooks/common/openshift-cfme/config.yml new file mode 100644 index 000000000..533a35d9e --- /dev/null +++ b/playbooks/common/openshift-cfme/config.yml @@ -0,0 +1,44 @@ +--- +# TODO: Make this work. The 'name' variable below is undefined +# presently because it's part of the cfme role. This play can't run +# until that's re-worked. +# +# - name: Pre-Pull manageiq-pods docker images +# hosts: nodes +# tasks: +# - name: Ensure the latest manageiq-pods docker image is pulling +# docker_image: +# name: "{{ openshift_cfme_container_image }}" +# # Fire-and-forget method, never timeout +# async: 99999999999 +# # F-a-f, never check on this. True 'background' task. +# poll: 0 + +- name: Configure Masters for CFME Bulk Image Imports + hosts: oo_masters_to_config + serial: 1 + tasks: + - name: Run master cfme tuning playbook + include_role: + name: openshift_cfme + tasks_from: tune_masters + +- name: Setup CFME + hosts: oo_first_master + vars: + r_openshift_cfme_miq_template_content: "{{ lookup('file', 'roles/openshift_cfme/files/miq-template.yaml') | from_yaml}}" + pre_tasks: + - name: Create a temporary place to evaluate the PV templates + command: mktemp -d /tmp/openshift-ansible-XXXXXXX + register: r_openshift_cfme_mktemp + changed_when: false + - name: Ensure the server template was read from disk + debug: + msg="{{ r_openshift_cfme_miq_template_content | from_yaml }}" + + tasks: + - name: Run the CFME Setup Role + include_role: + name: openshift_cfme + vars: + template_dir: "{{ hostvars[groups.masters.0].r_openshift_cfme_mktemp.stdout }}" diff --git a/playbooks/common/openshift-cfme/filter_plugins b/playbooks/common/openshift-cfme/filter_plugins new file mode 120000 index 000000000..99a95e4ca --- /dev/null +++ b/playbooks/common/openshift-cfme/filter_plugins @@ -0,0 +1 @@ +../../../filter_plugins
\ No newline at end of file diff --git a/playbooks/common/openshift-cfme/library b/playbooks/common/openshift-cfme/library new file mode 120000 index 000000000..ba40d2f56 --- /dev/null +++ b/playbooks/common/openshift-cfme/library @@ -0,0 +1 @@ +../../../library
\ No newline at end of file diff --git a/playbooks/common/openshift-cfme/roles b/playbooks/common/openshift-cfme/roles new file mode 120000 index 000000000..20c4c58cf --- /dev/null +++ b/playbooks/common/openshift-cfme/roles @@ -0,0 +1 @@ +../../../roles
\ No newline at end of file diff --git a/playbooks/common/openshift-cfme/uninstall.yml b/playbooks/common/openshift-cfme/uninstall.yml new file mode 100644 index 000000000..78b8e7668 --- /dev/null +++ b/playbooks/common/openshift-cfme/uninstall.yml @@ -0,0 +1,8 @@ +--- +- name: Uninstall CFME + hosts: masters + tasks: + - name: Run the CFME Uninstall Role Tasks + include_role: + name: openshift_cfme + tasks_from: uninstall diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml b/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml index b7fd2c0c5..616ba04f8 100644 --- a/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml +++ b/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml @@ -3,12 +3,12 @@ hosts: oo_etcd_hosts_to_backup roles: - role: openshift_facts - - role: etcd_upgrade - r_etcd_upgrade_action: backup - r_etcd_backup_tag: etcd_backup_tag + - role: etcd_common + r_etcd_common_action: backup + r_etcd_common_backup_tag: etcd_backup_tag r_etcd_common_etcd_runtime: "{{ openshift.common.etcd_runtime }}" - r_etcd_upgrade_embedded_etcd: "{{ groups.oo_etcd_to_config | default([]) | length == 0 }}" - r_etcd_backup_sufix_name: "{{ lookup('pipe', 'date +%Y%m%d%H%M%S') }}" + r_etcd_common_embedded_etcd: "{{ groups.oo_etcd_to_config | default([]) | length == 0 }}" + r_etcd_common_backup_sufix_name: "{{ lookup('pipe', 'date +%Y%m%d%H%M%S') }}" - name: Gate on etcd backup hosts: localhost @@ -18,7 +18,7 @@ - set_fact: etcd_backup_completed: "{{ hostvars | oo_select_keys(groups.oo_etcd_hosts_to_backup) - | oo_collect('inventory_hostname', {'r_etcd_upgrade_backup_complete': true}) }}" + | oo_collect('inventory_hostname', {'r_etcd_common_backup_complete': true}) }}" - set_fact: etcd_backup_failed: "{{ groups.oo_etcd_hosts_to_backup | difference(etcd_backup_completed) }}" - fail: diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/main.yml b/playbooks/common/openshift-cluster/upgrades/etcd/main.yml index 3e01883ae..64abc54e7 100644 --- a/playbooks/common/openshift-cluster/upgrades/etcd/main.yml +++ b/playbooks/common/openshift-cluster/upgrades/etcd/main.yml @@ -16,7 +16,8 @@ tasks: - include_role: name: etcd_common - tasks_from: etcdctl.yml + vars: + r_etcd_common_action: drop_etcdctl - name: Perform etcd upgrade include: ./upgrade.yml diff --git a/roles/cockpit/tasks/main.yml b/roles/cockpit/tasks/main.yml index bddad778f..57f49ea11 100644 --- a/roles/cockpit/tasks/main.yml +++ b/roles/cockpit/tasks/main.yml @@ -3,7 +3,7 @@ package: name={{ item }} state=present with_items: - cockpit-ws - - cockpit-shell + - cockpit-system - cockpit-bridge - cockpit-docker - "{{ cockpit_plugins }}" diff --git a/roles/etcd/tasks/main.yml b/roles/etcd/tasks/main.yml index fa2f44609..586aebb11 100644 --- a/roles/etcd/tasks/main.yml +++ b/roles/etcd/tasks/main.yml @@ -122,7 +122,8 @@ - include_role: name: etcd_common - tasks_from: etcdctl.yml + vars: + r_etcd_common_action: drop_etcdctl when: openshift_etcd_etcdctl_profile | default(true) | bool - name: Set fact etcd_service_status_changed diff --git a/roles/etcd_common/defaults/main.yml b/roles/etcd_common/defaults/main.yml index 14e712fcf..8cc7a9c20 100644 --- a/roles/etcd_common/defaults/main.yml +++ b/roles/etcd_common/defaults/main.yml @@ -1,8 +1,18 @@ --- +# Default action when calling this role +r_etcd_common_action: noop +r_etcd_common_backup_tag: '' +r_etcd_common_backup_sufix_name: '' + # runc, docker, host r_etcd_common_etcd_runtime: "docker" r_etcd_common_embedded_etcd: false +# etcd run on a host => use etcdctl command directly +# etcd run as a docker container => use docker exec +# etcd run as a runc container => use runc exec +r_etcd_common_etcdctl_command: "{{ 'etcdctl' if r_etcd_common_etcd_runtime == 'host' or r_etcd_common_embedded_etcd | bool else 'docker exec etcd_container etcdctl' if r_etcd_common_etcd_runtime == 'docker' else 'runc exec etcd etcdctl' }}" + # etcd server vars etcd_conf_dir: '/etc/etcd' r_etcd_common_system_container_host_dir: /var/lib/etcd/etcd.etcd diff --git a/roles/etcd_upgrade/tasks/backup.yml b/roles/etcd_common/tasks/backup.yml index 1ea6fc59f..4a4832275 100644 --- a/roles/etcd_upgrade/tasks/backup.yml +++ b/roles/etcd_common/tasks/backup.yml @@ -1,15 +1,11 @@ --- -# INPUT r_etcd_backup_sufix_name -# INPUT r_etcd_backup_tag -# OUTPUT r_etcd_upgrade_backup_complete - set_fact: - # ORIGIN etcd_data_dir etcd_common.defaults - l_etcd_backup_dir: "{{ etcd_data_dir }}/openshift-backup-{{ r_etcd_backup_tag | default('') }}{{ r_etcd_backup_sufix_name }}" + l_etcd_backup_dir: "{{ etcd_data_dir }}/openshift-backup-{{ r_etcd_common_backup_tag }}{{ r_etcd_common_backup_sufix_name }}" # TODO: replace shell module with command and update later checks - name: Check available disk space for etcd backup shell: df --output=avail -k {{ etcd_data_dir }} | tail -n 1 - register: avail_disk + register: l_avail_disk # AUDIT:changed_when: `false` because we are only inspecting # state, not manipulating anything changed_when: false @@ -17,8 +13,8 @@ # TODO: replace shell module with command and update later checks - name: Check current etcd disk usage shell: du --exclude='*openshift-backup*' -k {{ etcd_data_dir }} | tail -n 1 | cut -f1 - register: etcd_disk_usage - when: r_etcd_upgrade_embedded_etcd | bool + register: l_etcd_disk_usage + when: r_etcd_common_embedded_etcd | bool # AUDIT:changed_when: `false` because we are only inspecting # state, not manipulating anything changed_when: false @@ -26,9 +22,9 @@ - name: Abort if insufficient disk space for etcd backup fail: msg: > - {{ etcd_disk_usage.stdout }} Kb disk space required for etcd backup, - {{ avail_disk.stdout }} Kb available. - when: (r_etcd_upgrade_embedded_etcd | bool) and (etcd_disk_usage.stdout|int > avail_disk.stdout|int) + {{ l_etcd_disk_usage.stdout }} Kb disk space required for etcd backup, + {{ l_avail_disk.stdout }} Kb available. + when: (r_etcd_common_embedded_etcd | bool) and (l_etcd_disk_usage.stdout|int > l_avail_disk.stdout|int) # For non containerized and non embedded we should have the correct version of # etcd installed already. So don't do anything. @@ -37,17 +33,22 @@ # # For embedded non containerized we need to ensure we have the latest version # etcd on the host. +- name: Detecting Atomic Host Operating System + stat: + path: /run/ostree-booted + register: l_ostree_booted + - name: Install latest etcd for embedded package: name: etcd state: latest when: - - r_etcd_upgrade_embedded_etcd | bool + - r_etcd_common_embedded_etcd | bool - not l_ostree_booted.stat.exists | bool - name: Generate etcd backup command: > - {{ etcdctl_command }} backup --data-dir={{ etcd_data_dir }} + {{ r_etcd_common_etcdctl_command }} backup --data-dir={{ etcd_data_dir }} --backup-dir={{ l_etcd_backup_dir }} # According to the docs change you can simply copy snap/db @@ -55,16 +56,16 @@ - name: Check for v3 data store stat: path: "{{ etcd_data_dir }}/member/snap/db" - register: v3_db + register: l_v3_db - name: Copy etcd v3 data store command: > cp -a {{ etcd_data_dir }}/member/snap/db {{ l_etcd_backup_dir }}/member/snap/ - when: v3_db.stat.exists + when: l_v3_db.stat.exists - set_fact: - r_etcd_upgrade_backup_complete: True + r_etcd_common_backup_complete: True - name: Display location of etcd backup debug: diff --git a/roles/etcd_common/tasks/etcdctl.yml b/roles/etcd_common/tasks/drop_etcdctl.yml index 6cb456677..6cb456677 100644 --- a/roles/etcd_common/tasks/etcdctl.yml +++ b/roles/etcd_common/tasks/drop_etcdctl.yml diff --git a/roles/etcd_common/tasks/main.yml b/roles/etcd_common/tasks/main.yml new file mode 100644 index 000000000..6ed87e6c7 --- /dev/null +++ b/roles/etcd_common/tasks/main.yml @@ -0,0 +1,9 @@ +--- +- name: Fail if invalid r_etcd_common_action provided + fail: + msg: "etcd_common role can only be called with 'noop' or 'backup' or 'drop_etcdctl'" + when: r_etcd_common_action not in ['noop', 'backup', 'drop_etcdctl'] + +- name: Include main action task file + include: "{{ r_etcd_common_action }}.yml" + when: r_etcd_common_action != "noop" diff --git a/roles/etcd_upgrade/defaults/main.yml b/roles/etcd_upgrade/defaults/main.yml index b61bf526c..61bbba225 100644 --- a/roles/etcd_upgrade/defaults/main.yml +++ b/roles/etcd_upgrade/defaults/main.yml @@ -1,9 +1,3 @@ --- r_etcd_upgrade_action: upgrade r_etcd_upgrade_mechanism: rpm -r_etcd_upgrade_embedded_etcd: false -r_etcd_common_embedded_etcd: "{{ r_etcd_upgrade_embedded_etcd }}" -# etcd run on a host => use etcdctl command directly -# etcd run as a docker container => use docker exec -# etcd run as a runc container => use runc exec -etcdctl_command: "{{ 'etcdctl' if r_etcd_common_etcd_runtime == 'host' or r_etcd_upgrade_embedded_etcd | bool else 'docker exec etcd_container etcdctl' if r_etcd_common_etcd_runtime == 'docker' else 'runc exec etcd etcdctl' }}" diff --git a/roles/etcd_upgrade/tasks/main.yml b/roles/etcd_upgrade/tasks/main.yml index 5178c14e3..129c69d6b 100644 --- a/roles/etcd_upgrade/tasks/main.yml +++ b/roles/etcd_upgrade/tasks/main.yml @@ -2,9 +2,9 @@ # INPUT r_etcd_upgrade_action - name: Fail if invalid etcd_upgrade_action provided fail: - msg: "etcd_upgrade role can only be called with 'upgrade' or 'backup'" + msg: "etcd_upgrade role can only be called with 'upgrade'" when: - - r_etcd_upgrade_action not in ['upgrade', 'backup'] + - r_etcd_upgrade_action not in ['upgrade'] - name: Detecting Atomic Host Operating System stat: diff --git a/roles/lib_openshift/library/oc_obj.py b/roles/lib_openshift/library/oc_obj.py index 56af303cc..9b0c0e0e4 100644 --- a/roles/lib_openshift/library/oc_obj.py +++ b/roles/lib_openshift/library/oc_obj.py @@ -90,9 +90,9 @@ options: required: false default: str aliases: [] - all_namespace: + all_namespaces: description: - - The namespace where the object lives. + - Search in all namespaces for the object. required: false default: false aliases: [] diff --git a/roles/lib_openshift/src/doc/obj b/roles/lib_openshift/src/doc/obj index 4ff912b2d..c6504ed01 100644 --- a/roles/lib_openshift/src/doc/obj +++ b/roles/lib_openshift/src/doc/obj @@ -39,9 +39,9 @@ options: required: false default: str aliases: [] - all_namespace: + all_namespaces: description: - - The namespace where the object lives. + - Search in all namespaces for the object. required: false default: false aliases: [] diff --git a/roles/openshift_cfme/README.md b/roles/openshift_cfme/README.md new file mode 100644 index 000000000..8283afed6 --- /dev/null +++ b/roles/openshift_cfme/README.md @@ -0,0 +1,404 @@ +# OpenShift-Ansible - CFME Role + +# PROOF OF CONCEPT - Alpha Version + +This role is based on the work in the upstream +[manageiq/manageiq-pods](https://github.com/ManageIQ/manageiq-pods) +project. For additional literature on configuration specific to +ManageIQ (optional post-installation tasks), visit the project's +[upstream documentation page](http://manageiq.org/docs/get-started/basic-configuration). + +Please submit a +[new issue](https://github.com/openshift/openshift-ansible/issues/new) +if you run into bugs with this role or wish to request enhancements. + +# Important Notes + +This is an early *proof of concept* role to install the Cloud Forms +Management Engine (ManageIQ) on OpenShift Container Platform (OCP). + +* This role is still in **ALPHA STATUS** +* Many options are hard-coded still (ex: NFS setup) +* Not many configurable options yet +* **Should** be ran on a dedicated cluster +* **Will not run** on undersized infra +* The terms *CFME* and *MIQ* / *ManageIQ* are interchangeable + +## Requirements + +**NOTE:** These requirements are copied from the upstream +[manageiq/manageiq-pods](https://github.com/ManageIQ/manageiq-pods) +project. + +### Prerequisites: + +* + [OpenShift Origin 1.5](https://docs.openshift.com/container-platform/3.5/welcome/index.html) + or + [higher](https://docs.openshift.com/container-platform/latest/welcome/index.html) + provisioned +* NFS or other compatible volume provider +* A cluster-admin user (created by role if required) + +### Cluster Sizing + +In order to avoid random deployment failures due to resource +starvation, we recommend a minimum cluster size for a **test** +environment. + +| Type | Size | CPUs | Memory | +|----------------|---------|----------|----------| +| Masters | `1+` | `8` | `12GB` | +| Nodes | `2+` | `4` | `8GB` | +| PV Storage | `25GB` | `N/A` | `N/A` | + + +![Basic CFME Deployment](img/CFMEBasicDeployment.png) + +**CFME has hard-requirements for memory. CFME will NOT install if your + infrastructure does not meet or exceed the requirements given + above. Do not run this playbook if you do not have the required + memory, you will just waste your time.** + + +### Other sizing considerations + +* Recommendations assume MIQ will be the **only application running** + on this cluster. +* Alternatively, you can provision an infrastructure node to run + registry/metrics/router/logging pods. +* Each MIQ application pod will consume at least `3GB` of RAM on initial + deployment (blank deployment without providers). +* RAM consumption will ramp up higher depending on appliance use, once + providers are added expect higher resource consumption. + + +### Assumptions + +1) You meet/exceed the [cluster sizing](#cluster-sizing) requirements +1) Your NFS server is on your master host +1) Your PV backing NFS storage volume is mounted on `/exports/` + +Required directories that NFS will export to back the PVs: + +* `/exports/miq-pv0[123]` + +If the required directories are not present at install-time, they will +be created using the recommended permissions per the +[upstream documentation](https://github.com/ManageIQ/manageiq-pods#make-persistent-volumes-to-host-the-miq-database-and-application-data): + +* UID/GID: `root`/`root` +* Mode: `0775` + +**IMPORTANT:** If you are using a separate volume (`/dev/vdX`) for NFS + storage, **ensure** it is mounted on `/exports/` **before** running + this role. + + + +## Role Variables + +Core variables in this role: + +| Name | Default value | Description | +|-------------------------------|---------------|---------------| +| `openshift_cfme_install_app` | `False` | `True`: Install everything and create a new CFME app, `False`: Just install all of the templates and scaffolding | + + +Variables you may override have defaults defined in +[defaults/main.yml](defaults/main.yml). + + +# Important Notes + +This is a **tech preview** status role presently. Use it with the same +caution you would give any other pre-release software. + +**Most importantly** follow this one rule: don't re-run the entrypoint +playbook multiple times in a row without cleaning up after previous +runs if some of the CFME steps have ran. This is a known +flake. Cleanup instructions are provided at the bottom of this README. + + +# Usage + +This section describes the basic usage of this role. All parameters +will use their [default values](defaults/main.yml). + +## Pre-flight Checks + +**IMPORTANT:** As documented above in [the prerequisites](#prerequisites), + you **must already** have your OCP cluster up and running. + +**Optional:** The ManageIQ pod is fairly large (about 1.7 GB) so to +save some spin-up time post-deployment, you can begin pre-pulling the +docker image to each of your nodes now: + +``` +root@node0x # docker pull docker.io/manageiq/manageiq-pods:app-latest-fine +``` + +## Getting Started + +1) The *entry point playbook* to install CFME is located in +[the BYO playbooks](../../playbooks/byo/openshift-cfme/config.yml) +directory + +2) Update your existing `hosts` inventory file and ensure the +parameter `openshift_cfme_install_app` is set to `True` under the +`[OSEv3:vars]` block. + +2) Using your existing `hosts` inventory file, run `ansible-playbook` +with the entry point playbook: + +``` +$ ansible-playbook -v -i <INVENTORY_FILE> playbooks/byo/openshift-cfme/config.yml +``` + +## Next Steps + +Once complete, the playbook will let you know: + + +``` +TASK [openshift_cfme : Status update] ********************************************************* +ok: [ho.st.na.me] => { + "msg": "CFME has been deployed. Note that there will be a delay before it is fully initialized.\n" +} +``` + +This will take several minutes (*possibly 10 or more*, depending on +your network connection). However, you can get some insight into the +deployment process during initialization. + +### oc describe pod manageiq-0 + +*Some useful information about the output you will see if you run the +`oc describe pod manageiq-0` command* + +**Readiness probe**s - These will take a while to become +`Healthy`. The initial health probes won't even happen for at least 8 +minutes depending on how long it takes you to pull down the large +images. ManageIQ is a large application so it may take a considerable +amount of time for it to deploy and be marked as `Healthy`. + +If you go to the node you know the application is running on (check +for `Successfully assigned manageiq-0 to <HOST|IP>` in the `describe` +output) you can run a `docker pull` command to monitor the progress of +the image pull: + +``` +[root@cfme-node ~]# docker pull docker.io/manageiq/manageiq-pods:app-latest-fine +Trying to pull repository docker.io/manageiq/manageiq-pods ... +sha256:6c055ca9d3c65cd694d6c0e28986b5239ba56bbdf0488cccdaa283d545258f8a: Pulling from docker.io/manageiq/manageiq-pods +Digest: sha256:6c055ca9d3c65cd694d6c0e28986b5239ba56bbdf0488cccdaa283d545258f8a +Status: Image is up to date for docker.io/manageiq/manageiq-pods:app-latest-fine +``` + +The example above demonstrates the case where the image has been +successfully pulled already. + +If the image isn't completely pulled already then you will see +multiple progress bars detailing each image layer download status. + + +### rsh + +*Useful inspection/progress monitoring techniques with the `oc rsh` +command.* + + +On your master node, switch to the `cfme` project (or whatever you +named it if you overrode the `openshift_cfme_project` variable) and +check on the pod states: + +``` +[root@cfme-master01 ~]# oc project cfme +Now using project "cfme" on server "https://10.10.0.100:8443". + +[root@cfme-master01 ~]# oc get pod +NAME READY STATUS RESTARTS AGE +manageiq-0 0/1 Running 0 14m +memcached-1-3lk7g 1/1 Running 0 14m +postgresql-1-12slb 1/1 Running 0 14m +``` + +Note how the `manageiq-0` pod says `0/1` under the **READY** +column. After some time (depending on your network connection) you'll +be able to `rsh` into the pod to find out more of what's happening in +real time. First, the easy-mode command, run this once `rsh` is +available and then watch until it says `Started Initialize Appliance +Database`: + +``` +[root@cfme-master01 ~]# oc rsh manageiq-0 journalctl -f -u appliance-initialize.service +``` + +For the full explanation of what this means, and more interactive +inspection techniques, keep reading on. + +To obtain a shell on our `manageiq` pod we use this command: + +``` +[root@cfme-master01 ~]# oc rsh manageiq-0 bash -l +``` + +The `rsh` command opens a shell in your pod for you. In this case it's +the pod called `manageiq-0`. `systemd` is managing the services in +this pod so we can use the `list-units` command to see what is running +currently: `# systemctl list-units | grep appliance`. + +If you see the `appliance-initialize` service running, this indicates +that basic setup is still in progress. We can monitor the process with +the `journalctl` command like so: + + +``` +[root@manageiq-0 vmdb]# journalctl -f -u appliance-initialize.service +Jun 14 14:55:52 manageiq-0 appliance-initialize.sh[58]: == Checking deployment status == +Jun 14 14:55:52 manageiq-0 appliance-initialize.sh[58]: No pre-existing EVM configuration found on region PV +Jun 14 14:55:52 manageiq-0 appliance-initialize.sh[58]: == Checking for existing data on server PV == +Jun 14 14:55:52 manageiq-0 appliance-initialize.sh[58]: == Starting New Deployment == +Jun 14 14:55:52 manageiq-0 appliance-initialize.sh[58]: == Applying memcached config == +Jun 14 14:55:53 manageiq-0 appliance-initialize.sh[58]: == Initializing Appliance == +Jun 14 14:55:57 manageiq-0 appliance-initialize.sh[58]: create encryption key +Jun 14 14:55:57 manageiq-0 appliance-initialize.sh[58]: configuring external database +Jun 14 14:55:57 manageiq-0 appliance-initialize.sh[58]: Checking for connections to the database... +Jun 14 14:56:09 manageiq-0 appliance-initialize.sh[58]: Create region starting +Jun 14 14:58:15 manageiq-0 appliance-initialize.sh[58]: Create region complete +Jun 14 14:58:15 manageiq-0 appliance-initialize.sh[58]: == Initializing PV data == +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: == Initializing PV data backup == +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: sending incremental file list +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: created directory /persistent/server-deploy/backup/backup_2017_06_14_145816 +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: region-data/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: region-data/var/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: region-data/var/www/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: region-data/var/www/miq/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: region-data/var/www/miq/vmdb/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: region-data/var/www/miq/vmdb/REGION +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: region-data/var/www/miq/vmdb/certs/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: region-data/var/www/miq/vmdb/certs/v2_key +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: region-data/var/www/miq/vmdb/config/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: region-data/var/www/miq/vmdb/config/database.yml +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: server-data/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: server-data/var/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: server-data/var/www/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: server-data/var/www/miq/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: server-data/var/www/miq/vmdb/ +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: server-data/var/www/miq/vmdb/GUID +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: sent 1330 bytes received 136 bytes 2932.00 bytes/sec +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: total size is 770 speedup is 0.53 +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: == Restoring PV data symlinks == +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: /var/www/miq/vmdb/REGION symlink is already in place, skipping +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: /var/www/miq/vmdb/config/database.yml symlink is already in place, skipping +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: /var/www/miq/vmdb/certs/v2_key symlink is already in place, skipping +Jun 14 14:58:16 manageiq-0 appliance-initialize.sh[58]: /var/www/miq/vmdb/log symlink is already in place, skipping +Jun 14 14:58:28 manageiq-0 systemctl[304]: Removed symlink /etc/systemd/system/multi-user.target.wants/appliance-initialize.service. +Jun 14 14:58:29 manageiq-0 systemd[1]: Started Initialize Appliance Database. +``` + +Most of what we see here (above) is the initial database seeding +process. This process isn't very quick, so be patient. + +At the bottom of the log there is a special line from the `systemctl` +service, `Removed symlink +/etc/systemd/system/multi-user.target.wants/appliance-initialize.service`. The +`appliance-initialize` service is no longer marked as enabled. This +indicates that the base application initialization is complete now. + +We're not done yet though, there are other ancillary services which +run in this pod to support the application. *Still in the rsh shell*, +Use the `ps` command to monitor for the `httpd` processes +starting. You will see output similar to the following when that stage +has completed: + +``` +[root@manageiq-0 vmdb]# ps aux | grep http +root 1941 0.0 0.1 249820 7640 ? Ss 15:02 0:00 /usr/sbin/httpd -DFOREGROUND +apache 1942 0.0 0.0 250752 6012 ? S 15:02 0:00 /usr/sbin/httpd -DFOREGROUND +apache 1943 0.0 0.0 250472 5952 ? S 15:02 0:00 /usr/sbin/httpd -DFOREGROUND +apache 1944 0.0 0.0 250472 5916 ? S 15:02 0:00 /usr/sbin/httpd -DFOREGROUND +apache 1945 0.0 0.0 250360 5764 ? S 15:02 0:00 /usr/sbin/httpd -DFOREGROUND +``` + +Furthermore, you can find other related processes by just looking for +ones with `MIQ` in their name: + +``` +[root@manageiq-0 vmdb]# ps aux | grep miq +root 333 27.7 4.2 555884 315916 ? Sl 14:58 3:59 MIQ Server +root 1976 0.6 4.0 507224 303740 ? SNl 15:02 0:03 MIQ: MiqGenericWorker id: 1, queue: generic +root 1984 0.6 4.0 507224 304312 ? SNl 15:02 0:03 MIQ: MiqGenericWorker id: 2, queue: generic +root 1992 0.9 4.0 508252 304888 ? SNl 15:02 0:05 MIQ: MiqPriorityWorker id: 3, queue: generic +root 2000 0.7 4.0 510308 304696 ? SNl 15:02 0:04 MIQ: MiqPriorityWorker id: 4, queue: generic +root 2008 1.2 4.0 514000 303612 ? SNl 15:02 0:07 MIQ: MiqScheduleWorker id: 5 +root 2026 0.2 4.0 517504 303644 ? SNl 15:02 0:01 MIQ: MiqEventHandler id: 6, queue: ems +root 2036 0.2 4.0 518532 303768 ? SNl 15:02 0:01 MIQ: MiqReportingWorker id: 7, queue: reporting +root 2044 0.2 4.0 519560 303812 ? SNl 15:02 0:01 MIQ: MiqReportingWorker id: 8, queue: reporting +root 2059 0.2 4.0 528372 303956 ? SNl 15:02 0:01 puma 3.3.0 (tcp://127.0.0.1:5000) [MIQ: Web Server Worker] +root 2067 0.9 4.0 529664 305716 ? SNl 15:02 0:05 puma 3.3.0 (tcp://127.0.0.1:3000) [MIQ: Web Server Worker] +root 2075 0.2 4.0 529408 304056 ? SNl 15:02 0:01 puma 3.3.0 (tcp://127.0.0.1:4000) [MIQ: Web Server Worker] +root 2329 0.0 0.0 10640 972 ? S+ 15:13 0:00 grep --color=auto -i miq +``` + +Finally, *still in the rsh shell*, to test if the application is +running correctly, we can request the application homepage. If the +page is available the page title will be `ManageIQ: Login`: + +``` +[root@manageiq-0 vmdb]# curl -s -k https://localhost | grep -A2 '<title>' +<title> +ManageIQ: Login +</title> +``` + +**Note:** The `-s` flag makes `curl` operations silent and the `-k` +flag to ignore errors about untrusted certificates. + + + +# Additional Upstream Resources + +Below are some useful resources from the upstream project +documentation. You may find these of value. + +* [Verify Setup Was Successful](https://github.com/ManageIQ/manageiq-pods#verifying-the-setup-was-successful) +* [POD Access And Routes](https://github.com/ManageIQ/manageiq-pods#pod-access-and-routes) +* [Troubleshooting](https://github.com/ManageIQ/manageiq-pods#troubleshooting) + + +# Manual Cleanup + +At this time uninstallation/cleanup is still a manual process. You +will have to follow a few steps to fully remove CFME from your +cluster. + +Delete the project: + +* `oc delete project cfme` + +Delete the PVs: + +* `oc delete pv miq-pv01` +* `oc delete pv miq-pv02` +* `oc delete pv miq-pv03` + +Clean out the old PV data: + +* `cd /exports/` +* `find miq* -type f -delete` +* `find miq* -type d -delete` + +Remove the NFS exports: + +* `rm /etc/exports.d/openshift_cfme.exports` +* `exportfs -ar` + +Delete the user: + +* `oc delete user cfme` + +**NOTE:** The `oc delete project cfme` command will return quickly +however it will continue to operate in the background. Continue +running `oc get project` after you've completed the other steps to +monitor the pods and final project termination progress. diff --git a/roles/openshift_cfme/defaults/main.yml b/roles/openshift_cfme/defaults/main.yml new file mode 100644 index 000000000..493e1ef68 --- /dev/null +++ b/roles/openshift_cfme/defaults/main.yml @@ -0,0 +1,38 @@ +--- +# Namespace for the CFME project +openshift_cfme_project: cfme +# Namespace/project description +openshift_cfme_project_description: ManageIQ - CloudForms Management Engine +# Basic user assigned the `admin` role for the project +openshift_cfme_user: cfme +# Project system account for enabling privileged pods +openshift_cfme_service_account: "system:serviceaccount:{{ openshift_cfme_project }}:default" +# All the required exports +openshift_cfme_pv_exports: + - miq-pv01 + - miq-pv02 + - miq-pv03 +# PV template files and their created object names +openshift_cfme_pv_data: + - pv_name: miq-pv01 + pv_template: miq-pv-db.yaml + pv_label: CFME DB PV + - pv_name: miq-pv02 + pv_template: miq-pv-region.yaml + pv_label: CFME Region PV + - pv_name: miq-pv03 + pv_template: miq-pv-server.yaml + pv_label: CFME Server PV + +# Tuning parameter to use more than 5 images at once from an ImageStream +openshift_cfme_maxImagesBulkImportedPerRepository: 100 +# Hostname/IP of the NFS server. Currently defaults to first master +openshift_cfme_nfs_server: "{{ groups.nfs.0 }}" +# TODO: Refactor '_install_app' variable. This is just for testing but +# maybe in the future it should control the entire yes/no for CFME. +# +# Whether or not the manageiq app should be initialized ('oc new-app +# --template=manageiq). If False everything UP TO 'new-app' is ran. +openshift_cfme_install_app: False +# Docker image to pull +openshift_cfme_container_image: "docker.io/manageiq/manageiq-pods:app-latest-fine" diff --git a/roles/openshift_cfme/files/miq-template.yaml b/roles/openshift_cfme/files/miq-template.yaml new file mode 100644 index 000000000..8f0d2af38 --- /dev/null +++ b/roles/openshift_cfme/files/miq-template.yaml @@ -0,0 +1,566 @@ +--- +path: /tmp/miq-template-out +data: + apiVersion: v1 + kind: Template + labels: + template: manageiq + metadata: + name: manageiq + annotations: + description: "ManageIQ appliance with persistent storage" + tags: "instant-app,manageiq,miq" + iconClass: "icon-rails" + objects: + - apiVersion: v1 + kind: Secret + metadata: + name: "${NAME}-secrets" + stringData: + pg-password: "${DATABASE_PASSWORD}" + - apiVersion: v1 + kind: Service + metadata: + annotations: + description: "Exposes and load balances ManageIQ pods" + service.alpha.openshift.io/dependencies: '[{"name":"${DATABASE_SERVICE_NAME}","namespace":"","kind":"Service"},{"name":"${MEMCACHED_SERVICE_NAME}","namespace":"","kind":"Service"}]' + name: ${NAME} + spec: + clusterIP: None + ports: + - name: http + port: 80 + protocol: TCP + targetPort: 80 + - name: https + port: 443 + protocol: TCP + targetPort: 443 + selector: + name: ${NAME} + - apiVersion: v1 + kind: Route + metadata: + name: ${NAME} + spec: + host: ${APPLICATION_DOMAIN} + port: + targetPort: https + tls: + termination: passthrough + to: + kind: Service + name: ${NAME} + - apiVersion: v1 + kind: ImageStream + metadata: + name: miq-app + annotations: + description: "Keeps track of the ManageIQ image changes" + spec: + dockerImageRepository: "${APPLICATION_IMG_NAME}" + - apiVersion: v1 + kind: ImageStream + metadata: + name: miq-postgresql + annotations: + description: "Keeps track of the PostgreSQL image changes" + spec: + dockerImageRepository: "${POSTGRESQL_IMG_NAME}" + - apiVersion: v1 + kind: ImageStream + metadata: + name: miq-memcached + annotations: + description: "Keeps track of the Memcached image changes" + spec: + dockerImageRepository: "${MEMCACHED_IMG_NAME}" + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: "${NAME}-${DATABASE_SERVICE_NAME}" + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: ${DATABASE_VOLUME_CAPACITY} + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: "${NAME}-region" + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: ${APPLICATION_REGION_VOLUME_CAPACITY} + - apiVersion: apps/v1beta1 + kind: "StatefulSet" + metadata: + name: ${NAME} + annotations: + description: "Defines how to deploy the ManageIQ appliance" + spec: + serviceName: "${NAME}" + replicas: "${APPLICATION_REPLICA_COUNT}" + template: + metadata: + labels: + name: ${NAME} + name: ${NAME} + spec: + containers: + - name: manageiq + image: "${APPLICATION_IMG_NAME}:${APPLICATION_IMG_TAG}" + livenessProbe: + tcpSocket: + port: 443 + initialDelaySeconds: 480 + timeoutSeconds: 3 + readinessProbe: + httpGet: + path: / + port: 443 + scheme: HTTPS + initialDelaySeconds: 200 + timeoutSeconds: 3 + ports: + - containerPort: 80 + protocol: TCP + - containerPort: 443 + protocol: TCP + securityContext: + privileged: true + volumeMounts: + - + name: "${NAME}-server" + mountPath: "/persistent" + - + name: "${NAME}-region" + mountPath: "/persistent-region" + env: + - + name: "APPLICATION_INIT_DELAY" + value: "${APPLICATION_INIT_DELAY}" + - + name: "DATABASE_SERVICE_NAME" + value: "${DATABASE_SERVICE_NAME}" + - + name: "DATABASE_REGION" + value: "${DATABASE_REGION}" + - + name: "MEMCACHED_SERVICE_NAME" + value: "${MEMCACHED_SERVICE_NAME}" + - + name: "POSTGRESQL_USER" + value: "${DATABASE_USER}" + - + name: "POSTGRESQL_PASSWORD" + valueFrom: + secretKeyRef: + name: "${NAME}-secrets" + key: "pg-password" + - + name: "POSTGRESQL_DATABASE" + value: "${DATABASE_NAME}" + - + name: "POSTGRESQL_MAX_CONNECTIONS" + value: "${POSTGRESQL_MAX_CONNECTIONS}" + - + name: "POSTGRESQL_SHARED_BUFFERS" + value: "${POSTGRESQL_SHARED_BUFFERS}" + resources: + requests: + memory: "${APPLICATION_MEM_REQ}" + cpu: "${APPLICATION_CPU_REQ}" + limits: + memory: "${APPLICATION_MEM_LIMIT}" + lifecycle: + preStop: + exec: + command: + - /opt/manageiq/container-scripts/sync-pv-data + volumes: + - + name: "${NAME}-region" + persistentVolumeClaim: + claimName: ${NAME}-region + volumeClaimTemplates: + - metadata: + name: "${NAME}-server" + annotations: + # Uncomment this if using dynamic volume provisioning. + # https://docs.openshift.org/latest/install_config/persistent_storage/dynamically_provisioning_pvs.html + # volume.alpha.kubernetes.io/storage-class: anything + spec: + accessModes: [ ReadWriteOnce ] + resources: + requests: + storage: "${APPLICATION_VOLUME_CAPACITY}" + - apiVersion: v1 + kind: "Service" + metadata: + name: "${MEMCACHED_SERVICE_NAME}" + annotations: + description: "Exposes the memcached server" + spec: + ports: + - + name: "memcached" + port: 11211 + targetPort: 11211 + selector: + name: "${MEMCACHED_SERVICE_NAME}" + - apiVersion: v1 + kind: "DeploymentConfig" + metadata: + name: "${MEMCACHED_SERVICE_NAME}" + annotations: + description: "Defines how to deploy memcached" + spec: + strategy: + type: "Recreate" + triggers: + - + type: "ImageChange" + imageChangeParams: + automatic: true + containerNames: + - "memcached" + from: + kind: "ImageStreamTag" + name: "miq-memcached:${MEMCACHED_IMG_TAG}" + - + type: "ConfigChange" + replicas: 1 + selector: + name: "${MEMCACHED_SERVICE_NAME}" + template: + metadata: + name: "${MEMCACHED_SERVICE_NAME}" + labels: + name: "${MEMCACHED_SERVICE_NAME}" + spec: + volumes: [] + containers: + - + name: "memcached" + image: "${MEMCACHED_IMG_NAME}:${MEMCACHED_IMG_TAG}" + ports: + - + containerPort: 11211 + readinessProbe: + timeoutSeconds: 1 + initialDelaySeconds: 5 + tcpSocket: + port: 11211 + livenessProbe: + timeoutSeconds: 1 + initialDelaySeconds: 30 + tcpSocket: + port: 11211 + volumeMounts: [] + env: + - + name: "MEMCACHED_MAX_MEMORY" + value: "${MEMCACHED_MAX_MEMORY}" + - + name: "MEMCACHED_MAX_CONNECTIONS" + value: "${MEMCACHED_MAX_CONNECTIONS}" + - + name: "MEMCACHED_SLAB_PAGE_SIZE" + value: "${MEMCACHED_SLAB_PAGE_SIZE}" + resources: + requests: + memory: "${MEMCACHED_MEM_REQ}" + cpu: "${MEMCACHED_CPU_REQ}" + limits: + memory: "${MEMCACHED_MEM_LIMIT}" + - apiVersion: v1 + kind: "Service" + metadata: + name: "${DATABASE_SERVICE_NAME}" + annotations: + description: "Exposes the database server" + spec: + ports: + - + name: "postgresql" + port: 5432 + targetPort: 5432 + selector: + name: "${DATABASE_SERVICE_NAME}" + - apiVersion: v1 + kind: "DeploymentConfig" + metadata: + name: "${DATABASE_SERVICE_NAME}" + annotations: + description: "Defines how to deploy the database" + spec: + strategy: + type: "Recreate" + triggers: + - + type: "ImageChange" + imageChangeParams: + automatic: true + containerNames: + - "postgresql" + from: + kind: "ImageStreamTag" + name: "miq-postgresql:${POSTGRESQL_IMG_TAG}" + - + type: "ConfigChange" + replicas: 1 + selector: + name: "${DATABASE_SERVICE_NAME}" + template: + metadata: + name: "${DATABASE_SERVICE_NAME}" + labels: + name: "${DATABASE_SERVICE_NAME}" + spec: + volumes: + - + name: "miq-pgdb-volume" + persistentVolumeClaim: + claimName: "${NAME}-${DATABASE_SERVICE_NAME}" + containers: + - + name: "postgresql" + image: "${POSTGRESQL_IMG_NAME}:${POSTGRESQL_IMG_TAG}" + ports: + - + containerPort: 5432 + readinessProbe: + timeoutSeconds: 1 + initialDelaySeconds: 15 + exec: + command: + - "/bin/sh" + - "-i" + - "-c" + - "psql -h 127.0.0.1 -U ${POSTGRESQL_USER} -q -d ${POSTGRESQL_DATABASE} -c 'SELECT 1'" + livenessProbe: + timeoutSeconds: 1 + initialDelaySeconds: 60 + tcpSocket: + port: 5432 + volumeMounts: + - + name: "miq-pgdb-volume" + mountPath: "/var/lib/pgsql/data" + env: + - + name: "POSTGRESQL_USER" + value: "${DATABASE_USER}" + - + name: "POSTGRESQL_PASSWORD" + valueFrom: + secretKeyRef: + name: "${NAME}-secrets" + key: "pg-password" + - + name: "POSTGRESQL_DATABASE" + value: "${DATABASE_NAME}" + - + name: "POSTGRESQL_MAX_CONNECTIONS" + value: "${POSTGRESQL_MAX_CONNECTIONS}" + - + name: "POSTGRESQL_SHARED_BUFFERS" + value: "${POSTGRESQL_SHARED_BUFFERS}" + resources: + requests: + memory: "${POSTGRESQL_MEM_REQ}" + cpu: "${POSTGRESQL_CPU_REQ}" + limits: + memory: "${POSTGRESQL_MEM_LIMIT}" + + parameters: + - + name: "NAME" + displayName: Name + required: true + description: "The name assigned to all of the frontend objects defined in this template." + value: manageiq + - + name: "DATABASE_SERVICE_NAME" + displayName: "PostgreSQL Service Name" + required: true + description: "The name of the OpenShift Service exposed for the PostgreSQL container." + value: "postgresql" + - + name: "DATABASE_USER" + displayName: "PostgreSQL User" + required: true + description: "PostgreSQL user that will access the database." + value: "root" + - + name: "DATABASE_PASSWORD" + displayName: "PostgreSQL Password" + required: true + description: "Password for the PostgreSQL user." + from: "[a-zA-Z0-9]{8}" + generate: expression + - + name: "DATABASE_NAME" + required: true + displayName: "PostgreSQL Database Name" + description: "Name of the PostgreSQL database accessed." + value: "vmdb_production" + - + name: "DATABASE_REGION" + required: true + displayName: "Application Database Region" + description: "Database region that will be used for application." + value: "0" + - + name: "MEMCACHED_SERVICE_NAME" + required: true + displayName: "Memcached Service Name" + description: "The name of the OpenShift Service exposed for the Memcached container." + value: "memcached" + - + name: "MEMCACHED_MAX_MEMORY" + displayName: "Memcached Max Memory" + description: "Memcached maximum memory for memcached object storage in MB." + value: "64" + - + name: "MEMCACHED_MAX_CONNECTIONS" + displayName: "Memcached Max Connections" + description: "Memcached maximum number of connections allowed." + value: "1024" + - + name: "MEMCACHED_SLAB_PAGE_SIZE" + displayName: "Memcached Slab Page Size" + description: "Memcached size of each slab page." + value: "1m" + - + name: "POSTGRESQL_MAX_CONNECTIONS" + displayName: "PostgreSQL Max Connections" + description: "PostgreSQL maximum number of database connections allowed." + value: "100" + - + name: "POSTGRESQL_SHARED_BUFFERS" + displayName: "PostgreSQL Shared Buffer Amount" + description: "Amount of memory dedicated for PostgreSQL shared memory buffers." + value: "256MB" + - + name: "APPLICATION_CPU_REQ" + displayName: "Application Min CPU Requested" + required: true + description: "Minimum amount of CPU time the Application container will need (expressed in millicores)." + value: "1000m" + - + name: "POSTGRESQL_CPU_REQ" + displayName: "PostgreSQL Min CPU Requested" + required: true + description: "Minimum amount of CPU time the PostgreSQL container will need (expressed in millicores)." + value: "500m" + - + name: "MEMCACHED_CPU_REQ" + displayName: "Memcached Min CPU Requested" + required: true + description: "Minimum amount of CPU time the Memcached container will need (expressed in millicores)." + value: "200m" + - + name: "APPLICATION_MEM_REQ" + displayName: "Application Min RAM Requested" + required: true + description: "Minimum amount of memory the Application container will need." + value: "6144Mi" + - + name: "POSTGRESQL_MEM_REQ" + displayName: "PostgreSQL Min RAM Requested" + required: true + description: "Minimum amount of memory the PostgreSQL container will need." + value: "1024Mi" + - + name: "MEMCACHED_MEM_REQ" + displayName: "Memcached Min RAM Requested" + required: true + description: "Minimum amount of memory the Memcached container will need." + value: "64Mi" + - + name: "APPLICATION_MEM_LIMIT" + displayName: "Application Max RAM Limit" + required: true + description: "Maximum amount of memory the Application container can consume." + value: "16384Mi" + - + name: "POSTGRESQL_MEM_LIMIT" + displayName: "PostgreSQL Max RAM Limit" + required: true + description: "Maximum amount of memory the PostgreSQL container can consume." + value: "8192Mi" + - + name: "MEMCACHED_MEM_LIMIT" + displayName: "Memcached Max RAM Limit" + required: true + description: "Maximum amount of memory the Memcached container can consume." + value: "256Mi" + - + name: "POSTGRESQL_IMG_NAME" + displayName: "PostgreSQL Image Name" + description: "This is the PostgreSQL image name requested to deploy." + value: "docker.io/manageiq/manageiq-pods" + - + name: "POSTGRESQL_IMG_TAG" + displayName: "PostgreSQL Image Tag" + description: "This is the PostgreSQL image tag/version requested to deploy." + value: "postgresql-latest-fine" + - + name: "MEMCACHED_IMG_NAME" + displayName: "Memcached Image Name" + description: "This is the Memcached image name requested to deploy." + value: "docker.io/manageiq/manageiq-pods" + - + name: "MEMCACHED_IMG_TAG" + displayName: "Memcached Image Tag" + description: "This is the Memcached image tag/version requested to deploy." + value: "memcached-latest-fine" + - + name: "APPLICATION_IMG_NAME" + displayName: "Application Image Name" + description: "This is the Application image name requested to deploy." + value: "docker.io/manageiq/manageiq-pods" + - + name: "APPLICATION_IMG_TAG" + displayName: "Application Image Tag" + description: "This is the Application image tag/version requested to deploy." + value: "app-latest-fine" + - + name: "APPLICATION_DOMAIN" + displayName: "Application Hostname" + description: "The exposed hostname that will route to the application service, if left blank a value will be defaulted." + value: "" + - + name: "APPLICATION_REPLICA_COUNT" + displayName: "Application Replica Count" + description: "This is the number of Application replicas requested to deploy." + value: "1" + - + name: "APPLICATION_INIT_DELAY" + displayName: "Application Init Delay" + required: true + description: "Delay in seconds before we attempt to initialize the application." + value: "15" + - + name: "APPLICATION_VOLUME_CAPACITY" + displayName: "Application Volume Capacity" + required: true + description: "Volume space available for application data." + value: "5Gi" + - + name: "APPLICATION_REGION_VOLUME_CAPACITY" + displayName: "Application Region Volume Capacity" + required: true + description: "Volume space available for region application data." + value: "5Gi" + - + name: "DATABASE_VOLUME_CAPACITY" + displayName: "Database Volume Capacity" + required: true + description: "Volume space available for database." + value: "15Gi" diff --git a/roles/openshift_cfme/files/openshift_cfme.exports b/roles/openshift_cfme/files/openshift_cfme.exports new file mode 100644 index 000000000..5457d41fc --- /dev/null +++ b/roles/openshift_cfme/files/openshift_cfme.exports @@ -0,0 +1,3 @@ +/exports/miq-pv01 *(rw,no_root_squash,no_wdelay) +/exports/miq-pv02 *(rw,no_root_squash,no_wdelay) +/exports/miq-pv03 *(rw,no_root_squash,no_wdelay) diff --git a/roles/openshift_cfme/handlers/main.yml b/roles/openshift_cfme/handlers/main.yml new file mode 100644 index 000000000..476a5e030 --- /dev/null +++ b/roles/openshift_cfme/handlers/main.yml @@ -0,0 +1,42 @@ +--- +###################################################################### +# NOTE: These are duplicated from roles/openshift_master/handlers/main.yml +# +# TODO: Use the consolidated 'openshift_handlers' role once it's ready +# See: https://github.com/openshift/openshift-ansible/pull/4041#discussion_r118770782 +###################################################################### + +- name: restart master + systemd: name={{ openshift.common.service_type }}-master state=restarted + when: (openshift.master.ha is not defined or not openshift.master.ha | bool) and (not (master_service_status_changed | default(false) | bool)) + notify: Verify API Server + +- name: restart master api + systemd: name={{ openshift.common.service_type }}-master-api state=restarted + when: (openshift.master.ha is defined and openshift.master.ha | bool) and (not (master_api_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native' + notify: Verify API Server + +- name: restart master controllers + systemd: name={{ openshift.common.service_type }}-master-controllers state=restarted + when: (openshift.master.ha is defined and openshift.master.ha | bool) and (not (master_controllers_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native' + +- name: Verify API Server + # Using curl here since the uri module requires python-httplib2 and + # wait_for port doesn't provide health information. + command: > + curl --silent --tlsv1.2 + {% if openshift.common.version_gte_3_2_or_1_2 | bool %} + --cacert {{ openshift.common.config_base }}/master/ca-bundle.crt + {% else %} + --cacert {{ openshift.common.config_base }}/master/ca.crt + {% endif %} + {{ openshift.master.api_url }}/healthz/ready + args: + # Disables the following warning: + # Consider using get_url or uri module rather than running curl + warn: no + register: api_available_output + until: api_available_output.stdout == 'ok' + retries: 120 + delay: 1 + changed_when: false diff --git a/roles/openshift_cfme/img/CFMEBasicDeployment.png b/roles/openshift_cfme/img/CFMEBasicDeployment.png Binary files differnew file mode 100644 index 000000000..a89c1e325 --- /dev/null +++ b/roles/openshift_cfme/img/CFMEBasicDeployment.png diff --git a/roles/openshift_cfme/meta/main.yml b/roles/openshift_cfme/meta/main.yml new file mode 100644 index 000000000..9200f2c3c --- /dev/null +++ b/roles/openshift_cfme/meta/main.yml @@ -0,0 +1,20 @@ +--- +galaxy_info: + author: Tim Bielawa + description: OpenShift CFME (Manage IQ) Deployer + company: Red Hat, Inc. + license: Apache License, Version 2.0 + min_ansible_version: 2.2 + version: 1.0 + platforms: + - name: EL + versions: + - 7 + categories: + - cloud + - system +dependencies: +- role: lib_openshift +- role: lib_utils +- role: openshift_common +- role: openshift_master_facts diff --git a/roles/openshift_cfme/tasks/create_pvs.yml b/roles/openshift_cfme/tasks/create_pvs.yml new file mode 100644 index 000000000..7fa7d3997 --- /dev/null +++ b/roles/openshift_cfme/tasks/create_pvs.yml @@ -0,0 +1,36 @@ +--- +# Check for existance and then conditionally: +# - evaluate templates +# - PVs +# +# These tasks idempotently create required CFME PV objects. Do not +# call this file directly. This file is intended to be ran as an +# include that has a 'with_items' attached to it. Hence the use below +# of variables like "{{ item.pv_label }}" + +- name: "Check if the {{ item.pv_label }} template has been created already" + oc_obj: + namespace: "{{ openshift_cfme_project }}" + state: list + kind: pv + name: "{{ item.pv_name }}" + register: miq_pv_check + +# Skip all of this if the PV already exists +- block: + - name: "Ensure the {{ item.pv_label }} template is evaluated" + template: + src: "{{ item.pv_template }}.j2" + dest: "{{ template_dir }}/{{ item.pv_template }}" + + - name: "Ensure {{ item.pv_label }} is created" + oc_obj: + namespace: "{{ openshift_cfme_project }}" + kind: pv + name: "{{ item.pv_name }}" + state: present + delete_after: True + files: + - "{{ template_dir }}/{{ item.pv_template }}" + when: + - not miq_pv_check.results.results.0 diff --git a/roles/openshift_cfme/tasks/main.yml b/roles/openshift_cfme/tasks/main.yml new file mode 100644 index 000000000..acbce7232 --- /dev/null +++ b/roles/openshift_cfme/tasks/main.yml @@ -0,0 +1,148 @@ +--- +###################################################################### +# Users, projects, and privileges + +- name: Ensure the CFME user exists + oc_user: + state: present + username: "{{ openshift_cfme_user }}" + +- name: Ensure the CFME namespace exists with CFME user as admin + oc_project: + state: present + name: "{{ openshift_cfme_project }}" + display_name: "{{ openshift_cfme_project_description }}" + admin: "{{ openshift_cfme_user }}" + +- name: Ensure the CFME namespace service account is privileged + oc_adm_policy_user: + namespace: "{{ openshift_cfme_project }}" + user: "{{ openshift_cfme_service_account }}" + resource_kind: scc + resource_name: privileged + state: present + +###################################################################### +# NFS + +- name: Ensure the /exports/ directory exists + file: + path: /exports/ + state: directory + mode: 0755 + owner: root + group: root + +- name: Ensure the miq-pv0X export directories exist + file: + path: "/exports/{{ item }}" + state: directory + mode: 0775 + owner: root + group: root + with_items: "{{ openshift_cfme_pv_exports }}" + +- name: Ensure the NFS exports for CFME PVs exist + copy: + src: openshift_cfme.exports + dest: /etc/exports.d/openshift_cfme.exports + register: nfs_exports_updated + +- name: Ensure the NFS export table is refreshed if exports were added + command: exportfs -ar + when: + - nfs_exports_updated.changed + + +###################################################################### +# Create the required CFME PVs. Check out these online docs if you +# need a refresher on includes looping with items: +# * http://docs.ansible.com/ansible/playbooks_loops.html#loops-and-includes-in-2-0 +# * http://stackoverflow.com/a/35128533 +# +# TODO: Handle the case where a PV template is updated in +# openshift-ansible and the change needs to be landed on the managed +# cluster. + +- include: create_pvs.yml + with_items: "{{ openshift_cfme_pv_data }}" + +###################################################################### +# CFME App Template +# +# Note, this is different from the create_pvs.yml tasks in that the +# application template does not require any jinja2 evaluation. +# +# TODO: Handle the case where the server template is updated in +# openshift-ansible and the change needs to be landed on the managed +# cluster. + +- name: Check if the CFME Server template has been created already + oc_obj: + namespace: "{{ openshift_cfme_project }}" + state: list + kind: template + name: manageiq + register: miq_server_check + +- name: Copy over CFME Server template + copy: + src: miq-template.yaml + dest: "{{ template_dir }}/miq-template.yaml" + +- name: Ensure the server template was read from disk + debug: + var=r_openshift_cfme_miq_template_content + +- name: Ensure CFME Server Template exists + oc_obj: + namespace: "{{ openshift_cfme_project }}" + kind: template + name: "manageiq" + state: present + content: "{{ r_openshift_cfme_miq_template_content }}" + +###################################################################### +# Let's do this + +- name: Ensure the CFME Server is created + oc_process: + namespace: "{{ openshift_cfme_project }}" + template_name: manageiq + create: True + register: cfme_new_app_process + run_once: True + when: + # User said to install CFME in their inventory + - openshift_cfme_install_app | bool + # # The server app doesn't exist already + # - not miq_server_check.results.results.0 + +- debug: + var: cfme_new_app_process + +###################################################################### +# Various cleanup steps + +# TODO: Not sure what to do about this right now. Might be able to +# just delete it? This currently warns about "Unable to find +# '<TEMP_DIR>' in expected paths." +- name: Ensure the temporary PV/App templates are erased + file: + path: "{{ item }}" + state: absent + with_fileglob: + - "{{ template_dir }}/*.yaml" + +- name: Ensure the temporary PV/app template directory is erased + file: + path: "{{ template_dir }}" + state: absent + +###################################################################### + +- name: Status update + debug: + msg: > + CFME has been deployed. Note that there will be a delay before + it is fully initialized. diff --git a/roles/openshift_cfme/tasks/tune_masters.yml b/roles/openshift_cfme/tasks/tune_masters.yml new file mode 100644 index 000000000..02b0f10bf --- /dev/null +++ b/roles/openshift_cfme/tasks/tune_masters.yml @@ -0,0 +1,12 @@ +--- +- name: Ensure bulk image import limit is tuned + yedit: + src: /etc/origin/master/master-config.yaml + key: 'imagePolicyConfig.maxImagesBulkImportedPerRepository' + value: "{{ openshift_cfme_maxImagesBulkImportedPerRepository | int() }}" + state: present + backup: True + notify: + - restart master + +- meta: flush_handlers diff --git a/roles/openshift_cfme/tasks/uninstall.yml b/roles/openshift_cfme/tasks/uninstall.yml new file mode 100644 index 000000000..cba734a0e --- /dev/null +++ b/roles/openshift_cfme/tasks/uninstall.yml @@ -0,0 +1,43 @@ +--- +- include_role: + name: lib_openshift + +- name: Uninstall CFME - ManageIQ + debug: + msg: Uninstalling Cloudforms Management Engine - ManageIQ + +- name: Ensure the CFME project is removed + oc_project: + state: absent + name: "{{ openshift_cfme_project }}" + +- name: Ensure the CFME template is removed + oc_obj: + namespace: "{{ openshift_cfme_project }}" + state: absent + kind: template + name: manageiq + +- name: Ensure the CFME PVs are removed + oc_obj: + state: absent + all_namespaces: True + kind: pv + name: "{{ item }}" + with_items: "{{ openshift_cfme_pv_exports }}" + +- name: Ensure the CFME user is removed + oc_user: + state: absent + username: "{{ openshift_cfme_user }}" + +- name: Ensure the CFME NFS Exports are removed + file: + path: /etc/exports.d/openshift_cfme.exports + state: absent + register: nfs_exports_removed + +- name: Ensure the NFS export table is refreshed if exports were removed + command: exportfs -ar + when: + - nfs_exports_removed.changed diff --git a/roles/openshift_cfme/templates/miq-pv-db.yaml.j2 b/roles/openshift_cfme/templates/miq-pv-db.yaml.j2 new file mode 100644 index 000000000..b8c3bb277 --- /dev/null +++ b/roles/openshift_cfme/templates/miq-pv-db.yaml.j2 @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: PersistentVolume +metadata: + name: miq-pv01 +spec: + capacity: + storage: 15Gi + accessModes: + - ReadWriteOnce + nfs: + path: /exports/miq-pv01 + server: {{ openshift_cfme_nfs_server }} + persistentVolumeReclaimPolicy: Retain diff --git a/roles/openshift_cfme/templates/miq-pv-region.yaml.j2 b/roles/openshift_cfme/templates/miq-pv-region.yaml.j2 new file mode 100644 index 000000000..7218773f0 --- /dev/null +++ b/roles/openshift_cfme/templates/miq-pv-region.yaml.j2 @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: PersistentVolume +metadata: + name: miq-pv02 +spec: + capacity: + storage: 5Gi + accessModes: + - ReadWriteOnce + nfs: + path: /exports/miq-pv02 + server: {{ openshift_cfme_nfs_server }} + persistentVolumeReclaimPolicy: Retain diff --git a/roles/openshift_cfme/templates/miq-pv-server.yaml.j2 b/roles/openshift_cfme/templates/miq-pv-server.yaml.j2 new file mode 100644 index 000000000..7b40b6c69 --- /dev/null +++ b/roles/openshift_cfme/templates/miq-pv-server.yaml.j2 @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: PersistentVolume +metadata: + name: miq-pv03 +spec: + capacity: + storage: 5Gi + accessModes: + - ReadWriteOnce + nfs: + path: /exports/miq-pv03 + server: {{ openshift_cfme_nfs_server }} + persistentVolumeReclaimPolicy: Retain diff --git a/roles/openshift_examples/files/examples/v1.4/cfme-templates/cfme-template.yaml b/roles/openshift_examples/files/examples/v1.4/cfme-templates/cfme-template.yaml index 4f25a9c8f..982bd9530 100644 --- a/roles/openshift_examples/files/examples/v1.4/cfme-templates/cfme-template.yaml +++ b/roles/openshift_examples/files/examples/v1.4/cfme-templates/cfme-template.yaml @@ -48,7 +48,7 @@ objects: annotations: description: "Keeps track of changes in the CloudForms app image" spec: - dockerImageRepository: registry.access.redhat.com/cloudforms/cfme-openshift-app + dockerImageRepository: registry.access.redhat.com/cloudforms42/cfme-openshift-app - apiVersion: v1 kind: PersistentVolumeClaim metadata: @@ -188,7 +188,7 @@ objects: annotations: description: "Keeps track of changes in the CloudForms memcached image" spec: - dockerImageRepository: registry.access.redhat.com/cloudforms/cfme-openshift-memcached + dockerImageRepository: registry.access.redhat.com/cloudforms42/cfme-openshift-memcached - apiVersion: v1 kind: "DeploymentConfig" metadata: @@ -272,7 +272,7 @@ objects: annotations: description: "Keeps track of changes in the CloudForms postgresql image" spec: - dockerImageRepository: registry.access.redhat.com/cloudforms/cfme-openshift-postgresql + dockerImageRepository: registry.access.redhat.com/cloudforms42/cfme-openshift-postgresql - apiVersion: v1 kind: "DeploymentConfig" metadata: diff --git a/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-app-example.yaml b/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-app-example.yaml deleted file mode 100644 index 14bdd1dca..000000000 --- a/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-app-example.yaml +++ /dev/null @@ -1,13 +0,0 @@ -apiVersion: v1 -kind: PersistentVolume -metadata: - name: cloudforms -spec: - capacity: - storage: 2Gi - accessModes: - - ReadWriteOnce - nfs: - path: /opt/nfs/volumes-app - server: 10.19.0.216 - persistentVolumeReclaimPolicy: Recycle diff --git a/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-db-example.yaml b/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-db-example.yaml new file mode 100644 index 000000000..250a99b8d --- /dev/null +++ b/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-db-example.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: PersistentVolume +metadata: + name: cfme-pv01 +spec: + capacity: + storage: 15Gi + accessModes: + - ReadWriteOnce + nfs: + path: /exports/cfme-pv01 + server: <your-nfs-host-here> + persistentVolumeReclaimPolicy: Retain diff --git a/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-example.yaml b/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-example.yaml deleted file mode 100644 index 709d8d976..000000000 --- a/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-example.yaml +++ /dev/null @@ -1,13 +0,0 @@ -apiVersion: v1 -kind: PersistentVolume -metadata: - name: nfs-pv01 -spec: - capacity: - storage: 2Gi - accessModes: - - ReadWriteOnce - nfs: - path: /opt/nfs/volumes - server: 10.19.0.216 - persistentVolumeReclaimPolicy: Recycle diff --git a/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-region-example.yaml b/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-region-example.yaml new file mode 100644 index 000000000..cba9bbe35 --- /dev/null +++ b/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-region-example.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: PersistentVolume +metadata: + name: cfme-pv02 +spec: + capacity: + storage: 5Gi + accessModes: + - ReadWriteOnce + nfs: + path: /exports/cfme-pv02 + server: <your-nfs-host-here> + persistentVolumeReclaimPolicy: Retain diff --git a/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-server-example.yaml b/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-server-example.yaml new file mode 100644 index 000000000..c08c21265 --- /dev/null +++ b/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-pv-server-example.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: PersistentVolume +metadata: + name: cfme-pv03 +spec: + capacity: + storage: 5Gi + accessModes: + - ReadWriteOnce + nfs: + path: /exports/cfme-pv03 + server: <your-nfs-host-here> + persistentVolumeReclaimPolicy: Retain diff --git a/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-template.yaml b/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-template.yaml index 4f25a9c8f..3bc6c5813 100644 --- a/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-template.yaml +++ b/roles/openshift_examples/files/examples/v1.5/cfme-templates/cfme-template.yaml @@ -17,6 +17,7 @@ objects: service.alpha.openshift.io/dependencies: '[{"name":"${DATABASE_SERVICE_NAME}","namespace":"","kind":"Service"},{"name":"${MEMCACHED_SERVICE_NAME}","namespace":"","kind":"Service"}]' name: ${NAME} spec: + clusterIP: None ports: - name: http port: 80 @@ -48,11 +49,27 @@ objects: annotations: description: "Keeps track of changes in the CloudForms app image" spec: - dockerImageRepository: registry.access.redhat.com/cloudforms/cfme-openshift-app + dockerImageRepository: "${APPLICATION_IMG_NAME}" +- apiVersion: v1 + kind: ImageStream + metadata: + name: cfme-openshift-postgresql + annotations: + description: "Keeps track of changes in the CloudForms postgresql image" + spec: + dockerImageRepository: "${POSTGRESQL_IMG_NAME}" +- apiVersion: v1 + kind: ImageStream + metadata: + name: cfme-openshift-memcached + annotations: + description: "Keeps track of changes in the CloudForms memcached image" + spec: + dockerImageRepository: "${MEMCACHED_IMG_NAME}" - apiVersion: v1 kind: PersistentVolumeClaim metadata: - name: ${DATABASE_SERVICE_NAME} + name: "${NAME}-${DATABASE_SERVICE_NAME}" spec: accessModes: - ReadWriteOnce @@ -62,45 +79,41 @@ objects: - apiVersion: v1 kind: PersistentVolumeClaim metadata: - name: ${NAME} + name: "${NAME}-region" spec: accessModes: - ReadWriteOnce resources: requests: - storage: ${APPLICATION_VOLUME_CAPACITY} -- apiVersion: v1 - kind: "DeploymentConfig" + storage: ${APPLICATION_REGION_VOLUME_CAPACITY} +- apiVersion: apps/v1beta1 + kind: "StatefulSet" metadata: name: ${NAME} annotations: description: "Defines how to deploy the CloudForms appliance" spec: + serviceName: "${NAME}" + replicas: 1 template: metadata: labels: name: ${NAME} name: ${NAME} spec: - volumes: - - - name: "cfme-app-volume" - persistentVolumeClaim: - claimName: ${NAME} containers: - - image: cloudforms/cfme-openshift-app:${APPLICATION_IMG_TAG} - imagePullPolicy: IfNotPresent - name: cloudforms + - name: cloudforms + image: "${APPLICATION_IMG_NAME}:${APPLICATION_IMG_TAG}" livenessProbe: - httpGet: - path: / - port: 80 + tcpSocket: + port: 443 initialDelaySeconds: 480 timeoutSeconds: 3 readinessProbe: httpGet: path: / - port: 80 + port: 443 + scheme: HTTPS initialDelaySeconds: 200 timeoutSeconds: 3 ports: @@ -112,8 +125,11 @@ objects: privileged: true volumeMounts: - - name: "cfme-app-volume" + name: "${NAME}-server" mountPath: "/persistent" + - + name: "${NAME}-region" + mountPath: "/persistent-region" env: - name: "APPLICATION_INIT_DELAY" @@ -144,29 +160,32 @@ objects: value: "${POSTGRESQL_SHARED_BUFFERS}" resources: requests: - memory: "${MEMORY_APPLICATION_MIN}" + memory: "${APPLICATION_MEM_REQ}" + cpu: "${APPLICATION_CPU_REQ}" + limits: + memory: "${APPLICATION_MEM_LIMIT}" lifecycle: preStop: exec: command: - /opt/rh/cfme-container-scripts/sync-pv-data - replicas: 1 - selector: - name: ${NAME} - triggers: - - type: "ConfigChange" - - type: "ImageChange" - imageChangeParams: - automatic: true - containerNames: - - "cloudforms" - from: - kind: "ImageStreamTag" - name: "cfme-openshift-app:${APPLICATION_IMG_TAG}" - strategy: - type: "Recreate" - recreateParams: - timeoutSeconds: 1200 + volumes: + - + name: "${NAME}-region" + persistentVolumeClaim: + claimName: ${NAME}-region + volumeClaimTemplates: + - metadata: + name: "${NAME}-server" + annotations: + # Uncomment this if using dynamic volume provisioning. + # https://docs.openshift.org/latest/install_config/persistent_storage/dynamically_provisioning_pvs.html + # volume.alpha.kubernetes.io/storage-class: anything + spec: + accessModes: [ ReadWriteOnce ] + resources: + requests: + storage: "${APPLICATION_VOLUME_CAPACITY}" - apiVersion: v1 kind: "Service" metadata: @@ -182,14 +201,6 @@ objects: selector: name: "${MEMCACHED_SERVICE_NAME}" - apiVersion: v1 - kind: ImageStream - metadata: - name: cfme-openshift-memcached - annotations: - description: "Keeps track of changes in the CloudForms memcached image" - spec: - dockerImageRepository: registry.access.redhat.com/cloudforms/cfme-openshift-memcached -- apiVersion: v1 kind: "DeploymentConfig" metadata: name: "${MEMCACHED_SERVICE_NAME}" @@ -223,7 +234,7 @@ objects: containers: - name: "memcached" - image: "cloudforms/cfme-openshift-memcached:${MEMCACHED_IMG_TAG}" + image: "${MEMCACHED_IMG_NAME}:${MEMCACHED_IMG_TAG}" ports: - containerPort: 11211 @@ -249,8 +260,11 @@ objects: name: "MEMCACHED_SLAB_PAGE_SIZE" value: "${MEMCACHED_SLAB_PAGE_SIZE}" resources: + requests: + memory: "${MEMCACHED_MEM_REQ}" + cpu: "${MEMCACHED_CPU_REQ}" limits: - memory: "${MEMORY_MEMCACHED_LIMIT}" + memory: "${MEMCACHED_MEM_LIMIT}" - apiVersion: v1 kind: "Service" metadata: @@ -266,14 +280,6 @@ objects: selector: name: "${DATABASE_SERVICE_NAME}" - apiVersion: v1 - kind: ImageStream - metadata: - name: cfme-openshift-postgresql - annotations: - description: "Keeps track of changes in the CloudForms postgresql image" - spec: - dockerImageRepository: registry.access.redhat.com/cloudforms/cfme-openshift-postgresql -- apiVersion: v1 kind: "DeploymentConfig" metadata: name: "${DATABASE_SERVICE_NAME}" @@ -307,11 +313,11 @@ objects: - name: "cfme-pgdb-volume" persistentVolumeClaim: - claimName: ${DATABASE_SERVICE_NAME} + claimName: "${NAME}-${DATABASE_SERVICE_NAME}" containers: - name: "postgresql" - image: "cloudforms/cfme-openshift-postgresql:${POSTGRESQL_IMG_TAG}" + image: "${POSTGRESQL_IMG_NAME}:${POSTGRESQL_IMG_TAG}" ports: - containerPort: 5432 @@ -350,8 +356,11 @@ objects: name: "POSTGRESQL_SHARED_BUFFERS" value: "${POSTGRESQL_SHARED_BUFFERS}" resources: + requests: + memory: "${POSTGRESQL_MEM_REQ}" + cpu: "${POSTGRESQL_CPU_REQ}" limits: - memory: "${MEMORY_POSTGRESQL_LIMIT}" + memory: "${POSTGRESQL_MEM_LIMIT}" parameters: - @@ -420,36 +429,87 @@ parameters: name: "POSTGRESQL_SHARED_BUFFERS" displayName: "PostgreSQL Shared Buffer Amount" description: "Amount of memory dedicated for PostgreSQL shared memory buffers." - value: "64MB" + value: "256MB" - - name: "MEMORY_APPLICATION_MIN" - displayName: "Application Memory Minimum" + name: "APPLICATION_CPU_REQ" + displayName: "Application Min CPU Requested" + required: true + description: "Minimum amount of CPU time the Application container will need (expressed in millicores)." + value: "1000m" + - + name: "POSTGRESQL_CPU_REQ" + displayName: "PostgreSQL Min CPU Requested" + required: true + description: "Minimum amount of CPU time the PostgreSQL container will need (expressed in millicores)." + value: "500m" + - + name: "MEMCACHED_CPU_REQ" + displayName: "Memcached Min CPU Requested" + required: true + description: "Minimum amount of CPU time the Memcached container will need (expressed in millicores)." + value: "200m" + - + name: "APPLICATION_MEM_REQ" + displayName: "Application Min RAM Requested" required: true description: "Minimum amount of memory the Application container will need." - value: "4096Mi" + value: "6144Mi" + - + name: "POSTGRESQL_MEM_REQ" + displayName: "PostgreSQL Min RAM Requested" + required: true + description: "Minimum amount of memory the PostgreSQL container will need." + value: "1024Mi" - - name: "MEMORY_POSTGRESQL_LIMIT" - displayName: "PostgreSQL Memory Limit" + name: "MEMCACHED_MEM_REQ" + displayName: "Memcached Min RAM Requested" required: true - description: "Maximum amount of memory the PostgreSQL container can use." - value: "2048Mi" + description: "Minimum amount of memory the Memcached container will need." + value: "64Mi" - - name: "MEMORY_MEMCACHED_LIMIT" - displayName: "Memcached Memory Limit" + name: "APPLICATION_MEM_LIMIT" + displayName: "Application Max RAM Limit" required: true - description: "Maximum amount of memory the Memcached container can use." + description: "Maximum amount of memory the Application container can consume." + value: "16384Mi" + - + name: "POSTGRESQL_MEM_LIMIT" + displayName: "PostgreSQL Max RAM Limit" + required: true + description: "Maximum amount of memory the PostgreSQL container can consume." + value: "8192Mi" + - + name: "MEMCACHED_MEM_LIMIT" + displayName: "Memcached Max RAM Limit" + required: true + description: "Maximum amount of memory the Memcached container can consume." value: "256Mi" - + name: "POSTGRESQL_IMG_NAME" + displayName: "PostgreSQL Image Name" + description: "This is the PostgreSQL image name requested to deploy." + value: "registry.access.redhat.com/cloudforms45/cfme-openshift-postgresql" + - name: "POSTGRESQL_IMG_TAG" displayName: "PostgreSQL Image Tag" description: "This is the PostgreSQL image tag/version requested to deploy." value: "latest" - + name: "MEMCACHED_IMG_NAME" + displayName: "Memcached Image Name" + description: "This is the Memcached image name requested to deploy." + value: "registry.access.redhat.com/cloudforms45/cfme-openshift-memcached" + - name: "MEMCACHED_IMG_TAG" displayName: "Memcached Image Tag" description: "This is the Memcached image tag/version requested to deploy." value: "latest" - + name: "APPLICATION_IMG_NAME" + displayName: "Application Image Name" + description: "This is the Application image name requested to deploy." + value: "registry.access.redhat.com/cloudforms45/cfme-openshift-app" + - name: "APPLICATION_IMG_TAG" displayName: "Application Image Tag" description: "This is the Application image tag/version requested to deploy." @@ -464,16 +524,22 @@ parameters: displayName: "Application Init Delay" required: true description: "Delay in seconds before we attempt to initialize the application." - value: "30" + value: "15" - name: "APPLICATION_VOLUME_CAPACITY" displayName: "Application Volume Capacity" required: true description: "Volume space available for application data." - value: "1Gi" + value: "5Gi" + - + name: "APPLICATION_REGION_VOLUME_CAPACITY" + displayName: "Application Region Volume Capacity" + required: true + description: "Volume space available for region application data." + value: "5Gi" - name: "DATABASE_VOLUME_CAPACITY" displayName: "Database Volume Capacity" required: true description: "Volume space available for database." - value: "1Gi" + value: "15Gi" diff --git a/roles/openshift_excluder/tasks/install.yml b/roles/openshift_excluder/tasks/install.yml index d09358bee..3a866cedf 100644 --- a/roles/openshift_excluder/tasks/install.yml +++ b/roles/openshift_excluder/tasks/install.yml @@ -1,14 +1,24 @@ --- -- name: Install docker excluder - package: - name: "{{ r_openshift_excluder_service_type }}-docker-excluder{{ openshift_pkg_version | default('') | oo_image_tag_to_rpm_version(include_dash=True) + '*' }}" - state: "{{ r_openshift_excluder_docker_package_state }}" - when: - - r_openshift_excluder_enable_docker_excluder | bool - -- name: Install openshift excluder - package: - name: "{{ r_openshift_excluder_service_type }}-excluder{{ openshift_pkg_version | default('') | oo_image_tag_to_rpm_version(include_dash=True) + '*' }}" - state: "{{ r_openshift_excluder_package_state }}" - when: - - r_openshift_excluder_enable_openshift_excluder | bool + +- when: + - not openshift.common.is_atomic | bool + - r_openshift_excluder_install_ran is not defined + + block: + + - name: Install docker excluder + package: + name: "{{ r_openshift_excluder_service_type }}-docker-excluder{{ openshift_pkg_version | default('') | oo_image_tag_to_rpm_version(include_dash=True) + '*' }}" + state: "{{ r_openshift_excluder_docker_package_state }}" + when: + - r_openshift_excluder_enable_docker_excluder | bool + + - name: Install openshift excluder + package: + name: "{{ r_openshift_excluder_service_type }}-excluder{{ openshift_pkg_version | default('') | oo_image_tag_to_rpm_version(include_dash=True) + '*' }}" + state: "{{ r_openshift_excluder_package_state }}" + when: + - r_openshift_excluder_enable_openshift_excluder | bool + + - set_fact: + r_openshift_excluder_install_ran: True diff --git a/roles/openshift_facts/tasks/main.yml b/roles/openshift_facts/tasks/main.yml index 1b9bda67e..50ed3e964 100644 --- a/roles/openshift_facts/tasks/main.yml +++ b/roles/openshift_facts/tasks/main.yml @@ -24,12 +24,18 @@ msg: | openshift-ansible requires Python 3 for {{ ansible_distribution }}; For information on enabling Python 3 with Ansible, see https://docs.ansible.com/ansible/python_3_support.html - when: ansible_distribution == 'Fedora' and ansible_python['version']['major'] != 3 + when: + - ansible_distribution == 'Fedora' + - ansible_python['version']['major'] != 3 + - r_openshift_facts_ran is not defined - name: Validate python version fail: msg: "openshift-ansible requires Python 2 for {{ ansible_distribution }}" - when: ansible_distribution != 'Fedora' and ansible_python['version']['major'] != 2 + when: + - ansible_distribution != 'Fedora' + - ansible_python['version']['major'] != 2 + - r_openshift_facts_ran is not defined # Fail as early as possible if Atomic and old version of Docker - block: @@ -48,7 +54,9 @@ that: - l_atomic_docker_version.stdout | replace('"', '') | version_compare('1.12','>=') - when: l_is_atomic | bool + when: + - l_is_atomic | bool + - r_openshift_facts_ran is not defined - name: Load variables include_vars: "{{ item }}" @@ -59,7 +67,9 @@ - name: Ensure various deps are installed package: name={{ item }} state=present with_items: "{{ required_packages }}" - when: not l_is_atomic | bool + when: + - not l_is_atomic | bool + - r_openshift_facts_ran is not defined - name: Ensure various deps for running system containers are installed package: name={{ item }} state=present @@ -67,6 +77,7 @@ when: - not l_is_atomic | bool - l_any_system_container | bool + - r_openshift_facts_ran is not defined - name: Gather Cluster facts and set is_containerized if needed openshift_facts: @@ -99,3 +110,7 @@ - name: Set repoquery command set_fact: repoquery_cmd: "{{ 'dnf repoquery --latest-limit 1 -d 0' if ansible_pkg_mgr == 'dnf' else 'repoquery --plugins' }}" + +- name: Register that this already ran + set_fact: + r_openshift_facts_ran: True diff --git a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py index 27e6fe383..60aacf715 100644 --- a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py +++ b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py @@ -1,8 +1,24 @@ -# pylint: disable=missing-docstring +"""Check that required Docker images are available.""" + from openshift_checks import OpenShiftCheck, get_var from openshift_checks.mixins import DockerHostMixin +NODE_IMAGE_SUFFIXES = ["haproxy-router", "docker-registry", "deployer", "pod"] +DEPLOYMENT_IMAGE_INFO = { + "origin": { + "namespace": "openshift", + "name": "origin", + "registry_console_image": "cockpit/kubernetes", + }, + "openshift-enterprise": { + "namespace": "openshift3", + "name": "ose", + "registry_console_image": "registry.access.redhat.com/openshift3/registry-console", + }, +} + + class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): """Check that required Docker images are available. @@ -13,25 +29,13 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): name = "docker_image_availability" tags = ["preflight"] - dependencies = ["skopeo", "python-docker-py"] - deployment_image_info = { - "origin": { - "namespace": "openshift", - "name": "origin", - }, - "openshift-enterprise": { - "namespace": "openshift3", - "name": "ose", - }, - } - @classmethod def is_active(cls, task_vars): """Skip hosts with unsupported deployment types.""" deployment_type = get_var(task_vars, "openshift_deployment_type") - has_valid_deployment_type = deployment_type in cls.deployment_image_info + has_valid_deployment_type = deployment_type in DEPLOYMENT_IMAGE_INFO return super(DockerImageAvailability, cls).is_active(task_vars) and has_valid_deployment_type @@ -70,51 +74,55 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): return {"changed": changed} - def required_images(self, task_vars): - deployment_type = get_var(task_vars, "openshift_deployment_type") - image_info = self.deployment_image_info[deployment_type] - - openshift_release = get_var(task_vars, "openshift_release", default="latest") - openshift_image_tag = get_var(task_vars, "openshift_image_tag") - is_containerized = get_var(task_vars, "openshift", "common", "is_containerized") - - images = set(self.required_docker_images( - image_info["namespace"], - image_info["name"], - ["registry-console"] if "enterprise" in deployment_type else [], # include enterprise-only image names - openshift_release, - is_containerized, - )) - - # append images with qualified image tags to our list of required images. - # these are images with a (v0.0.0.0) tag, rather than a standard release - # format tag (v0.0). We want to check this set in both containerized and - # non-containerized installations. - images.update( - self.required_qualified_docker_images( - image_info["namespace"], - image_info["name"], - openshift_image_tag, - ), - ) - - return images - @staticmethod - def required_docker_images(namespace, name, additional_image_names, version, is_containerized): - if is_containerized: - return ["{}/{}:{}".format(namespace, name, version)] if name else [] - - # include additional non-containerized images specific to the current deployment type - return ["{}/{}:{}".format(namespace, img_name, version) for img_name in additional_image_names] - - @staticmethod - def required_qualified_docker_images(namespace, name, version): - # pylint: disable=invalid-name - return [ - "{}/{}-{}:{}".format(namespace, name, suffix, version) - for suffix in ["haproxy-router", "docker-registry", "deployer", "pod"] - ] + def required_images(task_vars): + """ + Determine which images we expect to need for this host. + Returns: a set of required images like 'openshift/origin:v3.6' + + The thorny issue of determining the image names from the variables is under consideration + via https://github.com/openshift/openshift-ansible/issues/4415 + + For now we operate as follows: + * For containerized components (master, node, ...) we look at the deployment type and + use openshift/origin or openshift3/ose as the base for those component images. The + version is openshift_image_tag as determined by the openshift_version role. + * For OpenShift-managed infrastructure (router, registry...) we use oreg_url if + it is defined; otherwise we again use the base that depends on the deployment type. + Registry is not included in constructed images. It may be in oreg_url or etcd image. + """ + required = set() + deployment_type = get_var(task_vars, "openshift_deployment_type") + host_groups = get_var(task_vars, "group_names") + image_tag = get_var(task_vars, "openshift_image_tag") + image_info = DEPLOYMENT_IMAGE_INFO[deployment_type] + if not image_info: + return required + + # template for images that run on top of OpenShift + image_url = "{}/{}-{}:{}".format(image_info["namespace"], image_info["name"], "${component}", "${version}") + image_url = get_var(task_vars, "oreg_url", default="") or image_url + if 'nodes' in host_groups: + for suffix in NODE_IMAGE_SUFFIXES: + required.add(image_url.replace("${component}", suffix).replace("${version}", image_tag)) + # The registry-console is for some reason not prefixed with ose- like the other components. + # Nor is it versioned the same, so just look for latest. + # Also a completely different name is used for Origin. + required.add(image_info["registry_console_image"]) + + # images for containerized components + if get_var(task_vars, "openshift", "common", "is_containerized"): + components = set() + if 'nodes' in host_groups: + components.update(["node", "openvswitch"]) + if 'masters' in host_groups: # name is "origin" or "ose" + components.add(image_info["name"]) + for component in components: + required.add("{}/{}:{}".format(image_info["namespace"], component, image_tag)) + if 'etcd' in host_groups: # special case, note it is the same for origin/enterprise + required.add("registry.access.redhat.com/rhel7/etcd") # and no image tag + + return required def local_images(self, images, task_vars): """Filter a list of images and return those available locally.""" @@ -124,7 +132,8 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): ] def is_image_local(self, image, task_vars): - result = self.module_executor("docker_image_facts", {"name": image}, task_vars) + """Check if image is already in local docker index.""" + result = self.execute_module("docker_image_facts", {"name": image}, task_vars=task_vars) if result.get("failed", False): return False @@ -132,6 +141,7 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): @staticmethod def known_docker_registries(task_vars): + """Build a list of docker registries available according to inventory vars.""" docker_facts = get_var(task_vars, "openshift", "docker") regs = set(docker_facts["additional_registries"]) @@ -147,17 +157,21 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): """Inspect existing images using Skopeo and return all images successfully inspected.""" return [ image for image in images - if any(self.is_available_skopeo_image(image, registry, task_vars) for registry in registries) + if self.is_available_skopeo_image(image, registries, task_vars) ] - def is_available_skopeo_image(self, image, registry, task_vars): - """Uses Skopeo to determine if required image exists in a given registry.""" + def is_available_skopeo_image(self, image, registries, task_vars): + """Use Skopeo to determine if required image exists in known registry(s).""" + + # if image does already includes a registry, just use that + if image.count("/") > 1: + registry, image = image.split("/", 1) + registries = [registry] - cmd_str = "skopeo inspect docker://{registry}/{image}".format( - registry=registry, - image=image, - ) + for registry in registries: + args = {"_raw_params": "skopeo inspect docker://{}/{}".format(registry, image)} + result = self.execute_module("command", args, task_vars=task_vars) + if result.get("rc", 0) == 0 and not result.get("failed"): + return True - args = {"_raw_params": cmd_str} - result = self.module_executor("command", args, task_vars) - return not result.get("failed", False) and result.get("rc", 0) == 0 + return False diff --git a/roles/openshift_health_checker/openshift_checks/docker_storage.py b/roles/openshift_health_checker/openshift_checks/docker_storage.py index 7f1751b36..2bd615457 100644 --- a/roles/openshift_health_checker/openshift_checks/docker_storage.py +++ b/roles/openshift_health_checker/openshift_checks/docker_storage.py @@ -34,7 +34,7 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck): } # attempt to get the docker info hash from the API - info = self.execute_module("docker_info", {}, task_vars) + info = self.execute_module("docker_info", {}, task_vars=task_vars) if info.get("failed"): return {"failed": True, "changed": changed, "msg": "Failed to query Docker API. Is docker running on this host?"} @@ -146,7 +146,7 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck): vgs_cmd = "/sbin/vgs --noheadings -o vg_free --select vg_name=" + vg_name # should return free space like " 12.00g" if the VG exists; empty if it does not - ret = self.execute_module("command", {"_raw_params": vgs_cmd}, task_vars) + ret = self.execute_module("command", {"_raw_params": vgs_cmd}, task_vars=task_vars) if ret.get("failed") or ret.get("rc", 0) != 0: raise OpenShiftCheckException( "Is LVM installed? Failed to run /sbin/vgs " diff --git a/roles/openshift_health_checker/openshift_checks/mixins.py b/roles/openshift_health_checker/openshift_checks/mixins.py index 7f3d78cc4..2cb2e21aa 100644 --- a/roles/openshift_health_checker/openshift_checks/mixins.py +++ b/roles/openshift_health_checker/openshift_checks/mixins.py @@ -40,8 +40,11 @@ class DockerHostMixin(object): # NOTE: we would use the "package" module but it's actually an action plugin # and it's not clear how to invoke one of those. This is about the same anyway: - pkg_manager = get_var(task_vars, "ansible_pkg_mgr", default="yum") - result = self.module_executor(pkg_manager, {"name": self.dependencies, "state": "present"}, task_vars) + result = self.execute_module( + get_var(task_vars, "ansible_pkg_mgr", default="yum"), + {"name": self.dependencies, "state": "present"}, + task_vars=task_vars, + ) msg = result.get("msg", "") if result.get("failed"): if "No package matching" in msg: diff --git a/roles/openshift_health_checker/openshift_checks/ovs_version.py b/roles/openshift_health_checker/openshift_checks/ovs_version.py index 1e45ae3af..2dd045f1f 100644 --- a/roles/openshift_health_checker/openshift_checks/ovs_version.py +++ b/roles/openshift_health_checker/openshift_checks/ovs_version.py @@ -43,7 +43,7 @@ class OvsVersion(NotContainerizedMixin, OpenShiftCheck): }, ], } - return self.execute_module("rpm_version", args, task_vars) + return self.execute_module("rpm_version", args, task_vars=task_vars) def get_required_ovs_version(self, task_vars): """Return the correct Open vSwitch version for the current OpenShift version""" diff --git a/roles/openshift_health_checker/openshift_checks/package_availability.py b/roles/openshift_health_checker/openshift_checks/package_availability.py index a7eb720fd..0dd2b1286 100644 --- a/roles/openshift_health_checker/openshift_checks/package_availability.py +++ b/roles/openshift_health_checker/openshift_checks/package_availability.py @@ -25,7 +25,7 @@ class PackageAvailability(NotContainerizedMixin, OpenShiftCheck): packages.update(self.node_packages(rpm_prefix)) args = {"packages": sorted(set(packages))} - return self.execute_module("check_yum_update", args, tmp, task_vars) + return self.execute_module("check_yum_update", args, tmp=tmp, task_vars=task_vars) @staticmethod def master_packages(rpm_prefix): @@ -36,8 +36,7 @@ class PackageAvailability(NotContainerizedMixin, OpenShiftCheck): "bash-completion", "cockpit-bridge", "cockpit-docker", - "cockpit-kubernetes", - "cockpit-shell", + "cockpit-system", "cockpit-ws", "etcd", "httpd-tools", diff --git a/roles/openshift_health_checker/openshift_checks/package_update.py b/roles/openshift_health_checker/openshift_checks/package_update.py index fd0c0a755..f432380c6 100644 --- a/roles/openshift_health_checker/openshift_checks/package_update.py +++ b/roles/openshift_health_checker/openshift_checks/package_update.py @@ -11,4 +11,4 @@ class PackageUpdate(NotContainerizedMixin, OpenShiftCheck): def run(self, tmp, task_vars): args = {"packages": []} - return self.execute_module("check_yum_update", args, tmp, task_vars) + return self.execute_module("check_yum_update", args, tmp=tmp, task_vars=task_vars) diff --git a/roles/openshift_health_checker/openshift_checks/package_version.py b/roles/openshift_health_checker/openshift_checks/package_version.py index 2e737818b..6a76bb93d 100644 --- a/roles/openshift_health_checker/openshift_checks/package_version.py +++ b/roles/openshift_health_checker/openshift_checks/package_version.py @@ -71,7 +71,7 @@ class PackageVersion(NotContainerizedMixin, OpenShiftCheck): ], } - return self.execute_module("aos_version", args, tmp, task_vars) + return self.execute_module("aos_version", args, tmp=tmp, task_vars=task_vars) def get_required_ovs_version(self, task_vars): """Return the correct Open vSwitch version for the current OpenShift version. diff --git a/roles/openshift_health_checker/test/docker_image_availability_test.py b/roles/openshift_health_checker/test/docker_image_availability_test.py index 197c65f51..0a7c0f8d3 100644 --- a/roles/openshift_health_checker/test/docker_image_availability_test.py +++ b/roles/openshift_health_checker/test/docker_image_availability_test.py @@ -31,15 +31,15 @@ def test_is_active(deployment_type, is_containerized, group_names, expect_active (False, True), ]) def test_all_images_available_locally(is_containerized, is_atomic): - def execute_module(module_name, args, task_vars): + def execute_module(module_name, module_args, task_vars): if module_name == "yum": return {"changed": True} assert module_name == "docker_image_facts" - assert 'name' in args - assert args['name'] + assert 'name' in module_args + assert module_args['name'] return { - 'images': [args['name']], + 'images': [module_args['name']], } result = DockerImageAvailability(execute_module=execute_module).run(tmp=None, task_vars=dict( @@ -52,8 +52,8 @@ def test_all_images_available_locally(is_containerized, is_atomic): docker=dict(additional_registries=["docker.io"]), ), openshift_deployment_type='origin', - openshift_release='v3.4', openshift_image_tag='3.4', + group_names=['nodes', 'masters'], )) assert not result.get('failed', False) @@ -64,7 +64,7 @@ def test_all_images_available_locally(is_containerized, is_atomic): True, ]) def test_all_images_available_remotely(available_locally): - def execute_module(module_name, args, task_vars): + def execute_module(module_name, module_args, task_vars): if module_name == 'docker_image_facts': return {'images': [], 'failed': available_locally} return {'changed': False} @@ -79,8 +79,8 @@ def test_all_images_available_remotely(available_locally): docker=dict(additional_registries=["docker.io", "registry.access.redhat.com"]), ), openshift_deployment_type='origin', - openshift_release='3.4', openshift_image_tag='v3.4', + group_names=['nodes', 'masters'], )) assert not result.get('failed', False) @@ -108,8 +108,8 @@ def test_all_images_unavailable(): docker=dict(additional_registries=["docker.io"]), ), openshift_deployment_type="openshift-enterprise", - openshift_release=None, - openshift_image_tag='latest' + openshift_image_tag='latest', + group_names=['nodes', 'masters'], )) assert actual['failed'] @@ -147,8 +147,8 @@ def test_skopeo_update_failure(message, extra_words): docker=dict(additional_registries=["unknown.io"]), ), openshift_deployment_type="openshift-enterprise", - openshift_release='', openshift_image_tag='', + group_names=['nodes', 'masters'], )) assert actual["failed"] @@ -177,8 +177,85 @@ def test_registry_availability(deployment_type, registries): docker=dict(additional_registries=registries), ), openshift_deployment_type=deployment_type, - openshift_release='', openshift_image_tag='', + group_names=['nodes', 'masters'], )) assert not actual.get("failed", False) + + +@pytest.mark.parametrize("deployment_type, is_containerized, groups, oreg_url, expected", [ + ( # standard set of stuff required on nodes + "origin", False, ['nodes'], None, + set([ + 'openshift/origin-pod:vtest', + 'openshift/origin-deployer:vtest', + 'openshift/origin-docker-registry:vtest', + 'openshift/origin-haproxy-router:vtest', + 'cockpit/kubernetes', # origin version of registry-console + ]) + ), + ( # set a different URL for images + "origin", False, ['nodes'], 'foo.io/openshift/origin-${component}:${version}', + set([ + 'foo.io/openshift/origin-pod:vtest', + 'foo.io/openshift/origin-deployer:vtest', + 'foo.io/openshift/origin-docker-registry:vtest', + 'foo.io/openshift/origin-haproxy-router:vtest', + 'cockpit/kubernetes', # AFAICS this is not built from the URL + ]) + ), + ( + "origin", True, ['nodes', 'masters', 'etcd'], None, + set([ + # images running on top of openshift + 'openshift/origin-pod:vtest', + 'openshift/origin-deployer:vtest', + 'openshift/origin-docker-registry:vtest', + 'openshift/origin-haproxy-router:vtest', + 'cockpit/kubernetes', + # containerized component images + 'openshift/origin:vtest', + 'openshift/node:vtest', + 'openshift/openvswitch:vtest', + 'registry.access.redhat.com/rhel7/etcd', + ]) + ), + ( # enterprise images + "openshift-enterprise", True, ['nodes'], 'foo.io/openshift3/ose-${component}:f13ac45', + set([ + 'foo.io/openshift3/ose-pod:f13ac45', + 'foo.io/openshift3/ose-deployer:f13ac45', + 'foo.io/openshift3/ose-docker-registry:f13ac45', + 'foo.io/openshift3/ose-haproxy-router:f13ac45', + # registry-console is not constructed/versioned the same as the others. + 'registry.access.redhat.com/openshift3/registry-console', + # containerized images aren't built from oreg_url + 'openshift3/node:vtest', + 'openshift3/openvswitch:vtest', + ]) + ), + ( + "openshift-enterprise", True, ['etcd', 'lb'], 'foo.io/openshift3/ose-${component}:f13ac45', + set([ + 'registry.access.redhat.com/rhel7/etcd', + # lb does not yet come in a containerized version + ]) + ), + +]) +def test_required_images(deployment_type, is_containerized, groups, oreg_url, expected): + task_vars = dict( + openshift=dict( + common=dict( + is_containerized=is_containerized, + is_atomic=False, + ), + ), + openshift_deployment_type=deployment_type, + group_names=groups, + oreg_url=oreg_url, + openshift_image_tag='vtest', + ) + + assert expected == DockerImageAvailability("DUMMY").required_images(task_vars) diff --git a/roles/openshift_health_checker/test/docker_storage_test.py b/roles/openshift_health_checker/test/docker_storage_test.py index 292a323db..876614b1d 100644 --- a/roles/openshift_health_checker/test/docker_storage_test.py +++ b/roles/openshift_health_checker/test/docker_storage_test.py @@ -77,7 +77,7 @@ non_atomic_task_vars = {"openshift": {"common": {"is_atomic": False}}} ), ]) def test_check_storage_driver(docker_info, failed, expect_msg): - def execute_module(module_name, args, tmp=None, task_vars=None): + def execute_module(module_name, module_args, tmp=None, task_vars=None): if module_name == "yum": return {} if module_name != "docker_info": @@ -187,7 +187,7 @@ def test_dm_usage(task_vars, driver_status, vg_free, success, expect_msg): ) ]) def test_vg_free(pool, command_returns, raises, returns): - def execute_module(module_name, args, tmp=None, task_vars=None): + def execute_module(module_name, module_args, tmp=None, task_vars=None): if module_name != "command": raise ValueError("not expecting module " + module_name) return command_returns diff --git a/roles/openshift_hosted/tasks/registry/registry.yml b/roles/openshift_hosted/tasks/registry/registry.yml index 751489958..d895e9a68 100644 --- a/roles/openshift_hosted/tasks/registry/registry.yml +++ b/roles/openshift_hosted/tasks/registry/registry.yml @@ -124,6 +124,35 @@ edits: "{{ openshift_hosted_registry_edits }}" force: "{{ True|bool in openshift_hosted_registry_force }}" +- name: Ensure OpenShift registry correctly rolls out (best-effort today) + command: | + oc rollout status deploymentconfig {{ openshift_hosted_registry_name }} \ + --namespace {{ openshift_hosted_registry_namespace }} \ + --config {{ openshift.common.config_base }}/master/admin.kubeconfig + async: 600 + poll: 15 + failed_when: false + +- name: Determine the latest version of the OpenShift registry deployment + command: | + oc get deploymentconfig {{ openshift_hosted_registry_name }} \ + --namespace {{ openshift_hosted_registry_namespace }} \ + --config {{ openshift.common.config_base }}/master/admin.kubeconfig \ + -o jsonpath='{ .status.latestVersion }' + register: openshift_hosted_registry_latest_version + +- name: Sanity-check that the OpenShift registry rolled out correctly + command: | + oc get replicationcontroller {{ openshift_hosted_registry_name }}-{{ openshift_hosted_registry_latest_version.stdout }} \ + --namespace {{ openshift_hosted_registry_namespace }} \ + --config {{ openshift.common.config_base }}/master/admin.kubeconfig \ + -o jsonpath='{ .metadata.annotations.openshift\.io/deployment\.phase }' + register: openshift_hosted_registry_rc_phase + until: "'Running' not in openshift_hosted_registry_rc_phase.stdout" + delay: 15 + retries: 40 + failed_when: "'Failed' in openshift_hosted_registry_rc_phase.stdout" + - include: storage/glusterfs.yml when: - openshift.hosted.registry.storage.kind | default(none) == 'glusterfs' or openshift.hosted.registry.storage.glusterfs.swap diff --git a/roles/openshift_hosted/tasks/router/router.yml b/roles/openshift_hosted/tasks/router/router.yml index 192afc87a..160ae2f5e 100644 --- a/roles/openshift_hosted/tasks/router/router.yml +++ b/roles/openshift_hosted/tasks/router/router.yml @@ -55,7 +55,7 @@ state: present with_items: "{{ openshift_hosted_routers }}" -- name: Grant the router serivce account(s) access to the appropriate scc +- name: Grant the router service account(s) access to the appropriate scc oc_adm_policy_user: user: "system:serviceaccount:{{ item.namespace }}:{{ item.serviceaccount }}" namespace: "{{ item.namespace }}" @@ -89,18 +89,37 @@ ports: "{{ item.ports }}" stats_port: "{{ item.stats_port }}" with_items: "{{ openshift_hosted_routers }}" - register: routerout -# This should probably move to module -- name: wait for deploy - pause: - seconds: 30 - when: routerout.changed +- name: Ensure OpenShift router correctly rolls out (best-effort today) + command: | + oc rollout status deploymentconfig {{ item.name }} \ + --namespace {{ item.namespace | default('default') }} \ + --config {{ openshift.common.config_base }}/master/admin.kubeconfig + async: 600 + poll: 15 + with_items: "{{ openshift_hosted_routers }}" + failed_when: false -- name: Ensure router replica count matches desired - oc_scale: - kind: dc - name: "{{ item.name | default('router') }}" - namespace: "{{ item.namespace | default('default') }}" - replicas: "{{ item.replicas }}" +- name: Determine the latest version of the OpenShift router deployment + command: | + oc get deploymentconfig {{ item.name }} \ + --namespace {{ item.namespace }} \ + --config {{ openshift.common.config_base }}/master/admin.kubeconfig \ + -o jsonpath='{ .status.latestVersion }' + register: openshift_hosted_routers_latest_version with_items: "{{ openshift_hosted_routers }}" + +- name: Poll for OpenShift router deployment success + command: | + oc get replicationcontroller {{ item.0.name }}-{{ item.1.stdout }} \ + --namespace {{ item.0.namespace }} \ + --config {{ openshift.common.config_base }}/master/admin.kubeconfig \ + -o jsonpath='{ .metadata.annotations.openshift\.io/deployment\.phase }' + register: openshift_hosted_router_rc_phase + until: "'Running' not in openshift_hosted_router_rc_phase.stdout" + delay: 15 + retries: 40 + failed_when: "'Failed' in openshift_hosted_router_rc_phase.stdout" + with_together: + - "{{ openshift_hosted_routers }}" + - "{{ openshift_hosted_routers_latest_version.results }}" diff --git a/roles/openshift_logging/README.md b/roles/openshift_logging/README.md index 0c60ef6fd..dd0f22d4b 100644 --- a/roles/openshift_logging/README.md +++ b/roles/openshift_logging/README.md @@ -55,6 +55,9 @@ When both `openshift_logging_install_logging` and `openshift_logging_upgrade_log - `openshift_logging_fluentd_use_journal`: NOTE: Fluentd will attempt to detect whether or not Docker is using the journald log driver when using the default of empty. - `openshift_logging_fluentd_journal_read_from_head`: If empty, Fluentd will use its internal default, which is false. - `openshift_logging_fluentd_hosts`: List of nodes that should be labeled for Fluentd to be deployed to. Defaults to ['--all']. +- `openshift_logging_fluentd_buffer_queue_limit`: Buffer queue limit for Fluentd. Defaults to 1024. +- `openshift_logging_fluentd_buffer_size_limit`: Buffer chunk limit for Fluentd. Defaults to 1m. + - `openshift_logging_es_host`: The name of the ES service Fluentd should send logs to. Defaults to 'logging-es'. - `openshift_logging_es_port`: The port for the ES service Fluentd should sent its logs to. Defaults to '9200'. @@ -155,3 +158,5 @@ Elasticsearch OPS too, if using an OPS cluster: - `openshift_logging_mux_namespaces`: Default `[]` - additional namespaces to create for _external_ mux clients to associate with their logs - users will need to set this +- `openshift_logging_mux_buffer_queue_limit`: Default `[1024]` - Buffer queue limit for Mux. +- `openshift_logging_mux_buffer_size_limit`: Default `[1m]` - Buffer chunk limit for Mux. diff --git a/roles/openshift_logging/defaults/main.yml b/roles/openshift_logging/defaults/main.yml index 3c343c9dc..66d880d23 100644 --- a/roles/openshift_logging/defaults/main.yml +++ b/roles/openshift_logging/defaults/main.yml @@ -76,6 +76,8 @@ openshift_logging_fluentd_use_journal: "{{ openshift_hosted_logging_use_journal openshift_logging_fluentd_journal_source: "{{ openshift_hosted_logging_journal_source | default('') }}" openshift_logging_fluentd_journal_read_from_head: "{{ openshift_hosted_logging_journal_read_from_head | default('') }}" openshift_logging_fluentd_hosts: ['--all'] +openshift_logging_fluentd_buffer_queue_limit: 1024 +openshift_logging_fluentd_buffer_size_limit: 1m openshift_logging_es_host: logging-es openshift_logging_es_port: 9200 diff --git a/roles/openshift_logging/tasks/install_logging.yaml b/roles/openshift_logging/tasks/install_logging.yaml index 7c1062b77..66dc0e096 100644 --- a/roles/openshift_logging/tasks/install_logging.yaml +++ b/roles/openshift_logging/tasks/install_logging.yaml @@ -119,6 +119,12 @@ openshift_logging_elasticsearch_pvc_size: "{{ openshift_logging_es_pvc_size }}" openshift_logging_elasticsearch_pvc_dynamic: "{{ openshift_logging_es_pvc_dynamic }}" openshift_logging_elasticsearch_pvc_pv_selector: "{{ openshift_logging_es_pv_selector }}" + openshift_logging_es_key: "{{ openshift_logging_es_ops_key }}" + openshift_logging_es_cert: "{{ openshift_logging_es_ops_cert }}" + openshift_logging_es_ca_ext: "{{ openshift_logging_es_ops_ca_ext }}" + openshift_logging_es_hostname: "{{ openshift_logging_es_ops_hostname }}" + openshift_logging_es_edge_term_policy: "{{ openshift_logging_es_ops_edge_term_policy | default('') }}" + openshift_logging_es_allow_external: "{{ openshift_logging_es_ops_allow_external }}" with_together: - "{{ openshift_logging_facts.elasticsearch_ops.deploymentconfigs }}" @@ -141,6 +147,12 @@ openshift_logging_elasticsearch_pvc_size: "{{ openshift_logging_es_pvc_size }}" openshift_logging_elasticsearch_pvc_dynamic: "{{ openshift_logging_es_pvc_dynamic }}" openshift_logging_elasticsearch_pvc_pv_selector: "{{ openshift_logging_es_pv_selector }}" + openshift_logging_es_key: "{{ openshift_logging_es_ops_key }}" + openshift_logging_es_cert: "{{ openshift_logging_es_ops_cert }}" + openshift_logging_es_ca_ext: "{{ openshift_logging_es_ops_ca_ext }}" + openshift_logging_es_hostname: "{{ openshift_logging_es_ops_hostname }}" + openshift_logging_es_edge_term_policy: "{{ openshift_logging_es_ops_edge_term_policy | default('') }}" + openshift_logging_es_allow_external: "{{ openshift_logging_es_ops_allow_external }}" with_sequence: count={{ openshift_logging_es_ops_cluster_size | int - openshift_logging_facts.elasticsearch_ops.deploymentconfigs.keys() | count }} when: diff --git a/roles/openshift_logging_elasticsearch/tasks/main.yaml b/roles/openshift_logging_elasticsearch/tasks/main.yaml index f1d15b76d..684dbe0a0 100644 --- a/roles/openshift_logging_elasticsearch/tasks/main.yaml +++ b/roles/openshift_logging_elasticsearch/tasks/main.yaml @@ -269,6 +269,75 @@ - "{{ tempdir }}/templates/logging-es-dc.yml" delete_after: true +- name: Retrieving the cert to use when generating secrets for the {{ es_component }} component + slurp: + src: "{{ generated_certs_dir }}/{{ item.file }}" + register: key_pairs + with_items: + - { name: "ca_file", file: "ca.crt" } + - { name: "es_key", file: "system.logging.es.key" } + - { name: "es_cert", file: "system.logging.es.crt" } + when: openshift_logging_es_allow_external | bool + +- set_fact: + es_key: "{{ lookup('file', openshift_logging_es_key) | b64encode }}" + when: + - openshift_logging_es_key | trim | length > 0 + - openshift_logging_es_allow_external | bool + changed_when: false + +- set_fact: + es_cert: "{{ lookup('file', openshift_logging_es_cert) | b64encode }}" + when: + - openshift_logging_es_cert | trim | length > 0 + - openshift_logging_es_allow_external | bool + changed_when: false + +- set_fact: + es_ca: "{{ lookup('file', openshift_logging_es_ca_ext) | b64encode }}" + when: + - openshift_logging_es_ca_ext | trim | length > 0 + - openshift_logging_es_allow_external | bool + changed_when: false + +- set_fact: + es_ca: "{{ key_pairs | entry_from_named_pair('ca_file') }}" + when: + - es_ca is not defined + - openshift_logging_es_allow_external | bool + changed_when: false + +- name: Generating Elasticsearch {{ es_component }} route template + template: + src: route_reencrypt.j2 + dest: "{{mktemp.stdout}}/templates/logging-{{ es_component }}-route.yaml" + vars: + obj_name: "logging-{{ es_component }}" + route_host: "{{ openshift_logging_es_hostname }}" + service_name: "logging-{{ es_component }}" + tls_key: "{{ es_key | default('') | b64decode }}" + tls_cert: "{{ es_cert | default('') | b64decode }}" + tls_ca_cert: "{{ es_ca | b64decode }}" + tls_dest_ca_cert: "{{ key_pairs | entry_from_named_pair('ca_file') | b64decode }}" + edge_term_policy: "{{ openshift_logging_es_edge_term_policy | default('') }}" + labels: + component: support + logging-infra: support + provider: openshift + changed_when: no + when: openshift_logging_es_allow_external | bool + +# This currently has an issue if the host name changes +- name: Setting Elasticsearch {{ es_component }} route + oc_obj: + state: present + name: "logging-{{ es_component }}" + namespace: "{{ openshift_logging_elasticsearch_namespace }}" + kind: route + files: + - "{{ tempdir }}/templates/logging-{{ es_component }}-route.yaml" + when: openshift_logging_es_allow_external | bool + ## Placeholder for migration when necessary ## - name: Delete temp directory diff --git a/roles/openshift_logging_elasticsearch/templates/elasticsearch-logging.yml.j2 b/roles/openshift_logging_elasticsearch/templates/elasticsearch-logging.yml.j2 index 377abe21f..38948ba2f 100644 --- a/roles/openshift_logging_elasticsearch/templates/elasticsearch-logging.yml.j2 +++ b/roles/openshift_logging_elasticsearch/templates/elasticsearch-logging.yml.j2 @@ -35,6 +35,12 @@ appender: layout: type: consolePattern conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n" + # need this filter until https://github.com/openshift/origin/issues/14515 is fixed + filter: + 1: + type: org.apache.log4j.varia.StringMatchFilter + StringToMatch: "SSL Problem illegal change cipher spec msg, conn state = 6, handshake state = 1" + AcceptOnMatch: false file: type: dailyRollingFile @@ -43,6 +49,12 @@ appender: layout: type: pattern conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n" + # need this filter until https://github.com/openshift/origin/issues/14515 is fixed + filter: + 1: + type: org.apache.log4j.varia.StringMatchFilter + StringToMatch: "SSL Problem illegal change cipher spec msg, conn state = 6, handshake state = 1" + AcceptOnMatch: false # Use the following log4j-extras RollingFileAppender to enable gzip compression of log files. # For more information see https://logging.apache.org/log4j/extras/apidocs/org/apache/log4j/rolling/RollingFileAppender.html diff --git a/roles/openshift_logging_elasticsearch/templates/elasticsearch.yml.j2 b/roles/openshift_logging_elasticsearch/templates/elasticsearch.yml.j2 index 58c325c8a..409e564c2 100644 --- a/roles/openshift_logging_elasticsearch/templates/elasticsearch.yml.j2 +++ b/roles/openshift_logging_elasticsearch/templates/elasticsearch.yml.j2 @@ -16,6 +16,7 @@ index: node: master: ${IS_MASTER} data: ${HAS_DATA} + max_local_storage_nodes: 1 network: host: 0.0.0.0 diff --git a/roles/openshift_logging_elasticsearch/templates/route_reencrypt.j2 b/roles/openshift_logging_elasticsearch/templates/route_reencrypt.j2 new file mode 100644 index 000000000..cf8a9e65f --- /dev/null +++ b/roles/openshift_logging_elasticsearch/templates/route_reencrypt.j2 @@ -0,0 +1,36 @@ +apiVersion: "v1" +kind: "Route" +metadata: + name: "{{obj_name}}" +{% if labels is defined%} + labels: +{% for key, value in labels.iteritems() %} + {{key}}: {{value}} +{% endfor %} +{% endif %} +spec: + host: {{ route_host }} + tls: +{% if tls_key is defined and tls_key | length > 0 %} + key: | +{{ tls_key|indent(6, true) }} +{% if tls_cert is defined and tls_cert | length > 0 %} + certificate: | +{{ tls_cert|indent(6, true) }} +{% endif %} +{% endif %} + caCertificate: | +{% for line in tls_ca_cert.split('\n') %} + {{ line }} +{% endfor %} + destinationCACertificate: | +{% for line in tls_dest_ca_cert.split('\n') %} + {{ line }} +{% endfor %} + termination: reencrypt +{% if edge_term_policy is defined and edge_term_policy | length > 0 %} + insecureEdgeTerminationPolicy: {{ edge_term_policy }} +{% endif %} + to: + kind: Service + name: {{ service_name }} diff --git a/roles/openshift_logging_fluentd/templates/fluentd.j2 b/roles/openshift_logging_fluentd/templates/fluentd.j2 index e185938e3..a5695ee26 100644 --- a/roles/openshift_logging_fluentd/templates/fluentd.j2 +++ b/roles/openshift_logging_fluentd/templates/fluentd.j2 @@ -93,6 +93,14 @@ spec: value: "{{ openshift_logging_fluentd_journal_source | default('') }}" - name: "JOURNAL_READ_FROM_HEAD" value: "{{ openshift_logging_fluentd_journal_read_from_head | lower }}" + - name: "BUFFER_QUEUE_LIMIT" + value: "{{ openshift_logging_fluentd_buffer_queue_limit }}" + - name: "BUFFER_SIZE_LIMIT" + value: "{{ openshift_logging_fluentd_buffer_size_limit }}" + - name: "FLUENTD_CPU_LIMIT" + value: "{{ openshift_logging_fluentd_cpu_limit }}" + - name: "FLUENTD_MEMORY_LIMIT" + value: "{{ openshift_logging_fluentd_memory_limit }}" volumes: - name: runlogjournal hostPath: diff --git a/roles/openshift_logging_mux/defaults/main.yml b/roles/openshift_logging_mux/defaults/main.yml index 10fa4372c..77e47d38c 100644 --- a/roles/openshift_logging_mux/defaults/main.yml +++ b/roles/openshift_logging_mux/defaults/main.yml @@ -10,7 +10,9 @@ openshift_logging_mux_namespace: logging ### Common settings openshift_logging_mux_nodeselector: "{{ openshift_hosted_logging_mux_nodeselector_label | default('') | map_from_pairs }}" openshift_logging_mux_cpu_limit: 500m -openshift_logging_mux_memory_limit: 1Gi +openshift_logging_mux_memory_limit: 2Gi +openshift_logging_mux_buffer_queue_limit: 1024 +openshift_logging_mux_buffer_size_limit: 1m openshift_logging_mux_replicas: 1 diff --git a/roles/openshift_logging_mux/templates/mux.j2 b/roles/openshift_logging_mux/templates/mux.j2 index 502cd3347..243698c6a 100644 --- a/roles/openshift_logging_mux/templates/mux.j2 +++ b/roles/openshift_logging_mux/templates/mux.j2 @@ -103,6 +103,14 @@ spec: value: "true" - name: MUX_ALLOW_EXTERNAL value: "{{ openshift_logging_mux_allow_external | default('false') }}" + - name: "BUFFER_QUEUE_LIMIT" + value: "{{ openshift_logging_mux_buffer_queue_limit }}" + - name: "BUFFER_SIZE_LIMIT" + value: "{{ openshift_logging_mux_buffer_size_limit }}" + - name: "MUX_CPU_LIMIT" + value: "{{ openshift_logging_mux_cpu_limit }}" + - name: "MUX_MEMORY_LIMIT" + value: "{{ openshift_logging_mux_memory_limit }}" volumes: - name: config configMap: diff --git a/roles/openshift_metrics/README.md b/roles/openshift_metrics/README.md index 84503217b..1f10de4a2 100644 --- a/roles/openshift_metrics/README.md +++ b/roles/openshift_metrics/README.md @@ -68,6 +68,9 @@ For default values, see [`defaults/main.yaml`](defaults/main.yaml). - `openshift_metrics_resolution`: How often metrics should be gathered. +- `openshift_metrics_install_hawkular_agent`: Install the Hawkular OpenShift Agent (HOSA). HOSA can be used + to collect custom metrics from your pods. This component is currently in tech-preview and is not installed by default. + ## Additional variables to control resource limits Each metrics component (hawkular, cassandra, heapster) can specify a cpu and memory limits and requests by setting the corresponding role variable: diff --git a/roles/openshift_metrics/defaults/main.yaml b/roles/openshift_metrics/defaults/main.yaml index 467db34c8..ba50566e9 100644 --- a/roles/openshift_metrics/defaults/main.yaml +++ b/roles/openshift_metrics/defaults/main.yaml @@ -31,6 +31,14 @@ openshift_metrics_heapster_requests_memory: 0.9375G openshift_metrics_heapster_requests_cpu: null openshift_metrics_heapster_nodeselector: "" +openshift_metrics_install_hawkular_agent: False +openshift_metrics_hawkular_agent_limits_memory: null +openshift_metrics_hawkular_agent_limits_cpu: null +openshift_metrics_hawkular_agent_requests_memory: null +openshift_metrics_hawkular_agent_requests_cpu: null +openshift_metrics_hawkular_agent_nodeselector: "" +openshift_metrics_hawkular_agent_namespace: "default" + openshift_metrics_hawkular_hostname: "hawkular-metrics.{{openshift_master_default_subdomain}}" openshift_metrics_duration: 7 diff --git a/roles/openshift_metrics/tasks/generate_hawkular_certificates.yaml b/roles/openshift_metrics/tasks/generate_hawkular_certificates.yaml index fb4fe2f03..7b81b3c10 100644 --- a/roles/openshift_metrics/tasks/generate_hawkular_certificates.yaml +++ b/roles/openshift_metrics/tasks/generate_hawkular_certificates.yaml @@ -73,6 +73,8 @@ {{ hawkular_secrets['hawkular-metrics.key'] }} tls.truststore.crt: > {{ hawkular_secrets['hawkular-cassandra.crt'] }} + ca.crt: > + {{ hawkular_secrets['ca.crt'] }} when: name not in metrics_secrets.stdout_lines changed_when: no diff --git a/roles/openshift_metrics/tasks/install_hosa.yaml b/roles/openshift_metrics/tasks/install_hosa.yaml new file mode 100644 index 000000000..cc533a68b --- /dev/null +++ b/roles/openshift_metrics/tasks/install_hosa.yaml @@ -0,0 +1,44 @@ +--- +- name: Generate Hawkular Agent (HOSA) Cluster Role + template: + src: hawkular_openshift_agent_role.j2 + dest: "{{mktemp.stdout}}/templates/metrics-hawkular-openshift-agent-role.yaml" + changed_when: no + +- name: Generate Hawkular Agent (HOSA) Service Account + template: + src: hawkular_openshift_agent_sa.j2 + dest: "{{mktemp.stdout}}/templates/metrics-hawkular-openshift-agent-sa.yaml" + changed_when: no + +- name: Generate Hawkular Agent (HOSA) Daemon Set + template: + src: hawkular_openshift_agent_ds.j2 + dest: "{{mktemp.stdout}}/templates/metrics-hawkular-openshift-agent-ds.yaml" + vars: + node_selector: "{{openshift_metrics_hawkular_agent_nodeselector | default('') }}" + changed_when: no + +- name: Generate the Hawkular Agent (HOSA) Configmap + template: + src: hawkular_openshift_agent_cm.j2 + dest: "{{mktemp.stdout}}/templates/metrics-hawkular-openshift-agent-cm.yaml" + changed_when: no + +- name: Generate role binding for the hawkular-openshift-agent service account + template: + src: rolebinding.j2 + dest: "{{ mktemp.stdout }}/templates/metrics-hawkular-agent-rolebinding.yaml" + vars: + cluster: True + obj_name: hawkular-openshift-agent-rb + labels: + metrics-infra: hawkular-agent + roleRef: + kind: ClusterRole + name: hawkular-openshift-agent + subjects: + - kind: ServiceAccount + name: hawkular-openshift-agent + namespace: "{{openshift_metrics_hawkular_agent_namespace}}" + changed_when: no diff --git a/roles/openshift_metrics/tasks/install_metrics.yaml b/roles/openshift_metrics/tasks/install_metrics.yaml index 74eb56713..fdf4ae57f 100644 --- a/roles/openshift_metrics/tasks/install_metrics.yaml +++ b/roles/openshift_metrics/tasks/install_metrics.yaml @@ -16,11 +16,19 @@ include: install_heapster.yaml when: openshift_metrics_heapster_standalone | bool -- find: paths={{ mktemp.stdout }}/templates patterns=*.yaml +- name: Install Hawkular OpenShift Agent (HOSA) + include: install_hosa.yaml + when: openshift_metrics_install_hawkular_agent | default(false) | bool + +- find: + paths: "{{ mktemp.stdout }}/templates" + patterns: "^(?!metrics-hawkular-openshift-agent).*.yaml" + use_regex: true register: object_def_files changed_when: no -- slurp: src={{item.path}} +- slurp: + src: "{{item.path}}" register: object_defs with_items: "{{object_def_files.files}}" changed_when: no @@ -34,6 +42,31 @@ file_content: "{{ item.content | b64decode | from_yaml }}" with_items: "{{ object_defs.results }}" +- find: + paths: "{{ mktemp.stdout }}/templates" + patterns: "^metrics-hawkular-openshift-agent.*.yaml" + use_regex: true + register: hawkular_agent_object_def_files + when: openshift_metrics_install_hawkular_agent | bool + changed_when: no + +- slurp: + src: "{{item.path}}" + register: hawkular_agent_object_defs + with_items: "{{ hawkular_agent_object_def_files.files }}" + when: openshift_metrics_install_hawkular_agent | bool + changed_when: no + +- name: Create Hawkular Agent objects + include: oc_apply.yaml + vars: + kubeconfig: "{{ mktemp.stdout }}/admin.kubeconfig" + namespace: "{{ openshift_metrics_hawkular_agent_namespace }}" + file_name: "{{ item.source }}" + file_content: "{{ item.content | b64decode | from_yaml }}" + with_items: "{{ hawkular_agent_object_defs.results }}" + when: openshift_metrics_install_hawkular_agent | bool + - include: update_master_config.yaml - command: > diff --git a/roles/openshift_metrics/tasks/main.yaml b/roles/openshift_metrics/tasks/main.yaml index 5d8506a73..0b5f23c24 100644 --- a/roles/openshift_metrics/tasks/main.yaml +++ b/roles/openshift_metrics/tasks/main.yaml @@ -44,6 +44,9 @@ - include: "{{ (openshift_metrics_install_metrics | bool) | ternary('install_metrics.yaml','uninstall_metrics.yaml') }}" +- include: uninstall_hosa.yaml + when: not openshift_metrics_install_hawkular_agent | bool + - name: Delete temp directory local_action: file path=local_tmp.stdout state=absent tags: metrics_cleanup diff --git a/roles/openshift_metrics/tasks/oc_apply.yaml b/roles/openshift_metrics/tasks/oc_apply.yaml index dd67703b4..1e1af40e8 100644 --- a/roles/openshift_metrics/tasks/oc_apply.yaml +++ b/roles/openshift_metrics/tasks/oc_apply.yaml @@ -14,7 +14,7 @@ command: > {{ openshift.common.client_binary }} --config={{ kubeconfig }} apply -f {{ file_name }} - -n {{ openshift_metrics_project }} + -n {{namespace}} register: generation_apply failed_when: "'error' in generation_apply.stderr" changed_when: no diff --git a/roles/openshift_metrics/tasks/uninstall_hosa.yaml b/roles/openshift_metrics/tasks/uninstall_hosa.yaml new file mode 100644 index 000000000..42ed02460 --- /dev/null +++ b/roles/openshift_metrics/tasks/uninstall_hosa.yaml @@ -0,0 +1,15 @@ +--- +- name: remove Hawkular Agent (HOSA) components + command: > + {{ openshift.common.client_binary }} -n {{ openshift_metrics_hawkular_agent_namespace }} --config={{ mktemp.stdout }}/admin.kubeconfig + delete --ignore-not-found --selector=metrics-infra=agent + all,sa,secrets,templates,routes,pvc,rolebindings,clusterrolebindings + register: delete_metrics + changed_when: delete_metrics.stdout != 'No resources found' + +- name: remove rolebindings + command: > + {{ openshift.common.client_binary }} -n {{ openshift_metrics_hawkular_agent_namespace }} --config={{ mktemp.stdout }}/admin.kubeconfig + delete --ignore-not-found + clusterrolebinding/hawkular-openshift-agent-rb + changed_when: delete_metrics.stdout != 'No resources found' diff --git a/roles/openshift_metrics/templates/hawkular_openshift_agent_cm.j2 b/roles/openshift_metrics/templates/hawkular_openshift_agent_cm.j2 new file mode 100644 index 000000000..bf472c066 --- /dev/null +++ b/roles/openshift_metrics/templates/hawkular_openshift_agent_cm.j2 @@ -0,0 +1,54 @@ +id: hawkular-openshift-agent +kind: ConfigMap +apiVersion: v1 +name: Hawkular OpenShift Agent Configuration +metadata: + name: hawkular-openshift-agent-configuration + labels: + metrics-infra: agent + namespace: {{openshift_metrics_hawkular_agent_namespace}} +data: + config.yaml: | + kubernetes: + tenant: ${POD:namespace_name} + hawkular_server: + url: https://hawkular-metrics.openshift-infra.svc.cluster.local + credentials: + username: secret:openshift-infra/hawkular-metrics-account/hawkular-metrics.username + password: secret:openshift-infra/hawkular-metrics-account/hawkular-metrics.password + ca_cert_file: secret:openshift-infra/hawkular-metrics-certs/ca.crt + emitter: + status_enabled: false + collector: + minimum_collection_interval: 10s + default_collection_interval: 30s + metric_id_prefix: pod/${POD:uid}/custom/ + tags: + metric_name: ${METRIC:name} + description: ${METRIC:description} + units: ${METRIC:units} + namespace_id: ${POD:namespace_uid} + namespace_name: ${POD:namespace_name} + node_name: ${POD:node_name} + pod_id: ${POD:uid} + pod_ip: ${POD:ip} + pod_name: ${POD:name} + pod_namespace: ${POD:namespace_name} + hostname: ${POD:hostname} + host_ip: ${POD:host_ip} + labels: ${POD:labels} + type: pod + collector: hawkular_openshift_agent + custom_metric: true + hawkular-openshift-agent: | + endpoints: + - type: prometheus + protocol: "http" + port: 8080 + path: /metrics + collection_interval: 30s + metrics: + - name: hawkular_openshift_agent_metric_data_points_collected_total + - name: hawkular_openshift_agent_monitored_endpoints + - name: hawkular_openshift_agent_monitored_pods + - name: hawkular_openshift_agent_monitored_metrics diff --git a/roles/openshift_metrics/templates/hawkular_openshift_agent_ds.j2 b/roles/openshift_metrics/templates/hawkular_openshift_agent_ds.j2 new file mode 100644 index 000000000..d65eaf9ae --- /dev/null +++ b/roles/openshift_metrics/templates/hawkular_openshift_agent_ds.j2 @@ -0,0 +1,91 @@ +apiVersion: extensions/v1beta1 +kind: DaemonSet +metadata: + name: hawkular-openshift-agent + labels: + name: hawkular-openshift-agent + metrics-infra: agent + namespace: {{openshift_metrics_hawkular_agent_namespace}} +spec: + selector: + matchLabels: + name: hawkular-openshift-agent + template: + metadata: + labels: + name: hawkular-openshift-agent + metrics-infra: agent + spec: + serviceAccount: hawkular-openshift-agent +{% if node_selector is iterable and node_selector | length > 0 %} + nodeSelector: +{% for key, value in node_selector.iteritems() %} + {{key}}: "{{value}}" +{% endfor %} +{% endif %} + containers: + - image: {{openshift_metrics_image_prefix}}metrics-hawkular-openshift-agent:{{openshift_metrics_image_version}} + imagePullPolicy: Always + name: hawkular-openshift-agent +{% if ((openshift_metrics_hawkular_agent_limits_cpu is defined and openshift_metrics_hawkular_agent_limits_cpu is not none) + or (openshift_metrics_hawkular_agent_limits_memory is defined and openshift_metrics_hawkular_agent_limits_memory is not none) + or (openshift_metrics_hawkular_agent_requests_cpu is defined and openshift_metrics_hawkular_agent_requests_cpu is not none) + or (openshift_metrics_hawkular_agent_requests_memory is defined and openshift_metrics_hawkular_agent_requests_memory is not none)) +%} + resources: +{% if (openshift_metrics_hawkular_agent_limits_cpu is not none + or openshift_metrics_hawkular_agent_limits_memory is not none) +%} + limits: +{% if openshift_metrics_hawkular_agent_limits_cpu is not none %} + cpu: "{{openshift_metrics_hawkular_agent_limits_cpu}}" +{% endif %} +{% if openshift_metrics_hawkular_agent_limits_memory is not none %} + memory: "{{openshift_metrics_hawkular_agent_limits_memory}}" +{% endif %} +{% endif %} +{% if (openshift_metrics_hawkular_agent_requests_cpu is not none + or openshift_metrics_hawkular_agent_requests_memory is not none) +%} + requests: +{% if openshift_metrics_hawkular_agent_requests_cpu is not none %} + cpu: "{{openshift_metrics_hawkular_agent_requests_cpu}}" +{% endif %} +{% if openshift_metrics_hawkular_agent_requests_memory is not none %} + memory: "{{openshift_metrics_hawkular_agent_requests_memory}}" +{% endif %} +{% endif %} +{% endif %} + + livenessProbe: + httpGet: + scheme: HTTP + path: /health + port: 8080 + initialDelaySeconds: 30 + periodSeconds: 30 + command: + - "hawkular-openshift-agent" + - "-config" + - "/hawkular-openshift-agent-configuration/config.yaml" + - "-v" + - "3" + env: + - name: K8S_POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: K8S_POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + volumeMounts: + - name: hawkular-openshift-agent-configuration + mountPath: "/hawkular-openshift-agent-configuration" + volumes: + - name: hawkular-openshift-agent-configuration + configMap: + name: hawkular-openshift-agent-configuration + - name: hawkular-openshift-agent + configMap: + name: hawkular-openshift-agent-configuration diff --git a/roles/openshift_metrics/templates/hawkular_openshift_agent_role.j2 b/roles/openshift_metrics/templates/hawkular_openshift_agent_role.j2 new file mode 100644 index 000000000..24b8cd801 --- /dev/null +++ b/roles/openshift_metrics/templates/hawkular_openshift_agent_role.j2 @@ -0,0 +1,25 @@ +apiVersion: v1 +kind: ClusterRole +metadata: + name: hawkular-openshift-agent + labels: + metrics-infra: agent +rules: +- apiGroups: + - "" + resources: + - configmaps + - namespaces + - nodes + - pods + - projects + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - secrets + verbs: + - get diff --git a/roles/openshift_metrics/templates/hawkular_openshift_agent_sa.j2 b/roles/openshift_metrics/templates/hawkular_openshift_agent_sa.j2 new file mode 100644 index 000000000..ec604d73c --- /dev/null +++ b/roles/openshift_metrics/templates/hawkular_openshift_agent_sa.j2 @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: hawkular-openshift-agent + labels: + metrics-infra: agent + namespace: {{openshift_metrics_hawkular_agent_namespace}} diff --git a/roles/openshift_repos/tasks/main.yaml b/roles/openshift_repos/tasks/main.yaml index 023b1a9b7..8f8550e2d 100644 --- a/roles/openshift_repos/tasks/main.yaml +++ b/roles/openshift_repos/tasks/main.yaml @@ -4,7 +4,8 @@ path: /run/ostree-booted register: ostree_booted -- block: +- when: not ostree_booted.stat.exists + block: - name: Ensure libselinux-python is installed package: name=libselinux-python state=present @@ -24,41 +25,40 @@ - openshift_additional_repos | length == 0 notify: refresh cache - # Note: OpenShift repositories under CentOS may be shipped through the - # "centos-release-openshift-origin" package which configures the repository. - # This task matches the file names provided by the package so that they are - # not installed twice in different files and remains idempotent. - - name: Configure origin gpg keys if needed - copy: - src: "{{ item.src }}" - dest: "{{ item.dest }}" - with_items: - - src: origin/gpg_keys/openshift-ansible-CentOS-SIG-PaaS - dest: /etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-SIG-PaaS - - src: origin/repos/openshift-ansible-centos-paas-sig.repo - dest: /etc/yum.repos.d/CentOS-OpenShift-Origin.repo - notify: refresh cache - when: - - ansible_os_family == "RedHat" - - ansible_distribution != "Fedora" - - openshift_deployment_type == 'origin' - - openshift_enable_origin_repo | default(true) | bool - # Singleton block - - when: r_osr_first_run | default(true) + - when: r_openshift_repos_has_run is not defined block: + + # Note: OpenShift repositories under CentOS may be shipped through the + # "centos-release-openshift-origin" package which configures the repository. + # This task matches the file names provided by the package so that they are + # not installed twice in different files and remains idempotent. + - name: Configure origin gpg keys if needed + copy: + src: "{{ item.src }}" + dest: "{{ item.dest }}" + with_items: + - src: origin/gpg_keys/openshift-ansible-CentOS-SIG-PaaS + dest: /etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-SIG-PaaS + - src: origin/repos/openshift-ansible-centos-paas-sig.repo + dest: /etc/yum.repos.d/CentOS-OpenShift-Origin.repo + notify: refresh cache + when: + - ansible_os_family == "RedHat" + - ansible_distribution != "Fedora" + - openshift_deployment_type == 'origin' + - openshift_enable_origin_repo | default(true) | bool + - name: Ensure clean repo cache in the event repos have been changed manually debug: msg: "First run of openshift_repos" changed_when: true notify: refresh cache - - name: Set fact r_osr_first_run false + - name: Record that openshift_repos already ran set_fact: - r_osr_first_run: false + r_openshift_repos_has_run: True # Force running ALL handlers now, because we expect repo cache to be cleared # if changes have been made. - meta: flush_handlers - - when: not ostree_booted.stat.exists diff --git a/roles/openshift_storage_glusterfs/README.md b/roles/openshift_storage_glusterfs/README.md index 7b310dbf8..62fc35299 100644 --- a/roles/openshift_storage_glusterfs/README.md +++ b/roles/openshift_storage_glusterfs/README.md @@ -1,7 +1,31 @@ OpenShift GlusterFS Cluster =========================== -OpenShift GlusterFS Cluster Installation +OpenShift GlusterFS Cluster Configuration + +This role handles the configuration of GlusterFS clusters. It can handle +two primary configuration scenarios: + +* Configuring a new, natively-hosted GlusterFS cluster. In this scenario, + GlusterFS pods are deployed on nodes in the OpenShift cluster which are + configured to provide storage. +* Configuring a new, external GlusterFS cluster. In this scenario, the + cluster nodes have the GlusterFS software pre-installed but have not + been configured yet. The installer will take care of configuring the + cluster(s) for use by OpenShift applications. +* Using existing GlusterFS clusters. In this scenario, one or more + GlusterFS clusters are assumed to be already setup. These clusters can + be either natively-hosted or external, but must be managed by a + [heketi service](https://github.com/heketi/heketi). + +As part of the configuration, a particular GlusterFS cluster may be +specified to provide backend storage for a natively-hosted Docker +registry. + +Unless configured otherwise, a StorageClass will be automatically +created for each non-registry GlusterFS cluster. This will allow +applications which can mount PersistentVolumes to request +dynamically-provisioned GlusterFS volumes. Requirements ------------ @@ -21,26 +45,50 @@ hosted Docker registry: * `[glusterfs_registry]` +Host Variables +-------------- + +For configuring new clusters, the following role variables are available. + +Each host in either of the above groups must have the following variable +defined: + +| Name | Default value | Description | +|-------------------|---------------|-----------------------------------------| +| glusterfs_devices | None | A list of block devices that will be completely managed as part of a GlusterFS cluster. There must be at least one device listed. Each device must be bare, e.g. no partitions or LVM PVs. **Example:** '[ "/dev/sdb" ]' + +In addition, each host may specify the following variables to further control +their configuration as GlusterFS nodes: + +| Name | Default value | Description | +|--------------------|---------------------------|-----------------------------------------| +| glusterfs_cluster | 1 | The ID of the cluster this node should belong to. This is useful when a single heketi service is expected to manage multiple distinct clusters. **NOTE:** For natively-hosted clusters, all pods will be in the same OpenShift namespace +| glusterfs_hostname | openshift.common.hostname | A hostname (or IP address) that will be used for internal GlusterFS communication +| glusterfs_ip | openshift.common.ip | An IP address that will be used by pods to communicate with the GlusterFS node +| glusterfs_zone | 1 | A zone number for the node. Zones are used within the cluster for determining how to distribute the bricks of GlusterFS volumes. heketi will try to spread each volumes' bricks as evenly as possible across all zones + Role Variables -------------- This role has the following variables that control the integration of a GlusterFS cluster into a new or existing OpenShift cluster: -| Name | Default value | | +| Name | Default value | Description | |--------------------------------------------------|-------------------------|-----------------------------------------| | openshift_storage_glusterfs_timeout | 300 | Seconds to wait for pods to become ready | openshift_storage_glusterfs_namespace | 'default' | Namespace in which to create GlusterFS resources | openshift_storage_glusterfs_is_native | True | GlusterFS should be containerized -| openshift_storage_glusterfs_nodeselector | 'storagenode=glusterfs' | Selector to determine which nodes will host GlusterFS pods in native mode +| openshift_storage_glusterfs_name | 'storage' | A name to identify the GlusterFS cluster, which will be used in resource names +| openshift_storage_glusterfs_nodeselector | 'glusterfs=storage-host'| Selector to determine which nodes will host GlusterFS pods in native mode. **NOTE:** The label value is taken from the cluster name +| openshift_storage_glusterfs_storageclass | True | Automatically create a StorageClass for each GlusterFS cluster | openshift_storage_glusterfs_image | 'gluster/gluster-centos'| Container image to use for GlusterFS pods, enterprise default is 'rhgs3/rhgs-server-rhel7' | openshift_storage_glusterfs_version | 'latest' | Container image version to use for GlusterFS pods | openshift_storage_glusterfs_wipe | False | Destroy any existing GlusterFS resources and wipe storage devices. **WARNING: THIS WILL DESTROY ANY DATA ON THOSE DEVICES.** | openshift_storage_glusterfs_heketi_is_native | True | heketi should be containerized | openshift_storage_glusterfs_heketi_image | 'heketi/heketi' | Container image to use for heketi pods, enterprise default is 'rhgs3/rhgs-volmanager-rhel7' | openshift_storage_glusterfs_heketi_version | 'latest' | Container image version to use for heketi pods -| openshift_storage_glusterfs_heketi_admin_key | '' | String to use as secret key for performing heketi commands as admin -| openshift_storage_glusterfs_heketi_user_key | '' | String to use as secret key for performing heketi commands as user that can only view or modify volumes +| openshift_storage_glusterfs_heketi_admin_key | auto-generated | String to use as secret key for performing heketi commands as admin +| openshift_storage_glusterfs_heketi_user_key | auto-generated | String to use as secret key for performing heketi commands as user that can only view or modify volumes | openshift_storage_glusterfs_heketi_topology_load | True | Load the GlusterFS topology information into heketi | openshift_storage_glusterfs_heketi_url | Undefined | URL for the heketi REST API, dynamically determined in native mode | openshift_storage_glusterfs_heketi_wipe | False | Destroy any existing heketi resources, defaults to the value of `openshift_storage_glusterfs_wipe` @@ -52,17 +100,24 @@ registry. These variables start with the prefix values in their corresponding non-registry variables. The following variables are an exception: -| Name | Default value | | -|---------------------------------------------------|-----------------------|-----------------------------------------| -| openshift_storage_glusterfs_registry_namespace | registry namespace | Default is to use the hosted registry's namespace, otherwise 'default' -| openshift_storage_glusterfs_registry_nodeselector | 'storagenode=registry'| This allows for the logical separation of the registry GlusterFS cluster from any regular-use GlusterFS clusters +| Name | Default value | Description | +|-------------------------------------------------------|-----------------------|-----------------------------------------| +| openshift_storage_glusterfs_registry_namespace | registry namespace | Default is to use the hosted registry's namespace, otherwise 'default' +| openshift_storage_glusterfs_registry_name | 'registry' | This allows for the logical separation of the registry GlusterFS cluster from other GlusterFS clusters +| openshift_storage_glusterfs_registry_storageclass | False | It is recommended to not create a StorageClass for GlusterFS clusters serving registry storage, so as to avoid performance penalties +| openshift_storage_glusterfs_registry_heketi_admin_key | auto-generated | Separate from the above +| openshift_storage_glusterfs_registry_heketi_user_key | auto-generated | Separate from the above Additionally, this role's behavior responds to the following registry-specific -variable: - -| Name | Default value | Description | -|----------------------------------------------|---------------|------------------------------------------------------------------------------| -| openshift_hosted_registry_glusterfs_swap | False | Whether to swap an existing registry's storage volume for a GlusterFS volume | +variables: + +| Name | Default value | Description | +|-----------------------------------------------|------------------------------|-----------------------------------------| +| openshift_hosted_registry_glusterfs_endpoints | glusterfs-registry-endpoints | The name for the Endpoints resource that will point the registry to the GlusterFS nodes +| openshift_hosted_registry_glusterfs_path | glusterfs-registry-volume | The name for the GlusterFS volume that will provide registry storage +| openshift_hosted_registry_glusterfs_readonly | False | Whether the GlusterFS volume should be read-only +| openshift_hosted_registry_glusterfs_swap | False | Whether to swap an existing registry's storage volume for a GlusterFS volume +| openshift_hosted_registry_glusterfs_swapcopy | True | If swapping, copy the contents of the pre-existing registry storage to the new GlusterFS volume Dependencies ------------ diff --git a/roles/openshift_storage_glusterfs/defaults/main.yml b/roles/openshift_storage_glusterfs/defaults/main.yml index ebe9ca30b..468877e57 100644 --- a/roles/openshift_storage_glusterfs/defaults/main.yml +++ b/roles/openshift_storage_glusterfs/defaults/main.yml @@ -2,7 +2,9 @@ openshift_storage_glusterfs_timeout: 300 openshift_storage_glusterfs_namespace: 'default' openshift_storage_glusterfs_is_native: True -openshift_storage_glusterfs_nodeselector: 'storagenode=glusterfs' +openshift_storage_glusterfs_name: 'storage' +openshift_storage_glusterfs_nodeselector: "glusterfs={{ openshift_storage_glusterfs_name }}-host" +openshift_storage_glusterfs_storageclass: True openshift_storage_glusterfs_image: "{{ 'rhgs3/rhgs-server-rhel7' | quote if deployment_type == 'openshift-enterprise' else 'gluster/gluster-centos' | quote }}" openshift_storage_glusterfs_version: 'latest' openshift_storage_glusterfs_wipe: False @@ -11,8 +13,8 @@ openshift_storage_glusterfs_heketi_is_missing: True openshift_storage_glusterfs_heketi_deploy_is_missing: True openshift_storage_glusterfs_heketi_image: "{{ 'rhgs3/rhgs-volmanager-rhel7' | quote if deployment_type == 'openshift-enterprise' else 'heketi/heketi' | quote }}" openshift_storage_glusterfs_heketi_version: 'latest' -openshift_storage_glusterfs_heketi_admin_key: '' -openshift_storage_glusterfs_heketi_user_key: '' +openshift_storage_glusterfs_heketi_admin_key: "{{ 32 | oo_generate_secret }}" +openshift_storage_glusterfs_heketi_user_key: "{{ 32 | oo_generate_secret }}" openshift_storage_glusterfs_heketi_topology_load: True openshift_storage_glusterfs_heketi_wipe: "{{ openshift_storage_glusterfs_wipe }}" openshift_storage_glusterfs_heketi_url: "{{ omit }}" @@ -20,7 +22,9 @@ openshift_storage_glusterfs_heketi_url: "{{ omit }}" openshift_storage_glusterfs_registry_timeout: "{{ openshift_storage_glusterfs_timeout }}" openshift_storage_glusterfs_registry_namespace: "{{ openshift.hosted.registry.namespace | default('default') }}" openshift_storage_glusterfs_registry_is_native: "{{ openshift_storage_glusterfs_is_native }}" -openshift_storage_glusterfs_registry_nodeselector: 'storagenode=registry' +openshift_storage_glusterfs_registry_name: 'registry' +openshift_storage_glusterfs_registry_nodeselector: "glusterfs={{ openshift_storage_glusterfs_registry_name }}-host" +openshift_storage_glusterfs_registry_storageclass: False openshift_storage_glusterfs_registry_image: "{{ openshift_storage_glusterfs_image }}" openshift_storage_glusterfs_registry_version: "{{ openshift_storage_glusterfs_version }}" openshift_storage_glusterfs_registry_wipe: "{{ openshift_storage_glusterfs_wipe }}" @@ -29,8 +33,8 @@ openshift_storage_glusterfs_registry_heketi_is_missing: "{{ openshift_storage_gl openshift_storage_glusterfs_registry_heketi_deploy_is_missing: "{{ openshift_storage_glusterfs_heketi_deploy_is_missing }}" openshift_storage_glusterfs_registry_heketi_image: "{{ openshift_storage_glusterfs_heketi_image }}" openshift_storage_glusterfs_registry_heketi_version: "{{ openshift_storage_glusterfs_heketi_version }}" -openshift_storage_glusterfs_registry_heketi_admin_key: "{{ openshift_storage_glusterfs_heketi_admin_key }}" -openshift_storage_glusterfs_registry_heketi_user_key: "{{ openshift_storage_glusterfs_heketi_user_key }}" +openshift_storage_glusterfs_registry_heketi_admin_key: "{{ 32 | oo_generate_secret }}" +openshift_storage_glusterfs_registry_heketi_user_key: "{{ 32 | oo_generate_secret }}" openshift_storage_glusterfs_registry_heketi_topology_load: "{{ openshift_storage_glusterfs_heketi_topology_load }}" openshift_storage_glusterfs_registry_heketi_wipe: "{{ openshift_storage_glusterfs_heketi_wipe }}" openshift_storage_glusterfs_registry_heketi_url: "{{ openshift_storage_glusterfs_heketi_url | default(omit) }}" diff --git a/roles/openshift_storage_glusterfs/files/v3.6/deploy-heketi-template.yml b/roles/openshift_storage_glusterfs/files/v3.6/deploy-heketi-template.yml index c9945be13..81b4fa5dc 100644 --- a/roles/openshift_storage_glusterfs/files/v3.6/deploy-heketi-template.yml +++ b/roles/openshift_storage_glusterfs/files/v3.6/deploy-heketi-template.yml @@ -9,49 +9,47 @@ metadata: annotations: description: Bootstrap Heketi installation tags: glusterfs,heketi,installation -labels: - template: deploy-heketi objects: - kind: Service apiVersion: v1 metadata: - name: deploy-heketi + name: deploy-heketi-${CLUSTER_NAME} labels: - glusterfs: deploy-heketi-service + glusterfs: deploy-heketi-${CLUSTER_NAME}-service deploy-heketi: support annotations: description: Exposes Heketi service spec: ports: - - name: deploy-heketi + - name: deploy-heketi-${CLUSTER_NAME} port: 8080 targetPort: 8080 selector: - name: deploy-heketi + glusterfs: deploy-heketi-${CLUSTER_NAME}-pod - kind: Route apiVersion: v1 metadata: - name: deploy-heketi + name: deploy-heketi-${CLUSTER_NAME} labels: - glusterfs: deploy-heketi-route + glusterfs: deploy-heketi-${CLUSTER_NAME}-route deploy-heketi: support spec: to: kind: Service - name: deploy-heketi + name: deploy-heketi-${CLUSTER_NAME} - kind: DeploymentConfig apiVersion: v1 metadata: - name: deploy-heketi + name: deploy-heketi-${CLUSTER_NAME} labels: - glusterfs: deploy-heketi-dc + glusterfs: deploy-heketi-${CLUSTER_NAME}-dc deploy-heketi: support annotations: description: Defines how to deploy Heketi spec: replicas: 1 selector: - name: deploy-heketi + glusterfs: deploy-heketi-${CLUSTER_NAME}-pod triggers: - type: ConfigChange strategy: @@ -60,13 +58,12 @@ objects: metadata: name: deploy-heketi labels: - name: deploy-heketi - glusterfs: deploy-heketi-pod + glusterfs: deploy-heketi-${CLUSTER_NAME}-pod deploy-heketi: support spec: - serviceAccountName: heketi-service-account + serviceAccountName: heketi-${CLUSTER_NAME}-service-account containers: - - name: deploy-heketi + - name: heketi image: ${IMAGE_NAME}:${IMAGE_VERSION} env: - name: HEKETI_USER_KEY @@ -81,11 +78,15 @@ objects: value: '14' - name: HEKETI_KUBE_GLUSTER_DAEMONSET value: '1' + - name: HEKETI_KUBE_NAMESPACE + value: ${HEKETI_KUBE_NAMESPACE} ports: - containerPort: 8080 volumeMounts: - name: db mountPath: /var/lib/heketi + - name: topology + mountPath: ${TOPOLOGY_PATH} readinessProbe: timeoutSeconds: 3 initialDelaySeconds: 3 @@ -100,6 +101,9 @@ objects: port: 8080 volumes: - name: db + - name: topology + secret: + secretName: heketi-${CLUSTER_NAME}-topology-secret parameters: - name: HEKETI_USER_KEY displayName: Heketi User Secret @@ -107,9 +111,19 @@ parameters: - name: HEKETI_ADMIN_KEY displayName: Heketi Administrator Secret description: Set secret for administration of the Heketi service as user _admin_ +- name: HEKETI_KUBE_NAMESPACE + displayName: Namespace + description: Set the namespace where the GlusterFS pods reside + value: default - name: IMAGE_NAME - displayName: GlusterFS container name + displayName: heketi container name required: True - name: IMAGE_VERSION - displayName: GlusterFS container versiona + displayName: heketi container versiona + required: True +- name: CLUSTER_NAME + displayName: GlusterFS cluster name + value: glusterfs +- name: TOPOLOGY_PATH + displayName: heketi topology file location required: True diff --git a/roles/openshift_storage_glusterfs/files/v3.6/glusterfs-template.yml b/roles/openshift_storage_glusterfs/files/v3.6/glusterfs-template.yml index c66705752..dc3d2250a 100644 --- a/roles/openshift_storage_glusterfs/files/v3.6/glusterfs-template.yml +++ b/roles/openshift_storage_glusterfs/files/v3.6/glusterfs-template.yml @@ -12,24 +12,24 @@ objects: - kind: DaemonSet apiVersion: extensions/v1beta1 metadata: - name: glusterfs + name: glusterfs-${CLUSTER_NAME} labels: - glusterfs: daemonset + glusterfs: ${CLUSTER_NAME}-daemonset annotations: description: GlusterFS DaemonSet tags: glusterfs spec: selector: matchLabels: - glusterfs-node: pod + glusterfs: ${CLUSTER_NAME}-pod template: metadata: - name: glusterfs + name: glusterfs-${CLUSTER_NAME} labels: + glusterfs: ${CLUSTER_NAME}-pod glusterfs-node: pod spec: - nodeSelector: - storagenode: glusterfs + nodeSelector: "${{NODE_LABELS}}" hostNetwork: true containers: - name: glusterfs @@ -63,26 +63,26 @@ objects: privileged: true readinessProbe: timeoutSeconds: 3 - initialDelaySeconds: 100 + initialDelaySeconds: 40 exec: command: - "/bin/bash" - "-c" - systemctl status glusterd.service - periodSeconds: 10 + periodSeconds: 25 successThreshold: 1 - failureThreshold: 3 + failureThreshold: 15 livenessProbe: timeoutSeconds: 3 - initialDelaySeconds: 100 + initialDelaySeconds: 40 exec: command: - "/bin/bash" - "-c" - systemctl status glusterd.service - periodSeconds: 10 + periodSeconds: 25 successThreshold: 1 - failureThreshold: 3 + failureThreshold: 15 resources: {} terminationMessagePath: "/dev/termination-log" volumes: @@ -120,9 +120,16 @@ objects: dnsPolicy: ClusterFirst securityContext: {} parameters: +- name: NODE_LABELS + displayName: Daemonset Node Labels + description: Labels which define the daemonset node selector. Must contain at least one label of the format \'glusterfs=<CLUSTER_NAME>-host\' + value: '{ "glusterfs": "storage-host" }' - name: IMAGE_NAME displayName: GlusterFS container name required: True - name: IMAGE_VERSION displayName: GlusterFS container versiona required: True +- name: CLUSTER_NAME + displayName: GlusterFS cluster name + value: storage diff --git a/roles/openshift_storage_glusterfs/files/v3.6/heketi-template.yml b/roles/openshift_storage_glusterfs/files/v3.6/heketi-template.yml index df045c170..1d8f1abdf 100644 --- a/roles/openshift_storage_glusterfs/files/v3.6/heketi-template.yml +++ b/roles/openshift_storage_glusterfs/files/v3.6/heketi-template.yml @@ -8,15 +8,13 @@ metadata: annotations: description: Heketi service deployment template tags: glusterfs,heketi -labels: - template: heketi objects: - kind: Service apiVersion: v1 metadata: - name: heketi + name: heketi-${CLUSTER_NAME} labels: - glusterfs: heketi-service + glusterfs: heketi-${CLUSTER_NAME}-service annotations: description: Exposes Heketi service spec: @@ -25,40 +23,40 @@ objects: port: 8080 targetPort: 8080 selector: - glusterfs: heketi-pod + glusterfs: heketi-${CLUSTER_NAME}-pod - kind: Route apiVersion: v1 metadata: - name: heketi + name: heketi-${CLUSTER_NAME} labels: - glusterfs: heketi-route + glusterfs: heketi-${CLUSTER_NAME}-route spec: to: kind: Service - name: heketi + name: heketi-${CLUSTER_NAME} - kind: DeploymentConfig apiVersion: v1 metadata: - name: heketi + name: heketi-${CLUSTER_NAME} labels: - glusterfs: heketi-dc + glusterfs: heketi-${CLUSTER_NAME}-dc annotations: description: Defines how to deploy Heketi spec: replicas: 1 selector: - glusterfs: heketi-pod + glusterfs: heketi-${CLUSTER_NAME}-pod triggers: - type: ConfigChange strategy: type: Recreate template: metadata: - name: heketi + name: heketi-${CLUSTER_NAME} labels: - glusterfs: heketi-pod + glusterfs: heketi-${CLUSTER_NAME}-pod spec: - serviceAccountName: heketi-service-account + serviceAccountName: heketi-${CLUSTER_NAME}-service-account containers: - name: heketi image: ${IMAGE_NAME}:${IMAGE_VERSION} @@ -76,6 +74,8 @@ objects: value: '14' - name: HEKETI_KUBE_GLUSTER_DAEMONSET value: '1' + - name: HEKETI_KUBE_NAMESPACE + value: ${HEKETI_KUBE_NAMESPACE} ports: - containerPort: 8080 volumeMounts: @@ -96,7 +96,7 @@ objects: volumes: - name: db glusterfs: - endpoints: heketi-storage-endpoints + endpoints: heketi-db-${CLUSTER_NAME}-endpoints path: heketidbstorage parameters: - name: HEKETI_USER_KEY @@ -105,9 +105,16 @@ parameters: - name: HEKETI_ADMIN_KEY displayName: Heketi Administrator Secret description: Set secret for administration of the Heketi service as user _admin_ +- name: HEKETI_KUBE_NAMESPACE + displayName: Namespace + description: Set the namespace where the GlusterFS pods reside + value: default - name: IMAGE_NAME - displayName: GlusterFS container name + displayName: heketi container name required: True - name: IMAGE_VERSION - displayName: GlusterFS container versiona + displayName: heketi container versiona required: True +- name: CLUSTER_NAME + displayName: GlusterFS cluster name + value: glusterfs diff --git a/roles/openshift_storage_glusterfs/tasks/glusterfs_common.yml b/roles/openshift_storage_glusterfs/tasks/glusterfs_common.yml index fa5fa2cb0..829c1f51b 100644 --- a/roles/openshift_storage_glusterfs/tasks/glusterfs_common.yml +++ b/roles/openshift_storage_glusterfs/tasks/glusterfs_common.yml @@ -5,12 +5,6 @@ name: "{{ glusterfs_namespace }}" when: glusterfs_is_native or glusterfs_heketi_is_native -- include: glusterfs_deploy.yml - when: glusterfs_is_native - -- name: Make sure heketi-client is installed - package: name=heketi-client state=present - - name: Delete pre-existing heketi resources oc_obj: namespace: "{{ glusterfs_namespace }}" @@ -21,12 +15,18 @@ with_items: - kind: "template,route,service,dc,jobs,secret" selector: "deploy-heketi" - - kind: "template,route,service,dc" - name: "heketi" - - kind: "svc,ep" + - kind: "svc" name: "heketi-storage-endpoints" + - kind: "secret" + name: "heketi-{{ glusterfs_name }}-topology-secret" + - kind: "template,route,service,dc" + name: "heketi-{{ glusterfs_name }}" + - kind: "svc" + name: "heketi-db-{{ glusterfs_name }}-endpoints" - kind: "sa" - name: "heketi-service-account" + name: "heketi-{{ glusterfs_name }}-service-account" + - kind: "secret" + name: "heketi-{{ glusterfs_name }}-user-secret" failed_when: False when: glusterfs_heketi_wipe @@ -35,11 +35,11 @@ namespace: "{{ glusterfs_namespace }}" kind: pod state: list - selector: "glusterfs=deploy-heketi-pod" + selector: "glusterfs=deploy-heketi-{{ glusterfs_name }}-pod" register: heketi_pod until: "heketi_pod.results.results[0]['items'] | count == 0" delay: 10 - retries: "{{ (glusterfs_timeout / 10) | int }}" + retries: "{{ (glusterfs_timeout | int / 10) | int }}" when: glusterfs_heketi_wipe - name: Wait for heketi pods to terminate @@ -47,23 +47,26 @@ namespace: "{{ glusterfs_namespace }}" kind: pod state: list - selector: "glusterfs=heketi-pod" + selector: "glusterfs=heketi-{{ glusterfs_name }}-pod" register: heketi_pod until: "heketi_pod.results.results[0]['items'] | count == 0" delay: 10 - retries: "{{ (glusterfs_timeout / 10) | int }}" + retries: "{{ (glusterfs_timeout | int / 10) | int }}" when: glusterfs_heketi_wipe +- include: glusterfs_deploy.yml + when: glusterfs_is_native + - name: Create heketi service account oc_serviceaccount: namespace: "{{ glusterfs_namespace }}" - name: heketi-service-account + name: "heketi-{{ glusterfs_name }}-service-account" state: present when: glusterfs_heketi_is_native - name: Add heketi service account to privileged SCC oc_adm_policy_user: - user: "system:serviceaccount:{{ glusterfs_namespace }}:heketi-service-account" + user: "system:serviceaccount:{{ glusterfs_namespace }}:heketi-{{ glusterfs_name }}-service-account" resource_kind: scc resource_name: privileged state: present @@ -71,7 +74,7 @@ - name: Allow heketi service account to view/edit pods oc_adm_policy_user: - user: "system:serviceaccount:{{ glusterfs_namespace }}:heketi-service-account" + user: "system:serviceaccount:{{ glusterfs_namespace }}:heketi-{{ glusterfs_name }}-service-account" resource_kind: role resource_name: edit state: present @@ -82,7 +85,7 @@ namespace: "{{ glusterfs_namespace }}" state: list kind: pod - selector: "glusterfs=deploy-heketi-pod,deploy-heketi=support" + selector: "glusterfs=deploy-heketi-{{ glusterfs_name }}-pod" register: heketi_pod when: glusterfs_heketi_is_native @@ -100,7 +103,7 @@ namespace: "{{ glusterfs_namespace }}" state: list kind: pod - selector: "glusterfs=heketi-pod" + selector: "glusterfs=heketi-{{ glusterfs_name }}-pod" register: heketi_pod when: glusterfs_heketi_is_native @@ -113,48 +116,35 @@ # heketi is not missing when there are one or more pods with matching labels whose 'Ready' status is True - "heketi_pod.results.results[0]['items'] | oo_collect(attribute='status.conditions') | oo_collect(attribute='status', filters={'type': 'Ready'}) | map('bool') | select | list | count > 0" +- name: Generate topology file + template: + src: "{{ openshift.common.examples_content_version }}/topology.json.j2" + dest: "{{ mktemp.stdout }}/topology.json" + when: + - glusterfs_heketi_topology_load + - include: heketi_deploy_part1.yml when: - glusterfs_heketi_is_native - glusterfs_heketi_deploy_is_missing - glusterfs_heketi_is_missing -- name: Determine heketi URL - oc_obj: - namespace: "{{ glusterfs_namespace }}" - state: list - kind: ep - selector: "glusterfs in (deploy-heketi-service, heketi-service)" - register: heketi_url - until: - - "heketi_url.results.results[0]['items'][0].subsets[0].addresses[0].ip != ''" - - "heketi_url.results.results[0]['items'][0].subsets[0].ports[0].port != ''" - delay: 10 - retries: "{{ (glusterfs_timeout / 10) | int }}" - when: - - glusterfs_heketi_is_native - - glusterfs_heketi_url is undefined - - name: Set heketi URL set_fact: - glusterfs_heketi_url: "{{ heketi_url.results.results[0]['items'][0].subsets[0].addresses[0].ip }}:{{ heketi_url.results.results[0]['items'][0].subsets[0].ports[0].port }}" + glusterfs_heketi_url: "localhost:8080" when: - glusterfs_heketi_is_native - - glusterfs_heketi_url is undefined + +- name: Set heketi-cli command + set_fact: + glusterfs_heketi_client: "{% if glusterfs_heketi_is_native %}oc rsh {{ heketi_pod.results.results[0]['items'][0]['metadata']['name'] }} {% endif %}heketi-cli -s http://{{ glusterfs_heketi_url }} --user admin --secret '{{ glusterfs_heketi_admin_key }}'" - name: Verify heketi service - command: "heketi-cli -s http://{{ glusterfs_heketi_url }} --user admin --secret '{{ glusterfs_heketi_admin_key }}' cluster list" + command: "{{ glusterfs_heketi_client }} cluster list" changed_when: False -- name: Generate topology file - template: - src: "{{ openshift.common.examples_content_version }}/topology.json.j2" - dest: "{{ mktemp.stdout }}/topology.json" - when: - - glusterfs_heketi_topology_load - - name: Load heketi topology - command: "heketi-cli -s http://{{ glusterfs_heketi_url }} --user admin --secret '{{ glusterfs_heketi_admin_key }}' topology load --json={{ mktemp.stdout }}/topology.json 2>&1" + command: "{{ glusterfs_heketi_client }} topology load --json={{ mktemp.stdout }}/topology.json 2>&1" register: topology_load failed_when: "topology_load.rc != 0 or 'Unable' in topology_load.stdout" when: @@ -164,3 +154,29 @@ when: - glusterfs_heketi_is_native - glusterfs_heketi_is_missing + +- name: Create heketi user secret + oc_secret: + namespace: "{{ glusterfs_namespace }}" + state: present + name: "heketi-{{ glusterfs_name }}-user-secret" + type: "kubernetes.io/glusterfs" + force: True + contents: + - path: key + data: "{{ glusterfs_heketi_user_key }}" + +- name: Generate GlusterFS StorageClass file + template: + src: "{{ openshift.common.examples_content_version }}/glusterfs-storageclass.yml.j2" + dest: "{{ mktemp.stdout }}/glusterfs-storageclass.yml" + +- name: Create GlusterFS StorageClass + oc_obj: + state: present + kind: storageclass + name: "glusterfs-{{ glusterfs_name }}" + files: + - "{{ mktemp.stdout }}/glusterfs-storageclass.yml" + when: + - glusterfs_storageclass diff --git a/roles/openshift_storage_glusterfs/tasks/glusterfs_config.yml b/roles/openshift_storage_glusterfs/tasks/glusterfs_config.yml index 451990240..aa303d126 100644 --- a/roles/openshift_storage_glusterfs/tasks/glusterfs_config.yml +++ b/roles/openshift_storage_glusterfs/tasks/glusterfs_config.yml @@ -3,7 +3,9 @@ glusterfs_timeout: "{{ openshift_storage_glusterfs_timeout }}" glusterfs_namespace: "{{ openshift_storage_glusterfs_namespace }}" glusterfs_is_native: "{{ openshift_storage_glusterfs_is_native }}" - glusterfs_nodeselector: "{{ openshift_storage_glusterfs_nodeselector | map_from_pairs }}" + glusterfs_name: "{{ openshift_storage_glusterfs_name }}" + glusterfs_nodeselector: "{{ openshift_storage_glusterfs_nodeselector | default(['storagenode', openshift_storage_glusterfs_name] | join('=')) | map_from_pairs }}" + glusterfs_storageclass: "{{ openshift_storage_glusterfs_storageclass }}" glusterfs_image: "{{ openshift_storage_glusterfs_image }}" glusterfs_version: "{{ openshift_storage_glusterfs_version }}" glusterfs_wipe: "{{ openshift_storage_glusterfs_wipe }}" @@ -17,6 +19,6 @@ glusterfs_heketi_topology_load: "{{ openshift_storage_glusterfs_heketi_topology_load }}" glusterfs_heketi_wipe: "{{ openshift_storage_glusterfs_heketi_wipe }}" glusterfs_heketi_url: "{{ openshift_storage_glusterfs_heketi_url }}" - glusterfs_nodes: "{{ g_glusterfs_hosts }}" + glusterfs_nodes: "{{ groups.glusterfs }}" - include: glusterfs_common.yml diff --git a/roles/openshift_storage_glusterfs/tasks/glusterfs_deploy.yml b/roles/openshift_storage_glusterfs/tasks/glusterfs_deploy.yml index 579112349..ea4dcc510 100644 --- a/roles/openshift_storage_glusterfs/tasks/glusterfs_deploy.yml +++ b/roles/openshift_storage_glusterfs/tasks/glusterfs_deploy.yml @@ -1,23 +1,24 @@ --- - assert: - that: "glusterfs_nodeselector.keys() | count == 1" - msg: Only one GlusterFS nodeselector key pair should be provided - -- assert: that: "glusterfs_nodes | count >= 3" msg: There must be at least three GlusterFS nodes specified - name: Delete pre-existing GlusterFS resources oc_obj: namespace: "{{ glusterfs_namespace }}" - kind: "template,daemonset" - name: glusterfs + kind: "{{ item.kind }}" + name: "{{ item.name }}" state: absent + with_items: + - kind: template + name: glusterfs + - kind: daemonset + name: "glusterfs-{{ glusterfs_name }}" when: glusterfs_wipe - name: Unlabel any existing GlusterFS nodes oc_label: - name: "{{ item }}" + name: "{{ hostvars[item].openshift.common.hostname }}" kind: node state: absent labels: "{{ glusterfs_nodeselector | oo_dict_to_list_of_dict }}" @@ -40,11 +41,16 @@ failed_when: False when: glusterfs_wipe - # Runs "vgremove -fy <vg>; pvremove -fy <pv>" for every device found to be a physical volume. + # Runs "lvremove -ff <vg>; vgremove -fy <vg>; pvremove -fy <pv>" for every device found to be a physical volume. - name: Clear GlusterFS storage device contents - shell: "{% for line in item.stdout_lines %}{% set fields = line.split() %}{% if fields | count > 1 %}vgremove -fy {{ fields[1] }}; {% endif %}pvremove -fy {{ fields[0] }}; {% endfor %}" + shell: "{% for line in item.stdout_lines %}{% set fields = line.split() %}{% if fields | count > 1 %}lvremove -ff {{ fields[1] }}; vgremove -fy {{ fields[1] }}; {% endif %}pvremove -fy {{ fields[0] }}; {% endfor %}" delegate_to: "{{ item.item }}" with_items: "{{ devices_info.results }}" + register: clear_devices + until: + - "'contains a filesystem in use' not in clear_devices.stderr" + delay: 1 + retries: 30 when: - glusterfs_wipe - item.stdout_lines | count > 0 @@ -61,13 +67,11 @@ - name: Label GlusterFS nodes oc_label: - name: "{{ glusterfs_host }}" + name: "{{ hostvars[item].openshift.common.hostname }}" kind: node state: add labels: "{{ glusterfs_nodeselector | oo_dict_to_list_of_dict }}" with_items: "{{ glusterfs_nodes | default([]) }}" - loop_control: - loop_var: glusterfs_host - name: Copy GlusterFS DaemonSet template copy: @@ -78,7 +82,7 @@ oc_obj: namespace: "{{ glusterfs_namespace }}" kind: template - name: glusterfs + name: "glusterfs" state: present files: - "{{ mktemp.stdout }}/glusterfs-template.yml" @@ -91,17 +95,19 @@ params: IMAGE_NAME: "{{ glusterfs_image }}" IMAGE_VERSION: "{{ glusterfs_version }}" + NODE_LABELS: "{{ glusterfs_nodeselector }}" + CLUSTER_NAME: "{{ glusterfs_name }}" - name: Wait for GlusterFS pods oc_obj: namespace: "{{ glusterfs_namespace }}" kind: pod state: list - selector: "glusterfs-node=pod" + selector: "glusterfs={{ glusterfs_name }}-pod" register: glusterfs_pods until: - "glusterfs_pods.results.results[0]['items'] | count > 0" # There must be as many pods with 'Ready' staus True as there are nodes expecting those pods - "glusterfs_pods.results.results[0]['items'] | oo_collect(attribute='status.conditions') | oo_collect(attribute='status', filters={'type': 'Ready'}) | map('bool') | select | list | count == glusterfs_nodes | count" delay: 10 - retries: "{{ (glusterfs_timeout / 10) | int }}" + retries: "{{ (glusterfs_timeout | int / 10) | int }}" diff --git a/roles/openshift_storage_glusterfs/tasks/glusterfs_registry.yml b/roles/openshift_storage_glusterfs/tasks/glusterfs_registry.yml index 392f4b65b..4c6891eeb 100644 --- a/roles/openshift_storage_glusterfs/tasks/glusterfs_registry.yml +++ b/roles/openshift_storage_glusterfs/tasks/glusterfs_registry.yml @@ -3,7 +3,9 @@ glusterfs_timeout: "{{ openshift_storage_glusterfs_registry_timeout }}" glusterfs_namespace: "{{ openshift_storage_glusterfs_registry_namespace }}" glusterfs_is_native: "{{ openshift_storage_glusterfs_registry_is_native }}" - glusterfs_nodeselector: "{{ openshift_storage_glusterfs_registry_nodeselector | map_from_pairs }}" + glusterfs_name: "{{ openshift_storage_glusterfs_registry_name }}" + glusterfs_nodeselector: "{{ openshift_storage_glusterfs_registry_nodeselector | default(['storagenode', openshift_storage_glusterfs_registry_name] | join('=')) | map_from_pairs }}" + glusterfs_storageclass: "{{ openshift_storage_glusterfs_registry_storageclass }}" glusterfs_image: "{{ openshift_storage_glusterfs_registry_image }}" glusterfs_version: "{{ openshift_storage_glusterfs_registry_version }}" glusterfs_wipe: "{{ openshift_storage_glusterfs_registry_wipe }}" @@ -17,21 +19,22 @@ glusterfs_heketi_topology_load: "{{ openshift_storage_glusterfs_registry_heketi_topology_load }}" glusterfs_heketi_wipe: "{{ openshift_storage_glusterfs_registry_heketi_wipe }}" glusterfs_heketi_url: "{{ openshift_storage_glusterfs_registry_heketi_url }}" - glusterfs_nodes: "{{ g_glusterfs_registry_hosts }}" + glusterfs_nodes: "{{ groups.glusterfs_registry }}" - include: glusterfs_common.yml - when: g_glusterfs_registry_hosts != g_glusterfs_hosts + when: + - groups.glusterfs_registry | default([]) | count > 0 + - "'glusterfs' not in groups or groups.glusterfs_registry != groups.glusterfs" - name: Delete pre-existing GlusterFS registry resources oc_obj: namespace: "{{ glusterfs_namespace }}" kind: "{{ item.kind }}" - name: "{{ item.name | default(omit) }}" - selector: "{{ item.selector | default(omit) }}" + name: "{{ item.name }}" state: absent with_items: - - kind: "svc,ep" - name: "glusterfs-registry-endpoints" + - kind: "svc" + name: "glusterfs-{{ glusterfs_name }}-endpoints" failed_when: False - name: Generate GlusterFS registry endpoints @@ -40,8 +43,8 @@ dest: "{{ mktemp.stdout }}/glusterfs-registry-endpoints.yml" - name: Copy GlusterFS registry service - copy: - src: "{{ openshift.common.examples_content_version }}/glusterfs-registry-service.yml" + template: + src: "{{ openshift.common.examples_content_version }}/glusterfs-registry-service.yml.j2" dest: "{{ mktemp.stdout }}/glusterfs-registry-service.yml" - name: Create GlusterFS registry endpoints @@ -49,7 +52,7 @@ namespace: "{{ glusterfs_namespace }}" state: present kind: endpoints - name: glusterfs-registry-endpoints + name: "glusterfs-{{ glusterfs_name }}-endpoints" files: - "{{ mktemp.stdout }}/glusterfs-registry-endpoints.yml" @@ -58,14 +61,14 @@ namespace: "{{ glusterfs_namespace }}" state: present kind: service - name: glusterfs-registry-endpoints + name: "glusterfs-{{ glusterfs_name }}-endpoints" files: - "{{ mktemp.stdout }}/glusterfs-registry-service.yml" - name: Check if GlusterFS registry volume exists - command: "heketi-cli -s http://{{ glusterfs_heketi_url }} --user admin --secret '{{ glusterfs_heketi_admin_key }}' volume list" + command: "{{ glusterfs_heketi_client }} volume list" register: registry_volume - name: Create GlusterFS registry volume - command: "heketi-cli -s http://{{ glusterfs_heketi_url }} --user admin --secret '{{ glusterfs_heketi_admin_key }}' volume create --size={{ openshift.hosted.registry.storage.volume.size | replace('Gi','') }} --name={{ openshift.hosted.registry.storage.glusterfs.path }}" + command: "{{ glusterfs_heketi_client }} volume create --size={{ openshift.hosted.registry.storage.volume.size | replace('Gi','') }} --name={{ openshift.hosted.registry.storage.glusterfs.path }}" when: "openshift.hosted.registry.storage.glusterfs.path not in registry_volume.stdout" diff --git a/roles/openshift_storage_glusterfs/tasks/heketi_deploy_part1.yml b/roles/openshift_storage_glusterfs/tasks/heketi_deploy_part1.yml index c14fcfb15..318d34b5d 100644 --- a/roles/openshift_storage_glusterfs/tasks/heketi_deploy_part1.yml +++ b/roles/openshift_storage_glusterfs/tasks/heketi_deploy_part1.yml @@ -6,11 +6,21 @@ with_items: - "deploy-heketi-template.yml" -- name: Create deploy-heketi resources +- name: Create heketi topology secret + oc_secret: + namespace: "{{ glusterfs_namespace }}" + state: present + name: "heketi-{{ glusterfs_name }}-topology-secret" + force: True + files: + - name: topology.json + path: "{{ mktemp.stdout }}/topology.json" + +- name: Create deploy-heketi template oc_obj: namespace: "{{ glusterfs_namespace }}" kind: template - name: deploy-heketi + name: "deploy-heketi" state: present files: - "{{ mktemp.stdout }}/deploy-heketi-template.yml" @@ -25,17 +35,20 @@ IMAGE_VERSION: "{{ glusterfs_heketi_version }}" HEKETI_USER_KEY: "{{ glusterfs_heketi_user_key }}" HEKETI_ADMIN_KEY: "{{ glusterfs_heketi_admin_key }}" + HEKETI_KUBE_NAMESPACE: "{{ glusterfs_namespace }}" + CLUSTER_NAME: "{{ glusterfs_name }}" + TOPOLOGY_PATH: "{{ mktemp.stdout }}" - name: Wait for deploy-heketi pod oc_obj: namespace: "{{ glusterfs_namespace }}" kind: pod state: list - selector: "glusterfs=deploy-heketi-pod,deploy-heketi=support" + selector: "glusterfs=deploy-heketi-{{ glusterfs_name }}-pod" register: heketi_pod until: - "heketi_pod.results.results[0]['items'] | count > 0" # Pod's 'Ready' status must be True - "heketi_pod.results.results[0]['items'] | oo_collect(attribute='status.conditions') | oo_collect(attribute='status', filters={'type': 'Ready'}) | map('bool') | select | list | count == 1" delay: 10 - retries: "{{ (glusterfs_timeout / 10) | int }}" + retries: "{{ (glusterfs_timeout | int / 10) | int }}" diff --git a/roles/openshift_storage_glusterfs/tasks/heketi_deploy_part2.yml b/roles/openshift_storage_glusterfs/tasks/heketi_deploy_part2.yml index 64410a9ab..3a9619d9d 100644 --- a/roles/openshift_storage_glusterfs/tasks/heketi_deploy_part2.yml +++ b/roles/openshift_storage_glusterfs/tasks/heketi_deploy_part2.yml @@ -1,8 +1,10 @@ --- - name: Create heketi DB volume - command: "heketi-cli -s http://{{ glusterfs_heketi_url }} --user admin --secret '{{ glusterfs_heketi_admin_key }}' setup-openshift-heketi-storage --listfile {{ mktemp.stdout }}/heketi-storage.json" + command: "{{ glusterfs_heketi_client }} setup-openshift-heketi-storage --listfile /tmp/heketi-storage.json" register: setup_storage - failed_when: False + +- name: Copy heketi-storage list + shell: "{{ openshift.common.client_binary }} rsh {{ heketi_pod.results.results[0]['items'][0]['metadata']['name'] }} cat /tmp/heketi-storage.json > {{ mktemp.stdout }}/heketi-storage.json" # This is used in the subsequent task - name: Copy the admin client config @@ -28,7 +30,7 @@ # Pod's 'Complete' status must be True - "heketi_job.results.results | oo_collect(attribute='status.conditions') | oo_collect(attribute='status', filters={'type': 'Complete'}) | map('bool') | select | list | count == 1" delay: 10 - retries: "{{ (glusterfs_timeout / 10) | int }}" + retries: "{{ (glusterfs_timeout | int / 10) | int }}" failed_when: - "'results' in heketi_job.results" - "heketi_job.results.results | count > 0" @@ -46,14 +48,45 @@ with_items: - kind: "template,route,service,jobs,dc,secret" selector: "deploy-heketi" - failed_when: False + - kind: "svc" + name: "heketi-storage-endpoints" + - kind: "secret" + name: "heketi-{{ glusterfs_name }}-topology-secret" + +- name: Generate heketi endpoints + template: + src: "{{ openshift.common.examples_content_version }}/heketi-endpoints.yml.j2" + dest: "{{ mktemp.stdout }}/heketi-endpoints.yml" + +- name: Generate heketi service + template: + src: "{{ openshift.common.examples_content_version }}/heketi-service.yml.j2" + dest: "{{ mktemp.stdout }}/heketi-service.yml" + +- name: Create heketi endpoints + oc_obj: + namespace: "{{ glusterfs_namespace }}" + state: present + kind: endpoints + name: "heketi-db-{{ glusterfs_name }}-endpoints" + files: + - "{{ mktemp.stdout }}/heketi-endpoints.yml" + +- name: Create heketi service + oc_obj: + namespace: "{{ glusterfs_namespace }}" + state: present + kind: service + name: "heketi-db-{{ glusterfs_name }}-endpoints" + files: + - "{{ mktemp.stdout }}/heketi-service.yml" - name: Copy heketi template copy: src: "{{ openshift.common.examples_content_version }}/heketi-template.yml" dest: "{{ mktemp.stdout }}/heketi-template.yml" -- name: Create heketi resources +- name: Create heketi template oc_obj: namespace: "{{ glusterfs_namespace }}" kind: template @@ -72,38 +105,27 @@ IMAGE_VERSION: "{{ glusterfs_heketi_version }}" HEKETI_USER_KEY: "{{ glusterfs_heketi_user_key }}" HEKETI_ADMIN_KEY: "{{ glusterfs_heketi_admin_key }}" + HEKETI_KUBE_NAMESPACE: "{{ glusterfs_namespace }}" + CLUSTER_NAME: "{{ glusterfs_name }}" - name: Wait for heketi pod oc_obj: namespace: "{{ glusterfs_namespace }}" kind: pod state: list - selector: "glusterfs=heketi-pod" + selector: "glusterfs=heketi-{{ glusterfs_name }}-pod" register: heketi_pod until: - "heketi_pod.results.results[0]['items'] | count > 0" # Pod's 'Ready' status must be True - "heketi_pod.results.results[0]['items'] | oo_collect(attribute='status.conditions') | oo_collect(attribute='status', filters={'type': 'Ready'}) | map('bool') | select | list | count == 1" delay: 10 - retries: "{{ (glusterfs_timeout / 10) | int }}" - -- name: Determine heketi URL - oc_obj: - namespace: "{{ glusterfs_namespace }}" - state: list - kind: ep - selector: "glusterfs=heketi-service" - register: heketi_url - until: - - "heketi_url.results.results[0]['items'][0].subsets[0].addresses[0].ip != ''" - - "heketi_url.results.results[0]['items'][0].subsets[0].ports[0].port != ''" - delay: 10 - retries: "{{ (glusterfs_timeout / 10) | int }}" + retries: "{{ (glusterfs_timeout | int / 10) | int }}" -- name: Set heketi URL +- name: Set heketi-cli command set_fact: - glusterfs_heketi_url: "{{ heketi_url.results.results[0]['items'][0].subsets[0].addresses[0].ip }}:{{ heketi_url.results.results[0]['items'][0].subsets[0].ports[0].port }}" + glusterfs_heketi_client: "{% if glusterfs_heketi_is_native %}oc rsh {{ heketi_pod.results.results[0]['items'][0]['metadata']['name'] }} {% endif %}heketi-cli -s http://localhost:8080 --user admin --secret '{{ glusterfs_heketi_admin_key }}'" - name: Verify heketi service - command: "heketi-cli -s http://{{ glusterfs_heketi_url }} --user admin --secret '{{ glusterfs_heketi_admin_key }}' cluster list" + command: "{{ glusterfs_heketi_client }} cluster list" changed_when: False diff --git a/roles/openshift_storage_glusterfs/tasks/main.yml b/roles/openshift_storage_glusterfs/tasks/main.yml index ebd8db453..c9bfdd1cd 100644 --- a/roles/openshift_storage_glusterfs/tasks/main.yml +++ b/roles/openshift_storage_glusterfs/tasks/main.yml @@ -7,12 +7,11 @@ - include: glusterfs_config.yml when: - - g_glusterfs_hosts | default([]) | count > 0 + - groups.glusterfs | default([]) | count > 0 - include: glusterfs_registry.yml when: - - g_glusterfs_registry_hosts | default([]) | count > 0 - - "openshift.hosted.registry.storage.kind == 'glusterfs' or openshift.hosted.registry.glusterfs.swap" + - "groups.glusterfs_registry | default([]) | count > 0 or openshift.hosted.registry.storage.kind == 'glusterfs' or openshift.hosted.registry.glusterfs.swap" - name: Delete temp directory file: diff --git a/roles/openshift_storage_glusterfs/templates/v3.6/glusterfs-registry-endpoints.yml.j2 b/roles/openshift_storage_glusterfs/templates/v3.6/glusterfs-registry-endpoints.yml.j2 index 605627ab5..11c9195bb 100644 --- a/roles/openshift_storage_glusterfs/templates/v3.6/glusterfs-registry-endpoints.yml.j2 +++ b/roles/openshift_storage_glusterfs/templates/v3.6/glusterfs-registry-endpoints.yml.j2 @@ -1,7 +1,8 @@ +--- apiVersion: v1 kind: Endpoints metadata: - name: glusterfs-registry-endpoints + name: glusterfs-{{ glusterfs_name }}-endpoints subsets: - addresses: {% for node in glusterfs_nodes %} diff --git a/roles/openshift_storage_glusterfs/files/v3.6/glusterfs-registry-service.yml b/roles/openshift_storage_glusterfs/templates/v3.6/glusterfs-registry-service.yml.j2 index 3f8d8f507..3f869d2b7 100644 --- a/roles/openshift_storage_glusterfs/files/v3.6/glusterfs-registry-service.yml +++ b/roles/openshift_storage_glusterfs/templates/v3.6/glusterfs-registry-service.yml.j2 @@ -2,7 +2,7 @@ apiVersion: v1 kind: Service metadata: - name: glusterfs-registry-endpoints + name: glusterfs-{{ glusterfs_name }}-endpoints spec: ports: - port: 1 diff --git a/roles/openshift_storage_glusterfs/templates/v3.6/glusterfs-storageclass.yml.j2 b/roles/openshift_storage_glusterfs/templates/v3.6/glusterfs-storageclass.yml.j2 new file mode 100644 index 000000000..9b8fae310 --- /dev/null +++ b/roles/openshift_storage_glusterfs/templates/v3.6/glusterfs-storageclass.yml.j2 @@ -0,0 +1,10 @@ +--- +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: glusterfs-{{ glusterfs_name }} +provisioner: kubernetes.io/glusterfs +parameters: + resturl: "http://{{ glusterfs_heketi_url }}:8081" + secretNamespace: "{{ glusterfs_namespace }}" + secretName: "heketi-{{ glusterfs_name }}-user-secret" diff --git a/roles/openshift_storage_glusterfs/templates/v3.6/heketi-endpoints.yml.j2 b/roles/openshift_storage_glusterfs/templates/v3.6/heketi-endpoints.yml.j2 new file mode 100644 index 000000000..99cbdf748 --- /dev/null +++ b/roles/openshift_storage_glusterfs/templates/v3.6/heketi-endpoints.yml.j2 @@ -0,0 +1,12 @@ +--- +apiVersion: v1 +kind: Endpoints +metadata: + name: heketi-db-{{ glusterfs_name }}-endpoints +subsets: +- addresses: +{% for node in glusterfs_nodes %} + - ip: {{ hostvars[node].glusterfs_ip | default(hostvars[node].openshift.common.ip) }} +{% endfor %} + ports: + - port: 1 diff --git a/roles/openshift_storage_glusterfs/templates/v3.6/heketi-service.yml.j2 b/roles/openshift_storage_glusterfs/templates/v3.6/heketi-service.yml.j2 new file mode 100644 index 000000000..dcb896441 --- /dev/null +++ b/roles/openshift_storage_glusterfs/templates/v3.6/heketi-service.yml.j2 @@ -0,0 +1,10 @@ +--- +apiVersion: v1 +kind: Service +metadata: + name: heketi-db-{{ glusterfs_name }}-endpoints +spec: + ports: + - port: 1 +status: + loadBalancer: {} |