5 files changed, 109 insertions, 71 deletions
diff --git a/roles/openshift_logging_elasticsearch/tasks/main.yaml b/roles/openshift_logging_elasticsearch/tasks/main.yaml
index ff5ad1045..b731d93a0 100644
--- a/roles/openshift_logging_elasticsearch/tasks/main.yaml
+++ b/roles/openshift_logging_elasticsearch/tasks/main.yaml
@@ -137,6 +137,16 @@
     - "prometheus_out.stderr | length > 0"
     - "'already exists' not in prometheus_out.stderr"
 
+- set_fact:
+    _logging_metrics_proxy_passwd: "{{ 16 | lib_utils_oo_random_word | b64encode }}"
+
+- template:
+    src: passwd.j2
+    dest: "{{mktemp.stdout}}/passwd.yml"
+  vars:
+    logging_user_name: "{{ openshift_logging_elasticsearch_prometheus_sa }}"
+    logging_user_passwd: "{{ _logging_metrics_proxy_passwd }}"
+
 # View role and binding
 - name: Generate logging-elasticsearch-view-role
   template:
@@ -255,6 +265,8 @@
         path: "{{ generated_certs_dir }}/ca.crt"
       - name: admin.jks
         path: "{{ generated_certs_dir }}/system.admin.jks"
+      - name: passwd.yml
+        path: "{{mktemp.stdout}}/passwd.yml"
 
 # services
 - name: Set logging-{{ es_component }}-cluster service
@@ -391,6 +403,7 @@
     es_container_security_context: "{{ _es_containers.elasticsearch.securityContext if _es_containers is defined and 'elasticsearch' in _es_containers and 'securityContext' in _es_containers.elasticsearch else None }}"
     deploy_type: "{{ openshift_logging_elasticsearch_deployment_type }}"
     es_replicas: 1
+    basic_auth_passwd: "{{ _logging_metrics_proxy_passwd | b64decode }}"
 
 - name: Set ES dc
   oc_obj:
diff --git a/roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml b/roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml
index 6bce13d1d..879459cf6 100644
--- a/roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml
+++ b/roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml
@@ -1,91 +1,113 @@
 ---
-# Disable external communication for {{ _cluster_component }}
-- name: Disable external communication for logging-{{ _cluster_component }}
-  oc_service:
-    state: present
-    name: "logging-{{ _cluster_component }}"
-    namespace: "{{ openshift_logging_elasticsearch_namespace }}"
-    selector:
-      component: "{{ _cluster_component }}"
-      provider: openshift
-      connection: blocked
-    labels:
-      logging-infra: 'support'
-    ports:
-      - port: 9200
-        targetPort: "restapi"
-  when:
-    - full_restart_cluster | bool
-
 ## get all pods for the cluster
 - command: >
     oc get pod -l component={{ _cluster_component }},provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[?(@.status.phase==\"Running\")].metadata.name}
   register: _cluster_pods
 
-- name: "Disable shard balancing for logging-{{ _cluster_component }} cluster"
-  command: >
-    oc exec {{ _cluster_pods.stdout.split(' ')[0] }} -c elasticsearch -n {{ openshift_logging_elasticsearch_namespace }} -- {{ __es_local_curl }} -XPUT 'https://localhost:9200/_cluster/settings' -d '{ "transient": { "cluster.routing.allocation.enable" : "none" } }'
-  register: _disable_output
-  changed_when: "'\"acknowledged\":true' in _disable_output.stdout"
+### Check for cluster state before making changes -- if its red then we don't want to continue
+- name: "Checking current health for {{ _es_node }} cluster"
+  shell: >
+    oc exec "{{ _cluster_pods.stdout.split(' ')[0] }}" -c elasticsearch -n "{{ openshift_logging_elasticsearch_namespace }}" -- es_cluster_health
+  register: _pod_status
   when: _cluster_pods.stdout_lines | count > 0
 
-# Flush ES
-- name: "Flushing for logging-{{ _cluster_component }} cluster"
-  command: >
-    oc exec {{ _cluster_pods.stdout.split(' ')[0] }} -c elasticsearch -n {{ openshift_logging_elasticsearch_namespace }} -- {{ __es_local_curl }} -XPUT 'https://localhost:9200/_flush/synced'
-  register: _flush_output
-  changed_when: "'\"acknowledged\":true' in _flush_output.stdout"
-  when:
+- when:
+  - _pod_status.stdout is defined
+  - (_pod_status.stdout | from_json)['status'] in ['red']
+  block:
+  - name: Set Logging message to manually restart
+    run_once: true
+    set_stats:
+      data:
+        installer_phase_logging:
+          message: "Cluster logging-{{ _cluster_component }} was in a red state and will not be automatically restarted. Please see documentation regarding doing a {{ 'full' if full_restart_cluster | bool else 'rolling'}} cluster restart."
+
+  - debug: msg="Cluster logging-{{ _cluster_component }} was in a red state and will not be automatically restarted. Please see documentation regarding doing a {{ 'full' if full_restart_cluster | bool else 'rolling'}} cluster restart."
+
+- when: _pod_status.stdout is undefined or (_pod_status.stdout | from_json)['status'] in ['green', 'yellow']
+  block:
+  # Disable external communication for {{ _cluster_component }}
+  - name: Disable external communication for logging-{{ _cluster_component }}
+    oc_service:
+      state: present
+      name: "logging-{{ _cluster_component }}"
+      namespace: "{{ openshift_logging_elasticsearch_namespace }}"
+      selector:
+        component: "{{ _cluster_component }}"
+        provider: openshift
+        connection: blocked
+      labels:
+        logging-infra: 'support'
+      ports:
+      - port: 9200
+        targetPort: "restapi"
+    when:
+    - full_restart_cluster | bool
+
+  - name: "Disable shard balancing for logging-{{ _cluster_component }} cluster"
+    command: >
+      oc exec {{ _cluster_pods.stdout.split(' ')[0] }} -c elasticsearch -n {{ openshift_logging_elasticsearch_namespace }} -- {{ __es_local_curl }} -XPUT 'https://localhost:9200/_cluster/settings' -d '{ "transient": { "cluster.routing.allocation.enable" : "none" } }'
+    register: _disable_output
+    changed_when: "'\"acknowledged\":true' in _disable_output.stdout"
+    when: _cluster_pods.stdout_lines | count > 0
+
+  # Flush ES
+  - name: "Flushing for logging-{{ _cluster_component }} cluster"
+    command: >
+      oc exec {{ _cluster_pods.stdout.split(' ')[0] }} -c elasticsearch -n {{ openshift_logging_elasticsearch_namespace }} -- {{ __es_local_curl }} -XPUT 'https://localhost:9200/_flush/synced'
+    register: _flush_output
+    changed_when: "'\"acknowledged\":true' in _flush_output.stdout"
+    when:
     - _cluster_pods.stdout_lines | count > 0
     - full_restart_cluster | bool
 
-- command: >
-    oc get dc -l component={{ _cluster_component }},provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[*].metadata.name}
-  register: _cluster_dcs
+  - command: >
+      oc get dc -l component={{ _cluster_component }},provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[*].metadata.name}
+    register: _cluster_dcs
 
-## restart all dcs for full restart
-- name: "Restart ES node {{ _es_node }}"
-  include_tasks: restart_es_node.yml
-  with_items: "{{ _cluster_dcs }}"
-  loop_control:
-    loop_var: _es_node
-  when:
+  ## restart all dcs for full restart
+  - name: "Restart ES node {{ _es_node }}"
+    include_tasks: restart_es_node.yml
+    with_items: "{{ _cluster_dcs }}"
+    loop_control:
+      loop_var: _es_node
+    when:
     - full_restart_cluster | bool
 
-## restart the node if it's dc is in the list of nodes to restart?
-- name: "Restart ES node {{ _es_node }}"
-  include_tasks: restart_es_node.yml
-  with_items: "{{ _restart_logging_nodes }}"
-  loop_control:
-    loop_var: _es_node
-  when:
+  ## restart the node if it's dc is in the list of nodes to restart?
+  - name: "Restart ES node {{ _es_node }}"
+    include_tasks: restart_es_node.yml
+    with_items: "{{ _restart_logging_nodes }}"
+    loop_control:
+      loop_var: _es_node
+    when:
     - not full_restart_cluster | bool
     - _es_node in _cluster_dcs.stdout
 
-## we may need a new first pod to run against -- fetch them all again
-- command: >
-    oc get pod -l component={{ _cluster_component }},provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[?(@.status.phase==\"Running\")].metadata.name}
-  register: _cluster_pods
+  ## we may need a new first pod to run against -- fetch them all again
+  - command: >
+      oc get pod -l component={{ _cluster_component }},provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[?(@.status.phase==\"Running\")].metadata.name}
+    register: _cluster_pods
 
-- name: "Enable shard balancing for logging-{{ _cluster_component }} cluster"
-  command: >
-    oc exec {{ _cluster_pods.stdout.split(' ')[0] }} -c elasticsearch -n {{ openshift_logging_elasticsearch_namespace }} -- {{ __es_local_curl }} -XPUT 'https://localhost:9200/_cluster/settings' -d '{ "transient": { "cluster.routing.allocation.enable" : "all" } }'
-  register: _enable_output
-  changed_when: "'\"acknowledged\":true' in _enable_output.stdout"
+  - name: "Enable shard balancing for logging-{{ _cluster_component }} cluster"
+    command: >
+      oc exec {{ _cluster_pods.stdout.split(' ')[0] }} -c elasticsearch -n {{ openshift_logging_elasticsearch_namespace }} -- {{ __es_local_curl }} -XPUT 'https://localhost:9200/_cluster/settings' -d '{ "transient": { "cluster.routing.allocation.enable" : "all" } }'
+    register: _enable_output
+    changed_when: "'\"acknowledged\":true' in _enable_output.stdout"
 
-# Reenable external communication for {{ _cluster_component }}
-- name: Reenable external communication for logging-{{ _cluster_component }}
-  oc_service:
-    state: present
-    name: "logging-{{ _cluster_component }}"
-    namespace: "{{ openshift_logging_elasticsearch_namespace }}"
-    selector:
-      component: "{{ _cluster_component }}"
-      provider: openshift
-    labels:
-      logging-infra: 'support'
-    ports:
+  # Reenable external communication for {{ _cluster_component }}
+  - name: Reenable external communication for logging-{{ _cluster_component }}
+    oc_service:
+      state: present
+      name: "logging-{{ _cluster_component }}"
+      namespace: "{{ openshift_logging_elasticsearch_namespace }}"
+      selector:
+        component: "{{ _cluster_component }}"
+        provider: openshift
+      labels:
+        logging-infra: 'support'
+      ports:
       - port: 9200
         targetPort: "restapi"
-  when:
+    when:
     - full_restart_cluster | bool
diff --git a/roles/openshift_logging_elasticsearch/tasks/restart_es_node.yml b/roles/openshift_logging_elasticsearch/tasks/restart_es_node.yml
index 6d0df40c8..fe15e40fd 100644
--- a/roles/openshift_logging_elasticsearch/tasks/restart_es_node.yml
+++ b/roles/openshift_logging_elasticsearch/tasks/restart_es_node.yml
@@ -26,12 +26,12 @@
 
 - name: "Waiting for ES to be ready for {{ _es_node }}"
   shell: >
-    oc exec "{{ _pod }}" -c elasticsearch -n "{{ openshift_logging_elasticsearch_namespace }}" -- {{ __es_local_curl }} https://localhost:9200/_cat/health | cut -d' ' -f4
+    oc exec "{{ _pod }}" -c elasticsearch -n "{{ openshift_logging_elasticsearch_namespace }}" -- es_cluster_health
   with_items: "{{ _pods.stdout.split(' ') }}"
   loop_control:
     loop_var: _pod
   register: _pod_status
-  until: _pod_status.stdout in ['green', 'yellow']
+  until: (_pod_status.stdout | from_json)['status'] in ['green', 'yellow']
   retries: 60
   delay: 5
   changed_when: false
diff --git a/roles/openshift_logging_elasticsearch/templates/es.j2 b/roles/openshift_logging_elasticsearch/templates/es.j2
index 4b189f255..b1d6a4489 100644
--- a/roles/openshift_logging_elasticsearch/templates/es.j2
+++ b/roles/openshift_logging_elasticsearch/templates/es.j2
@@ -51,6 +51,7 @@ spec:
            - -client-id={{openshift_logging_elasticsearch_prometheus_sa}}
            - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token
            - -cookie-secret={{ 16 | lib_utils_oo_random_word | b64encode }}
+           - -basic-auth-password={{ basic_auth_passwd }}
            - -upstream=https://localhost:9200
            - '-openshift-sar={"namespace": "{{ openshift_logging_elasticsearch_namespace}}", "verb": "view", "resource": "prometheus", "group": "metrics.openshift.io"}'
            - '-openshift-delegate-urls={"/": {"resource": "prometheus", "verb": "view", "group": "metrics.openshift.io", "namespace": "{{ openshift_logging_elasticsearch_namespace}}"}}'
diff --git a/roles/openshift_logging_elasticsearch/templates/passwd.j2 b/roles/openshift_logging_elasticsearch/templates/passwd.j2
new file mode 100644
index 000000000..a22151eef
--- /dev/null
+++ b/roles/openshift_logging_elasticsearch/templates/passwd.j2
@@ -0,0 +1,2 @@
+"{{logging_user_name}}":
+  passwd: "{{logging_user_passwd}}"