From 463def14c007a757fa0623ac67c169dbbcd226ca Mon Sep 17 00:00:00 2001 From: Pablo Fontanilla Date: Mon, 29 Jun 2026 12:48:58 +0200 Subject: [PATCH] Remove manual Redfish fencing roles and update defaults for 4.22 GA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The CEO auto-configures STONITH fencing from BMH resources since 4.19, making the manual bare-metal redfish role and post-install stonith configuration dead code. Strip these and bump stale version defaults. Removed: - roles/redfish/ and redfish.yml (bare-metal manual stonith) - kcli-redfish.yml playbook and stonith task files (kcli manual stonith) - setup.yml post_tasks block (is_ocp_4_19 version gate + redfish prompt) Kept: - roles/kcli/kcli-redfish/ with only ksushy startup (BMC simulator still needed for virtual deployments) Updated: - Default OCP version 4.20 → 4.22 - Removed DevPreviewNoUpgrade/TechPreview feature gates from examples - Updated release image references to 4.22 - Cleaned up docs (CLAUDE.md, READMEs) Co-Authored-By: Claude Opus 4.6 --- CLAUDE.md | 5 +- deploy/aws-hypervisor/scripts/configure.sh | 2 +- deploy/openshift-clusters/README-kcli.md | 42 +---- deploy/openshift-clusters/README.md | 12 +- deploy/openshift-clusters/kcli-install.yml | 11 +- deploy/openshift-clusters/kcli-redfish.yml | 101 ----------- deploy/openshift-clusters/redfish.yml | 92 ---------- .../files/config_arbiter_example.sh | 4 +- .../files/config_fencing_example.sh | 9 +- .../roles/kcli/kcli-redfish/README.md | 117 ++----------- .../kcli-redfish/tasks/configure_stonith.yml | 76 --------- .../roles/kcli/kcli-redfish/tasks/main.yml | 6 +- .../kcli-redfish/tasks/post-installation.yml | 45 ----- .../kcli-redfish/tasks/verify_stonith.yml | 52 ------ .../roles/redfish/README.md | 130 --------------- .../roles/redfish/defaults/main.yml | 9 - .../roles/redfish/tasks/main.yml | 27 --- .../roles/redfish/tasks/process_bmh.yml | 134 --------------- deploy/openshift-clusters/setup.yml | 157 ------------------ deploy/openshift-clusters/vars/init-host.yml | 2 +- 20 files changed, 35 insertions(+), 998 deletions(-) delete mode 100644 deploy/openshift-clusters/kcli-redfish.yml delete mode 100644 deploy/openshift-clusters/redfish.yml delete mode 100644 deploy/openshift-clusters/roles/kcli/kcli-redfish/tasks/configure_stonith.yml delete mode 100644 deploy/openshift-clusters/roles/kcli/kcli-redfish/tasks/post-installation.yml delete mode 100644 deploy/openshift-clusters/roles/kcli/kcli-redfish/tasks/verify_stonith.yml delete mode 100644 deploy/openshift-clusters/roles/redfish/README.md delete mode 100644 deploy/openshift-clusters/roles/redfish/defaults/main.yml delete mode 100644 deploy/openshift-clusters/roles/redfish/tasks/main.yml delete mode 100644 deploy/openshift-clusters/roles/redfish/tasks/process_bmh.yml diff --git a/CLAUDE.md b/CLAUDE.md index 93645e63..8a782026 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -52,9 +52,6 @@ ansible-playbook setup.yml -i inventory.ini ansible-playbook setup.yml -e "topology=arbiter" -e "interactive_mode=false" -i inventory.ini ansible-playbook setup.yml -e "topology=fencing" -e "interactive_mode=false" -i inventory.ini -# Redfish stonith configuration (for fencing topology) -ansible-playbook redfish.yml -i inventory.ini - # Cleanup ansible-playbook clean.yml -i inventory.ini ``` @@ -110,7 +107,7 @@ make shellcheck - `assisted/acm-install`: Install ACM/MCE + assisted service + enable TNF on hub - `assisted/assisted-spoke`: Deploy spoke TNF cluster via assisted installer + BMH - `proxy-setup`: Squid proxy for cluster external access - - `redfish`: Automated stonith configuration for fencing topology + - `kcli/kcli-redfish`: ksushy BMC simulator startup for kcli fencing deployments - `config`: SSH key and git configuration - `git-user`: Git user configuration for development diff --git a/deploy/aws-hypervisor/scripts/configure.sh b/deploy/aws-hypervisor/scripts/configure.sh index 9c21602f..0a463c09 100644 --- a/deploy/aws-hypervisor/scripts/configure.sh +++ b/deploy/aws-hypervisor/scripts/configure.sh @@ -6,7 +6,7 @@ sudo hostnamectl set-hostname "aws-${STACK_NAME}" function get_ocp_version() { local latest_ga_ocp_version - local default_version="${DEFAULT_OCP_VERSION:-4.20}" + local default_version="${DEFAULT_OCP_VERSION:-4.22}" if latest_ga_ocp_version="$(curl -sL https://sippy.dptools.openshift.org/api/releases | jq -re '.ga_dates | to_entries | max_by(.value) | .key')"; then echo "${latest_ga_ocp_version:-$default_version}" diff --git a/deploy/openshift-clusters/README-kcli.md b/deploy/openshift-clusters/README-kcli.md index 1208441b..8bf299c8 100644 --- a/deploy/openshift-clusters/README-kcli.md +++ b/deploy/openshift-clusters/README-kcli.md @@ -292,54 +292,26 @@ ansible-playbook kcli-install.yml -i inventory.ini \ jq '.auths | has("registry.ci.openshift.org")' < roles/kcli/kcli-install/files/pull-secret.json ``` -## 7. Fencing Configuration (Post-Deployment) +## 7. Fencing Configuration -After a successful 4.19 kcli deployment with fencing topology, STONITH fencing needs to be configured to enable automatic node recovery. *If you are using the kcli-install playbook, this will be done for you automatically via kcli-redfish.yml**. If you're doing it some other way, you can use the kcli-redfish,yml playbook manually. +For kcli deployments with fencing topology, the `kcli/kcli-redfish` role starts the ksushy BMC simulator before cluster installation. The cluster-etcd-operator (CEO) then auto-configures STONITH fencing during installation using the simulated BMC endpoints. -The existing `redfish.yml` playbook **will not work** with kcli deployments because it expects BMH resources that don't exist in virtualized environments. +### ksushy BMC Simulator -### kcli Fencing Configuration - -The specialized `kcli-redfish.yml` playbook is designed for kcli deployments. **All configuration is automatically detected** - no manual variables required: - -```bash -# Configure fencing for kcli-deployed cluster (fully automatic) -ansible-playbook kcli-redfish.yml -i inventory.ini -``` - -The kcli-redfish playbook automatically: -1. **Detects cluster name** from running kcli clusters or kcli-install defaults -2. **Uses hypervisor IP** from ansible inventory host -3. **Pulls BMC credentials** from kcli-install role defaults -4. **Discovers cluster nodes** from the OpenShift API -5. **Calculates BMC endpoints** using the ksushy simulator configuration -6. **Configures PCS stonith resources** on each node -7. **Enables stonith globally** in the cluster - -### Default Configuration - -The playbook uses reasonable defaults that work for typical kcli deployments: +The ksushy service provides Redfish BMC simulation for virtual machines: | Variable | Default Value | Description | |----------|---------------|-------------| -| `test_cluster_name` | `tnt-cluster` | From kcli-install defaults | | `ksushy_ip` | `192.168.122.1` | Standard libvirt network gateway | | `bmc_user` | `admin` | From kcli-install defaults | | `bmc_password` | `admin123` | From kcli-install defaults | | `ksushy_port` | `9000` | From kcli-install defaults | -These defaults work for standard kcli deployments where VMs use the default libvirt network (`192.168.122.x/24`). - -### Why Not Use redfish.yml? - -**Do not use the `redfish.yml` playbook** with kcli deployments. It will fail because: +The ksushy service is managed automatically by `kcli-install.yml`. To verify it is running: ```bash -# This will fail for kcli deployments -ansible-playbook redfish.yml # Expects BMH resources that don't exist - -# Use this instead for kcli deployments -ansible-playbook kcli-redfish.yml # Uses defaults optimized for kcli +systemctl --user status ksushy.service +curl -sk https://192.168.122.1:9000/redfish/v1/Systems/local ``` ## 8. Troubleshooting diff --git a/deploy/openshift-clusters/README.md b/deploy/openshift-clusters/README.md index fa221498..5835c911 100644 --- a/deploy/openshift-clusters/README.md +++ b/deploy/openshift-clusters/README.md @@ -183,17 +183,7 @@ For more information on STONITH, go to the [official RHEL HA documentation](http For clusters using the fencing topology on OpenShift 4.19.x, automatic Redfish stonith configuration is available. This feature configures Pacemaker stonith resources using Redfish fencing for BareMetalHost resources. -Redfish configuration can be applied in two ways: - -**Integrated Usage:** -- When running the main deployment playbook in interactive mode with fencing topology, you will be prompted to configure Redfish stonith automatically -- Redfish configuration runs as part of the main deployment workflow - -**Standalone Usage:** -- Redfish configuration can be run independently using: `ansible-playbook redfish.yml` -- This allows for running it separately from the main deployment or re-running it if needed - -For detailed configuration options, verification commands, and requirements, refer to the [Redfish role documentation](roles/redfish/README.md). +Fencing topology clusters use automatic fencing configuration via the cluster-etcd-operator (CEO). The CEO discovers BareMetalHost resources and configures STONITH automatically during installation. No manual Redfish configuration is required. ### Optional: Attaching Extra Disks diff --git a/deploy/openshift-clusters/kcli-install.yml b/deploy/openshift-clusters/kcli-install.yml index 672101e2..7f57644d 100644 --- a/deploy/openshift-clusters/kcli-install.yml +++ b/deploy/openshift-clusters/kcli-install.yml @@ -75,7 +75,7 @@ tasks_from: prerequisites.yml roles: - # Start ksushy BEFORE cluster installation (required for 4.20+) + # Start ksushy BMC simulator before cluster installation - role: kcli/kcli-redfish when: topology == "fencing" - kcli/kcli-install @@ -95,15 +95,6 @@ - name: Update inventory with cluster VMs include_tasks: roles/common/tasks/update-cluster-inventory.yml - # Configure stonith fencing after cluster installation - - name: Configure Redfish BMC simulation for fencing topology - shell: ansible-playbook kcli-redfish.yml -i {{ inventory_file | default('inventory.ini') }} - args: - chdir: "{{ playbook_dir }}" - delegate_to: localhost - run_once: true - when: topology == "fencing" - - name: "Final verification message" ansible.builtin.debug: msg: |- diff --git a/deploy/openshift-clusters/kcli-redfish.yml b/deploy/openshift-clusters/kcli-redfish.yml deleted file mode 100644 index d77e1c45..00000000 --- a/deploy/openshift-clusters/kcli-redfish.yml +++ /dev/null @@ -1,101 +0,0 @@ ---- -- name: Configure PCS Stonith for kcli-deployed clusters using simulated Redfish BMC - hosts: metal_machine - gather_facts: false - - # Ensure the kubernetes.core collection is installed: - # ansible-galaxy collection install kubernetes.core - collections: - - kubernetes.core - - vars: - # Set environment variables from proxy.env file - proxy_env_vars: {} - - tasks: - - name: Source proxy environment and run kcli-redfish role - block: - - name: Check if proxy.env file exists - stat: - path: "./proxy.env" - register: proxy_env_file - delegate_to: localhost - - - name: Handle proxy.env file presence - block: - - name: Set environment variables from proxy.env - shell: | - source ./proxy.env - echo "KUBECONFIG=$KUBECONFIG" - echo "K8S_AUTH_PROXY=$K8S_AUTH_PROXY" - echo "HTTP_PROXY=$HTTP_PROXY" - echo "HTTPS_PROXY=$HTTPS_PROXY" - echo "NO_PROXY=$NO_PROXY" - register: env_vars_output - changed_when: false - delegate_to: localhost - - - name: Extract environment variables - set_fact: - proxy_env_vars: - KUBECONFIG: "{{ env_vars_output.stdout_lines | select('match', '^KUBECONFIG=.*') | first | regex_replace('^KUBECONFIG=', '') }}" - K8S_AUTH_PROXY: "{{ env_vars_output.stdout_lines | select('match', '^K8S_AUTH_PROXY=.*') | first | regex_replace('^K8S_AUTH_PROXY=', '') }}" - HTTP_PROXY: "{{ env_vars_output.stdout_lines | select('match', '^HTTP_PROXY=.*') | first | regex_replace('^HTTP_PROXY=', '') }}" - HTTPS_PROXY: "{{ env_vars_output.stdout_lines | select('match', '^HTTPS_PROXY=.*') | first | regex_replace('^HTTPS_PROXY=', '') }}" - NO_PROXY: "{{ env_vars_output.stdout_lines | select('match', '^NO_PROXY=.*') | first | regex_replace('^NO_PROXY=', '') }}" - - - name: Include kcli-redfish post-installation tasks with proxy environment variables - include_role: - name: kcli/kcli-redfish - tasks_from: post-installation.yml - vars: - proxy_kubeconfig: "{{ proxy_env_vars.KUBECONFIG }}" - proxy_k8s_auth_proxy: "{{ proxy_env_vars.K8S_AUTH_PROXY }}" - proxy_http_proxy: "{{ proxy_env_vars.HTTP_PROXY }}" - proxy_https_proxy: "{{ proxy_env_vars.HTTPS_PROXY }}" - proxy_no_proxy: "{{ proxy_env_vars.NO_PROXY }}" - - - name: Proxy environment configuration used - debug: - msg: | - - Using proxy.env file for cluster access configuration. - Proxy settings have been applied to the kcli-redfish role. - - when: proxy_env_file.stat.exists - - - name: Handle direct cluster access (no proxy.env) - block: - - name: No proxy environment detected - debug: - msg: | - - proxy.env file not found. Assuming direct cluster access. - The kcli-redfish role will run without proxy configuration. - - Note: Ensure your current environment has: - - Valid KUBECONFIG environment variable or ~/.kube/config - - Direct network access to the OpenShift cluster API - - - name: Include kcli-redfish post-installation tasks for direct cluster access - include_role: - name: kcli/kcli-redfish - tasks_from: post-installation.yml - - when: not proxy_env_file.stat.exists - - rescue: - - name: Handle kcli-redfish configuration errors - debug: - msg: |- - An error occurred while running kcli-redfish configuration. - Error details: {{ ansible_failed_result.msg | default('Unknown error') }} - - Common issues: - 1. Ensure cluster is deployed with kcli and fencing topology - 2. Verify ksushy BMC simulator is running on the hypervisor - 3. Check that kcli clusters are running: kcli list cluster - 4. Verify you're running this on the correct inventory host (hypervisor) - - You can override auto-detection with explicit variables: - ansible-playbook kcli-redfish.yml -e "test_cluster_name=your-cluster" diff --git a/deploy/openshift-clusters/redfish.yml b/deploy/openshift-clusters/redfish.yml deleted file mode 100644 index 7212df6c..00000000 --- a/deploy/openshift-clusters/redfish.yml +++ /dev/null @@ -1,92 +0,0 @@ ---- -- name: Configure PCS Stonith for Bare Metal Hosts using Redfish - hosts: localhost # This playbook runs on the Ansible control node - connection: local - gather_facts: false # No need to gather facts for localhost - - # Ensure the kubernetes.core collection is installed: - # ansible-galaxy collection install kubernetes.core - collections: - - kubernetes.core - - vars: - # Set environment variables from proxy.env file - proxy_env_vars: {} - - tasks: - - name: Source proxy environment and run redfish role - block: - - name: Check if proxy.env file exists - stat: - path: "./proxy.env" - register: proxy_env_file - - - name: Handle proxy.env file presence - block: - - name: Set environment variables from proxy.env - shell: | - source ./proxy.env - echo "KUBECONFIG=$KUBECONFIG" - echo "K8S_AUTH_PROXY=$K8S_AUTH_PROXY" - echo "HTTP_PROXY=$HTTP_PROXY" - echo "HTTPS_PROXY=$HTTPS_PROXY" - echo "NO_PROXY=$NO_PROXY" - register: env_vars_output - changed_when: false - - - name: Extract environment variables - set_fact: - proxy_env_vars: - KUBECONFIG: "{{ env_vars_output.stdout_lines | select('match', '^KUBECONFIG=.*') | first | regex_replace('^KUBECONFIG=', '') }}" - K8S_AUTH_PROXY: "{{ env_vars_output.stdout_lines | select('match', '^K8S_AUTH_PROXY=.*') | first | regex_replace('^K8S_AUTH_PROXY=', '') }}" - HTTP_PROXY: "{{ env_vars_output.stdout_lines | select('match', '^HTTP_PROXY=.*') | first | regex_replace('^HTTP_PROXY=', '') }}" - HTTPS_PROXY: "{{ env_vars_output.stdout_lines | select('match', '^HTTPS_PROXY=.*') | first | regex_replace('^HTTPS_PROXY=', '') }}" - NO_PROXY: "{{ env_vars_output.stdout_lines | select('match', '^NO_PROXY=.*') | first | regex_replace('^NO_PROXY=', '') }}" - - - name: Include redfish role with proxy environment variables - include_role: - name: redfish - vars: - proxy_kubeconfig: "{{ proxy_env_vars.KUBECONFIG }}" - proxy_k8s_auth_proxy: "{{ proxy_env_vars.K8S_AUTH_PROXY }}" - proxy_http_proxy: "{{ proxy_env_vars.HTTP_PROXY }}" - proxy_https_proxy: "{{ proxy_env_vars.HTTPS_PROXY }}" - proxy_no_proxy: "{{ proxy_env_vars.NO_PROXY }}" - - - name: Proxy environment configuration used - debug: - msg: | - - Using proxy.env file for cluster access configuration. - Proxy settings have been applied to the redfish role. - - when: proxy_env_file.stat.exists - - - name: Handle direct cluster access (no proxy.env) - block: - - name: No proxy environment detected - debug: - msg: | - - proxy.env file not found. Assuming direct cluster access. - The redfish role will run without proxy configuration. - - Note: Ensure your current environment has: - - Valid KUBECONFIG environment variable or ~/.kube/config - - Direct network access to the OpenShift cluster API - - - name: Include redfish role for direct cluster access - include_role: - name: redfish - - when: not proxy_env_file.stat.exists - - rescue: - - name: Handle redfish configuration errors - debug: - msg: |- - An error occurred while running redfish configuration. - Error details: {{ ansible_failed_result.msg | default('Unknown error') }} - - You can try running manually: - source proxy.env && ansible-playbook -i localhost, roles/redfish/tasks/main.yml diff --git a/deploy/openshift-clusters/roles/dev-scripts/install-dev/files/config_arbiter_example.sh b/deploy/openshift-clusters/roles/dev-scripts/install-dev/files/config_arbiter_example.sh index 7b2bd3d3..5e661324 100644 --- a/deploy/openshift-clusters/roles/dev-scripts/install-dev/files/config_arbiter_example.sh +++ b/deploy/openshift-clusters/roles/dev-scripts/install-dev/files/config_arbiter_example.sh @@ -24,8 +24,6 @@ export AGENT_E2E_TEST_SCENARIO="TNA_IPV4" ## END Agent Specific Install Config Variables #### -# TechPreview FeatureSet not needed for 4.20 and above OCP -# export FEATURE_SET="TechPreviewNoUpgrade" export OPENSHIFT_CI="true" # If you want to avoid using the CI_TOKEN, uncomment this variable, but it has side effects. @@ -35,7 +33,7 @@ export OPENSHIFT_CI="true" # You can find the latest public images in https://quay.io/repository/openshift-release-dev/ocp-release?tab=tags # and select your preferred version. Public sources can be found at https://mirror.openshift.com/pub/openshift-v4/ -export OPENSHIFT_RELEASE_IMAGE=quay.io/openshift-release-dev/ocp-release:4.21.0-x86_64 +export OPENSHIFT_RELEASE_IMAGE=quay.io/openshift-release-dev/ocp-release:4.22.0-x86_64 # Unless you need to override the installer image, this is not needed # export OPENSHIFT_INSTALL_RELEASE_IMAGE_OVERRIDE="" diff --git a/deploy/openshift-clusters/roles/dev-scripts/install-dev/files/config_fencing_example.sh b/deploy/openshift-clusters/roles/dev-scripts/install-dev/files/config_fencing_example.sh index 2291e37c..54719343 100644 --- a/deploy/openshift-clusters/roles/dev-scripts/install-dev/files/config_fencing_example.sh +++ b/deploy/openshift-clusters/roles/dev-scripts/install-dev/files/config_fencing_example.sh @@ -5,11 +5,8 @@ export NUM_WORKERS=0 export MASTER_MEMORY=32768 export MASTER_DISK=100 export NUM_MASTERS=2 -export FEATURE_SET="DevPreviewNoUpgrade" - -# redfish or ipmi, but if not set and using OPENSHIF_CI=true, -# mixed drivers will be used and automatic fencing configuration in 4.19 won't work -export BMC_DRIVER=redfish +# Ensure consistent BMC driver across all hosts for automatic fencing configuration +export BMC_DRIVER=redfish # If you want to avoid using the CI_TOKEN, uncomment this variable, but it has side effects. # You can read more on this here: https://github.com/openshift-metal3/dev-scripts/blob/3f070cfd36977381a186cadfb44887856d652bed/config_example.sh#L21 @@ -22,7 +19,7 @@ export CI_TOKEN="sha256~" # You can find the latest public images in https://quay.io/repository/openshift-release-dev/ocp-release?tab=tags # and select your preferred version. Public sources can be found at https://mirror.openshift.com/pub/openshift-v4/ -export OPENSHIFT_RELEASE_IMAGE=quay.io/openshift-release-dev/ocp-release:4.21.0-multi +export OPENSHIFT_RELEASE_IMAGE=quay.io/openshift-release-dev/ocp-release:4.22.0-multi # Unless you need to override the installer image, this is not needed # export OPENSHIFT_INSTALL_RELEASE_IMAGE_OVERRIDE="" diff --git a/deploy/openshift-clusters/roles/kcli/kcli-redfish/README.md b/deploy/openshift-clusters/roles/kcli/kcli-redfish/README.md index a65ae665..13e2d50b 100644 --- a/deploy/openshift-clusters/roles/kcli/kcli-redfish/README.md +++ b/deploy/openshift-clusters/roles/kcli/kcli-redfish/README.md @@ -1,128 +1,45 @@ # kcli-redfish Role -This role configures PCS (Pacemaker/Corosync) Stonith resources for kcli-deployed OpenShift clusters using simulated Redfish BMC endpoints. +Starts the ksushy BMC simulator for kcli-deployed OpenShift clusters with fencing topology. ## Description -The kcli-redfish role automates the configuration of STONITH (Shoot-The-Other-Node-In-The-Head) resources for kcli-deployed OpenShift clusters. Unlike bare metal deployments that use BareMetalHost resources, kcli deployments use virtual machines with simulated BMC functionality via [ksushy](https://kcli.readthedocs.io/en/latest/index.html#ksushy). For more information, see [the deploy folder README](../../../README.md#redfish-stonith-configuration) +The kcli-redfish role manages the ksushy BMC simulator, which provides Redfish endpoints for virtual machines deployed via kcli. The cluster-etcd-operator (CEO) uses these endpoints to auto-configure STONITH fencing during installation. This role: -1. Identifies cluster nodes from kcli deployment configuration -2. Creates ksushy systemd service on the hypervisor for SSL-enabled BMC simulation +1. Installs required Python dependencies +2. Creates the ksushy systemd service on the hypervisor 3. Configures firewall rules to allow BMC access from VMs -4. Configures PCS stonith resources on each node using `fence_redfish` -5. Enables stonith in the cluster +4. Validates BMC endpoint accessibility ## Requirements -- OpenShift cluster deployed with kcli fencing topology - kcli command available on the hypervisor host -- `kubernetes.core` Ansible collection -- Python 3 with `kubernetes`, `PyYAML`, and `jsonpatch` libraries (automatically installed) -- `oc` CLI tool available in PATH -- Valid kubeconfig file with cluster-admin permissions -- SSH access to the kcli deployment host (hypervisor) - Sudo privileges on the hypervisor for firewall configuration and Python package installation -## Automatic Setup - -The role automatically handles: -- Installing required Python dependencies (`kubernetes`, `PyYAML`, `jsonpatch`) -- Creating ksushy systemd service using `kcli create sushy-service` -- Configuring firewall rules (port 9000/tcp in libvirt zone) -- SSL certificate management (self-signed certificates via kcli) -- BMC endpoint discovery for all cluster VMs - -## Dependencies - -- kubernetes.core collection: `ansible-galaxy collection install kubernetes.core` -- Python dependencies are automatically installed by the role (kubernetes, PyYAML, jsonpatch) - ## Role Variables -### Automatic Configuration - -The role automatically detects all required configuration from: - -| Variable | Auto-Detection Source | Override Available | -|----------|----------------------|-------------------| -| `test_cluster_name` | `kcli list cluster` or kcli-install defaults | Yes | -| `ksushy_ip` | Ansible inventory host IP | Yes | -| `ksushy_port` | kcli-install role defaults (9000) | Yes | -| `bmc_user` | kcli-install role defaults ("admin") | Yes | -| `bmc_password` | kcli-install role defaults ("admin123") | Yes | - -### Manual Override Variables (Optional) - -Override only if auto-detection fails: - -- `test_cluster_name`: Override detected cluster name -- `ksushy_ip`: Override detected hypervisor IP -- `ksushy_port`: Override BMC simulator port -- `bmc_user`: Override BMC username -- `bmc_password`: Override BMC password -- `ssl_insecure_param`: SSL verification parameter (default: "ssl_insecure=1") +| Variable | Default | Description | +|----------|---------|-------------| +| `test_cluster_name` | `tnt-cluster` | kcli cluster name | +| `ksushy_ip` | `192.168.122.1` | Hypervisor IP (libvirt default gateway) | +| `ksushy_port` | `9000` | BMC simulator port | +| `bmc_user` | `admin` | BMC username | +| `bmc_password` | `admin123` | BMC password | ## Usage -### Running the Role - -This role should be run after a successful kcli deployment with fencing topology. **No configuration required** - everything is auto-detected: - -```bash -# Ensure you're authenticated to your OpenShift cluster -oc whoami - -# Run the kcli-redfish configuration (fully automatic) -ansible-playbook kcli-redfish.yml -i inventory.ini -``` - -### Integration with kcli-install - -The role can be integrated into the kcli-install workflow by adding it as a post-deployment task. **No variables required** due to auto-detection: - -```yaml -# In kcli-install.yml or custom playbook -- name: Configure kcli fencing - include_role: - name: kcli.kcli-redfish - # All configuration is automatically detected from kcli-install defaults - # and current deployment environment -``` - -## How It Works - -1. **Node Discovery**: Identifies cluster nodes by querying the OpenShift API -2. **BMC Endpoint Calculation**: Constructs BMC endpoints using the ksushy simulator -3. **Stonith Configuration**: Configures fence_redfish resources for each node -4. **Stonith Enablement**: Enables stonith globally in the cluster - -The role understands that in kcli deployments: -- Virtual machines are named `{cluster-name}-ctlplane-{index}` -- BMC simulation uses ksushy with predictable endpoints -- No BareMetalHost resources exist in the cluster +This role runs automatically as part of `kcli-install.yml` before cluster installation. It does not need to be invoked manually. ## Troubleshooting -### Common Issues - -**No cluster nodes found:** -```bash -# Verify cluster access -oc get nodes -``` - **ksushy not accessible:** ```bash -# Check ksushy systemd service is running on the hypervisor systemctl --user status ksushy.service - -# Test ksushy endpoint (uses HTTPS with self-signed cert) -curl -k https://{ksushy_ip}:{ksushy_port}/redfish/v1/ +curl -sk https://192.168.122.1:9000/redfish/v1/ ``` -**Stonith configuration fails:** +**Firewall blocking BMC access:** ```bash -# Check existing stonith resources -oc debug node/{node-name} -- chroot /host pcs stonith status -``` \ No newline at end of file +firewall-cmd --list-ports --zone=libvirt +``` diff --git a/deploy/openshift-clusters/roles/kcli/kcli-redfish/tasks/configure_stonith.yml b/deploy/openshift-clusters/roles/kcli/kcli-redfish/tasks/configure_stonith.yml deleted file mode 100644 index 0ee928c9..00000000 --- a/deploy/openshift-clusters/roles/kcli/kcli-redfish/tasks/configure_stonith.yml +++ /dev/null @@ -1,76 +0,0 @@ ---- -# Configure stonith for a single node in kcli deployment -# Variables available: current_node_name - -- name: Calculate VM index for node {{ current_node_name }} - set_fact: - # Extract the numeric index from node name pattern - # kcli creates nodes like: clustername-ctlplane-0, clustername-ctlplane-1 - # Node names are like: clustername-ctlplane-0.domain.com - vm_index: "{{ current_node_name | regex_replace(test_cluster_name + '-ctlplane-([0-9]+)\\..*', '\\1') }}" - -- name: Set BMC endpoint details for node {{ current_node_name }} - set_fact: - bmc_ip: "{{ ksushy_ip }}" - bmc_port: "{{ ksushy_port }}" - # ksushy creates predictable system IDs based on VM names (without domain) - # Uses local/ prefix for ksushy systemd service - bmc_system_id: "local/{{ test_cluster_name }}-ctlplane-{{ vm_index }}" - stonith_resource_name: "{{ current_node_name }}_redfish" - -- name: Display BMC configuration for node {{ current_node_name }} - debug: - msg: | - Node: {{ current_node_name }} - BMC IP: {{ bmc_ip }}:{{ bmc_port }} - System ID: {{ bmc_system_id }} - Stonith Resource: {{ stonith_resource_name }} - -- name: Check if stonith resource already exists for node {{ current_node_name }} - ansible.builtin.shell: | - oc debug node/{{ current_node_name }} -- chroot /host bash -c "pcs stonith status {{ stonith_resource_name }}" - register: pcs_stonith_status_result - failed_when: false - no_log: true - delegate_to: localhost - environment: - KUBECONFIG: "{{ proxy_kubeconfig | default(omit) }}" - HTTP_PROXY: "{{ proxy_http_proxy | default(omit) }}" - HTTPS_PROXY: "{{ proxy_https_proxy | default(omit) }}" - NO_PROXY: "{{ proxy_no_proxy | default(omit) }}" - -- name: Create stonith resource for node {{ current_node_name }} - ansible.builtin.shell: | - oc debug node/{{ current_node_name }} -- chroot /host bash -c "pcs stonith create {{ stonith_resource_name }} fence_redfish \ - username={{ bmc_user }} password={{ bmc_password }} \ - ip={{ bmc_ip }} ipport={{ bmc_port }} \ - systems_uri=/redfish/v1/Systems/{{ bmc_system_id }} \ - pcmk_host_list={{ current_node_name }} ssl_insecure=1" - no_log: true - register: pcs_stonith_create_result - when: pcs_stonith_status_result.rc != 0 - delegate_to: localhost - environment: - KUBECONFIG: "{{ proxy_kubeconfig | default(omit) }}" - HTTP_PROXY: "{{ proxy_http_proxy | default(omit) }}" - HTTPS_PROXY: "{{ proxy_https_proxy | default(omit) }}" - NO_PROXY: "{{ proxy_no_proxy | default(omit) }}" - -- name: Report stonith resource creation status for node {{ current_node_name }} - debug: - msg: | - {% if pcs_stonith_status_result.rc == 0 %} - Stonith resource {{ stonith_resource_name }} already exists for node {{ current_node_name }} - {% elif pcs_stonith_create_result is defined and pcs_stonith_create_result.rc == 0 %} - Successfully created stonith resource {{ stonith_resource_name }} for node {{ current_node_name }} - {% else %} - Failed to create stonith resource {{ stonith_resource_name }} for node {{ current_node_name }} - {% endif %} - -- name: Fail if stonith resource creation failed - fail: - msg: "Failed to create stonith resource for node {{ current_node_name }}: {{ pcs_stonith_create_result.stderr | default('Unknown error') }}" - when: - - pcs_stonith_status_result.rc != 0 - - pcs_stonith_create_result is defined - - pcs_stonith_create_result.rc != 0 diff --git a/deploy/openshift-clusters/roles/kcli/kcli-redfish/tasks/main.yml b/deploy/openshift-clusters/roles/kcli/kcli-redfish/tasks/main.yml index afb8a13a..4868015a 100644 --- a/deploy/openshift-clusters/roles/kcli/kcli-redfish/tasks/main.yml +++ b/deploy/openshift-clusters/roles/kcli/kcli-redfish/tasks/main.yml @@ -1,6 +1,6 @@ --- # kcli-redfish role main tasks -# Configures stonith fencing for kcli-deployed clusters +# Starts ksushy BMC simulator for kcli fencing deployments - name: Install prerequisites include_tasks: prerequisites.yml @@ -17,14 +17,12 @@ - name: Display configuration debug: msg: | - kcli fencing configuration: + ksushy BMC simulator configuration: Cluster Name: {{ test_cluster_name }} Hypervisor IP: {{ ksushy_ip }} BMC User: {{ bmc_user }} BMC Port: {{ ksushy_port }} - Override with: ansible-playbook kcli-redfish.yml -e "test_cluster_name=my-cluster" -e "ksushy_ip=X.X.X.X" - - name: Start ksushy BMC simulator include_tasks: start_ksushy.yml diff --git a/deploy/openshift-clusters/roles/kcli/kcli-redfish/tasks/post-installation.yml b/deploy/openshift-clusters/roles/kcli/kcli-redfish/tasks/post-installation.yml deleted file mode 100644 index d2f859ef..00000000 --- a/deploy/openshift-clusters/roles/kcli/kcli-redfish/tasks/post-installation.yml +++ /dev/null @@ -1,45 +0,0 @@ ---- -# Post-installation stonith configuration tasks -# These run AFTER cluster installation is complete - -- name: Get cluster nodes - kubernetes.core.k8s_info: - api_version: v1 - kind: Node - label_selectors: - - "node-role.kubernetes.io/control-plane=" - register: cluster_nodes - delegate_to: localhost - environment: - KUBECONFIG: "{{ proxy_kubeconfig | default(omit) }}" - K8S_AUTH_PROXY: "{{ proxy_k8s_auth_proxy | default(omit) }}" - HTTP_PROXY: "{{ proxy_http_proxy | default(omit) }}" - HTTPS_PROXY: "{{ proxy_https_proxy | default(omit) }}" - NO_PROXY: "{{ proxy_no_proxy | default(omit) }}" - -- name: Validate cluster nodes found - assert: - that: - - cluster_nodes.resources is defined - - cluster_nodes.resources | length > 0 - fail_msg: "No control plane nodes found in the cluster" - delegate_to: localhost - -- name: Extract node names - set_fact: - node_names: "{{ cluster_nodes.resources | map(attribute='metadata.name') | list }}" - delegate_to: localhost - -- name: Display discovered nodes - debug: - msg: "Found cluster nodes: {{ node_names }}" - delegate_to: localhost - -- name: Configure stonith for each node - include_tasks: configure_stonith.yml - loop: "{{ node_names }}" - loop_control: - loop_var: current_node_name - -- name: Verify stonith configuration - include_tasks: verify_stonith.yml diff --git a/deploy/openshift-clusters/roles/kcli/kcli-redfish/tasks/verify_stonith.yml b/deploy/openshift-clusters/roles/kcli/kcli-redfish/tasks/verify_stonith.yml deleted file mode 100644 index 55f341c8..00000000 --- a/deploy/openshift-clusters/roles/kcli/kcli-redfish/tasks/verify_stonith.yml +++ /dev/null @@ -1,52 +0,0 @@ ---- -# Verify and enable stonith globally - -- name: Enable stonith globally (run on first node) - ansible.builtin.shell: | - oc debug node/{{ node_names[0] }} -- chroot /host bash -c "pcs property set stonith-enabled=true" - register: pcs_stonith_enable_result - delegate_to: localhost - environment: - KUBECONFIG: "{{ proxy_kubeconfig | default(omit) }}" - HTTP_PROXY: "{{ proxy_http_proxy | default(omit) }}" - HTTPS_PROXY: "{{ proxy_https_proxy | default(omit) }}" - NO_PROXY: "{{ proxy_no_proxy | default(omit) }}" - -- name: Verify stonith is enabled - ansible.builtin.shell: | - oc debug node/{{ node_names[0] }} -- chroot /host bash -c "pcs property show stonith-enabled" - register: pcs_stonith_verify_result - delegate_to: localhost - environment: - KUBECONFIG: "{{ proxy_kubeconfig | default(omit) }}" - HTTP_PROXY: "{{ proxy_http_proxy | default(omit) }}" - HTTPS_PROXY: "{{ proxy_https_proxy | default(omit) }}" - NO_PROXY: "{{ proxy_no_proxy | default(omit) }}" - -- name: Display stonith status - debug: - msg: | - Stonith enable result: {{ pcs_stonith_enable_result.stdout | default('No output') }} - Stonith verification: {{ pcs_stonith_verify_result.stdout | default('No output') }} - -- name: Fail if stonith is not enabled - fail: - msg: "STONITH is not enabled. Expected 'stonith-enabled: true' but got: {{ pcs_stonith_verify_result.stdout | default('No output') }}" - when: "'stonith-enabled: true' not in pcs_stonith_verify_result.stdout" - -- name: List all stonith resources - ansible.builtin.shell: | - oc debug node/{{ node_names[0] }} -- chroot /host bash -c "pcs stonith status" - register: pcs_stonith_list_result - delegate_to: localhost - environment: - KUBECONFIG: "{{ proxy_kubeconfig | default(omit) }}" - HTTP_PROXY: "{{ proxy_http_proxy | default(omit) }}" - HTTPS_PROXY: "{{ proxy_https_proxy | default(omit) }}" - NO_PROXY: "{{ proxy_no_proxy | default(omit) }}" - -- name: Display final stonith configuration - debug: - msg: |- - Final stonith configuration: - {{ pcs_stonith_list_result.stdout | default('No stonith resources found') }} diff --git a/deploy/openshift-clusters/roles/redfish/README.md b/deploy/openshift-clusters/roles/redfish/README.md deleted file mode 100644 index 31275ed0..00000000 --- a/deploy/openshift-clusters/roles/redfish/README.md +++ /dev/null @@ -1,130 +0,0 @@ -# Redfish Role - -This role configures PCS (Pacemaker/Corosync) Stonith resources for Bare Metal Hosts using Redfish fencing. - -## Description - -The redfish role automates the configuration of STONITH (Shoot-The-Other-Node-In-The-Head) resources for OpenShift bare metal nodes using Redfish BMC interfaces. This role runs on localhost and uses your local kubeconfig to access the OpenShift cluster. It: - -1. Discovers all BareMetalHost (BMH) resources in the specified namespace -2. Extracts BMC credentials and connection details from each BMH -3. Uses `oc debug` commands to access cluster nodes (no SSH required) -4. Configures PCS stonith resources on each node using `fence_redfish` -5. Enables stonith in the cluster - -## Requirements - -- OpenShift cluster with bare metal nodes -- BMH resources configured with Redfish BMC details -- `kubernetes.core` Ansible collection -- `oc` CLI tool available in PATH -- Valid kubeconfig file with cluster-admin or equivalent permissions -- Appropriate permissions to run `oc debug` commands on cluster nodes - -**Note**: This role runs on localhost (the machine where you execute the playbook) and uses your local kubeconfig to access the OpenShift cluster. It does not require SSH access to the cluster nodes. - -## Dependencies - -- kubernetes.core collection: `ansible-galaxy collection install kubernetes.core` - -## Role Variables - -### Default Variables (defaults/main.yml) - -- `bmh_namespace`: Namespace where BareMetalHost resources are deployed (default: "openshift-machine-api") -- `ssl_insecure_param`: SSL certificate verification parameter (default: "") - -## Usage - -### Running the Role - -This role is designed to run on localhost (your local machine) and uses your local kubeconfig to access the OpenShift cluster. Ensure you have a valid kubeconfig and are authenticated to the cluster before running. - -```bash -# Ensure you're authenticated to your OpenShift cluster -oc whoami -``` -If you deployed the cluster using the procedure in [openshift-clusters](../../README.md), you should have sourced the `proxy.env` file to have a valid connection. - -Use the top-level `redfish.yml` playbook: - - -```bash -# Run the playbook (executes on localhost, accesses cluster via kubeconfig) -ansible-playbook redfish.yml -``` - -### Custom Variables - -To override default variables, create a vars file or use `-e` option: - -```bash -ansible-playbook redfish.yml -e bmh_namespace=custom-namespace -``` - -## Verification - -After the Redfish configuration is complete, you can verify the stonith setup using these commands: - -```bash -oc debug node/master-0 -- chroot /host sudo pcs property config -``` - -Sample output: -``` -Starting pod/master-0-debug-wzsz9 ... -To use host binaries, run `chroot /host` -Cluster Properties: cib-bootstrap-options - cluster-infrastructure=corosync - cluster-name=TNF - dc-version=2.1.9-1.el9-49aab9983 - have-watchdog=false - stonith-enabled=true - -Removing debug pod ... -``` - -```bash -oc debug node/master-0 -- chroot /host sudo pcs status -``` - -Sample output: -``` -Starting pod/master-0-debug-hlh52 ... -To use host binaries, run `chroot /host` -Cluster name: TNF -Cluster Summary: - * Stack: corosync (Pacemaker is running) - * Current DC: master-0 (version 2.1.9-1.el9-49aab9983) - partition with quorum - * Last updated: Mon Jul 7 23:05:29 2025 on master-0 - * Last change: Mon Jul 7 23:05:24 2025 by root via root on master-1 - * 2 nodes configured - * 6 resource instances configured - -Node List: - * Online: [ master-0 master-1 ] - -Full List of Resources: - * Clone Set: kubelet-clone [kubelet]: - * Started: [ master-0 master-1 ] - * Clone Set: etcd-clone [etcd]: - * Started: [ master-0 master-1 ] - * master-0_redfish (stonith:fence_redfish): Started master-0 - * master-1_redfish (stonith:fence_redfish): Started master-1 - -Daemon Status: - corosync: active/enabled - pacemaker: active/enabled - pcsd: active/enabled - -Removing debug pod ... -``` - -## Notes - -- **Localhost Execution**: This role runs entirely on localhost (your local machine) and uses your local kubeconfig to access the OpenShift cluster -- **No SSH Required**: Uses OCP debug commands instead of SSH, making it work out of the box on any OpenShift cluster without requiring SSH keys or network access to individual nodes -- **Kubeconfig Dependency**: Requires a valid kubeconfig file with appropriate permissions to access BMH resources and run debug commands on cluster nodes -- **Automatic Processing**: The role processes all BMH resources found in the namespace automatically -- **SSL Configuration**: SSL certificate verification can be disabled by setting appropriate BMH configuration -- **Security**: All sensitive operations are performed with appropriate security considerations \ No newline at end of file diff --git a/deploy/openshift-clusters/roles/redfish/defaults/main.yml b/deploy/openshift-clusters/roles/redfish/defaults/main.yml deleted file mode 100644 index cde146f2..00000000 --- a/deploy/openshift-clusters/roles/redfish/defaults/main.yml +++ /dev/null @@ -1,9 +0,0 @@ ---- -# Default variables for the redfish role -# These variables can be overridden when calling the role - -# BMH namespace where Bare Metal Hosts are deployed -bmh_namespace: "openshift-machine-api" - -# SSL certificate verification setting -ssl_insecure_param: "" diff --git a/deploy/openshift-clusters/roles/redfish/tasks/main.yml b/deploy/openshift-clusters/roles/redfish/tasks/main.yml deleted file mode 100644 index c64bafa8..00000000 --- a/deploy/openshift-clusters/roles/redfish/tasks/main.yml +++ /dev/null @@ -1,27 +0,0 @@ ---- -- name: Get all Bare Metal Host (BMH) CRs from {{ bmh_namespace }} namespace - kubernetes.core.k8s_info: - api_version: metal3.io/v1alpha1 - kind: BareMetalHost - namespace: "{{ bmh_namespace }}" - register: bmh_crs_output - environment: - KUBECONFIG: "{{ proxy_kubeconfig | default(omit) }}" - K8S_AUTH_PROXY: "{{ proxy_k8s_auth_proxy | default(omit) }}" - HTTP_PROXY: "{{ proxy_http_proxy | default(omit) }}" - HTTPS_PROXY: "{{ proxy_https_proxy | default(omit) }}" - NO_PROXY: "{{ proxy_no_proxy | default(omit) }}" - -- name: Set fact for BMH names, sorted alphabetically - # Extract only the names from the BMH resources - set_fact: - bmh_names: "{{ bmh_crs_output.resources | map(attribute='metadata.name') | sort }}" - when: bmh_crs_output.resources is defined and bmh_crs_output.resources | length > 0 - -- name: Process each Bare Metal Host found - # Include process_bmh.yml tasks to process each BMH for better organization - include_tasks: process_bmh.yml - loop: "{{ bmh_names }}" - loop_control: - loop_var: current_bmh_name # This will be used by process_bmh.yml - when: bmh_names is defined and bmh_names | length > 0 diff --git a/deploy/openshift-clusters/roles/redfish/tasks/process_bmh.yml b/deploy/openshift-clusters/roles/redfish/tasks/process_bmh.yml deleted file mode 100644 index ff7456e7..00000000 --- a/deploy/openshift-clusters/roles/redfish/tasks/process_bmh.yml +++ /dev/null @@ -1,134 +0,0 @@ ---- -- name: Get detailed information for BMH {{ current_bmh_name }} - # Get full details for the current BMH being processed in the loop - kubernetes.core.k8s_info: - api_version: metal3.io/v1alpha1 - kind: BareMetalHost - namespace: "{{ bmh_namespace }}" - name: "{{ current_bmh_name }}" - register: bmh_detail - environment: - KUBECONFIG: "{{ proxy_kubeconfig | default(omit) }}" - K8S_AUTH_PROXY: "{{ proxy_k8s_auth_proxy | default(omit) }}" - HTTP_PROXY: "{{ proxy_http_proxy | default(omit) }}" - HTTPS_PROXY: "{{ proxy_https_proxy | default(omit) }}" - NO_PROXY: "{{ proxy_no_proxy | default(omit) }}" - -- name: Extract node name from BMH status - set_fact: - node_name: "{{ bmh_detail.resources[0].status.hardware.hostname }}" - when: - - bmh_detail.resources is defined - - bmh_detail.resources | length > 0 - - bmh_detail.resources[0].status.hardware.hostname is defined - -- name: Extract BMC address components (IP, Port, Hash) for {{ current_bmh_name }} - set_fact: - credentials_secret_name: "{{ bmh_detail.resources[0].spec.bmc.credentialsName }}" - bmc_address_full: "{{ bmh_detail.resources[0].spec.bmc.address }}" - bmc_ip: "{{ bmh_detail.resources[0].spec.bmc.address | regex_search('\\b\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\b') }}" - bmc_port: "{{ bmh_detail.resources[0].spec.bmc.address | regex_search('(?<=\\:)(\\d+)') }}" - bmc_hash: "{{ bmh_detail.resources[0].spec.bmc.address | split('/') | last }}" - when: bmh_detail.resources is defined and bmh_detail.resources | length > 0 - -- name: Get BMC credentials secret details for {{ credentials_secret_name }} - kubernetes.core.k8s_info: - api_version: v1 - kind: Secret - namespace: "{{ bmh_namespace }}" - name: "{{ credentials_secret_name }}" - register: credentials_secret_detail - when: credentials_secret_name is defined - environment: - KUBECONFIG: "{{ proxy_kubeconfig | default(omit) }}" - K8S_AUTH_PROXY: "{{ proxy_k8s_auth_proxy | default(omit) }}" - HTTP_PROXY: "{{ proxy_http_proxy | default(omit) }}" - HTTPS_PROXY: "{{ proxy_https_proxy | default(omit) }}" - NO_PROXY: "{{ proxy_no_proxy | default(omit) }}" - -- name: Decode BMC username and password from base64 - set_fact: - bmc_username: "{{ credentials_secret_detail.resources[0].data.username | b64decode }}" - bmc_password: "{{ credentials_secret_detail.resources[0].data.password | b64decode }}" - no_log: true # IMPORTANT: Prevent sensitive data from being logged in plain text - when: credentials_secret_detail.resources is defined and credentials_secret_detail.resources | length > 0 - -- name: Determine if SSL certificate verification should be disabled - set_fact: - ssl_insecure_param: "ssl_insecure=1" - when: bmh_detail.resources[0].spec.bmc.disableCertificateVerification | default(false) | bool - -- name: Ensure the redfish stonith resource does not exist already on {{ node_name }} - # Using OCP debug command to access the node without requiring SSH - ansible.builtin.shell: | - oc debug node/{{ node_name }} -- chroot /host bash -c "pcs stonith status {{ node_name }}_redfish" - register: pcs_stonith_status_result - ignore_errors: true # Crucial for debugging to see stdout/stderr even if the command itself fails - changed_when: false # This task doesn't change system state - when: - - node_name is defined # Ensure node name is defined before attempting to run debug command - environment: - KUBECONFIG: "{{ proxy_kubeconfig | default(omit) }}" - HTTP_PROXY: "{{ proxy_http_proxy | default(omit) }}" - HTTPS_PROXY: "{{ proxy_https_proxy | default(omit) }}" - NO_PROXY: "{{ proxy_no_proxy | default(omit) }}" - -# Uncomment the following to debug pcs stonith status output -- name: Debug Output of SSH from localhost command - debug: - var: pcs_stonith_status_result - when: pcs_stonith_status_result is defined - -- name: Create PCS stonith resource on node {{ node_name }} if it doesn't exist - # Using OCP debug command to access the node without requiring SSH - ansible.builtin.shell: | - oc debug node/{{ node_name }} -- chroot /host bash -c "pcs stonith create {{ node_name }}_redfish fence_redfish \ - username={{ bmc_username }} password={{ bmc_password }} \ - ip={{ bmc_ip }} ipport={{ bmc_port }} \ - systems_uri=/redfish/v1/Systems/{{ bmc_hash }} \ - pcmk_host_list={{ node_name }} {{ ssl_insecure_param | default('') }}" - no_log: true - register: pcs_stonith_create_result - when: - - bmc_username is defined - - bmc_password is defined - - bmc_ip is defined - - bmc_port is defined - - bmc_hash is defined - - node_name is defined - # This task runs ONLY if the pcs stonith status was not able to find the resource - - pcs_stonith_status_result.rc == 1 - environment: - KUBECONFIG: "{{ proxy_kubeconfig | default(omit) }}" - HTTP_PROXY: "{{ proxy_http_proxy | default(omit) }}" - HTTPS_PROXY: "{{ proxy_https_proxy | default(omit) }}" - NO_PROXY: "{{ proxy_no_proxy | default(omit) }}" - -# Uncomment the following to debug pcs stonith create output -- name: Debug pcs stonith create - debug: - var: pcs_stonith_create_result - when: pcs_stonith_create_result is defined - -- name: Enable PCS stonith on node {{ node_name }} - # Using OCP debug command to access the node without requiring SSH - ansible.builtin.shell: | - oc debug node/{{ node_name }} -- chroot /host bash -c "pcs property set stonith-enabled=true" - no_log: true - register: pcs_stonith_enable_result - when: - - node_name is defined - - pcs_stonith_status_result is defined - - (pcs_stonith_status_result.rc == 0 or (pcs_stonith_create_result is defined and pcs_stonith_create_result.rc == 0)) - environment: - KUBECONFIG: "{{ proxy_kubeconfig | default(omit) }}" - HTTP_PROXY: "{{ proxy_http_proxy | default(omit) }}" - HTTPS_PROXY: "{{ proxy_https_proxy | default(omit) }}" - NO_PROXY: "{{ proxy_no_proxy | default(omit) }}" - -# Uncomment the following to debug pcs stonith create output -- name: Debug pcs stonith create - debug: - var: pcs_stonith_create_result - when: pcs_stonith_create_result is defined - diff --git a/deploy/openshift-clusters/setup.yml b/deploy/openshift-clusters/setup.yml index 8042b758..01ae6fe7 100644 --- a/deploy/openshift-clusters/setup.yml +++ b/deploy/openshift-clusters/setup.yml @@ -85,160 +85,3 @@ - name: Update inventory with cluster VMs include_tasks: roles/common/tasks/update-cluster-inventory.yml - post_tasks: - - name: Check if redfish configuration should be offered - block: - - name: Get OpenShift cluster version - ansible.builtin.shell: | - source ./proxy.env && oc version - register: oc_version_result - delegate_to: localhost - run_once: true - failed_when: false - changed_when: false - - - name: Parse OpenShift version - set_fact: - ocp_version: "{{ oc_version_result.stdout | regex_search('Server Version: (.+)', '\\1') | first }}" - when: - - oc_version_result.rc == 0 - - oc_version_result.stdout is defined - - oc_version_result.stdout | length > 0 - delegate_to: localhost - run_once: true - - - name: Check if OpenShift version is 4.19.x - set_fact: - is_ocp_4_19: "{{ ocp_version is defined and ocp_version is match('^4\\.19\\..*') }}" - delegate_to: localhost - run_once: true - - - name: Prompt user for redfish configuration - ansible.builtin.pause: - prompt: | - Your cluster meets the requirements for automatic redfish stonith configuration: - Topology: {{ topology }} - OpenShift Version: {{ ocp_version | default('Could not determine') }} - - The redfish role will: - - Discover all BareMetalHost resources - - Configure PCS stonith resources using redfish fencing - - Enable stonith in the cluster - - Run on localhost using your kubeconfig (no SSH required) - - Would you like to run the redfish configuration now? (y/n) - register: redfish_prompt_result - delegate_to: localhost - run_once: true - when: - - interactive_mode is not defined or interactive_mode | bool - - topology == "fencing" - - is_ocp_4_19 | default(false) - - - name: Set redfish execution flag - set_fact: - should_run_redfish: true - when: - - redfish_prompt_result is defined - - redfish_prompt_result.user_input | default('') | lower in ['y', 'yes'] - - - name: Redfish configuration will be executed - ansible.builtin.debug: - msg: | - The redfish configuration will now be executed using the redfish playbook. - when: should_run_redfish | default(false) - - - name: Redfish configuration skipped - ansible.builtin.debug: - msg: | - You can run the redfish configuration later by executing: ansible-playbook redfish.yml - delegate_to: localhost - run_once: true - when: - - redfish_prompt_result is defined - - redfish_prompt_result.user_input | default('') | lower not in ['y', 'yes'] - - is_ocp_4_19 | default(false) - - rescue: - - name: Handle redfish configuration errors - ansible.builtin.debug: - msg: | - An error occurred while checking or running redfish configuration. You can run it manually later with: ansible-playbook redfish.yml - delegate_to: localhost - run_once: true - - when: - - topology == "fencing" - - interactive_mode is not defined or interactive_mode | bool - - - name: Check if proxy.env file exists - ansible.builtin.stat: - path: "./proxy.env" - register: proxy_env_stat - delegate_to: localhost - run_once: true - when: should_run_redfish | default(false) - - - name: Confirm to proceed without proxy.env - ansible.builtin.pause: - prompt: | - The proxy.env file is missing. The redfish configuration will likely fail if there is no direct cluster access. - Do you want to proceed anyway? (y/n) - register: proceed_without_proxy - delegate_to: localhost - run_once: true - when: - - should_run_redfish | default(false) - - not proxy_env_stat.stat.exists - - interactive_mode is not defined or interactive_mode | bool - - - name: Abort redfish configuration due to missing proxy.env - ansible.builtin.fail: - msg: "Redfish configuration aborted due to missing proxy.env file" - when: - - should_run_redfish | default(false) - - not proxy_env_stat.stat.exists - - (proceed_without_proxy.user_input | default('') | lower not in ['y', 'yes']) - - - name: Source proxy.env and extract environment variables - ansible.builtin.shell: | - source ./proxy.env && env | grep -E '^(KUBECONFIG|HTTP_PROXY|HTTPS_PROXY|NO_PROXY)=' - register: proxy_env_vars - delegate_to: localhost - run_once: true - when: - - should_run_redfish | default(false) - - proxy_env_stat.stat.exists - failed_when: false - - - name: Parse environment variables from proxy.env - ansible.builtin.set_fact: - redfish_env: "{{ redfish_env | default({}) | combine({item.split('=')[0]: item.split('=')[1:]|join('=')}) }}" - loop: "{{ proxy_env_vars.stdout_lines | default([]) }}" - delegate_to: localhost - run_once: true - when: - - should_run_redfish | default(false) - - proxy_env_stat.stat.exists - - proxy_env_vars.stdout_lines is defined - - - name: Execute redfish configuration - include_role: - name: redfish - vars: - proxy_kubeconfig: "{{ redfish_env.KUBECONFIG | default('') }}" - proxy_k8s_auth_proxy: "{{ redfish_env.HTTP_PROXY | default('') }}" - proxy_http_proxy: "{{ redfish_env.HTTP_PROXY | default('') }}" - proxy_https_proxy: "{{ redfish_env.HTTPS_PROXY | default('') }}" - proxy_no_proxy: "{{ redfish_env.NO_PROXY | default('') }}" - ansible_connection: local - ansible_host: localhost - ansible_python_interpreter: "{{ ansible_playbook_python }}" - when: should_run_redfish | default(false) - run_once: true - - - name: Redfish configuration completed - ansible.builtin.debug: - msg: | - The redfish role has finished configuring stonith resources. Automatic fencing configured using redfish BMC interfaces. - when: should_run_redfish | default(false) diff --git a/deploy/openshift-clusters/vars/init-host.yml b/deploy/openshift-clusters/vars/init-host.yml index 8f883452..d7a0e354 100644 --- a/deploy/openshift-clusters/vars/init-host.yml +++ b/deploy/openshift-clusters/vars/init-host.yml @@ -19,4 +19,4 @@ hostname_prefix: "{{ ansible_user | default('tnf') }}-dev" interactive_mode: false # OpenShift version configuration -default_ocp_version: "4.20" +default_ocp_version: "4.22"