Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
e1edbf3
Add baremetal node adoption automation for TNF
fonta-rh Jun 29, 2026
251d127
Extract wizard into standalone script with input validation
fonta-rh Jun 29, 2026
3140888
Accept hostnames in BMC address wizard input
fonta-rh Jun 29, 2026
1a1c5e3
Rename bmc_ip to bmc_address, make boot_mac optional
fonta-rh Jun 29, 2026
10747b2
Use Redfish BootOptions for boot MAC discovery
fonta-rh Jun 29, 2026
84ebb54
Address CodeRabbit review findings on PR #88
fonta-rh Jun 29, 2026
388f2fe
Address Doug's poka-yoke review findings on PR #88
fonta-rh Jun 29, 2026
4f112de
Fix boot MAC discovery failing on HPE iLO (trailing slash in system_id)
fonta-rh Jun 29, 2026
9b6cefb
Add OS-level network config to baremetal adoption workflow
fonta-rh Jun 30, 2026
b895d7d
Add [provisioning_host] prompts to baremetal wizard
fonta-rh Jun 30, 2026
101f533
Add BMC port to Redfish addresses for CEO fencing compatibility
fonta-rh Jul 2, 2026
8d9c936
Move generated adoption artifacts to role files/ directory
fonta-rh Jul 2, 2026
298336d
Add baremetal deploy via dev-scripts ABI
fonta-rh Jun 30, 2026
1053afe
Add fork/branch support, run requirements, remove laptop workarounds
fonta-rh Jul 1, 2026
368571b
Switch baremetal deploy to DHCP networking mode
fonta-rh Jul 1, 2026
dc00a3b
Remove PROVISIONING_HOST_EXTERNAL_IP resolution from deploy-baremetal
fonta-rh Jul 1, 2026
9f45936
Convert deploy-baremetal.sh to Ansible playbook
fonta-rh Jul 2, 2026
0f098dc
Fix baremetal deploy playbook: SSH keys, stale state, host deps
fonta-rh Jul 2, 2026
62206c7
Add proxy-based cluster access for baremetal deployments
fonta-rh Jul 3, 2026
01da195
Improve post-deploy access instructions for baremetal
fonta-rh Jul 3, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions deploy/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,18 @@ fencing-assisted:
keep-instance:
@../helpers/keep-instance.sh '$(DAYS)'

baremetal-adopt:
@./openshift-clusters/scripts/baremetal-adopt.sh

baremetal-verify:
@./openshift-clusters/scripts/baremetal-adopt.sh --verify-only

baremetal-fencing-agent:
@./openshift-clusters/scripts/deploy-baremetal.sh

baremetal-wizard:
@./openshift-clusters/scripts/baremetal-wizard.sh

patch-nodes:
@./openshift-clusters/scripts/patch-nodes.sh
get-tnf-logs:
Expand Down Expand Up @@ -138,6 +150,12 @@ help:
@echo " clean-spoke - Clean spoke cluster resources (VMs, network, auth) from assisted installer"
@echo " patch-nodes - Build resource-agents RPM and patch cluster nodes (default version: 4.11)"
@echo ""
@echo "Baremetal Adoption:"
@echo " baremetal-adopt - Adopt baremetal nodes: validate BMC + generate dev-scripts artifacts"
@echo " baremetal-fencing-agent - Deploy TNF cluster on adopted baremetal nodes via provisioning host"
@echo " baremetal-verify - Verify BMC credentials for adopted baremetal nodes (no artifacts)"
@echo " baremetal-wizard - Interactive wizard to create baremetal node inventory"
@echo ""
@echo "Cluster Utilities:"
@echo " get-tnf-logs - Collect pacemaker and etcd logs from cluster nodes"

3 changes: 3 additions & 0 deletions deploy/openshift-clusters/.gitignore
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
inventory.ini
inventory_baremetal.ini

proxy.env
kubeconfig
kubeadmin-password
clusters/
*.pyc
*.pyo

Expand Down
168 changes: 168 additions & 0 deletions deploy/openshift-clusters/deploy-baremetal.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
---
# Deploy a TNF fencing cluster on adopted baremetal nodes via dev-scripts ABI.
#
# Targets the [provisioning_host] group from inventory_baremetal.ini.
# Expects adoption artifacts from 'make baremetal-adopt' in
# roles/dev-scripts/install-dev/files/.
#
# Usage:
# ansible-playbook deploy-baremetal.yml -i inventory_baremetal.ini
# ansible-playbook deploy-baremetal.yml -i inventory_baremetal.ini -e dev_scripts_branch=my-branch
#
- hosts: provisioning_host
gather_facts: no
force_handlers: yes

vars:
method: agent
topology: fencing
test_cluster_name: ostest

pre_tasks:
- name: Check adoption artifacts exist on controller
ansible.builtin.stat:
path: "{{ playbook_dir }}/roles/dev-scripts/install-dev/files/{{ item }}"
delegate_to: localhost
become: false
register: artifact_check
loop:
- config_baremetal_fencing.sh
- ironic_nodes.json

- name: Fail if adoption artifacts are missing
ansible.builtin.fail:
msg: >-
Adoption artifact not found: {{ item.item }}.
Run 'make baremetal-adopt' first.
when: not item.stat.exists
loop: "{{ artifact_check.results }}"
loop_control:
label: "{{ item.item }}"

tasks:
# --- Validation, config deploy, pull-secret (reused from install-dev role) ---
- name: Validate and deploy config + pull-secret
ansible.builtin.include_role:
name: dev-scripts/install-dev
tasks_from: config
vars:
method: agent
config_file:
agent: config_baremetal_fencing.sh
install_host_deps: false

# --- Git checkout ---
- name: Checkout dev-scripts
ansible.builtin.git:
dest: "{{ dev_scripts_path }}"
repo: "{{ dev_scripts_src_repo }}"
version: "{{ dev_scripts_branch }}"

# --- Baremetal-specific setup ---
- name: Copy ironic_nodes.json to dev-scripts
ansible.builtin.copy:
src: "{{ playbook_dir }}/roles/dev-scripts/install-dev/files/ironic_nodes.json"
dest: "{{ dev_scripts_path }}/ironic_nodes.json"
mode: "0600"

- name: Create working directory
ansible.builtin.shell: mkdir -p "${HOME}/dev-scripts-workdir"
changed_when: false

- name: Append WORKING_DIR to deployed config
ansible.builtin.lineinfile:
path: "{{ dev_scripts_path }}/config_{{ whoami.stdout }}.sh"
regexp: '^export WORKING_DIR='
line: 'export WORKING_DIR="${HOME}/dev-scripts-workdir"'

# --- Ensure SSH key exists for node access ---
- name: Check for existing SSH keypair
ansible.builtin.stat:
path: ~/.ssh/id_ed25519
register: ssh_key_check

- name: Generate SSH keypair if missing
ansible.builtin.command:
cmd: ssh-keygen -t ed25519 -f ~/.ssh/id_ed25519 -N ''
when: not ssh_key_check.stat.exists

- name: Read SSH public key
ansible.builtin.slurp:
src: ~/.ssh/id_ed25519.pub
register: ssh_pub_key

- name: Append SSH_PUB_KEY to deployed config
ansible.builtin.lineinfile:
path: "{{ dev_scripts_path }}/config_{{ whoami.stdout }}.sh"
regexp: '^export SSH_PUB_KEY='
line: 'export SSH_PUB_KEY="{{ ssh_pub_key.content | b64decode | trim }}"'

- name: Create empty mirror registry credentials
ansible.builtin.shell: |
f="${HOME}/private-mirror-{{ test_cluster_name }}.json"
[ -f "$f" ] || echo '{"auths":{}}' > "$f"
changed_when: false

# --- Clean prior deployment state ---
- name: Remove prior cluster state (required by dev-scripts verifyClean)
ansible.builtin.file:
path: "{{ dev_scripts_path }}/ocp/{{ test_cluster_name }}"
state: absent

# --- Deploy ---
- name: Run dev-scripts ABI pipeline
block:
- name: Run dev-scripts make target
make:
chdir: "{{ dev_scripts_path }}"
target: "{{ item }}"
loop:
- requirements
- agent_requirements
- agent_build_installer
- agent_prepare_release
- agent_configure
- agent_create_cluster
Comment on lines +113 to +125

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🎯 Functional Correctness | 🟠 Major | ⚡ Quick win

🧩 Analysis chain

🌐 Web query:

Is community.general collection required for the ansible make module, or is it part of ansible.builtin?

💡 Result:

The make module is part of the community.general collection, not ansible.builtin [1][2]. In modern Ansible (following the move to the collections-based architecture), you should reference it by its fully qualified collection name (FQCN): community.general.make [2]. While the module existed as a built-in module in older versions of Ansible (prior to the introduction of collections and the separation of modules into the community and core distributions) [3][4], it was migrated to the community.general collection as part of the project's modularization [2][5]. If you are using a modern version of Ansible, you must ensure the community.general collection is installed in your environment to use this module [2].

Citations:


🏁 Script executed:

#!/bin/bash
set -euo pipefail

# Inspect the target playbook and nearby context
git ls-files deploy/openshift-clusters/deploy-baremetal.yml
wc -l deploy/openshift-clusters/deploy-baremetal.yml
sed -n '1,180p' deploy/openshift-clusters/deploy-baremetal.yml

# Look for any collections declarations that would resolve community.general.make
rg -n '^\s*collections:|community\.general|make:' deploy/openshift-clusters -g '*.yml' -g '*.yaml'

Repository: openshift-eng/two-node-toolbox

Length of output: 6055


🏁 Script executed:

#!/bin/bash
set -euo pipefail

# Check the playbook header and any relevant Ansible config/collection requirements
sed -n '1,130p' deploy/openshift-clusters/deploy-baremetal.yml

printf '\n--- collections requirements ---\n'
git ls-files 'deploy/openshift-clusters/**' 'collections/**' 'ansible.cfg' '.ansible*' | sed -n '1,200p'
printf '\n--- matches ---\n'
rg -n '^\s*collections:|community\.general|make:' deploy/openshift-clusters collections . -g '*.yml' -g '*.yaml' -g 'ansible.cfg' -g '.ansible*'

Repository: openshift-eng/two-node-toolbox

Length of output: 15874


🏁 Script executed:

#!/bin/bash
set -euo pipefail

# Find any collection installation/requirements references that would make community.general available
git ls-files | rg '(^|/)(collections/requirements\.yml|ansible\.cfg|requirements\.yml|requirements\.yaml|galaxy\.yml|galaxy\.yaml)$'
for f in $(git ls-files | rg '(^|/)(collections/requirements\.yml|ansible\.cfg|requirements\.yml|requirements\.yaml|galaxy\.yml|galaxy\.yaml)$'); do
  echo "--- $f ---"
  sed -n '1,220p' "$f"
done

Repository: openshift-eng/two-node-toolbox

Length of output: 917


🌐 Web query:

Ansible short module name resolution for collection modules: can community.general.make be used as make when the collection is installed but no collections: is declared?

💡 Result:

No, you cannot use the short module name make to refer to the community.general.make module if the collections keyword is not declared in your playbook or role [1][2][3]. Ansible requires either the Fully Qualified Collection Name (FQCN), such as community.general.make, or the explicit use of the collections keyword to resolve short module names that are not part of the ansible.builtin collection [2][3]. The collections keyword creates an ordered search path that allows Ansible to resolve unqualified module names [2][3]. Without this declaration, or without explicitly using the FQCN, Ansible will attempt to find a module named make within the default search path (which includes ansible.builtin) and will fail if it cannot find it there [2][3]. The official documentation consistently recommends using the FQCN (community.general.make) to ensure clarity, avoid conflicts, and eliminate the need for the collections keyword [2][4][3].

Citations:


Use community.general.make here. make is a collection module, and this play doesn’t declare collections:, so the short name can fail to resolve; switch to the FQCN or add community.general to the play scope.

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@deploy/openshift-clusters/deploy-baremetal.yml` around lines 82 - 94, The ABI
pipeline task uses the short name make, which may not resolve because this play
does not declare a collections scope. Update the task in the “Run dev-scripts
ABI pipeline” block to use the fully qualified community.general.make module, or
alternatively add community.general to the play’s collections so the
“dev-scripts: {{ item }}” task resolves reliably.

loop_control:
label: "{{ item }}"
rescue:
- name: Display recovery instructions
ansible.builtin.debug:
msg: |
DEPLOYMENT FAILED. To recover:
1. Power off baremetal nodes via BMC
2. Clean dev-scripts state:
make -C {{ dev_scripts_path }} clean
3. Fix the issue and re-run:
make baremetal-fencing-agent
- name: Fail after displaying recovery steps
ansible.builtin.fail:
msg: "dev-scripts ABI pipeline failed"

# --- Post-deploy: proxy + credentials ---
- name: Setup proxy and fetch credentials
ansible.builtin.include_role:
name: proxy-setup
vars:
kubeconfig_path: "{{ dev_scripts_path }}/ocp/{{ test_cluster_name }}/auth/kubeconfig"
kubeadmin_password_path: "{{ dev_scripts_path }}/ocp/{{ test_cluster_name }}/auth/kubeadmin-password"

- name: Fetch SSH private key to controller
ansible.builtin.fetch:
src: ~/.ssh/id_ed25519
dest: "clusters/{{ test_cluster_name }}/auth/id_ed25519"
flat: true
mode: "0600"

- name: Display access information
ansible.builtin.debug:
msg: |-
Baremetal TNF cluster deployed successfully!

Next steps:
1. Source the proxy environment from anywhere:
source {{ playbook_dir }}/proxy.env
(or from openshift-clusters directory: source proxy.env)
2. Verify cluster access: oc get nodes
3. SSH to nodes:
ssh -i clusters/{{ test_cluster_name }}/auth/id_ed25519 core@<node-ip>
74 changes: 74 additions & 0 deletions deploy/openshift-clusters/inventory_baremetal.ini.sample
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# Baremetal node inventory for TNF adoption
#
# NOTE: This is separate from inventory.ini, which targets the hypervisor host.
# This file describes the physical baremetal nodes to be adopted as OpenShift nodes.
# inventory.ini → hypervisor (where dev-scripts runs)
# inventory_baremetal.ini → baremetal nodes (BMC endpoints for adoption)
#
# Copy this file to inventory_baremetal.ini and fill in your node details.
# Then run: make baremetal-adopt
#
# Each node requires:
# bmc_address - BMC/iDRAC/iLO management address (IP or hostname)
# bmc_user - BMC login username
# bmc_pass - BMC login password
# bmc_port - (optional) BMC Redfish port (default: 443)
# boot_mac - (optional) MAC address of the NIC used for PXE boot
# If omitted, the adopt script attempts Redfish discovery.
# node_ip - (optional) Static IP address for this node on the machine network
# Required for baremetal ABI deployments with static IPs.
#
# The hostname (first field) becomes the node name in ironic_nodes.json.
# For TNF, you need exactly 2 nodes (master-0 and master-1).

[baremetal_nodes]
master-0 bmc_address=192.168.1.100 bmc_user=admin bmc_pass=changeme boot_mac=52:54:00:00:00:01 node_ip=192.168.1.10
master-1 bmc_address=192.168.1.101 bmc_user=admin bmc_pass=changeme boot_mac=52:54:00:00:00:02 node_ip=192.168.1.11
Comment on lines +25 to +26

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔒 Security & Privacy | 🟡 Minor | ⚡ Quick win

Replace the sample bmc_pass values with non-credential placeholders.

bmc_pass=changeme is still a literal password in a config sample, so it is easy to copy into a real inventory unchanged. Please switch these to obviously non-usable placeholders such as <set-me> or leave the value blank/commented. As per coding guidelines, “Flag hardcoded secrets including API keys, tokens, passwords, private keys, and credentials.”

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@deploy/openshift-clusters/inventory_baremetal.ini.sample` around lines 24 -
25, The sample inventory entries still use a literal bmc_pass value, which
should be replaced with a clearly non-usable placeholder. Update the bmc_pass
fields in the master-0 and master-1 example entries to an obvious placeholder or
commented/blank value, keeping the sample format intact and avoiding any
real-looking credential in inventory_baremetal.ini.sample.

Source: Coding guidelines


[baremetal_nodes:vars]
# BMC driver — only redfish is supported for TNF fencing
bmc_driver=redfish

# BMC Redfish port (per-node bmc_port overrides this)
bmc_port=443

# Skip TLS verification for BMC endpoints (common with self-signed certs)
bmc_verify_ca=False

# Node CPU architecture
cpu_arch=x86_64

[baremetal_network]
# Cluster-wide network config for baremetal ABI deployments (all optional).
# machine_network - Machine network CIDR (e.g. 192.168.1.0/24)
# gateway - Default gateway IP
# api_vip - API virtual IP
# ingress_vip - Ingress virtual IP
# dns_servers - Comma-separated DNS server IPs reachable from the nodes.
# Used as the node DNS resolver during install (resolves quay.io, etc.).
# If unset, falls back to gateway — which may not run DNS.
#machine_network=192.168.1.0/24
#gateway=192.168.1.1
#api_vip=192.168.1.100
#ingress_vip=192.168.1.101
#dns_servers=10.11.5.160,10.2.32.85

[provisioning_host]
# Provisioning host for baremetal ABI deployment. Must be on the same L2 network
# as the baremetal nodes (serves agent ISO via HTTP, runs dnsmasq, acts as gateway).
#
# Standard Ansible inventory format — one host entry with connection variables.
# For local deployment, use: localhost ansible_connection=local
#
#10.1.155.50 ansible_user=root ansible_ssh_private_key_file=~/.ssh/lab_key

[provisioning_host:vars]
# Override dev-scripts checkout on the provisioning host (optional).
# Defaults come from roles/dev-scripts/install-dev/defaults/main.yml:
# dev_scripts_path=openshift-metal3/dev-scripts
# dev_scripts_src_repo=https://github.com/openshift-metal3/dev-scripts
# dev_scripts_branch=master
#
#dev_scripts_path=~/openshift-metal3/dev-scripts
#dev_scripts_src_repo=https://github.com/myuser/dev-scripts
#dev_scripts_branch=my-feature-branch
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,6 @@ ci_token
clusterbot-ci_token
config_arbiter.sh
config_fencing.sh
config_sno.sh
config_sno.sh
config_baremetal_fencing.sh
ironic_nodes.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,6 @@ export OPENSHIFT_INSTALL_EXPERIMENTAL_DISABLE_IMAGE_POLICY=true
# export VBMC_IMAGE=quay.io/rh-edge-enablement/vbmc:2026-06
# export SUSHY_TOOLS_IMAGE=quay.io/rh-edge-enablement/sushy-tools:2026-06
# fi

# Baremetal network config (node IPs, VIPs, bridge overrides) is auto-generated
# by 'make baremetal-adopt' into config_baremetal_fencing.sh — do not add here.
Original file line number Diff line number Diff line change
Expand Up @@ -125,3 +125,4 @@
- containernetworking-plugins
state: present
become: true
when: install_host_deps | default(true)
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
# Determine the directory where this proxy.env file is located
PROXY_ENV_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)"

export EC2_PUBLIC_IP={{ hostvars[inventory_hostname]['inventory_hostname'].split('@')[1] }}
export EC2_PUBLIC_IP={{ hostvars[inventory_hostname]['ansible_host'] | default(inventory_hostname.split('@')[1] if '@' in inventory_hostname else inventory_hostname) }}
export PROXYPORT={{ proxy_port }}
export HTTP_PROXY=http://${EC2_PUBLIC_IP}:${PROXYPORT}/
export HTTPS_PROXY=http://${EC2_PUBLIC_IP}:${PROXYPORT}/
Expand Down
Loading