Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions .github/workflows/stackhpc-all-in-one.yml
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,26 @@ jobs:
env:
KAYOBE_AUTOMATION_SSH_PRIVATE_KEY: ${{ steps.ssh_key.outputs.ssh_key }}

- name: Pull container images
run: |
# Try up to 3 times to pull container images.
for attempt in $(seq 3); do
docker run -t --rm \
-v $(pwd):/stack/kayobe-automation-env/src/kayobe-config \
-e KAYOBE_ENVIRONMENT -e KAYOBE_VAULT_PASSWORD -e KAYOBE_AUTOMATION_SSH_PRIVATE_KEY \
${{ steps.kayobe_image.outputs.kayobe_image }} \
/stack/kayobe-automation-env/src/kayobe-config/.automation/pipeline/overcloud-container-image-pull.sh
if [ "$?" = "0" ]; then
echo "Pulled container images on attempt $attempt"
exit 0
fi
echo "Failed to pull container images on attempt $attempt"
done
echo "Failed to pull container imagers after $attempt attempts"
exit 1
env:
KAYOBE_AUTOMATION_SSH_PRIVATE_KEY: ${{ steps.ssh_key.outputs.ssh_key }}

- name: Service deploy
run: |
docker run -t --rm \
Expand Down
56 changes: 27 additions & 29 deletions .github/workflows/stackhpc-container-image-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,12 @@ on:
type: boolean
required: false
default: true
push-dirty:
sbom:
description: Generate SBOM?
type: boolean
required: false
default: true
push-critical:
description: Push scanned images that have critical vulnerabilities?
type: boolean
required: false
Expand Down Expand Up @@ -82,14 +87,14 @@ jobs:
id: openstack_release
run: |
BRANCH=$(awk -F'=' '/defaultbranch/ {print $2}' .gitreview)
echo "openstack_release=${BRANCH}" | sed -E "s,(stable|unmaintained)/,," >> $GITHUB_OUTPUT
echo "openstack_release=${BRANCH}" | sed -E "s,(stable|unmaintained)/,," | tee -a "$GITHUB_OUTPUT"

# Generate a tag to apply to all built container images.
# Without this, each kayobe * container image build command would use a different tag.
- name: Generate container datetime tag
id: datetime_tag
run: |
echo "datetime_tag=$(date +%Y%m%dT%H%M%S)" >> $GITHUB_OUTPUT
echo "datetime_tag=$(date +%Y%m%dT%H%M%S)" | tee -a "$GITHUB_OUTPUT"

# Dynamically define job matrix.
# We need a separate matrix entry for each distribution, when the relevant input is true.
Expand All @@ -113,7 +118,7 @@ jobs:
# remove trailing comma
output="${output%,}"
output+="]}"
echo "matrix=$output" >> $GITHUB_OUTPUT
echo "matrix=$output" | tee -a "$GITHUB_OUTPUT"

- name: Display container datetime tag
run: |
Expand Down Expand Up @@ -187,7 +192,7 @@ jobs:

- name: Get Kolla tag
id: write-kolla-tag
run: echo "kolla-tag=${{ needs.generate-tag.outputs.openstack_release }}-${{ matrix.distro.name }}-${{ matrix.distro.release }}-${{ needs.generate-tag.outputs.datetime_tag }}" >> $GITHUB_OUTPUT
run: echo "kolla-tag=${{ needs.generate-tag.outputs.openstack_release }}-${{ matrix.distro.name }}-${{ matrix.distro.release }}-${{ needs.generate-tag.outputs.datetime_tag }}" | tee -a "$GITHUB_OUTPUT"

- name: Configure localhost as a seed
run: |
Expand All @@ -198,17 +203,6 @@ jobs:
localhost ansible_connection=local ansible_python_interpreter=/usr/bin/python3
EOF

# See etc/kayobe/ansible/roles/pulp_auth_proxy/README.md for details.
# NOTE: We override pulp_auth_proxy_conf_path to a path shared by the
# runner and dind containers.
- name: Deploy an authenticating package repository mirror proxy
run: |
source venvs/kayobe/bin/activate &&
source src/kayobe-config/kayobe-env --environment ci-builder &&
kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/pulp-auth-proxy.yml -e pulp_auth_proxy_conf_path=/home/runner/_work/pulp_proxy
env:
KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD_CI_BUILDER }}

- name: Create build logs output directory
run: mkdir image-build-logs

Expand All @@ -230,6 +224,9 @@ jobs:
args="$args -e stackhpc_repo_mirror_auth_proxy_enabled=true"
args="$args -e kolla_build_log_path=$GITHUB_WORKSPACE/image-build-logs/kolla-build-overcloud.log"
args="$args -e base_path=$GITHUB_WORKSPACE/opt/kayobe"
# NOTE: We override pulp_auth_proxy_conf_path to a path shared by the
# runner and dind containers.
args="$args -e pulp_auth_proxy_conf_path=/home/runner/_work/pulp_proxy"
source venvs/kayobe/bin/activate &&
source src/kayobe-config/kayobe-env --environment ci-builder &&
kayobe overcloud container image build $args
Expand Down Expand Up @@ -261,20 +258,21 @@ jobs:
if: inputs.seed && matrix.distro.arch == 'amd64'

- name: Get built container images
run: docker image ls --filter "reference=ark.stackhpc.com/stackhpc-dev/*:${{ steps.write-kolla-tag.outputs.kolla-tag }}*" > ${{ matrix.distro.name }}-${{ matrix.distro.release }}-container-images
run: docker image ls --filter "reference=ark.stackhpc.com/stackhpc-dev/*:${{ steps.write-kolla-tag.outputs.kolla-tag }}*" | tee "${{ matrix.distro.name }}-${{ matrix.distro.release }}-container-images"

- name: Fail if no images have been built
run: if [ $(wc -l < ${{ matrix.distro.name }}-${{ matrix.distro.release }}-container-images) -le 1 ]; then exit 1; fi

- name: Scan built container images
run: src/kayobe-config/tools/scan-images.sh ${{ matrix.distro.name }}-${{ matrix.distro.release }} ${{ steps.write-kolla-tag.outputs.kolla-tag }}
run: src/kayobe-config/tools/scan-images.sh ${{ matrix.distro.name }}-${{ matrix.distro.release }} ${{ steps.write-kolla-tag.outputs.kolla-tag }} ${{ inputs.sbom && '--sbom' }}

- name: Move image scan logs to output artifact
run: mv image-scan-output image-build-logs/image-scan-output
if: ${{ !cancelled() }}

- name: Fail if no images have passed scanning
run: if [ $(wc -l < image-build-logs/image-scan-output/critical-images.txt) -gt 0 ]; then exit 1; fi
if: ${{ !inputs.push-dirty }}
- name: Fail if any images have critical vulnerabilities
run: if [ -e image-build-logs/image-scan-output/critical-images.txt ] && [ $(wc -l < image-build-logs/image-scan-output/critical-images.txt) -gt 0 ]; then exit 1; fi
if: ${{ !inputs.push-critical }}

- name: Copy clean images to push-attempt-images list
run: cp image-build-logs/image-scan-output/clean-images.txt image-build-logs/push-attempt-images.txt
Expand All @@ -284,13 +282,13 @@ jobs:
# This should be reverted when it's decided to filter high level CVEs as well.
- name: Append dirty images to push list
run: |
cat image-build-logs/image-scan-output/dirty-images.txt >> image-build-logs/push-attempt-images.txt
cat image-build-logs/image-scan-output/high-images.txt >> image-build-logs/push-attempt-images.txt
if: ${{ inputs.push }}

- name: Append images with critical vulnerabilities to push list
run: |
cat image-build-logs/image-scan-output/critical-images.txt >> image-build-logs/push-attempt-images.txt
if: ${{ inputs.push && inputs.push-dirty }}
if: ${{ inputs.push && inputs.push-critical }}

- name: Push images
run: |
Expand Down Expand Up @@ -332,19 +330,19 @@ jobs:
if: ${{ steps.build_overcloud_images.outcome == 'failure' || steps.build_seed_images.outcome == 'failure' }}

- name: Fail when images failed to push
run: if [ $(wc -l < image-build-logs/push-failed-images.txt) -gt 0 ]; then cat image-build-logs/push-failed-images.txt && exit 1; fi
if: ${{ !cancelled() }}
run: if [ -e image-build-logs/push-failed-images.txt ] && [ $(wc -l < image-build-logs/push-failed-images.txt) -gt 0 ]; then cat image-build-logs/push-failed-images.txt && exit 1; fi
if: ${{ inputs.push && !cancelled() }}

# NOTE(seunghun1ee): Currently we want to mark the job fail only when critical CVEs are detected.
# This can be used again instead of "Fail when critical vulnerabilities are found" when it's
# decided to fail the job on detecting high CVEs as well.
# - name: Fail when images failed scanning
# run: if [ $(wc -l < image-build-logs/image-scan-output/dirty-images.txt) -gt 0 ]; then cat image-build-logs/image-scan-output/dirty-images.txt && exit 1; fi
# if: ${{ !inputs.push-dirty && !cancelled() }}
# run: if [ $(wc -l < image-build-logs/image-scan-output/high-images.txt) -gt 0 ]; then cat image-build-logs/image-scan-output/high-images.txt && exit 1; fi
# if: ${{ !inputs.push-critical && !cancelled() }}

- name: Fail when critical vulnerabilities are found
run: if [ $(wc -l < image-build-logs/image-scan-output/critical-images.txt) -gt 0 ]; then cat image-build-logs/image-scan-output/critical-images.txt && exit 1; fi
if: ${{ !inputs.push-dirty && !cancelled() }}
run: if [ -e image-build-logs/image-scan-output/critical-images.txt ] && [ $(wc -l < image-build-logs/image-scan-output/critical-images.txt) -gt 0 ]; then cat image-build-logs/image-scan-output/critical-images.txt && exit 1; fi
if: ${{ !inputs.push-critical && !cancelled() }}

- name: Remove locally built images for this run
if: always() && runner.arch == 'ARM64'
Expand Down
40 changes: 32 additions & 8 deletions etc/kayobe/ansible/install-doca.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,50 @@
become: true
hosts: mlnx
gather_facts: true
vars:
# we don't build kernel modules for each version, eg 5.14.0-611.13.1 has been built,
# but not 5.14.0-611.20.1
doca_kernel_version: "{{ stackhpc_doca_kernel_version_matrix[stackhpc_pulp_repo_rocky_9_minor_version] }}"
tasks:
- name: Get running kernel
ansible.builtin.command:
cmd: "uname -r"
register: kernel

- name: Install kernel repo
ansible.builtin.dnf:
name: doca-kernel-repo
state: latest
update_cache: true

# not the same as doca_kernel_version: some dots changed to underscore or dash
- name: Discover kernel repo filename
ansible.builtin.shell: |
set -o pipefail
rpm -ql doca-kernel-repo | grep /etc/yum.repos.d/
register: kernel_repo_filename
changed_when: false

- name: Ensure correct priority for DOCA modules
ansible.builtin.lineinfile:
line: "priority=-2"
insertafter: EOF
path: "/etc/yum.repos.d/doca-kernel-{{ kernel.stdout }}.repo"
path: "{{ kernel_repo_filename.stdout }}"

# This is required by mlnx-ofa_kernel, and comes from the doca repository.
# It is already present when doca-ofed is installed, but will be upgraded.
- name: Ensure mlnx-tools is installed
ansible.builtin.dnf:
name: mlnx-tools
state: latest
update_cache: true
become: true

- name: Ensure appropriate doca-kernel is installed
ansible.builtin.dnf:
name: "doca-kernel-{{ doca_kernel_version }}"
state: latest
disablerepo: doca
become: true

- name: Install DOCA OFED
- name: Ensure DOCA OFED userspace is installed
ansible.builtin.dnf:
name: doca-ofed
name:
- doca-ofed-userspace
state: latest
update_cache: true
104 changes: 104 additions & 0 deletions etc/kayobe/ansible/rocky-97-ofed-upgrade.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
---
# This playbook performs the system upgrade from RockyLinux 9.6 to 9.7 on hosts
# using doca ofed kernel modules and utilities.
# It prevents the install of the dkms toolchain and dkms modules that would be
# used instead or our precompiled modules if a direct `dnf upgrade` was run.
#
# It must be run after new package snapshots have been merged and
# `pulp-repo-sync.yml` and `pulp-repo-publish.yml` have been run.
# Also `kayobe overcloud host configure -t dnf` must have been run for the new
# `doca.repo` to be present (the doca version is in the url) on the mlnx hosts.

- name: Prepare upgrade from Rocky Linux 9.6 to 9.7
hosts: mlnx
serial: "{{ lookup('env', 'ANSIBLE_SERIAL') | default(1, true) }}"
gather_facts: true
tags: pre
vars:
# we don't build kernel modules for each version, eg 5.14.0-611.13.1 has been built,
# but not 5.14.0-611.20.1.
doca_kernel_version: "{{ stackhpc_doca_kernel_version_matrix[stackhpc_pulp_repo_rocky_9_minor_version] }}"
tasks:
- name: Assert that hosts are running Rocky Linux 9.6
ansible.builtin.assert:
that:
- ansible_facts.distribution == 'Rocky'
- ansible_facts.distribution_version == '9.6'
- os_distribution == 'rocky'
fail_msg: >-
This playbook is only designed for Rocky Linux 9.6 hosts. Ensure
that you are limiting it to only run on Rocky Linux 9.6 hosts and
os_distribution is set to rocky.

- name: Ensure doca kernel repo is up to date
ansible.builtin.dnf:
name: doca-kernel-repo
state: latest
update_cache: true
become: true

# This is required by mlnx-ofa_kernel, and comes from the doca repository.
# It is already present when doca-ofed is installed, but will be upgraded.
- name: Ensure mlnx-tools is installed
ansible.builtin.dnf:
name: mlnx-tools
state: latest
update_cache: true
become: true

- name: Ensure appropriate doca-kernel is installed
ansible.builtin.dnf:
name: "doca-kernel-{{ doca_kernel_version }}"
state: latest
disablerepo: doca
become: true

# doca-ofed 3.2 starts to depend on the dkms modules. It was not the case
# in doca-ofed 2.9.3.
- name: Ensure doca-ofed is not present (upgrading it brings dkms)
ansible.builtin.dnf:
name: doca-ofed
state: absent
autoremove: false
become: true

- name: Ensure latest doca-ofed-userspace instead of doca-ofed
ansible.builtin.dnf:
name: doca-ofed-userspace
state: latest
become: true

- name: Upgrade all
ansible.builtin.dnf:
name: "*"
state: latest
become: true

- name: Fix potential grub config preventing new kernel from being used
ansible.builtin.import_playbook: reset-bls-entries.yml
vars:
reset_bls_hosts: mlnx

- name: Reboot to apply updates
ansible.builtin.import_playbook: reboot.yml
vars:
reboot_hosts: mlnx

- name: Confirm the host is upgraded to Rocky Linux 9.7
hosts: mlnx
tags: post
tasks:
- name: Update distribution facts
ansible.builtin.setup:
filter: "{{ kayobe_ansible_setup_filter }}"
gather_subset: "{{ kayobe_ansible_setup_gather_subset }}"

# Can fail (eg in multinode) when there are bad entries in grub config
# Fixed by `kayobe playbook run ansible/maintenance/reset-bls-entries.yml`
# and manual reboot
- name: Assert that hosts are now using Rocky Linux 9.7
ansible.builtin.assert:
that:
- ansible_facts.distribution == 'Rocky'
- ansible_facts.distribution_version == '9.7'
- os_distribution == 'rocky'
2 changes: 1 addition & 1 deletion etc/kayobe/ansible/stackhpc-cloud-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@
# Inclusive min
sct_docker_version_min: "24.0.0"
# Exclusive max
sct_docker_version_max: "28.1.0"
sct_docker_version_max: "30.0.0"
sct_selinux_state: "{{ selinux_state }}"
failed_when: host_results.rc not in [0, 1]
register: host_results
Expand Down
9 changes: 9 additions & 0 deletions etc/kayobe/docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,15 @@ docker_registry_insecure: "{{ 'https' not in stackhpc_repo_mirror_url }}"
# Enable live-restore on docker daemon
docker_daemon_live_restore: true

# Avoid docker 29 for the moment in Caracal
docker_packages_version: "{{ '-28.*' if os_distribution == 'rocky' else ('=5:28*' if os_release == 'noble' else '=5:27*') }}"
# variable from https://github.com/stackhpc/ansible-role-docker/blob/master/defaults/main.yml
docker_packages:
- "docker-ce{{ docker_packages_version }}"
- "docker-ce-cli{{ docker_packages_version }}"
- "docker-ce-rootless-extras{{ docker_packages_version }}"
- "containerd.io"

###############################################################################
# Dummy variable to allow Ansible to accept this file.
workaround_ansible_issue_8743: yes
1 change: 1 addition & 0 deletions etc/kayobe/environments/ci-builder/stackhpc-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ kolla_build_neutron_ovs: true
# Host and port of a package repository mirror.
# Build against the development Pulp service repositories.
# Use Ark's package repositories to install packages.
stackhpc_repo_mirror_auth_proxy_enabled: true
stackhpc_repo_mirror_url: "{{ stackhpc_repo_mirror_auth_proxy_url if stackhpc_repo_mirror_auth_proxy_enabled | bool else stackhpc_release_pulp_url }}"
stackhpc_repo_mirror_username: "skc-ci-builder-reader"
stackhpc_repo_mirror_password: !vault |
Expand Down
Loading
Loading