From f195b064b0cd1f72c0bc457a6d939bb0a6dde272 Mon Sep 17 00:00:00 2001 From: Eduardo Olivares Date: Wed, 17 Jun 2026 15:57:41 +0200 Subject: [PATCH 1/2] [bgp] Remove provider network gateway IP from router loopback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The gateway IP (e.g. `192.168.133.1`) was configured on the router loopback interface so that VMs could ping the external subnet gateway. This worked with `ovn-bgp-agent`, but with native OVN BGP — which replaces `ovn-bgp-agent` in RHOSO — pinging the gateway IP fails: OVN's `arp_proxy` responds to the ARP request and the ICMP reaches the router, but an anti-loop flow in `lr_in_ip_input` drops the reply because the source IP matches the router port address. Since BGP routing does not depend on this loopback entry, remove it. Related-Issue: #OSPRH-30905 Co-Authored-By: Claude Opus 4.6 (1M context) Signed-off-by: Eduardo Olivares --- playbooks/bgp/prepare-bgp-spines-leaves.yaml | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/playbooks/bgp/prepare-bgp-spines-leaves.yaml b/playbooks/bgp/prepare-bgp-spines-leaves.yaml index 53bea8643..e3d1a265d 100644 --- a/playbooks/bgp/prepare-bgp-spines-leaves.yaml +++ b/playbooks/bgp/prepare-bgp-spines-leaves.yaml @@ -272,19 +272,6 @@ state: present when: _ip_version == 6 - - name: Add provider network gateway IP to router loopback - become: true - community.general.nmcli: - autoconnect: true - conn_name: lo - ip4: - - 127.0.0.1/8 - - 192.168.133.1/32 - method4: manual - ip6: "::1/128" - method6: manual - state: present - - name: Configure FRR vars: _router_id: "{{ '' if _ip_version == 4 else '1.1.1.1' }}" From 704c66b4e985d31d33dcbd4336bf0fb3999aaf7c Mon Sep 17 00:00:00 2001 From: Eduardo Olivares Date: Wed, 17 Jun 2026 15:58:04 +0200 Subject: [PATCH 2/2] [bgp] Add neutron pod restart workaround for OSPRH-30900 After a fresh deployment, the BGP reconciler's `full_sync()` can be skipped if the OVSDB lock is not yet held at startup, and it is never retried. This leaves `arp_proxy` unset on interconnect LSPs. Restarting the neutron pods triggers a new `full_sync()` that completes the setup. This workaround should be removed once the bug is fixed in neutron. Related-Issue: #OSPRH-30900 Co-Authored-By: Claude Opus 4.6 (1M context) Signed-off-by: Eduardo Olivares --- playbooks/bgp/prepare-bgp-computes.yaml | 48 +++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/playbooks/bgp/prepare-bgp-computes.yaml b/playbooks/bgp/prepare-bgp-computes.yaml index 24d836269..8bf8f2c62 100644 --- a/playbooks/bgp/prepare-bgp-computes.yaml +++ b/playbooks/bgp/prepare-bgp-computes.yaml @@ -59,3 +59,51 @@ iptables -t filter -I LIBVIRT_FWI -s 100.64.10.0/24 -i ocpbm -j ACCEPT && iptables -t filter -I LIBVIRT_FWI -d 100.64.10.0/24 -o ocpbm -j ACCEPT changed_when: false + +# Workaround for OSPRH-30900 - should be removed when the bug is fixed +- name: Restart neutron pods due to OSPRH-30900 + hosts: controller-0 + tasks: + - name: Get current neutron pod names + ansible.builtin.command: + cmd: >- + oc get pod -n openstack -l service=neutron + -o jsonpath='{.items[*].metadata.name}' + register: _neutron_pod_names + changed_when: false + + - name: Delete all neutron pods + ansible.builtin.command: + cmd: >- + oc delete pod -n openstack -l service=neutron + changed_when: true + + - name: Wait for old neutron pods to terminate + ansible.builtin.command: + cmd: >- + oc wait pod -n openstack {{ item }} + --for=delete --timeout=120s + changed_when: false + failed_when: false + loop: "{{ _neutron_pod_names.stdout.split() }}" + + - name: Wait for new neutron pods to be ready + ansible.builtin.command: + cmd: >- + oc wait pod -n openstack + -l service=neutron + --for=condition=Ready + --timeout=300s + changed_when: false + retries: 4 + delay: 10 + register: _neutron_pods_ready + until: _neutron_pods_ready.rc == 0 + + - name: Wait for OpenStackControlPlane to reconcile + ansible.builtin.command: + cmd: >- + oc wait --for=condition=Ready + openstackcontrolplane/controlplane + -n openstack --timeout=1200s + changed_when: false