From 2f41bc506d38fa3f18f407278a9eec77ce250754 Mon Sep 17 00:00:00 2001 From: Numan Siddique Date: Feb 22 2021 07:38:29 +0000 Subject: Backport "ofctrl: Fix the assert seen when flood removing flows. Resolves: #1928012 Signed-off-by: Numan Siddique --- diff --git a/0001-ofctrl-Fix-the-assert-seen-when-flood-removing-flows.patch b/0001-ofctrl-Fix-the-assert-seen-when-flood-removing-flows.patch new file mode 100644 index 0000000..e011a9f --- /dev/null +++ b/0001-ofctrl-Fix-the-assert-seen-when-flood-removing-flows.patch @@ -0,0 +1,178 @@ +From 6cce5c6380d33d8ae0795c10737d637f29680e32 Mon Sep 17 00:00:00 2001 +From: Numan Siddique +Date: Tue, 16 Feb 2021 02:01:21 +0530 +Subject: [PATCH] ofctrl: Fix the assert seen when flood removing flows. + +In one of the scaled deployments, ovn-controller is asserting with the +below stack trace + +*** + (gdb) bt + 0 raise () from /lib64/libc.so.6 + 1 abort () from /lib64/libc.so.6 + 2 ovs_abort_valist ("%s: assertion %s failed in %s()") at lib/util.c:419 + 3 vlog_abort_valist ("%s: assertion %s failed in %s()") at lib/vlog.c:1249 + 4 vlog_abort ("%s: assertion %s failed in %s()") at lib/vlog.c:1263 + 5 ovs_assert_failure (where="controller/ofctrl.c:1198", + function="flood_remove_flows_for_sb_uuid", + condition="ovs_list_is_empty(&f->list_node)") at lib/util.c:86 + 6 flood_remove_flows_for_sb_uuid (sb_uuid=...538, + flood_remove_nodes=...ed0) at controller/ofctrl.c:1205 + 7 flood_remove_flows_for_sb_uuid (sb_uuid=...898, + flood_remove_nodes=...ed0) at controller/ofctrl.c:1230 + 8 flood_remove_flows_for_sb_uuid (sb_uuid=...bf0, + flood_remove_nodes=...ed0) at controller/ofctrl.c:1230 + 9 ofctrl_flood_remove_flows (flood_remove_nodes=...ed0) at controller/ofctrl.c:1250 + 10 lflow_handle_changed_ref (ref_type=REF_TYPE_PORTGROUP, + ref_name= "5564_pg_64...bac") at controller/lflow.c:612 + 11 _flow_output_resource_ref_handler (ref_type=REF_TYPE_PORTGROUP) + at controller/ovn-controller.c:2181 + 12 engine_compute () at lib/inc-proc-eng.c:306 + 13 engine_run_node (recompute_allowed=true) at lib/inc-proc-eng.c:352 + 14 engine_run (recompute_allowed=true) at lib/inc-proc-eng.c:377 + 15 main () at controller/ovn-controller.c:2794 +*** + +This assertion is seen when a port group gets updated and it is referenced by many +logical flows (with conj actions). The function ofctrl_flood_remove_flows(), calls +flood_remove_flows_for_sb_uuid() for each sb uuid in the hmap - flood_remove_nodes +using HMAP_FOR_EACH (flood_remove_nodes). flood_remove_flows_for_sb_uuid() also takes +the hmap 'flood_remove_nodes' as an argument and it inserts few items into it when +it has to call itself recursively. When an item is inserted, its possible that the +hmap may get expanded. And if this happens, the HMAP_FOR_EACH () skips few entries +causing some of the desired flows not getting cleared. + +Later when ofctrl_add_or_append_flow() is called, there would be multiple +'struct sb_flow_ref' references for the same desired flow. And this causes the +above assertion later when the same port group gets updated. + +This patch fixes this issue by cloning the hmap 'flood_remove_nodes' and using it to +iterate the flood remove nodes. Also a test case is added to cover this scenario. + +Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1928012 +Fixes: 580aea72e26f ("ovn-controller: Fix conjunction handling with incremental processing.") +Suggested-by: Ilya Maximetes +Acked-by: Ilya Maximetes +Signed-off-by: Numan Siddique + +(cherry-picked from upstream master commit 858d1dd716db1a1e664a7c1737fd34f04fcbda5e) + +Change-Id: I892e33adfe11978a4a61a6c0ae6a41c16fad4879 +Signed-off-by: Numan Siddique +--- + controller/ofctrl.c | 15 +++++++++- + tests/ovn.at | 72 +++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 86 insertions(+), 1 deletion(-) + +diff --git a/controller/ofctrl.c b/controller/ofctrl.c +index 9d62e1260..1c9694a63 100644 +--- a/controller/ofctrl.c ++++ b/controller/ofctrl.c +@@ -1247,10 +1247,23 @@ ofctrl_flood_remove_flows(struct ovn_desired_flow_table *flow_table, + struct hmap *flood_remove_nodes) + { + struct ofctrl_flood_remove_node *ofrn; ++ int i, n = 0; ++ ++ /* flood_remove_flows_for_sb_uuid() will modify the 'flood_remove_nodes' ++ * hash map by inserting new items, so we can't use it for iteration. ++ * Copying the sb_uuids into an array. */ ++ struct uuid *sb_uuids; ++ sb_uuids = xmalloc(hmap_count(flood_remove_nodes) * sizeof *sb_uuids); ++ struct hmap flood_remove_uuids = HMAP_INITIALIZER(&flood_remove_uuids); + HMAP_FOR_EACH (ofrn, hmap_node, flood_remove_nodes) { +- flood_remove_flows_for_sb_uuid(flow_table, &ofrn->sb_uuid, ++ sb_uuids[n++] = ofrn->sb_uuid; ++ } ++ ++ for (i = 0; i < n; i++) { ++ flood_remove_flows_for_sb_uuid(flow_table, &sb_uuids[i], + flood_remove_nodes); + } ++ free(sb_uuids); + + /* remove any related group and meter info */ + HMAP_FOR_EACH (ofrn, hmap_node, flood_remove_nodes) { +diff --git a/tests/ovn.at b/tests/ovn.at +index 6c9bda075..1f077cf88 100644 +--- a/tests/ovn.at ++++ b/tests/ovn.at +@@ -23868,3 +23868,75 @@ wait_column "true" nb:Logical_Switch_Port up name=lsp1 + + OVN_CLEANUP([hv1]) + AT_CLEANUP ++ ++# Test case to check that ovn-controller doesn't assert when ++# handling port group updates. ++AT_SETUP([ovn -- No ovn-controller assert for port group updates]) ++ovn_start ++ ++net_add n1 ++sim_add hv1 ++as hv1 ++ovs-vsctl add-br br-phys ++ovn_attach n1 br-phys 192.168.0.10 ++ ++as hv1 ++ovs-vsctl \ ++ -- add-port br-int vif1 \ ++ -- set Interface vif1 external_ids:iface-id=sw0-port1 \ ++ ofport-request=1 ++ ++check ovn-nbctl ls-add sw0 ++check ovn-nbctl lsp-add sw0 sw0-port1 ++check ovn-nbctl lsp-set-addresses sw0-port1 "10:14:00:00:00:01 192.168.0.2" ++ ++check ovn-nbctl lsp-add sw0 sw0-port2 ++check ovn-nbctl lsp-add sw0 sw0-port3 ++check ovn-nbctl lsp-add sw0 sw0-port4 ++check ovn-nbctl lsp-add sw0 sw0-port5 ++check ovn-nbctl lsp-add sw0 sw0-port6 ++check ovn-nbctl lsp-add sw0 sw0-port7 ++ ++ovn-nbctl create address_set name=as1 ++ovn-nbctl set address_set . addresses="10.0.0.10,10.0.0.11,10.0.0.12" ++ ++ovn-nbctl pg-add pg1 sw0-port1 sw0-port2 sw0-port3 ++ovn-nbctl acl-add pg1 to-lport 1002 "outport == @pg1 && ip4.dst == \$as1 && icmp4" drop ++ovn-nbctl acl-add pg1 to-lport 1002 "outport == @pg1 && ip4.dst == \$as1 && tcp && tcp.dst >=10000 && tcp.dst <= 20000" drop ++ovn-nbctl acl-add pg1 to-lport 1002 "outport == @pg1 && ip4.dst == \$as1 && udp && udp.dst >=10000 && udp.dst <= 20000" drop ++ ++ovn-nbctl pg-add pg2 sw0-port2 sw0-port3 sw0-port4 sw0-port5 ++ovn-nbctl acl-add pg2 to-lport 1002 "outport == @pg2 && ip4.dst == \$as1 && icmp4" allow-related ++ovn-nbctl acl-add pg2 to-lport 1002 "outport == @pg2 && ip4.dst == \$as1 && tcp && tcp.dst >=30000 && tcp.dst <= 40000" drop ++ovn-nbctl acl-add pg2 to-lport 1002 "outport == @pg2 && ip4.dst == \$as1 && udp && udp.dst >=30000 && udp.dst <= 40000" drop ++ ++ovn-nbctl pg-add pg3 sw0-port1 sw0-port5 ++ovn-nbctl acl-add pg3 to-lport 1002 "outport == @pg3 && ip4.dst == \$as1 && icmp4" allow-related ++ovn-nbctl acl-add pg3 to-lport 1002 "outport == @pg3 && ip4.dst == \$as1 && tcp && tcp.dst >=20000 && tcp.dst <= 30000" allow-related ++ovn-nbctl acl-add pg3 to-lport 1002 "outport == @pg3 && ip4.dst == \$as1 && udp && udp.dst >=20000 && udp.dst <= 30000" allow-related ++ ++AS_BOX([Delete and add the port groups multiple times]) ++ ++for i in $(seq 1 10) ++do ++ check ovn-nbctl --wait=hv clear port_Group pg1 ports ++ check ovn-nbctl --wait=hv clear port_Group pg2 ports ++ check ovn-nbctl --wait=hv clear port_Group pg3 ports ++ check ovn-nbctl --wait=hv pg-set-ports pg1 sw0-port1 ++ check ovn-nbctl --wait=hv pg-set-ports pg1 sw0-port1 sw0-port4 ++ check ovn-nbctl --wait=hv pg-set-ports pg1 sw0-port1 sw0-port4 sw0-port5 ++ ++ check ovn-nbctl --wait=hv pg-set-ports pg2 sw0-port2 ++ check ovn-nbctl --wait=hv pg-set-ports pg2 sw0-port2 sw0-port6 ++ check ovn-nbctl --wait=hv pg-set-ports pg2 sw0-port2 sw0-port6 sw0-port7 ++ ++ check ovn-nbctl --wait=hv pg-set-ports pg3 sw0-port1 ++ check ovn-nbctl --wait=hv pg-set-ports pg3 sw0-port1 sw0-port3 ++ check ovn-nbctl --wait=hv pg-set-ports pg3 sw0-port1 sw0-port3 sw0-port6 ++ ++ # Make sure that ovn-controller has not asserted. ++ AT_CHECK([kill -0 $(cat hv1/ovn-controller.pid)]) ++done ++ ++OVN_CLEANUP([hv1]) ++AT_CLEANUP +-- +2.29.2 + diff --git a/ovn.spec b/ovn.spec index 91cc1b7..7b542db 100644 --- a/ovn.spec +++ b/ovn.spec @@ -43,7 +43,7 @@ Name: ovn Summary: Open Virtual Network support URL: http://www.openvswitch.org/ Version: 20.12.0 -Release: 19%{?commit0:.%{date}git%{shortcommit0}}%{?dist} +Release: 20%{?commit0:.%{date}git%{shortcommit0}}%{?dist} Obsoletes: openvswitch-ovn-common < %{?epoch_ovs:%{epoch_ovs}:}2.11.0-8 Provides: openvswitch-ovn-common = %{?epoch:%{epoch}:}%{version}-%{release} @@ -165,6 +165,9 @@ Patch190: 0001-ovn-nbctl-do-not-allow-duplicated-ECMP-routes.patch # Bug 1903210 Patch200: 0001-controller-Fix-toggling-ct-zone-ids.patch +# Bug 1928012 +Patch210: 0001-ofctrl-Fix-the-assert-seen-when-flood-removing-flows.patch + # OpenvSwitch backports (400-) if required. # Address crpto policy for fedora %if 0%{?fedora} @@ -531,6 +534,9 @@ fi %{_unitdir}/ovn-controller-vtep.service %changelog +* Mon Feb 22 2021 Numan Siddique - 20.12.0-20 +- Backport "ofctrl: Fix the assert seen when flood removing flows." (#1928012) + * Mon Feb 22 2021 Numan Siddique - 20.12.0-19 - Backport "controller: Fix toggling ct zone ids." (#1903210)