From 0f1f5c8dfd43dbdfffbf4057d927ee5ba555c7af Mon Sep 17 00:00:00 2001 From: Andreas Dilger Date: Tue, 30 Jul 2024 13:11:42 -0600 Subject: [PATCH] LU-17331 tests: fix conf-sanity/30b fake NID generation It appears that "NID + 20" used by conf-sanity test_30b would occasionally match the client NID and cause the test to fail. Ensure the "fake" NID generated for OSS failover is not already used by the cluster, and it also handles IPv6 NIDs. Update code style in this subtest to match current standards. Test-Parameters: trivial testlist=conf-sanity env=ONLY=30 Test-Parameters: trivial testlist=conf-sanity env=ONLY=30 Test-Parameters: trivial testlist=conf-sanity env=ONLY=30 Test-Parameters: trivial testlist=conf-sanity env=ONLY=30 Test-Parameters: trivial testlist=conf-sanity env=ONLY=30 Test-Parameters: trivial testlist=conf-sanity env=ONLY=30 Fixes: b91d5d4263 ("b=15253 fix conf-sanity 30b for non-tcp networks") Signed-off-by: Andreas Dilger Change-Id: I119c8123193d2d0947ebbdbac5e5c7fb50d34ca5 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/55890 Reviewed-by: Chris Horn Reviewed-by: James Simmons Reviewed-by: Oleg Drokin Tested-by: James Simmons Tested-by: jenkins Tested-by: Maloo --- lustre/tests/conf-sanity.sh | 89 +++++++++++++++++++++++++-------------------- 1 file changed, 50 insertions(+), 39 deletions(-) diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index b3733e6..4b78b56 100755 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -1310,42 +1310,51 @@ test_30b() { local orignids=$($LCTL get_param -n \ osc.$FSNAME-OST0000-osc-[^M]*.import | grep failover_nids) - local orignidcount=$(echo "$orignids" | wc -w) - # Make a fake nid. Use the OST nid, and add 20 to the least significant - # numerical part of it. Hopefully that's not already a failover address - # for the server. - local OSTNID=$(do_facet ost1 "$LCTL get_param nis" | tail -1 | \ - awk '{print $1}') - local ORIGVAL=$(echo $OSTNID | egrep -oi "[0-9]*@") - local NEWVAL=$((($(echo $ORIGVAL | egrep -oi "[0-9]*") + 20) % 256)) - local NEW=$(echo $OSTNID | sed "s/$ORIGVAL/$NEWVAL@/") - echo "Using fake nid $NEW" - - local TEST="$LCTL get_param -n osc.$FSNAME-OST0000-osc-[^M]*.import | - grep failover_nids | sed -n 's/.*\($NEW\).*/\1/p'" + # Make a fake NID. Use the OST NID, and increase the least significant + # field. Hopefully that's not already a failover address for an existing + # node, but if so try again until it is an unused NID for this cluster. + local ostnid=$(do_facet ost1 "$LCTL list_nids | tail -1") + local origval=$(echo $ostnid | egrep -oi "[0-9a-f]*@") + # this will match on first loop, but keeps fake NID logic in one place + local newnid=$ostnid + + echo "Checking peer and local NIDs:" + while { $LNETCTL peer show | awk '/nid/{print $NF}' | sort -u + $LCTL list_nids; } | grep -w $newnid; do + local newval=$(((0x${origval%@} + $RANDOM) % 256)) + newnid=${ostnid/$origval/$newval@} + done + + echo "Changing $ostnid to fake NID $newnid" + + local test="$LCTL get_param -n osc.$FSNAME-OST0000-osc-[^M]*.import | + grep failover_nids | sed -n 's/.*\($newnid\).*/\1/p'" if [[ $PERM_CMD == *"set_param -P"* ]]; then - PARAM="osc.$FSNAME-OST0000-osc-[^M]*.import" - echo "Setting $PARAM from $TEST to $NEW" - do_facet mgs "$PERM_CMD $PARAM='connection=$NEW'" || - error "$PERM_CMD $PARAM failed" + param="osc.$FSNAME-OST0000-osc-[^M]*.import" + echo "Setting $param from $ostnid to $newnid" + do_facet mgs "$PERM_CMD $param='connection=$newnid'" || + error "$PERM_CMD $param failed" else - PARAM="$FSNAME-OST0000.failover.node" - echo "Setting $PARAM from $TEST to $NEW" - do_facet mgs "$PERM_CMD $PARAM='$NEW'" || - error "$PARAM $PARAM failed" - fi - wait_update_facet client "$TEST" "$NEW" || - error "check $PARAM failed!" - - local NIDS=$($LCTL get_param -n osc.$FSNAME-OST0000-osc-[^M]*.import | - grep failover_nids) - local NIDCOUNT=$(echo "$NIDS" | wc -w) - echo "should have $((orignidcount + 1)) entries \ - in failover nids string, have $NIDCOUNT" - [ $NIDCOUNT -eq $((orignidcount + 1)) ] || - error "Failover nid not added" + param="$FSNAME-OST0000.failover.node" + echo "Setting $param from $ostnid to $newnid" + do_facet mgs "$PERM_CMD $param='$newnid'" || + error "$PERM_CMD $param failed" + fi + wait_update_facet client "$test" "$newnid" || { + $LCTL get_param osc.$FSNAME-OST0000-osc-[^M]*.import + error "check $param for '$newnid' failed!" + } + + local nids=$($LCTL get_param -n osc.$FSNAME-OST0000-osc-[^M]*.import | + grep failover_nids) + local nidcount=$(echo "$nids" | wc -w) + echo "should have $((orignidcount + 1)) failover NIDs, have $nidcount" + (( $nidcount == $((orignidcount + 1)) )) || { + echo $nids + error "Failover NID '$newnid' not added" + } if [[ $PERM_CMD == *"set_param -P"* ]]; then do_facet mgs "$PERM_CMD -d osc.$FSNAME-OST0000-osc-*.import" @@ -1353,15 +1362,17 @@ test_30b() { do_facet mgs "$PERM_CMD -d $FSNAME-OST0000.failover.node" || error "$PERM_CMD delete failed" fi - umount_client $MOUNT + umount_client $MOUNT || error "umount_client $MOUNT failed" mount_client $MOUNT || error "mount_client $MOUNT failed" - NIDS=$($LCTL get_param -n osc.$FSNAME-OST0000-osc-[^M]*.import | - grep failover_nids) - NIDCOUNT=$(echo "$NIDS" | wc -w) - echo "only $orignidcount final entries should remain \ - in failover nids string, have $NIDCOUNT" - [ $NIDCOUNT -eq $orignidcount ] || error "Failover nids not removed" + nids=$($LCTL get_param -n osc.$FSNAME-OST0000-osc-[^M]*.import | + grep failover_nids) + nidcount=$(echo "$nids" | wc -w) + echo "only $orignidcount failover NIDs should be left, have $nidcount" + (( $nidcount == $orignidcount )) || { + echo "$nids" + error "Failover NID '$newnid' not removed" + } cleanup || error "cleanup failed with rc $?" } -- 1.8.3.1