Call complete() on the ping_data::completion if we get
LNET_EVENT_SEND with non-zero status. Otherwise the thread which
issued the ping is stuck waiting for the full ping timeout.
A pd_unlinked member is added to struct ping_data to indicate whether
the associated MD has been unlinked. This is checked by lnet_ping() to
determine whether it needs to explicitly called LNetMDUnlink().
Lastly, in cases where we do not receive a reply, we now return the
value of pd.rc, if it is non-zero, rather than -EIO. This can provide
more information about the underlying ping failure.
HPE-bug-id: LUS-11317
Test-Parameters: trivial testlist=sanity-lnet
Signed-off-by: Chris Horn <chris.horn@hpe.com>
Change-Id: I1bc573cf7397e319993fa8aabb31c5f3b59768e7
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/49020
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Reviewed-by: Frank Sehr <fsehr@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
struct ping_data {
int rc;
int replied;
struct ping_data {
int rc;
int replied;
struct lnet_handle_md mdh;
struct completion completion;
};
struct lnet_handle_md mdh;
struct completion completion;
};
pd->replied = 1;
pd->rc = event->mlength;
}
pd->replied = 1;
pd->rc = event->mlength;
}
+ pd->pd_unlinked = 1;
+
+ if (event->unlinked ||
+ (event->type == LNET_EVENT_SEND && event->status))
complete(&pd->completion);
}
complete(&pd->completion);
}
/* NB must wait for the UNLINK event below... */
}
/* NB must wait for the UNLINK event below... */
}
- if (wait_for_completion_timeout(&pd.completion, timeout) == 0) {
- /* Ensure completion in finite time... */
+ /* Ensure completion in finite time... */
+ wait_for_completion_timeout(&pd.completion, timeout);
+ if (!pd.pd_unlinked) {
LNetMDUnlink(pd.mdh);
wait_for_completion(&pd.completion);
}
if (!pd.replied) {
LNetMDUnlink(pd.mdh);
wait_for_completion(&pd.completion);
}
if (!pd.replied) {
goto fail_ping_buffer_decref;
}
goto fail_ping_buffer_decref;
}
}
run_test 251 "Define multiple kfi networks on single interface"
}
run_test 251 "Define multiple kfi networks on single interface"
+test_252() {
+ setup_health_test false || return $?
+
+ local rc=0
+
+ do_rpc_nodes $RNODE unload_modules_local || rc=$?
+
+ if [[ $rc -ne 0 ]]; then
+ cleanup_health_test || return $?
+
+ error "Failed to unload modules on $RNODE rc=$rc"
+ else
+ RLOADED=false
+ fi
+
+ local ts1=$(date +%s)
+
+ do_lnetctl ping --timeout 15 ${RNIDS[0]} &&
+ error "Expected ping ${RNIDS[0]} to fail"
+
+ local ts2=$(date +%s)
+
+ local delta=$(echo "$ts2 - $ts1" | bc)
+
+ [[ $delta -lt 15 ]] ||
+ error "Ping took longer than expected to fail: $delta"
+
+ cleanup_health_test
+}
+run_test 252 "Ping to down peer should unlink quickly"
+
test_300() {
# LU-13274
local header
test_300() {
# LU-13274
local header