If a push is not completed when discovery thread is stopped, then we
still have ln_dc_handler used as md handler (from
lnet_peer_send_push). That leads to assert failure from
lnet_assert_handler_unused.
To fix that, we call lnet_assert_handler_unused only after the monitor
thread has been stopped. Thus, the patch for LU-17496 is not needed
anymore.
Fixes:
36b14a23a6 ("LU-17207 lnet: race b/w monitor thr stop and discovery push")
Test-Parameters: testlist=sanity-lnet env=ONLY="212 220",ONLY_REPEAT=100
Signed-off-by: Cyril Bordage <cbordage@whamcloud.com>
Change-Id: I426c37b12a3d29327a7295f528a5b875a9ac88a0
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/54884
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Shaun Tancheff <shaun.tancheff@hpe.com>
Reviewed-by: Frank Sehr <fsehr@whamcloud.com>
Reviewed-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
int __LNetMDUnlink(struct lnet_handle_md md_in, bool discard);
#define LNetMDUnlink(handle) __LNetMDUnlink(handle, false)
-bool lnet_assert_handler_unused(lnet_handler_t handler, bool assert);
+void lnet_assert_handler_unused(lnet_handler_t handler);
/** @} lnet_md */
/** \defgroup lnet_data Data movement operations
the_lnet.ln_mt_zombie_rstqs = NULL;
}
- lnet_assert_handler_unused(the_lnet.ln_mt_handler, true);
+ lnet_assert_handler_unused(the_lnet.ln_mt_handler);
the_lnet.ln_mt_handler = NULL;
lnet_portals_destroy();
lnet_ping_md_unlink(the_lnet.ln_ping_target,
&the_lnet.ln_ping_target_md);
- lnet_assert_handler_unused(the_lnet.ln_ping_target_handler, true);
+ lnet_assert_handler_unused(the_lnet.ln_ping_target_handler);
lnet_ping_target_destroy();
}
the_lnet.ln_push_target_nbytes = 0;
LNetClearLazyPortal(LNET_RESERVED_PORTAL);
- lnet_assert_handler_unused(the_lnet.ln_push_target_handler, true);
+ lnet_assert_handler_unused(the_lnet.ln_push_target_handler);
the_lnet.ln_push_target_handler = NULL;
}
if (the_lnet.ln_refcount != 1) {
the_lnet.ln_refcount--;
} else {
+ lnet_handler_t dc_handler = the_lnet.ln_dc_handler;
LASSERT(!the_lnet.ln_niinit_self);
lnet_net_lock(LNET_LOCK_EX);
lnet_router_debugfs_fini();
lnet_peer_discovery_stop();
lnet_monitor_thr_stop();
+ lnet_assert_handler_unused(dc_handler);
lnet_push_target_fini();
lnet_ping_target_fini();
list_add(&md->md_list, &container->rec_active);
}
-bool lnet_assert_handler_unused(lnet_handler_t handler, bool assert)
+void lnet_assert_handler_unused(lnet_handler_t handler)
{
struct lnet_res_container *container;
int cpt;
- bool handler_in_use = false;
if (!handler)
- return handler_in_use;
+ return;
cfs_percpt_for_each(container, cpt, the_lnet.ln_md_containers) {
struct lnet_libmd *md;
lnet_res_lock(cpt);
list_for_each_entry(md, &container->rec_active, md_list) {
- if (assert) {
- LASSERT(md->md_handler != handler);
- } else if (md->md_handler == handler) {
- handler_in_use = true;
- break;
- }
+ LASSERT(md->md_handler != handler);
}
lnet_res_unlock(cpt);
}
- return handler_in_use;
}
EXPORT_SYMBOL(lnet_assert_handler_unused);
static int lnet_peer_discovery(void *arg)
{
struct lnet_peer *lp;
- int retry = 3;
int rc;
wait_for_completion(&the_lnet.ln_started);
lnet_net_unlock(LNET_LOCK_EX);
}
-cleanup:
CDEBUG(D_NET, "stopping\n");
/*
* Clean up before telling lnet_peer_discovery_stop() that
}
lnet_net_unlock(LNET_LOCK_EX);
- if (lnet_assert_handler_unused(the_lnet.ln_dc_handler, --retry <= 0))
- goto cleanup;
-
the_lnet.ln_dc_handler = NULL;
the_lnet.ln_dc_state = LNET_DC_STATE_SHUTDOWN;
rc = LNetClearLazyPortal(SRPC_FRAMEWORK_REQUEST_PORTAL);
rc = LNetClearLazyPortal(SRPC_REQUEST_PORTAL);
LASSERT(rc == 0);
- lnet_assert_handler_unused(srpc_data.rpc_lnet_handler, true);
+ lnet_assert_handler_unused(srpc_data.rpc_lnet_handler);
fallthrough;
case SRPC_STATE_NI_INIT:
percpu_ref_kill(&ptlrpc_pending);
wait_for_completion(&ptlrpc_done);
- lnet_assert_handler_unused(ptlrpc_handler, true);
+ lnet_assert_handler_unused(ptlrpc_handler);
LNetNIFini();
}