From: James Simmons Date: Fri, 13 Jan 2017 22:54:34 +0000 (-0500) Subject: LU-9019 lnet: simplify lnet_eq_wait_locked X-Git-Tag: 2.9.52~27 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=19b33c5ed4a5c8296b0a2ba576de874e83faa8fd LU-9019 lnet: simplify lnet_eq_wait_locked We can simplify the code by taking advantage of the behavior of schedule_timeout_interruptible(). Instead of testing if tms is less than zero we can pass in a signed long that schedule_timeout_interruptible is expecting and for the case of no timeout we can pass in MAX_SCHEDULE_TIMEOUT. Change-Id: I15126aab500e975ec34ee270d3a1f9b83cf50902 Signed-off-by: James Simmons Reviewed-on: https://review.whamcloud.com/23147 Reviewed-by: Doug Oucharek Tested-by: Jenkins Reviewed-by: Dmitry Eremin Tested-by: Maloo Reviewed-by: Oleg Drokin --- diff --git a/lnet/include/lnet/api.h b/lnet/include/lnet/api.h index 9a137f0..6098439 100644 --- a/lnet/include/lnet/api.h +++ b/lnet/include/lnet/api.h @@ -174,7 +174,7 @@ int LNetEQWait(lnet_handle_eq_t eventq_in, int LNetEQPoll(lnet_handle_eq_t *eventqs_in, int neq_in, - int timeout_ms, + signed long timeout, lnet_event_t *event_out, int *which_eq_out); /** @} lnet_eq */ diff --git a/lnet/include/lnet/types.h b/lnet/include/lnet/types.h index 372bb6a..a5cec58 100644 --- a/lnet/include/lnet/types.h +++ b/lnet/include/lnet/types.h @@ -73,8 +73,6 @@ typedef __u32 lnet_pid_t; #define LNET_PID_USERFLAG 0x80000000 /* set in userspace peers */ #define LNET_PID_LUSTRE 12345 -#define LNET_TIME_FOREVER (-1) - /* how an LNET NID encodes net:address */ /** extract the address part of an lnet_nid_t */ diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index f75a507..16465b5 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -55,7 +55,7 @@ static int rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT; module_param(rnet_htable_size, int, 0444); MODULE_PARM_DESC(rnet_htable_size, "size of remote network hash table"); -static int lnet_ping(lnet_process_id_t id, int timeout_ms, +static int lnet_ping(lnet_process_id_t id, signed long timeout, lnet_process_id_t __user *ids, int n_ids); static char * @@ -2118,17 +2118,29 @@ LNetCtl(unsigned int cmd, void *arg) case IOC_LIBCFS_LNET_FAULT: return lnet_fault_ctl(data->ioc_flags, data); - case IOC_LIBCFS_PING: + case IOC_LIBCFS_PING: { + signed long timeout; + id.nid = data->ioc_nid; id.pid = data->ioc_u32[0]; - rc = lnet_ping(id, data->ioc_u32[1], /* timeout */ - data->ioc_pbuf1, - data->ioc_plen1/sizeof(lnet_process_id_t)); + + /* Don't block longer than 2 minutes */ + if (data->ioc_u32[1] > 120 * MSEC_PER_SEC) + return -EINVAL; + + /* If timestamp is negative then disable timeout */ + if ((s32)data->ioc_u32[1] < 0) + timeout = MAX_SCHEDULE_TIMEOUT; + else + timeout = msecs_to_jiffies(data->ioc_u32[1]); + + rc = lnet_ping(id, timeout, data->ioc_pbuf1, + data->ioc_plen1 / sizeof(lnet_process_id_t)); if (rc < 0) return rc; data->ioc_count = rc; return 0; - + } default: ni = lnet_net2ni(data->ioc_net); if (ni == NULL) @@ -2203,9 +2215,8 @@ LNetSnprintHandle(char *str, int len, lnet_handle_any_t h) } EXPORT_SYMBOL(LNetSnprintHandle); -static int -lnet_ping(lnet_process_id_t id, int timeout_ms, lnet_process_id_t __user *ids, - int n_ids) +static int lnet_ping(lnet_process_id_t id, signed long timeout, + lnet_process_id_t __user *ids, int n_ids) { lnet_handle_eq_t eqh; lnet_handle_md_t mdh; @@ -2214,7 +2225,7 @@ lnet_ping(lnet_process_id_t id, int timeout_ms, lnet_process_id_t __user *ids, int which; int unlinked = 0; int replied = 0; - const int a_long_time = 60000; /* mS */ + const signed long a_long_time = msecs_to_jiffies(60 * MSEC_PER_SEC); int infosz; struct lnet_ping_info *info; lnet_process_id_t tmpid; @@ -2226,10 +2237,8 @@ lnet_ping(lnet_process_id_t id, int timeout_ms, lnet_process_id_t __user *ids, infosz = offsetof(struct lnet_ping_info, pi_ni[n_ids]); - if (n_ids <= 0 || - id.nid == LNET_NID_ANY || - timeout_ms > 500000 || /* arbitrary limit! */ - n_ids > 20) /* arbitrary limit! */ + /* n_ids limit is arbitrary */ + if (n_ids <= 0 || n_ids > 20 || id.nid == LNET_NID_ANY) return -EINVAL; if (id.pid == LNET_PID_ANY) @@ -2273,7 +2282,7 @@ lnet_ping(lnet_process_id_t id, int timeout_ms, lnet_process_id_t __user *ids, /* NB must wait for the UNLINK event below... */ unlinked = 1; - timeout_ms = a_long_time; + timeout = a_long_time; } do { @@ -2281,7 +2290,7 @@ lnet_ping(lnet_process_id_t id, int timeout_ms, lnet_process_id_t __user *ids, if (unlinked) blocked = cfs_block_allsigs(); - rc2 = LNetEQPoll(&eqh, 1, timeout_ms, &event, &which); + rc2 = LNetEQPoll(&eqh, 1, timeout, &event, &which); if (unlinked) cfs_restore_sigs(blocked); @@ -2305,7 +2314,7 @@ lnet_ping(lnet_process_id_t id, int timeout_ms, lnet_process_id_t __user *ids, LNetMDUnlink(mdh); /* No assertion (racing with network) */ unlinked = 1; - timeout_ms = a_long_time; + timeout = a_long_time; } else if (rc2 == 0) { /* timed out waiting for unlink */ CWARN("ping %s: late network completion\n", diff --git a/lnet/lnet/lib-eq.c b/lnet/lnet/lib-eq.c index 665b84b..4ff2ba6 100644 --- a/lnet/lnet/lib-eq.c +++ b/lnet/lnet/lib-eq.c @@ -313,44 +313,30 @@ LNetEQWait (lnet_handle_eq_t eventq, lnet_event_t *event) { int which; - return LNetEQPoll(&eventq, 1, LNET_TIME_FOREVER, - event, &which); + return LNetEQPoll(&eventq, 1, MAX_SCHEDULE_TIMEOUT, + event, &which); } EXPORT_SYMBOL(LNetEQWait); static int -lnet_eq_wait_locked(int *timeout_ms) +lnet_eq_wait_locked(signed long *timeout) __must_hold(&the_lnet.ln_eq_wait_lock) { - int tms = *timeout_ms; - int wait; - wait_queue_t wl; - cfs_time_t now; + signed long tms = *timeout; + wait_queue_t wl; + int wait; if (tms == 0) return -ENXIO; /* don't want to wait and no new event */ init_waitqueue_entry(&wl, current); - set_current_state(TASK_INTERRUPTIBLE); add_wait_queue(&the_lnet.ln_eq_waitq, &wl); lnet_eq_wait_unlock(); - if (tms < 0) { - schedule(); - } else { - struct timeval tv; - - now = cfs_time_current(); - schedule_timeout(cfs_time_seconds(tms) / 1000); - cfs_duration_usec(cfs_time_sub(cfs_time_current(), now), &tv); - tms -= (int)(tv.tv_sec * 1000 + tv.tv_usec / 1000); - if (tms < 0) /* no more wait but may have new event */ - tms = 0; - } - + tms = schedule_timeout_interruptible(tms); wait = tms != 0; /* might need to call here again */ - *timeout_ms = tms; + *timeout = tms; lnet_eq_wait_lock(); remove_wait_queue(&the_lnet.ln_eq_waitq, &wl); @@ -370,8 +356,8 @@ __must_hold(&the_lnet.ln_eq_wait_lock) * fixed period, or block indefinitely. * * \param eventqs,neq An array of EQ handles, and size of the array. - * \param timeout_ms Time in milliseconds to wait for an event to occur on - * one of the EQs. The constant LNET_TIME_FOREVER can be used to indicate an + * \param timeout Time in jiffies to wait for an event to occur on + * one of the EQs. The constant MAX_SCHEDULE_TIMEOUT can be used to indicate an * infinite timeout. * \param event,which On successful return (1 or -EOVERFLOW), \a event will * hold the next event in the EQs, and \a which will contain the index of the @@ -385,7 +371,7 @@ __must_hold(&the_lnet.ln_eq_wait_lock) * \retval -ENOENT If there's an invalid handle in \a eventqs. */ int -LNetEQPoll(lnet_handle_eq_t *eventqs, int neq, int timeout_ms, +LNetEQPoll(lnet_handle_eq_t *eventqs, int neq, signed long timeout, lnet_event_t *event, int *which) { int wait = 1; @@ -427,7 +413,7 @@ LNetEQPoll(lnet_handle_eq_t *eventqs, int neq, int timeout_ms, * 0 : don't want to wait anymore, but might have new event * so need to call dequeue again */ - wait = lnet_eq_wait_locked(&timeout_ms); + wait = lnet_eq_wait_locked(&timeout); if (wait < 0) /* no new event */ break; } diff --git a/lnet/utils/portals.c b/lnet/utils/portals.c index 26efcc6..fd86a14 100644 --- a/lnet/utils/portals.c +++ b/lnet/utils/portals.c @@ -954,9 +954,14 @@ int jt_ptl_ping(int argc, char **argv) } } - if (argc > 2) - timeout = 1000 * atol(argv[2]); - else + if (argc > 2) { + timeout = 1000 * atol(argv[2]); + if (timeout > 120 * 1000) { + fprintf(stderr, "Timeout %s is to large\n", + argv[2]); + return -1; + } + } else timeout = 1000; /* default 1 second timeout */ LIBCFS_IOC_INIT (data); diff --git a/lustre/doc/lctl.8 b/lustre/doc/lctl.8 index c290087..0ade6ff 100644 --- a/lustre/doc/lctl.8 +++ b/lustre/doc/lctl.8 @@ -59,9 +59,11 @@ mount -t lustre -o nosvc Note the replace_nids command skips any invalidated records in the configuration log. The previous log is backed up with the suffix '.bak'. .TP -.BI ping " " +.BI ping " timeout" Check LNET connectivity via an LNET ping. This will use the fabric -appropriate to the specified NID. +appropriate to the specified NID. By default lctl will attempt to +reach the remote node up to 120 seconds and then timeout. To disable +the timeout just specify an negative timeout value. .TP .BI interface_list Print the network interface information for a given