On some customers’ systems, kernel was compiled with HZ defined to
100, instead of 1000. This improves performance for HPC applications.
However, to use these systems with Lustre, customers have to re-build
Lustre for the kernel because Lustre directly uses the defined
constant HZ.
Since kernel 2.6.21, some non-HZ dependent timing APIs become non-
inline functions, which can be used in Lustre codes to replace the
direct HZ access.
These kernel APIs include:
jiffies_to_msecs()
jiffies_to_usecs()
jiffies_to_timespec()
msecs_to_jiffies()
usecs_to_jiffies()
timespec_to_jiffies()
And here are some samples of the replacement:
HZ -> msecs_to_jiffies(MSEC_PER_SEC)
n * HZ -> msecs_to_jiffies(n * MSEC_PER_SEC)
HZ / n -> msecs_to_jiffies(MSEC_PER_SEC / n)
n / HZ -> jiffies_to_msecs(n) / MSEC_PER_SEC
n / HZ * 1000 -> jiffies_to_msecs(n)
This patch replaces the direct HZ access in lnet module.
Signed-off-by: Jian Yu <jian.yu@intel.com>
Change-Id: I0be6c82636df08b0a0a763ea31dafa817c077fe1
Reviewed-on: http://review.whamcloud.com/11303
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Nathaniel Clark <nathaniel.l.clark@intel.com>
Reviewed-by: James Simmons <uja.ornl@gmail.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
atomic_inc(&kgnilnd_data.kgn_nquiesce); \
CDEBUG(D_NET, "Waiting for thread pause to be over...\n"); \
while (kgnilnd_data.kgn_quiesce_trigger) { \
atomic_inc(&kgnilnd_data.kgn_nquiesce); \
CDEBUG(D_NET, "Waiting for thread pause to be over...\n"); \
while (kgnilnd_data.kgn_quiesce_trigger) { \
- set_current_state(TASK_INTERRUPTIBLE); \
- schedule_timeout(HZ); \
+ msleep_interruptible(MSEC_PER_SEC); \
} \
/* Mom, my homework is done */ \
CDEBUG(D_NET, "Waking up from thread pause\n"); \
} \
/* Mom, my homework is done */ \
CDEBUG(D_NET, "Waking up from thread pause\n"); \
mx_get_endpoint_addr(kmxlnd_data.kmx_endpt, &kmxlnd_data.kmx_epa);
mx_decompose_endpoint_addr(kmxlnd_data.kmx_epa, &nic_id, &ep_id);
mxret = mx_connect(kmxlnd_data.kmx_endpt, nic_id, ep_id,
mx_get_endpoint_addr(kmxlnd_data.kmx_endpt, &kmxlnd_data.kmx_epa);
mx_decompose_endpoint_addr(kmxlnd_data.kmx_epa, &nic_id, &ep_id);
mxret = mx_connect(kmxlnd_data.kmx_endpt, nic_id, ep_id,
- MXLND_MSG_MAGIC, MXLND_CONNECT_TIMEOUT/HZ*1000,
+ MXLND_MSG_MAGIC,
+ jiffies_to_msecs(MXLND_CONNECT_TIMEOUT),
&kmxlnd_data.kmx_epa);
if (mxret != MX_SUCCESS) {
CNETERR("unable to connect to myself (%s)\n", mx_strerror(mxret));
&kmxlnd_data.kmx_epa);
if (mxret != MX_SUCCESS) {
CNETERR("unable to connect to myself (%s)\n", mx_strerror(mxret));
goto failed_with_endpoint;
}
mxret = mx_set_request_timeout(kmxlnd_data.kmx_endpt, NULL,
goto failed_with_endpoint;
}
mxret = mx_set_request_timeout(kmxlnd_data.kmx_endpt, NULL,
- MXLND_COMM_TIMEOUT/HZ*1000);
+ jiffies_to_msecs(MXLND_COMM_TIMEOUT));
if (mxret != MX_SUCCESS) {
CERROR("mx_set_request_timeout() failed with %s\n",
mx_strerror(mxret));
if (mxret != MX_SUCCESS) {
CERROR("mx_set_request_timeout() failed with %s\n",
mx_strerror(mxret));
mx_wakeup(kmxlnd_data.kmx_endpt);
up(&kmxlnd_data.kmx_tx_queue_sem);
up(&kmxlnd_data.kmx_conn_sem);
mx_wakeup(kmxlnd_data.kmx_endpt);
up(&kmxlnd_data.kmx_tx_queue_sem);
up(&kmxlnd_data.kmx_conn_sem);
+ mxlnd_sleep(msecs_to_jiffies(2 * MSEC_PER_SEC));
#define MXLND_NDAEMONS 3 /* connd, timeoutd, tx_queued */
#define MXLND_MX_BOARD 0 /* Use the first MX NIC if more than 1 avail */
#define MXLND_MX_EP_ID 0 /* MX endpoint ID */
#define MXLND_NDAEMONS 3 /* connd, timeoutd, tx_queued */
#define MXLND_MX_BOARD 0 /* Use the first MX NIC if more than 1 avail */
#define MXLND_MX_EP_ID 0 /* MX endpoint ID */
-#define MXLND_COMM_TIMEOUT (20 * HZ) /* timeout for send/recv (jiffies) */
-#define MXLND_WAIT_TIMEOUT HZ /* timeout for wait (jiffies) */
-#define MXLND_CONNECT_TIMEOUT (5 * HZ) /* timeout for connections (jiffies) */
+/* timeout for send/recv (jiffies) */
+#define MXLND_COMM_TIMEOUT msecs_to_jiffies(20 * MSEC_PER_SEC)
+/* timeout for wait (jiffies) */
+#define MXLND_WAIT_TIMEOUT msecs_to_jiffies(MSEC_PER_SEC)
+/* timeout for connections (jiffies) */
+#define MXLND_CONNECT_TIMEOUT msecs_to_jiffies(5 * MSEC_PER_SEC)
#define MXLND_POLLING 1000 /* poll iterations before blocking */
#define MXLND_LOOKUP_COUNT 5 /* how many times to try to resolve MAC */
#define MXLND_MAX_PEERS 1024 /* number of nodes talking to me */
#define MXLND_POLLING 1000 /* poll iterations before blocking */
#define MXLND_LOOKUP_COUNT 5 /* how many times to try to resolve MAC */
#define MXLND_MAX_PEERS 1024 /* number of nodes talking to me */
break;
} else if (ret == -EHOSTUNREACH && try < tries) {
/* add a little backoff */
break;
} else if (ret == -EHOSTUNREACH && try < tries) {
/* add a little backoff */
- CDEBUG(D_NET, "sleeping for %d jiffies\n",
- HZ/4);
- mxlnd_sleep(HZ/4);
+ CDEBUG(D_NET, "sleeping for %lu jiffies\n",
+ msecs_to_jiffies(MSEC_PER_SEC / 4));
+ mxlnd_sleep(msecs_to_jiffies(MSEC_PER_SEC / 4));
}
}
} while (try++ < tries);
}
}
} while (try++ < tries);
mxlnd_conn_decref(conn);
}
mx_set_request_timeout(kmxlnd_data.kmx_endpt, request,
mxlnd_conn_decref(conn);
}
mx_set_request_timeout(kmxlnd_data.kmx_endpt, request,
- MXLND_CONNECT_TIMEOUT/HZ*1000);
+ jiffies_to_msecs(MXLND_CONNECT_TIMEOUT));
#if MXLND_STATS
if (cfs_time_after(jiffies, last)) {
#if MXLND_STATS
if (cfs_time_after(jiffies, last)) {
+ last = jiffies + msecs_to_jiffies(MSEC_PER_SEC);
CDEBUG(D_NET, "status= %s credits= %d outstanding= %d ntx_msgs= %d "
"ntx_posted= %d ntx_data= %d data_posted= %d\n",
mxlnd_connstatus_to_str(conn->mxk_status), conn->mxk_credits,
CDEBUG(D_NET, "status= %s credits= %d outstanding= %d ntx_msgs= %d "
"ntx_posted= %d ntx_data= %d data_posted= %d\n",
mxlnd_connstatus_to_str(conn->mxk_status), conn->mxk_credits,
long id = (long) arg;
unsigned long now = 0;
unsigned long next = 0;
long id = (long) arg;
unsigned long now = 0;
unsigned long next = 0;
- unsigned long delay = HZ;
+ unsigned long delay = msecs_to_jiffies(MSEC_PER_SEC);
kmx_peer_t *peer = NULL;
kmx_peer_t *temp = NULL;
kmx_conn_t *conn = NULL;
kmx_peer_t *peer = NULL;
kmx_peer_t *temp = NULL;
kmx_conn_t *conn = NULL;
conn->mxk_status == MXLND_CONN_FAIL) &&
cfs_time_after(now,
conn->mxk_last_tx +
conn->mxk_status == MXLND_CONN_FAIL) &&
cfs_time_after(now,
conn->mxk_last_tx +
+ msecs_to_jiffies(MSEC_PER_SEC))) {
write_unlock(g_lock);
mxlnd_check_sends(peer);
write_lock(g_lock);
write_unlock(g_lock);
mxlnd_check_sends(peer);
write_lock(g_lock);
{
return (*kiblnd_tunables.kib_keepalive > 0) &&
cfs_time_after(jiffies, conn->ibc_last_send +
{
return (*kiblnd_tunables.kib_keepalive > 0) &&
cfs_time_after(jiffies, conn->ibc_last_send +
- *kiblnd_tunables.kib_keepalive*HZ);
+ msecs_to_jiffies(*kiblnd_tunables.kib_keepalive *
+ MSEC_PER_SEC));
LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED);
tx->tx_queued = 1;
LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED);
tx->tx_queued = 1;
- tx->tx_deadline = jiffies + (*kiblnd_tunables.kib_timeout * HZ);
+ tx->tx_deadline = jiffies +
+ msecs_to_jiffies(*kiblnd_tunables.kib_timeout *
+ MSEC_PER_SEC);
if (tx->tx_conn == NULL) {
kiblnd_conn_addref(conn);
if (tx->tx_conn == NULL) {
kiblnd_conn_addref(conn);
kiblnd_data.kib_peer_hash_size;
}
kiblnd_data.kib_peer_hash_size;
}
+ deadline += msecs_to_jiffies(p * MSEC_PER_SEC);
spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags);
}
spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags);
}
MIN(peer->rap_reconnect_interval,
*kranal_tunables.kra_max_reconnect_interval);
MIN(peer->rap_reconnect_interval,
*kranal_tunables.kra_max_reconnect_interval);
- peer->rap_reconnect_time = jiffies + peer->rap_reconnect_interval * HZ;
+ peer->rap_reconnect_time = jiffies +
+ msecs_to_jiffies(peer->rap_reconnect_interval * MSEC_PER_SEC);
/* Grab all blocked packets while we have the global lock */
cfs_list_add(&zombies, &peer->rap_tx_queue);
/* Grab all blocked packets while we have the global lock */
cfs_list_add(&zombies, &peer->rap_tx_queue);
conn->rac_state == RANAL_CONN_CLOSING);
if (!conn->rac_close_sent &&
conn->rac_state == RANAL_CONN_CLOSING);
if (!conn->rac_close_sent &&
- cfs_time_aftereq(now, conn->rac_last_tx + conn->rac_keepalive *
- HZ)) {
+ cfs_time_aftereq(now, conn->rac_last_tx +
+ msecs_to_jiffies(conn->rac_keepalive *
+ MSEC_PER_SEC))) {
/* not sent in a while; schedule conn so scheduler sends a keepalive */
CDEBUG(D_NET, "Scheduling keepalive %p->%s\n",
conn, libcfs_nid2str(conn->rac_peer->rap_nid));
kranal_schedule_conn(conn);
}
/* not sent in a while; schedule conn so scheduler sends a keepalive */
CDEBUG(D_NET, "Scheduling keepalive %p->%s\n",
conn, libcfs_nid2str(conn->rac_peer->rap_nid));
kranal_schedule_conn(conn);
}
- timeout = conn->rac_timeout * HZ;
+ timeout = msecs_to_jiffies(conn->rac_timeout * MSEC_PER_SEC);
if (!conn->rac_close_recvd &&
cfs_time_aftereq(now, conn->rac_last_rx + timeout)) {
if (!conn->rac_close_recvd &&
cfs_time_aftereq(now, conn->rac_last_rx + timeout)) {
(conn->rac_state == RANAL_CONN_ESTABLISHED) ?
"Nothing" : "CLOSE not",
libcfs_nid2str(conn->rac_peer->rap_nid),
(conn->rac_state == RANAL_CONN_ESTABLISHED) ?
"Nothing" : "CLOSE not",
libcfs_nid2str(conn->rac_peer->rap_nid),
- (now - conn->rac_last_rx)/HZ);
+ jiffies_to_msecs(now - conn->rac_last_rx)/MSEC_PER_SEC);
spin_unlock_irqrestore(&conn->rac_lock, flags);
CERROR("tx on fmaq for %s blocked %lu seconds\n",
libcfs_nid2str(conn->rac_peer->rap_nid),
spin_unlock_irqrestore(&conn->rac_lock, flags);
CERROR("tx on fmaq for %s blocked %lu seconds\n",
libcfs_nid2str(conn->rac_peer->rap_nid),
- (now - tx->tx_qtime)/HZ);
+ jiffies_to_msecs(now-tx->tx_qtime)/MSEC_PER_SEC);
spin_unlock_irqrestore(&conn->rac_lock, flags);
CERROR("tx on rdmaq for %s blocked %lu seconds\n",
libcfs_nid2str(conn->rac_peer->rap_nid),
spin_unlock_irqrestore(&conn->rac_lock, flags);
CERROR("tx on rdmaq for %s blocked %lu seconds\n",
libcfs_nid2str(conn->rac_peer->rap_nid),
- (now - tx->tx_qtime)/HZ);
+ jiffies_to_msecs(now-tx->tx_qtime)/MSEC_PER_SEC);
spin_unlock_irqrestore(&conn->rac_lock, flags);
CERROR("tx on replyq for %s blocked %lu seconds\n",
libcfs_nid2str(conn->rac_peer->rap_nid),
spin_unlock_irqrestore(&conn->rac_lock, flags);
CERROR("tx on replyq for %s blocked %lu seconds\n",
libcfs_nid2str(conn->rac_peer->rap_nid),
- (now - tx->tx_qtime)/HZ);
+ jiffies_to_msecs(now-tx->tx_qtime)/MSEC_PER_SEC);
conn_index = (conn_index + 1) % conn_entries;
}
conn_index = (conn_index + 1) % conn_entries;
}
- next_check_time += p * HZ;
+ next_check_time += msecs_to_jiffies(p * MSEC_PER_SEC);
spin_lock_irqsave(&kranal_data.kra_reaper_lock, flags);
spin_lock_irqsave(&kranal_data.kra_reaper_lock, flags);
case RAP_NOT_DONE:
if (cfs_time_aftereq(jiffies,
case RAP_NOT_DONE:
if (cfs_time_aftereq(jiffies,
- conn->rac_last_tx + conn->rac_keepalive *
- HZ))
+ conn->rac_last_tx +
+ msecs_to_jiffies(conn->rac_keepalive *
+ MSEC_PER_SEC)))
CWARN("EAGAIN sending %02x (idle %lu secs)\n",
msg->ram_type,
CWARN("EAGAIN sending %02x (idle %lu secs)\n",
msg->ram_type,
- (jiffies - conn->rac_last_tx)/HZ);
+ jiffies_to_msecs(jiffies - conn->rac_last_tx) /
+ MSEC_PER_SEC);
if (cfs_time_aftereq(jiffies,
conn->rac_last_tx +
if (cfs_time_aftereq(jiffies,
conn->rac_last_tx +
- conn->rac_keepalive * HZ)) {
+ msecs_to_jiffies(conn->rac_keepalive *
+ MSEC_PER_SEC))) {
CDEBUG(D_NET, "sending NOOP (rdma in progress)\n");
kranal_init_msg(&conn->rac_msg, RANAL_MSG_NOOP);
kranal_sendmsg(conn, &conn->rac_msg, NULL, 0);
CDEBUG(D_NET, "sending NOOP (rdma in progress)\n");
kranal_init_msg(&conn->rac_msg, RANAL_MSG_NOOP);
kranal_sendmsg(conn, &conn->rac_msg, NULL, 0);
spin_unlock_irqrestore(&conn->rac_lock, flags);
if (cfs_time_aftereq(jiffies,
spin_unlock_irqrestore(&conn->rac_lock, flags);
if (cfs_time_aftereq(jiffies,
- conn->rac_last_tx + conn->rac_keepalive *
- HZ)) {
+ conn->rac_last_tx +
+ msecs_to_jiffies(conn->rac_keepalive *
+ MSEC_PER_SEC))) {
CDEBUG(D_NET, "sending NOOP -> %s (%p idle %lu(%ld))\n",
libcfs_nid2str(conn->rac_peer->rap_nid), conn,
CDEBUG(D_NET, "sending NOOP -> %s (%p idle %lu(%ld))\n",
libcfs_nid2str(conn->rac_peer->rap_nid), conn,
- (jiffies - conn->rac_last_tx)/HZ,
+ jiffies_to_msecs(jiffies - conn->rac_last_tx) /
+ MSEC_PER_SEC,
conn->rac_keepalive);
kranal_init_msg(&conn->rac_msg, RANAL_MSG_NOOP);
kranal_sendmsg(conn, &conn->rac_msg, NULL, 0);
conn->rac_keepalive);
kranal_init_msg(&conn->rac_msg, RANAL_MSG_NOOP);
kranal_sendmsg(conn, &conn->rac_msg, NULL, 0);
LASSERT (rrc == RAP_NOT_DONE);
if (!cfs_time_aftereq(jiffies, conn->rac_last_tx +
LASSERT (rrc == RAP_NOT_DONE);
if (!cfs_time_aftereq(jiffies, conn->rac_last_tx +
- conn->rac_timeout * HZ))
+ msecs_to_jiffies(conn->rac_timeout*MSEC_PER_SEC)))
return -EAGAIN;
/* Too late */
return -EAGAIN;
/* Too late */
/* retry with exponential backoff until HZ */
if (conn->rac_keepalive == 0)
conn->rac_keepalive = 1;
/* retry with exponential backoff until HZ */
if (conn->rac_keepalive == 0)
conn->rac_keepalive = 1;
- else if (conn->rac_keepalive <= HZ)
+ else if (conn->rac_keepalive <=
+ msecs_to_jiffies(MSEC_PER_SEC))
conn->rac_keepalive *= 2;
else
conn->rac_keepalive *= 2;
else
- conn->rac_keepalive += HZ;
+ conn->rac_keepalive +=
+ msecs_to_jiffies(MSEC_PER_SEC);
deadline = conn->rac_last_tx + conn->rac_keepalive;
spin_lock_irqsave(&dev->rad_lock, flags);
deadline = conn->rac_last_tx + conn->rac_keepalive;
spin_lock_irqsave(&dev->rad_lock, flags);