On some customer's systems, kernel was compiled with HZ defined to
100, instead of 1000. This improves performance for HPC applications.
However, to use these systems with Lustre, customers have to re-build
Lustre for the kernel because Lustre directly uses the defined
constant HZ.
Since kernel 2.6.21, some non-HZ dependent timing APIs become non-
inline functions, which can be used in Lustre codes to replace the
direct HZ access.
These kernel APIs include:
jiffies_to_msecs()
jiffies_to_usecs()
jiffies_to_timespec()
msecs_to_jiffies()
usecs_to_jiffies()
timespec_to_jiffies()
And here are some samples of the replacement:
HZ -> msecs_to_jiffies(MSEC_PER_SEC)
n * HZ -> msecs_to_jiffies(n * MSEC_PER_SEC)
HZ / n -> msecs_to_jiffies(MSEC_PER_SEC / n)
n / HZ -> jiffies_to_msecs(n) / MSEC_PER_SEC
n / HZ * 1000 -> jiffies_to_msecs(n)
This patch replaces the direct HZ access in lustre modules.
Signed-off-by: Jian Yu <jian.yu@intel.com>
Change-Id: Ib0ed9b5faf6ed311ff5423873d1c125b02ec4ab5
Reviewed-on: http://review.whamcloud.com/12052
Reviewed-by: Bob Glossman <bob.glossman@intel.com>
Reviewed-by: Dmitry Eremin <dmitry.eremin@intel.com>
Tested-by: Jenkins
Reviewed-by: James Simmons <uja.ornl@gmail.com>
Reviewed-by: Nathaniel Clark <nathaniel.l.clark@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
23 files changed:
- if ((jiffies - cur > 5 * HZ) &&
- (jiffies - lock->time > 5 * HZ)) {
+ if ((jiffies - cur > msecs_to_jiffies(5 * MSEC_PER_SEC)) &&
+ (jiffies - lock->time >
+ msecs_to_jiffies(5 * MSEC_PER_SEC))) {
struct task_struct *task = lock->task;
if (task == NULL)
struct task_struct *task = lock->task;
if (task == NULL)
current->comm, current->pid,
lock, task->comm, task->pid,
lock->func, lock->line,
current->comm, current->pid,
lock, task->comm, task->pid,
lock->func, lock->line,
- (jiffies - lock->time) / HZ);
+ jiffies_to_msecs(jiffies - lock->time) /
+ MSEC_PER_SEC);
LCONSOLE_WARN("====== for process holding the "
"lock =====\n");
libcfs_debug_dumpstack(task);
LCONSOLE_WARN("====== for current process =====\n");
libcfs_debug_dumpstack(NULL);
LCONSOLE_WARN("====== end =======\n");
LCONSOLE_WARN("====== for process holding the "
"lock =====\n");
libcfs_debug_dumpstack(task);
LCONSOLE_WARN("====== for current process =====\n");
libcfs_debug_dumpstack(NULL);
LCONSOLE_WARN("====== end =======\n");
+ cfs_pause(msecs_to_jiffies(1000 * MSEC_PER_SEC));
+ !IS_ERR(ksym = resolve_symbol(sechdrs, versindex, name,
+ mod, ownername)) ||
+ PTR_ERR(ksym) != -EBUSY,
+ !IS_ERR(ksym = resolve_symbol(sechdrs, versindex, name,
+ mod, ownername)) ||
+ PTR_ERR(ksym) != -EBUSY,
++ msecs_to_jiffies(30 * MSEC_PER_SEC)) <= 0) {
+ printk(KERN_WARNING "%s: gave up waiting for init of module %s.\n",
+ mod->name, ownername);
+ }
+ printk(KERN_WARNING "%s: gave up waiting for init of module %s.\n",
+ mod->name, ownername);
+ }
* export is being evicted */
ptlrpc_update_export_timer(req->rq_export, 0);
target_request_copy_put(req);
* export is being evicted */
ptlrpc_update_export_timer(req->rq_export, 0);
target_request_copy_put(req);
- delta = (jiffies - delta) / HZ;
+ delta = jiffies_to_msecs(jiffies - delta) / MSEC_PER_SEC;
CDEBUG(D_INFO,"4: recovery completed in %lus - %d/%d reqs/locks\n",
delta, obd->obd_replayed_requests, obd->obd_replayed_locks);
if (delta > OBD_RECOVERY_TIME_SOFT) {
CDEBUG(D_INFO,"4: recovery completed in %lus - %d/%d reqs/locks\n",
delta, obd->obd_replayed_requests, obd->obd_replayed_locks);
if (delta > OBD_RECOVERY_TIME_SOFT) {
ldlm_ns_name(ns), atomic_read(&ns->ns_bref));
force_wait:
if (force)
ldlm_ns_name(ns), atomic_read(&ns->ns_bref));
force_wait:
if (force)
- lwi = LWI_TIMEOUT(obd_timeout * HZ / 4, NULL, NULL);
+ lwi = LWI_TIMEOUT(msecs_to_jiffies(obd_timeout *
+ MSEC_PER_SEC) / 4, NULL, NULL);
rc = l_wait_event(ns->ns_waitq,
atomic_read(&ns->ns_bref) == 0, &lwi);
rc = l_wait_event(ns->ns_waitq,
atomic_read(&ns->ns_bref) == 0, &lwi);
#include <md_object.h>
#include <lustre_linkea.h>
#include <md_object.h>
#include <lustre_linkea.h>
-#define HALF_SEC (HZ >> 1)
+#define HALF_SEC msecs_to_jiffies(MSEC_PER_SEC >> 1)
#define LFSCK_CHECKPOINT_INTERVAL 60
enum lfsck_flags {
#define LFSCK_CHECKPOINT_INTERVAL 60
enum lfsck_flags {
__u64 checked = lo->ll_objs_checked_phase1 +
com->lc_new_checked;
__u64 speed = checked;
__u64 checked = lo->ll_objs_checked_phase1 +
com->lc_new_checked;
__u64 speed = checked;
- __u64 new_checked = com->lc_new_checked * HZ;
+ __u64 new_checked = msecs_to_jiffies(com->lc_new_checked *
+ MSEC_PER_SEC);
__u32 rtime = lo->ll_run_time_phase1 +
cfs_duration_sec(duration + HALF_SEC);
__u32 rtime = lo->ll_run_time_phase1 +
cfs_duration_sec(duration + HALF_SEC);
com->lc_new_checked;
__u64 speed1 = lo->ll_objs_checked_phase1;
__u64 speed2 = checked;
com->lc_new_checked;
__u64 speed1 = lo->ll_objs_checked_phase1;
__u64 speed2 = checked;
- __u64 new_checked = com->lc_new_checked * HZ;
+ __u64 new_checked = msecs_to_jiffies(com->lc_new_checked *
+ MSEC_PER_SEC);
__u32 rtime = lo->ll_run_time_phase2 +
cfs_duration_sec(duration + HALF_SEC);
__u32 rtime = lo->ll_run_time_phase2 +
cfs_duration_sec(duration + HALF_SEC);
bool dirty = false;
if (limit != LFSCK_SPEED_NO_LIMIT) {
bool dirty = false;
if (limit != LFSCK_SPEED_NO_LIMIT) {
- if (limit > HZ) {
- lfsck->li_sleep_rate = limit / HZ;
+ if (limit > msecs_to_jiffies(MSEC_PER_SEC)) {
+ lfsck->li_sleep_rate = jiffies_to_msecs(limit) /
+ MSEC_PER_SEC;
lfsck->li_sleep_jif = 1;
} else {
lfsck->li_sleep_rate = 1;
lfsck->li_sleep_jif = 1;
} else {
lfsck->li_sleep_rate = 1;
- lfsck->li_sleep_jif = HZ / limit;
+ lfsck->li_sleep_jif = msecs_to_jiffies(MSEC_PER_SEC /
+ limit);
}
} else {
lfsck->li_sleep_jif = 0;
}
} else {
lfsck->li_sleep_jif = 0;
lfsck->li_time_last_checkpoint;
__u64 checked = ns->ln_items_checked + com->lc_new_checked;
__u64 speed = checked;
lfsck->li_time_last_checkpoint;
__u64 checked = ns->ln_items_checked + com->lc_new_checked;
__u64 speed = checked;
- __u64 new_checked = com->lc_new_checked * HZ;
+ __u64 new_checked = msecs_to_jiffies(com->lc_new_checked *
+ MSEC_PER_SEC);
__u32 rtime = ns->ln_run_time_phase1 +
cfs_duration_sec(duration + HALF_SEC);
__u32 rtime = ns->ln_run_time_phase1 +
cfs_duration_sec(duration + HALF_SEC);
com->lc_new_checked;
__u64 speed1 = ns->ln_items_checked;
__u64 speed2 = checked;
com->lc_new_checked;
__u64 speed1 = ns->ln_items_checked;
__u64 speed2 = checked;
- __u64 new_checked = com->lc_new_checked * HZ;
+ __u64 new_checked = msecs_to_jiffies(com->lc_new_checked *
+ MSEC_PER_SEC);
__u32 rtime = ns->ln_run_time_phase2 +
cfs_duration_sec(duration + HALF_SEC);
__u32 rtime = ns->ln_run_time_phase2 +
cfs_duration_sec(duration + HALF_SEC);
/* wait running statahead threads to quit */
while (atomic_read(&sbi->ll_sa_running) > 0)
schedule_timeout_and_set_state(TASK_UNINTERRUPTIBLE,
/* wait running statahead threads to quit */
while (atomic_read(&sbi->ll_sa_running) > 0)
schedule_timeout_and_set_state(TASK_UNINTERRUPTIBLE,
+ msecs_to_jiffies(MSEC_PER_SEC >> 3));
* safely because statahead RPC will access sai data */
while (sai->sai_sent != sai->sai_replied) {
/* in case we're not woken up, timeout wait */
* safely because statahead RPC will access sai data */
while (sai->sai_sent != sai->sai_replied) {
/* in case we're not woken up, timeout wait */
- lwi = LWI_TIMEOUT(HZ >> 3, NULL, NULL);
+ lwi = LWI_TIMEOUT(msecs_to_jiffies(MSEC_PER_SEC >> 3),
+ NULL, NULL);
l_wait_event(thread->t_ctl_waitq,
sai->sai_sent == sai->sai_replied, &lwi);
}
l_wait_event(thread->t_ctl_waitq,
sai->sai_sent == sai->sai_replied, &lwi);
}
static inline void set_capa_key_expiry(struct mdt_device *mdt)
{
static inline void set_capa_key_expiry(struct mdt_device *mdt)
{
- mdt->mdt_ck_expiry = jiffies + mdt->mdt_ck_timeout * HZ;
+ mdt->mdt_ck_expiry = jiffies + msecs_to_jiffies(mdt->mdt_ck_timeout *
+ MSEC_PER_SEC);
}
static void make_capa_key(struct lustre_capa_key *key, u32 mdsnum, int keyid)
}
static void make_capa_key(struct lustre_capa_key *key, u32 mdsnum, int keyid)
if (rc) {
DEBUG_CAPA_KEY(D_ERROR, rkey, "update failed for");
/* next retry is in 300 sec */
if (rc) {
DEBUG_CAPA_KEY(D_ERROR, rkey, "update failed for");
/* next retry is in 300 sec */
- mdt->mdt_ck_expiry = jiffies + 300 * HZ;
+ mdt->mdt_ck_expiry = jiffies +
+ msecs_to_jiffies(300 *
+ MSEC_PER_SEC);
}
cfs_timer_arm(&mdt->mdt_ck_timer, mdt->mdt_ck_expiry);
}
cfs_timer_arm(&mdt->mdt_ck_timer, mdt->mdt_ck_expiry);
if (OBD_FAIL_CHECK(OBD_FAIL_TGT_DELAY_CONDITIONAL) &&
cfs_fail_val ==
tsi2mdt_info(tsi)->mti_mdt->mdt_seq_site.ss_node_id)
if (OBD_FAIL_CHECK(OBD_FAIL_TGT_DELAY_CONDITIONAL) &&
cfs_fail_val ==
tsi2mdt_info(tsi)->mti_mdt->mdt_seq_site.ss_node_id)
- schedule_timeout_and_set_state(TASK_UNINTERRUPTIBLE, HZ * 3);
+ schedule_timeout_and_set_state(TASK_UNINTERRUPTIBLE,
+ msecs_to_jiffies(3 * MSEC_PER_SEC));
rc = tgt_connect(tsi);
if (rc != 0)
rc = tgt_connect(tsi);
if (rc != 0)
/* Always wait a few seconds to allow the server who
caused the lock revocation to finish its setup, plus some
random so everyone doesn't try to reconnect at once. */
/* Always wait a few seconds to allow the server who
caused the lock revocation to finish its setup, plus some
random so everyone doesn't try to reconnect at once. */
- to = MGC_TIMEOUT_MIN_SECONDS * HZ;
- to += rand * HZ / 100; /* rand is centi-seconds */
+ to = msecs_to_jiffies(MGC_TIMEOUT_MIN_SECONDS * MSEC_PER_SEC);
+ /* rand is centi-seconds */
+ to += msecs_to_jiffies(rand * MSEC_PER_SEC / 100);
lwi = LWI_TIMEOUT(to, NULL, NULL);
l_wait_event(rq_waitq, rq_state & (RQ_STOP | RQ_PRECLEANUP),
&lwi);
lwi = LWI_TIMEOUT(to, NULL, NULL);
l_wait_event(rq_waitq, rq_state & (RQ_STOP | RQ_PRECLEANUP),
&lwi);
struct obd_device *obd = m->private;
struct ofd_device *ofd = ofd_dev(obd->obd_lu_dev);
struct obd_device *obd = m->private;
struct ofd_device *ofd = ofd_dev(obd->obd_lu_dev);
- return seq_printf(m, "%ld\n", ofd->ofd_fmd_max_age / HZ);
+ return seq_printf(m, "%ld\n", jiffies_to_msecs(ofd->ofd_fmd_max_age) /
+ MSEC_PER_SEC);
if (val > 65536 || val < 1)
return -EINVAL;
if (val > 65536 || val < 1)
return -EINVAL;
- ofd->ofd_fmd_max_age = val * HZ;
+ ofd->ofd_fmd_max_age = msecs_to_jiffies(val * MSEC_PER_SEC);
return count;
}
LPROC_SEQ_FOPS(ofd_fmd_max_age);
return count;
}
LPROC_SEQ_FOPS(ofd_fmd_max_age);
};
#define OFD_FMD_MAX_NUM_DEFAULT 128
};
#define OFD_FMD_MAX_NUM_DEFAULT 128
-#define OFD_FMD_MAX_AGE_DEFAULT ((obd_timeout + 10) * HZ)
+#define OFD_FMD_MAX_AGE_DEFAULT msecs_to_jiffies((obd_timeout+10)*MSEC_PER_SEC)
#define OFD_SOFT_SYNC_LIMIT_DEFAULT 16
#define OFD_SOFT_SYNC_LIMIT_DEFAULT 16
oti->oti_declare_ops_cred[OSD_OT_REF_DEL]);
if (last_credits != oh->ot_credits &&
oti->oti_declare_ops_cred[OSD_OT_REF_DEL]);
if (last_credits != oh->ot_credits &&
- time_after(jiffies, last_printed + 60 * HZ)) {
+ time_after(jiffies, last_printed +
+ msecs_to_jiffies(60 * MSEC_PER_SEC))) {
libcfs_debug_dumpstack(NULL);
last_credits = oh->ot_credits;
last_printed = jiffies;
libcfs_debug_dumpstack(NULL);
last_credits = oh->ot_credits;
last_printed = jiffies;
display_brw_stats(seq, "I/O time (1/1000s)", "ios",
&brw_stats->hist[BRW_R_IO_TIME],
display_brw_stats(seq, "I/O time (1/1000s)", "ios",
&brw_stats->hist[BRW_R_IO_TIME],
- &brw_stats->hist[BRW_W_IO_TIME], 1000 / HZ);
+ &brw_stats->hist[BRW_W_IO_TIME],
+ jiffies_to_msecs(1000) / MSEC_PER_SEC);
display_brw_stats(seq, "disk I/O size", "ios",
&brw_stats->hist[BRW_R_DISK_IOSIZE],
display_brw_stats(seq, "disk I/O size", "ios",
&brw_stats->hist[BRW_R_DISK_IOSIZE],
#include "osd_oi.h"
#include "osd_scrub.h"
#include "osd_oi.h"
#include "osd_scrub.h"
-#define HALF_SEC (HZ >> 1)
+#define HALF_SEC msecs_to_jiffies(MSEC_PER_SEC >> 1)
#define OSD_OTABLE_MAX_HASH 0x00000000ffffffffULL
#define OSD_OTABLE_MAX_HASH 0x00000000ffffffffULL
if (thread_is_running(&scrub->os_thread)) {
cfs_duration_t duration = cfs_time_current() -
scrub->os_time_last_checkpoint;
if (thread_is_running(&scrub->os_thread)) {
cfs_duration_t duration = cfs_time_current() -
scrub->os_time_last_checkpoint;
- __u64 new_checked = scrub->os_new_checked * HZ;
+ __u64 new_checked = msecs_to_jiffies(scrub->os_new_checked *
+ MSEC_PER_SEC);
__u32 rtime = sf->sf_run_time +
cfs_duration_sec(duration + HALF_SEC);
__u32 rtime = sf->sf_run_time +
cfs_duration_sec(duration + HALF_SEC);
LASSERT(timer);
CDEBUG(D_SEC, "ctx %p: start timer %lds\n", ctx, timeout);
LASSERT(timer);
CDEBUG(D_SEC, "ctx %p: start timer %lds\n", ctx, timeout);
- timeout = timeout * HZ + cfs_time_current();
+ timeout = msecs_to_jiffies(timeout * MSEC_PER_SEC) +
+ cfs_time_current();
init_timer(timer);
timer->expires = timeout;
init_timer(timer);
timer->expires = timeout;
set_current_state(TASK_INTERRUPTIBLE);
read_unlock(&rsi_cache.hash_lock);
set_current_state(TASK_INTERRUPTIBLE);
read_unlock(&rsi_cache.hash_lock);
- if (valid == 0)
- schedule_timeout(GSS_SVC_UPCALL_TIMEOUT *
- HZ);
-
+ if (valid == 0) {
+ unsigned long jiffies;
+ jiffies = msecs_to_jiffies(MSEC_PER_SEC *
+ GSS_SVC_UPCALL_TIMEOUT);
+ schedule_timeout(jiffies);
+ }
cache_get(&rsip->h);
goto cache_check;
}
cache_get(&rsip->h);
goto cache_check;
}
if (atomic_read(&rsi_cache.readers) > 0)
break;
set_current_state(TASK_UNINTERRUPTIBLE);
if (atomic_read(&rsi_cache.readers) > 0)
break;
set_current_state(TASK_UNINTERRUPTIBLE);
- LASSERT(HZ >= 4);
- schedule_timeout(HZ / 4);
+ LASSERT(msecs_to_jiffies(MSEC_PER_SEC) >= 4);
+ schedule_timeout(msecs_to_jiffies(MSEC_PER_SEC / 4));
}
if (atomic_read(&rsi_cache.readers) == 0)
}
if (atomic_read(&rsi_cache.readers) == 0)
newctx, newctx->cc_flags);
schedule_timeout_and_set_state(TASK_INTERRUPTIBLE,
newctx, newctx->cc_flags);
schedule_timeout_and_set_state(TASK_INTERRUPTIBLE,
+ msecs_to_jiffies(MSEC_PER_SEC));
} else {
/*
* it's possible newctx == oldctx if we're switching
} else {
/*
* it's possible newctx == oldctx if we're switching
req->rq_restart = 0;
spin_unlock(&req->rq_lock);
req->rq_restart = 0;
spin_unlock(&req->rq_lock);
- lwi = LWI_TIMEOUT_INTR(timeout * HZ, ctx_refresh_timeout,
+ lwi = LWI_TIMEOUT_INTR(msecs_to_jiffies(timeout * MSEC_PER_SEC),
+ ctx_refresh_timeout,
ctx_refresh_interrupt, req);
rc = l_wait_event(req->rq_reply_waitq, ctx_check_refresh(ctx), &lwi);
ctx_refresh_interrupt, req);
rc = l_wait_event(req->rq_reply_waitq, ctx_check_refresh(ctx), &lwi);
"cache missing: %lu\n"
"low free mark: %lu\n"
"max waitqueue depth: %u\n"
"cache missing: %lu\n"
"low free mark: %lu\n"
"max waitqueue depth: %u\n"
- "max wait time: "CFS_TIME_T"/%u\n"
+ "max wait time: "CFS_TIME_T"/%lu\n"
,
totalram_pages,
PAGES_PER_POOL,
,
totalram_pages,
PAGES_PER_POOL,
page_pools.epp_st_missings,
page_pools.epp_st_lowfree,
page_pools.epp_st_max_wqlen,
page_pools.epp_st_missings,
page_pools.epp_st_lowfree,
page_pools.epp_st_max_wqlen,
- page_pools.epp_st_max_wait, HZ
+ page_pools.epp_st_max_wait,
+ msecs_to_jiffies(MSEC_PER_SEC)
);
spin_unlock(&page_pools.epp_lock);
);
spin_unlock(&page_pools.epp_lock);
- if (page_pools.epp_st_access > 0) {
- CDEBUG(D_SEC,
- "max pages %lu, grows %u, grow fails %u, shrinks %u, "
- "access %lu, missing %lu, max qlen %u, max wait "
- CFS_TIME_T"/%d\n",
- page_pools.epp_st_max_pages, page_pools.epp_st_grows,
- page_pools.epp_st_grow_fails,
+ if (page_pools.epp_st_access > 0) {
+ CDEBUG(D_SEC,
+ "max pages %lu, grows %u, grow fails %u, shrinks %u, "
+ "access %lu, missing %lu, max qlen %u, max wait "
+ CFS_TIME_T"/%lu\n",
+ page_pools.epp_st_max_pages, page_pools.epp_st_grows,
+ page_pools.epp_st_grow_fails,
page_pools.epp_st_shrinks, page_pools.epp_st_access,
page_pools.epp_st_missings, page_pools.epp_st_max_wqlen,
page_pools.epp_st_shrinks, page_pools.epp_st_access,
page_pools.epp_st_missings, page_pools.epp_st_max_wqlen,
- page_pools.epp_st_max_wait, HZ);
+ page_pools.epp_st_max_wait,
+ msecs_to_jiffies(MSEC_PER_SEC));
/* check ctx list again before sleep */
sec_process_ctx_list();
/* check ctx list again before sleep */
sec_process_ctx_list();
- lwi = LWI_TIMEOUT(SEC_GC_INTERVAL * HZ, NULL, NULL);
+ lwi = LWI_TIMEOUT(msecs_to_jiffies(SEC_GC_INTERVAL *
+ MSEC_PER_SEC),
+ NULL, NULL);
l_wait_event(thread->t_ctl_waitq,
thread_is_stopping(thread) ||
thread_is_signal(thread),
l_wait_event(thread->t_ctl_waitq,
thread_is_stopping(thread) ||
thread_is_signal(thread),