ktime_t js_time_latest; /* time of most recent stat*/
struct lprocfs_stats *js_stats; /* per-job statistics */
struct obd_job_stats *js_jobstats; /* for accessing ojs_lock */
+ struct rcu_head js_rcu; /* RCU head for job_reclaim_rcu*/
};
static unsigned
return hlist_entry(hnode, struct job_stat, js_hash);
}
+static bool job_getref_try(struct job_stat *job)
+{
+ return atomic_inc_not_zero(&job->js_refcount);
+}
+
static void job_stat_get(struct cfs_hash *hs, struct hlist_node *hnode)
{
struct job_stat *job;
atomic_inc(&job->js_refcount);
}
+static void job_reclaim_rcu(struct rcu_head *head)
+{
+ struct job_stat *job = container_of(head, typeof(*job), js_rcu);
+
+ lprocfs_stats_free(&job->js_stats);
+ OBD_FREE_PTR(job);
+}
+
static void job_free(struct job_stat *job)
{
LASSERT(atomic_read(&job->js_refcount) == 0);
LASSERT(job->js_jobstats != NULL);
- write_lock(&job->js_jobstats->ojs_lock);
- list_del_init(&job->js_list);
- write_unlock(&job->js_jobstats->ojs_lock);
+ spin_lock(&job->js_jobstats->ojs_lock);
+ list_del_rcu(&job->js_list);
+ spin_unlock(&job->js_jobstats->ojs_lock);
- lprocfs_stats_free(&job->js_stats);
- OBD_FREE_PTR(job);
+ call_rcu(&job->js_rcu, job_reclaim_rcu);
}
static void job_putref(struct job_stat *job)
return;
}
- write_lock(&stats->ojs_lock);
+ spin_lock(&stats->ojs_lock);
if (!clear && stats->ojs_cleaning) {
- write_unlock(&stats->ojs_lock);
+ spin_unlock(&stats->ojs_lock);
return;
}
stats->ojs_cleaning = true;
- write_unlock(&stats->ojs_lock);
+ spin_unlock(&stats->ojs_lock);
/* Can't hold ojs_lock over hash iteration, since it is grabbed by
* job_cleanup_iter_callback()
cfs_hash_for_each_safe(stats->ojs_hash, job_cleanup_iter_callback,
&oldest);
- write_lock(&stats->ojs_lock);
+ spin_lock(&stats->ojs_lock);
stats->ojs_cleaning = false;
stats->ojs_cleanup_last = ktime_get_real();
- write_unlock(&stats->ojs_lock);
+ spin_unlock(&stats->ojs_lock);
}
static struct job_stat *job_alloc(char *jobid, struct obd_job_stats *jobs)
* "job2" was initialized in job_alloc() already. LU-2163 */
} else {
LASSERT(list_empty(&job->js_list));
- write_lock(&stats->ojs_lock);
- list_add_tail(&job->js_list, &stats->ojs_list);
- write_unlock(&stats->ojs_lock);
+ spin_lock(&stats->ojs_lock);
+ list_add_tail_rcu(&job->js_list, &stats->ojs_list);
+ spin_unlock(&stats->ojs_lock);
}
found:
}
EXPORT_SYMBOL(lprocfs_job_stats_fini);
+
+struct lprocfs_jobstats_data {
+ struct obd_job_stats *pjd_stats;
+ loff_t pjd_last_pos;
+ struct job_stat *pjd_last_job;
+};
+
static void *lprocfs_jobstats_seq_start(struct seq_file *p, loff_t *pos)
{
- struct obd_job_stats *stats = p->private;
+ struct lprocfs_jobstats_data *data = p->private;
+ struct obd_job_stats *stats = data->pjd_stats;
loff_t off = *pos;
struct job_stat *job;
- read_lock(&stats->ojs_lock);
+ rcu_read_lock();
if (off == 0)
return SEQ_START_TOKEN;
+
+ /* if pos matches the offset of last saved job, start from saved job */
+ if (data->pjd_last_job && data->pjd_last_pos == off)
+ return data->pjd_last_job;
+
off--;
- list_for_each_entry(job, &stats->ojs_list, js_list) {
+ list_for_each_entry_rcu(job, &stats->ojs_list, js_list) {
if (!off--)
return job;
}
static void lprocfs_jobstats_seq_stop(struct seq_file *p, void *v)
{
- struct obd_job_stats *stats = p->private;
+ struct lprocfs_jobstats_data *data = p->private;
+ struct job_stat *job = NULL;
+
+ /* try to get a ref on current job (not deleted) */
+ if (v && v != SEQ_START_TOKEN && job_getref_try(v))
+ job = v;
- read_unlock(&stats->ojs_lock);
+ rcu_read_unlock();
+
+ /* drop the ref on the old saved job */
+ if (data->pjd_last_job) {
+ job_putref(data->pjd_last_job);
+ data->pjd_last_job = NULL;
+ }
+
+ /* save the current job for the next read */
+ if (job)
+ data->pjd_last_job = job;
}
static void *lprocfs_jobstats_seq_next(struct seq_file *p, void *v, loff_t *pos)
{
- struct obd_job_stats *stats = p->private;
+ struct lprocfs_jobstats_data *data = p->private;
+ struct obd_job_stats *stats = data->pjd_stats;
struct job_stat *job;
- struct list_head *next;
+ struct list_head *cur;
++*pos;
+ data->pjd_last_pos = *pos;
if (v == SEQ_START_TOKEN) {
- next = stats->ojs_list.next;
+ cur = &stats->ojs_list;
} else {
job = (struct job_stat *)v;
- next = job->js_list.next;
+ cur = &job->js_list;
}
- return next == &stats->ojs_list ? NULL :
- list_entry(next, struct job_stat, js_list);
+ job = list_entry_rcu(cur->next, struct job_stat, js_list);
+ if (&job->js_list == &stats->ojs_list)
+ return NULL;
+
+ return job;
}
/*
static int lprocfs_jobstats_seq_open(struct inode *inode, struct file *file)
{
+ struct lprocfs_jobstats_data *data = NULL;
struct seq_file *seq;
int rc;
rc = seq_open(file, &lprocfs_jobstats_seq_sops);
if (rc)
return rc;
+
+ OBD_ALLOC_PTR(data);
+ if (!data)
+ return -ENOMEM;
+
+ data->pjd_stats = pde_data(inode);
+ data->pjd_last_job = NULL;
+ data->pjd_last_pos = 0;
seq = file->private_data;
- seq->private = pde_data(inode);
+ seq->private = data;
return 0;
}
size_t len, loff_t *off)
{
struct seq_file *seq = file->private_data;
- struct obd_job_stats *stats = seq->private;
+ struct lprocfs_jobstats_data *data = seq->private;
+ struct obd_job_stats *stats = data->pjd_stats;
char jobid[4 * LUSTRE_JOBID_SIZE]; /* all escaped chars, plus ""\n\0 */
char *p1, *p2, *last;
unsigned int c;
static int lprocfs_jobstats_seq_release(struct inode *inode, struct file *file)
{
struct seq_file *seq = file->private_data;
- struct obd_job_stats *stats = seq->private;
+ struct lprocfs_jobstats_data *data = seq->private;
- lprocfs_job_cleanup(stats, false);
+ /* drop the ref of last saved job */
+ if (data->pjd_last_job) {
+ job_putref(data->pjd_last_job);
+ data->pjd_last_pos = 0;
+ data->pjd_last_job = NULL;
+ }
+
+ lprocfs_job_cleanup(data->pjd_stats, false);
+ OBD_FREE_PTR(data);
return lprocfs_seq_release(inode, file);
}
RETURN(-ENOMEM);
INIT_LIST_HEAD(&stats->ojs_list);
- rwlock_init(&stats->ojs_lock);
+ spin_lock_init(&stats->ojs_lock);
stats->ojs_cntr_num = cntr_num;
stats->ojs_cntr_init_fn = init_fn;
/* Store 1/2 the actual interval, since we use that the most, and
local old_jobenv=$($LCTL get_param -n jobid_var)
[ $old_jobenv != $JOBENV ] && jobstats_set $JOBENV
+ stack_trap "jobstats_set $old_jobenv" EXIT
- if [[ $PERM_CMD == *"set_param -P"* ]]; then
- stack_trap "do_facet mgs $PERM_CMD jobid_var=$old_jobenv" EXIT
- else
- stack_trap "do_facet mgs $PERM_CMD \
- $FSNAME.sys.jobid_var=$old_jobenv" EXIT
- fi
changelog_register
+ local old_jobid_name=$($LCTL get_param jobid_name)
+ stack_trap "$LCTL set_param $old_jobid_name" EXIT
+
local old_interval=$(do_facet $SINGLEMDS lctl get_param -n \
mdt.*.job_cleanup_interval | head -n 1)
local new_interval=5
test_205e() {
local ops_comma
local file=$DIR/$tdir/$tfile
+ local -a cli_params
(( $MDS1_VERSION >= $(version_code 2.15.53) )) ||
skip "need lustre >= 2.15.53 for lljobstat"
skip "need lustre >= 2.15.53 for lljobstat"
verify_yaml_available || skip_env "YAML verification not installed"
+ cli_params=( $($LCTL get_param jobid_name jobid_var) )
+ $LCTL set_param jobid_var=nodelocal jobid_name=205e.%e.%u
+ stack_trap "$LCTL set_param ${cli_params[*]}" EXIT
+
mkdir_on_mdt0 $DIR/$tdir || error "failed to create dir"
$LFS setstripe -E EOF -i 0 -c 1 $file ||
# verify that job dd.0 does exist and has some ops on ost1
# typically this line is like:
- # - dd.0: {ops: 20, ...}
+ # - 205e.dd.0: {ops: 20, ...}
ops_comma=$(do_facet ost1 "lljobstat -n 1 -i 0 -c 1000" |
- awk '$2=="dd.0:" {print $4}')
+ awk '$2=="205e.dd.0:" {print $4}')
(( ${ops_comma%,} >= 10 )) ||
- error "cannot find job dd.0 with ops >= 10"
+ error "cannot find job 205e.dd.0 with ops >= 10"
}
run_test 205e "verify the output of lljobstat"
}
run_test 205f "verify qos_ost_weights YAML format "
+__test_205_jobstats_dump() {
+ local -a pids
+ local nbr_instance=$1
+
+ while true; do
+ if (( ${#pids[@]} >= nbr_instance )); then
+ wait ${pids[@]}
+ pids=()
+ fi
+
+ do_facet mds1 "$LCTL get_param mdt.*.job_stats > /dev/null" &
+ pids+=( $! )
+ done
+}
+
+__test_205_cleanup() {
+ kill $@
+ # Clear all job entries
+ do_facet mds1 "$LCTL set_param mdt.*.job_stats=clear"
+}
+
+test_205g() {
+ local -a mds1_params
+ local -a cli_params
+ local pids
+ local interval=5
+
+ mds1_params=( $(do_facet mds1 $LCTL get_param mdt.*.job_cleanup_interval) )
+ do_facet mds1 $LCTL set_param mdt.*.job_cleanup_interval=$interval
+ stack_trap "do_facet mds1 $LCTL set_param ${mds1_params[*]}" EXIT
+
+ cli_params=( $($LCTL get_param jobid_name jobid_var) )
+ $LCTL set_param jobid_var=TEST205G_ID jobid_name=%j.%p
+ stack_trap "$LCTL set_param ${cli_params[*]}" EXIT
+
+ # start jobs loop
+ export TEST205G_ID=205g
+ stack_trap "unset TEST205G_ID" EXIT
+ while true; do
+ printf $DIR/$tfile.{0001..1000} | xargs -P10 -n1 touch
+ done & pids="$! "
+
+ __test_205_jobstats_dump 4 & pids+="$! "
+ stack_trap "__test_205_cleanup $pids" EXIT INT
+
+ [[ $SLOW == "no" ]] && sleep 90 || sleep 240
+}
+run_test 205g "stress test for job_stats procfile"
+
# LU-1480, LU-1773 and LU-1657
test_206() {
mkdir -p $DIR/$tdir