From: Niu Yawei <yawei.niu@intel.com>
Date: Fri, 10 Jul 2015 02:14:33 +0000 (-0400)
Subject: LU-5946 lprocfs: free expired jobstats after /proc read
X-Git-Tag: 2.7.57~35
X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=dd99ddf9f22921535c3f083e91bfc06a566e382b;p=fs%2Flustre-release.git

LU-5946 lprocfs: free expired jobstats after /proc read

Currently, jobstats for expired jobids are only freed when a new
jobid arrives.  If no new jobids have arrived in a long time, the
expired jobstats won't be freed even though the /proc/.../job_stats
file is read repeatedly.  This shouldn't be a problem for steady job
submission patterns, but if there is a big gap in new jobid arrival
then expired jobids shouldn't be left around such a long time.

Free expired stats in lprocfs_jobstats_seq_release() after job_stats
is read, and move normal expiry to the arrival of new jobids instead
of on every stat to reduce overhead.  If jobs arrive frequently then
expired jobs will also be freed frequently, and if jobs arrive
slowly then there wan't be many expired jobs accumulating.

Rename job_iter_callback() to job_cleanup_iter_callback() for name
consistency with its caller and to make it more clear what it is for.

Instead of just passing a "bool force" param to lprocfs_job_cleanup(),
pass the number of seconds before which stats should be expired.
That allows lprocfs_job_cleanup() to be used at all callers instead
of having to have custom iterators in several places.

Avoid having multiple threads do cleanup of the jobstats concurrently.
This is not itself dangerous, since hash locking avoids races, but it
is inefficient to have multiple threads contending on the hash locks.

Add comment blocks for modified functions.

Remove some LASSERT() calls and replace with checks and error returns,
preferably at initialization time rather than during later usage.

Fix miscellaneous coding style issues.

Add a test for jobstats expiry after /proc read, and add a DNE test.

Signed-off-by: Andreas Dilger <andreas.dilger@intel.com>
Signed-off-by: Niu Yawei <yawei.niu@intel.com>
Change-Id: Icfd65ed7cd7beb26a9b566667e2686afe23ebbe5
Reviewed-on: http://review.whamcloud.com/12821
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: John L. Hammond <john.hammond@intel.com>
---

diff --git a/lustre/include/lprocfs_status.h b/lustre/include/lprocfs_status.h
index f12dd93..93da0d0 100644
--- a/lustre/include/lprocfs_status.h
+++ b/lustre/include/lprocfs_status.h
@@ -386,13 +386,14 @@ static inline void s2dhms(struct dhms *ts, time_t secs)
 typedef void (*cntr_init_callback)(struct lprocfs_stats *stats);
 
 struct obd_job_stats {
-	struct cfs_hash	       *ojs_hash;
-	struct list_head	ojs_list;
-	rwlock_t		ojs_lock; /* protect the obj_list */
-	cntr_init_callback	ojs_cntr_init_fn;
-	int			ojs_cntr_num;
-	int			ojs_cleanup_interval;
-	time_t			ojs_last_cleanup;
+	struct cfs_hash	       *ojs_hash;	/* hash of jobids */
+	struct list_head	ojs_list;	/* list of job_stat structs */
+	rwlock_t		ojs_lock;	/* protect ojs_list/js_list */
+	unsigned int		ojs_cleanup_interval;/* seconds before expiry */
+	time_t			ojs_last_cleanup; /* previous cleanup time */
+	cntr_init_callback	ojs_cntr_init_fn;/* lprocfs_stats initializer */
+	unsigned short		ojs_cntr_num;	/* number of stats in struct */
+	bool			ojs_cleaning;	/* currently expiring stats */
 };
 
 #ifdef CONFIG_PROC_FS
diff --git a/lustre/obdclass/lprocfs_jobstats.c b/lustre/obdclass/lprocfs_jobstats.c
index 6bdf476..6dbf3da 100644
--- a/lustre/obdclass/lprocfs_jobstats.c
+++ b/lustre/obdclass/lprocfs_jobstats.c
@@ -64,13 +64,13 @@
  */
 
 struct job_stat {
-	struct hlist_node	js_hash;
-	struct list_head	js_list;
-	atomic_t		js_refcount;
-	char			js_jobid[LUSTRE_JOBID_SIZE];
-	time_t			js_timestamp; /* seconds */
-	struct lprocfs_stats	*js_stats;
-	struct obd_job_stats	*js_jobstats;
+	struct hlist_node	js_hash;	/* hash struct for this jobid */
+	struct list_head	js_list;	/* on ojs_list, with ojs_lock */
+	atomic_t		js_refcount;	/* num users of this struct */
+	char			js_jobid[LUSTRE_JOBID_SIZE]; /* job name */
+	time_t			js_timestamp;	/* seconds of most recent stat*/
+	struct lprocfs_stats	*js_stats;	/* per-job statistics */
+	struct obd_job_stats	*js_jobstats;	/* for accessing ojs_lock */
 };
 
 static unsigned
@@ -109,7 +109,7 @@ static void job_stat_get(struct cfs_hash *hs, struct hlist_node *hnode)
 static void job_free(struct job_stat *job)
 {
 	LASSERT(atomic_read(&job->js_refcount) == 0);
-	LASSERT(job->js_jobstats);
+	LASSERT(job->js_jobstats != NULL);
 
 	write_lock(&job->js_jobstats->ojs_lock);
 	list_del_init(&job->js_list);
@@ -148,43 +148,100 @@ static struct cfs_hash_ops job_stats_hash_ops = {
 	.hs_exit       = job_stat_exit,
 };
 
-static int job_iter_callback(struct cfs_hash *hs, struct cfs_hash_bd *bd,
-			     struct hlist_node *hnode, void *data)
+/**
+ * Jobstats expiry iterator to clean up old jobids
+ *
+ * Called for each job_stat structure on this device, it should delete stats
+ * older than the specified \a oldest_time in seconds.  If \a oldest_time is
+ * in the future then this will delete all statistics (e.g. during shutdown).
+ *
+ * \param[in] hs	hash of all jobids on this device
+ * \param[in] bd	hash bucket containing this jobid
+ * \param[in] hnode	hash structure for this jobid
+ * \param[in] data	pointer to stats expiry time in seconds
+ */
+static int job_cleanup_iter_callback(struct cfs_hash *hs,
+				     struct cfs_hash_bd *bd,
+				     struct hlist_node *hnode, void *data)
 {
-	time_t oldest = *((time_t *)data);
+	time_t oldest_time = *((time_t *)data);
 	struct job_stat *job;
 
 	job = hlist_entry(hnode, struct job_stat, js_hash);
-	if (!oldest || job->js_timestamp < oldest)
+	if (job->js_timestamp < oldest_time)
 		cfs_hash_bd_del_locked(hs, bd, hnode);
 
 	return 0;
 }
 
-static void lprocfs_job_cleanup(struct obd_job_stats *stats, bool force)
+/**
+ * Clean up jobstats that were updated more than \a before seconds ago.
+ *
+ * Since this function may be called frequently, do not scan all of the
+ * jobstats on each call, only twice per cleanup interval.  That means stats
+ * may be around on average cleanup_interval / 4 longer than necessary,
+ * but that is not considered harmful.
+ *
+ * If \a before is negative then this will force clean up all jobstats due
+ * to the expiry time being in the future (e.g. at shutdown).
+ *
+ * If there is already another thread doing jobstats cleanup, don't try to
+ * do this again in the current thread unless this is a force cleanup.
+ *
+ * \param[in] stats	stucture tracking all job stats for this device
+ * \param[in] before	expire jobstats updated more than this many seconds ago
+ */
+static void lprocfs_job_cleanup(struct obd_job_stats *stats, int before)
 {
-	time_t oldest, now;
+	time_t now = cfs_time_current_sec();
+	time_t oldest;
 
-	if (stats->ojs_cleanup_interval == 0)
-		return;
+	if (likely(before >= 0)) {
+		unsigned int cleanup_interval = stats->ojs_cleanup_interval;
+
+		if (cleanup_interval == 0 || before == 0)
+			return;
 
-	now = cfs_time_current_sec();
-	if (!force && now < stats->ojs_last_cleanup +
-			    stats->ojs_cleanup_interval)
+		if (now < stats->ojs_last_cleanup + cleanup_interval / 2)
+			return;
+
+		if (stats->ojs_cleaning)
+			return;
+	}
+
+	write_lock(&stats->ojs_lock);
+	if (before >= 0 && stats->ojs_cleaning) {
+		write_unlock(&stats->ojs_lock);
 		return;
+	}
 
-	oldest = now - stats->ojs_cleanup_interval;
-	cfs_hash_for_each_safe(stats->ojs_hash, job_iter_callback,
+	stats->ojs_cleaning = true;
+	write_unlock(&stats->ojs_lock);
+
+	/* Can't hold ojs_lock over hash iteration, since it is grabbed by
+	 * job_cleanup_iter_callback()
+	 *   ->cfs_hash_bd_del_locked()
+	 *     ->job_putref()
+	 *       ->job_free()
+	 *
+	 * Holding ojs_lock isn't necessary for safety of the hash iteration,
+	 * since locking of the hash is handled internally, but there isn't
+	 * any benefit to having multiple threads doing cleanup at one time.
+	 */
+	oldest = now - before;
+	cfs_hash_for_each_safe(stats->ojs_hash, job_cleanup_iter_callback,
 			       &oldest);
+
+	write_lock(&stats->ojs_lock);
+	stats->ojs_cleaning = false;
 	stats->ojs_last_cleanup = cfs_time_current_sec();
+	write_unlock(&stats->ojs_lock);
 }
 
 static struct job_stat *job_alloc(char *jobid, struct obd_job_stats *jobs)
 {
 	struct job_stat *job;
 
-	LASSERT(jobs->ojs_cntr_num && jobs->ojs_cntr_init_fn);
-
 	OBD_ALLOC_PTR(job);
 	if (job == NULL)
 		return NULL;
@@ -214,11 +271,13 @@ int lprocfs_job_stats_log(struct obd_device *obd, char *jobid,
 	struct job_stat *job, *job2;
 	ENTRY;
 
-	LASSERT(stats && stats->ojs_hash);
+	LASSERT(stats != NULL);
+	LASSERT(stats->ojs_hash != NULL);
 
-	lprocfs_job_cleanup(stats, false);
+	if (event >= stats->ojs_cntr_num)
+		RETURN(-EINVAL);
 
-	if (!jobid || !strlen(jobid))
+	if (jobid == NULL || strlen(jobid) == 0)
 		RETURN(-EINVAL);
 
 	if (strlen(jobid) >= LUSTRE_JOBID_SIZE) {
@@ -231,6 +290,8 @@ int lprocfs_job_stats_log(struct obd_device *obd, char *jobid,
 	if (job)
 		goto found;
 
+	lprocfs_job_cleanup(stats, stats->ojs_cleanup_interval);
+
 	job = job_alloc(jobid, stats);
 	if (job == NULL)
 		RETURN(-ENOMEM);
@@ -254,11 +315,11 @@ int lprocfs_job_stats_log(struct obd_device *obd, char *jobid,
 
 found:
 	LASSERT(stats == job->js_jobstats);
-	LASSERT(stats->ojs_cntr_num > event);
 	job->js_timestamp = cfs_time_current_sec();
 	lprocfs_counter_add(job->js_stats, event, amount);
 
 	job_putref(job);
+
 	RETURN(0);
 }
 EXPORT_SYMBOL(lprocfs_job_stats_log);
@@ -266,11 +327,11 @@ EXPORT_SYMBOL(lprocfs_job_stats_log);
 void lprocfs_job_stats_fini(struct obd_device *obd)
 {
 	struct obd_job_stats *stats = &obd->u.obt.obt_jobstats;
-	time_t oldest = 0;
 
 	if (stats->ojs_hash == NULL)
 		return;
-	cfs_hash_for_each_safe(stats->ojs_hash, job_iter_callback, &oldest);
+
+	lprocfs_job_cleanup(stats, -99);
 	cfs_hash_putref(stats->ojs_hash);
 	stats->ojs_hash = NULL;
 	LASSERT(list_empty(&stats->ojs_list));
@@ -323,17 +384,17 @@ static void *lprocfs_jobstats_seq_next(struct seq_file *p, void *v, loff_t *pos)
  * Example of output on MDT:
  *
  * job_stats:
- * - job_id:        test_id.222.25844
+ * - job_id:        dd.4854
  *   snapshot_time: 1322494486
- *   open:          { samples:	       3, unit: reqs }
- *   close:         { samples:	       3, unit: reqs }
+ *   open:          { samples:	       1, unit: reqs }
+ *   close:         { samples:	       1, unit: reqs }
  *   mknod:         { samples:	       0, unit: reqs }
  *   link:          { samples:	       0, unit: reqs }
  *   unlink:        { samples:	       0, unit: reqs }
  *   mkdir:         { samples:	       0, unit: reqs }
  *   rmdir:         { samples:	       0, unit: reqs }
- *   rename:        { samples:	       1, unit: reqs }
- *   getattr:       { samples:	       7, unit: reqs }
+ *   rename:        { samples:	       0, unit: reqs }
+ *   getattr:       { samples:	       1, unit: reqs }
  *   setattr:       { samples:	       0, unit: reqs }
  *   getxattr:      { samples:	       0, unit: reqs }
  *   setxattr:      { samples:	       0, unit: reqs }
@@ -343,13 +404,13 @@ static void *lprocfs_jobstats_seq_next(struct seq_file *p, void *v, loff_t *pos)
  * Example of output on OST:
  *
  * job_stats:
- * - job_id         4854
+ * - job_id         dd.4854
  *   snapshot_time: 1322494602
- *   read:          { samples:  0, unit: bytes, min:  0, max:  0, sum:  0 }
- *   write:         { samples:  1, unit: bytes, min: 10, max: 10, sum: 10 }
- *   setattr:       { samples:  0, unit: reqs }
- *   punch:         { samples:  0, unit: reqs }
- *   sync:          { samples:  0, unit: reqs }
+ *   read:          { samples: 0, unit: bytes, min:  0, max:  0, sum:  0 }
+ *   write:         { samples: 1, unit: bytes, min: 4096, max: 4096, sum: 4096 }
+ *   setattr:       { samples: 0, unit: reqs }
+ *   punch:         { samples: 0, unit: reqs }
+ *   sync:          { samples: 0, unit: reqs }
  */
 
 static const char spaces[] = "                    ";
@@ -436,12 +497,14 @@ static ssize_t lprocfs_jobstats_seq_write(struct file *file,
 	struct seq_file *seq = file->private_data;
 	struct obd_job_stats *stats = seq->private;
 	char jobid[LUSTRE_JOBID_SIZE];
-	int all = 0;
 	struct job_stat *job;
 
 	if (len == 0 || len >= LUSTRE_JOBID_SIZE)
 		return -EINVAL;
 
+	if (stats->ojs_hash == NULL)
+		return -ENODEV;
+
 	if (copy_from_user(jobid, buf, len))
 		return -EFAULT;
 	jobid[len] = 0;
@@ -450,18 +513,13 @@ static ssize_t lprocfs_jobstats_seq_write(struct file *file,
 	if (jobid[len - 1] == '\n')
 		jobid[len - 1] = 0;
 
-	if (strcmp(jobid, "clear") == 0)
-		all = 1;
+	if (strcmp(jobid, "clear") == 0) {
+		lprocfs_job_cleanup(stats, -99);
 
-	LASSERT(stats->ojs_hash);
-	if (all) {
-		time_t oldest = 0;
-		cfs_hash_for_each_safe(stats->ojs_hash, job_iter_callback,
-				       &oldest);
 		return len;
 	}
 
-	if (!strlen(jobid))
+	if (strlen(jobid) == 0)
 		return -EINVAL;
 
 	job = cfs_hash_lookup(stats->ojs_hash, jobid);
@@ -474,13 +532,35 @@ static ssize_t lprocfs_jobstats_seq_write(struct file *file,
 	return len;
 }
 
+/**
+ * Clean up the seq file state when the /proc file is closed.
+ *
+ * This also expires old job stats from the cache after they have been
+ * printed in case the system is idle and not generating new jobstats.
+ *
+ * \param[in] inode	struct inode for seq file being closed
+ * \param[in] file	struct file for seq file being closed
+ *
+ * \retval		0 on success
+ * \retval		negative errno on failure
+ */
+static int lprocfs_jobstats_seq_release(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq = file->private_data;
+	struct obd_job_stats *stats = seq->private;
+
+	lprocfs_job_cleanup(stats, stats->ojs_cleanup_interval);
+
+	return lprocfs_seq_release(inode, file);
+}
+
 static const struct file_operations lprocfs_jobstats_seq_fops = {
 	.owner   = THIS_MODULE,
 	.open    = lprocfs_jobstats_seq_open,
 	.read    = seq_read,
 	.write   = lprocfs_jobstats_seq_write,
 	.llseek  = seq_lseek,
-	.release = lprocfs_seq_release,
+	.release = lprocfs_jobstats_seq_release,
 };
 
 int lprocfs_job_stats_init(struct obd_device *obd, int cntr_num,
@@ -493,9 +573,17 @@ int lprocfs_job_stats_init(struct obd_device *obd, int cntr_num,
 	LASSERT(obd->obd_proc_entry != NULL);
 	LASSERT(obd->obd_type->typ_name);
 
-	if (strcmp(obd->obd_type->typ_name, LUSTRE_MDT_NAME) &&
-	    strcmp(obd->obd_type->typ_name, LUSTRE_OST_NAME)) {
-		CERROR("Invalid obd device type.\n");
+	if (cntr_num <= 0)
+		RETURN(-EINVAL);
+
+	if (init_fn == NULL)
+		RETURN(-EINVAL);
+
+	/* Currently needs to be a target due to the use of obt_jobstats. */
+	if (strcmp(obd->obd_type->typ_name, LUSTRE_MDT_NAME) != 0 &&
+	    strcmp(obd->obd_type->typ_name, LUSTRE_OST_NAME) != 0) {
+		CERROR("%s: invalid device type %s for job stats: rc = %d\n",
+		       obd->obd_name, obd->obd_type->typ_name, -EINVAL);
 		RETURN(-EINVAL);
 	}
 	stats = &obd->u.obt.obt_jobstats;
@@ -534,7 +622,9 @@ int lprocfs_job_interval_seq_show(struct seq_file *m, void *data)
 	struct obd_device *obd = m->private;
 	struct obd_job_stats *stats;
 
-	LASSERT(obd != NULL);
+	if (obd == NULL)
+		return -ENODEV;
+
 	stats = &obd->u.obt.obt_jobstats;
 	return seq_printf(m, "%d\n", stats->ojs_cleanup_interval);
 }
@@ -544,11 +634,14 @@ ssize_t
 lprocfs_job_interval_seq_write(struct file *file, const char *buffer,
 				size_t count, loff_t *off)
 {
-	struct obd_device *obd = ((struct seq_file *)file->private_data)->private;
+	struct obd_device *obd;
 	struct obd_job_stats *stats;
 	int val, rc;
 
-	LASSERT(obd != NULL);
+	obd = ((struct seq_file *)file->private_data)->private;
+	if (obd == NULL)
+		return -ENODEV;
+
 	stats = &obd->u.obt.obt_jobstats;
 
 	rc = lprocfs_write_helper(buffer, count, &val);
@@ -556,7 +649,7 @@ lprocfs_job_interval_seq_write(struct file *file, const char *buffer,
 		return rc;
 
 	stats->ojs_cleanup_interval = val;
-	lprocfs_job_cleanup(stats, true);
+	lprocfs_job_cleanup(stats, stats->ojs_cleanup_interval);
 	return count;
 }
 EXPORT_SYMBOL(lprocfs_job_interval_seq_write);
diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh
index 0a9e496..bbf65c3 100644
--- a/lustre/tests/sanity.sh
+++ b/lustre/tests/sanity.sh
@@ -11440,36 +11440,41 @@ else
 fi
 
 verify_jobstats() {
-	local cmd=$1
-	local target=$2
-
-	# clear old jobstats
-	do_facet $SINGLEMDS lctl set_param mdt.*.job_stats="clear"
-	do_facet ost1 lctl set_param obdfilter.*.job_stats="clear"
-
-	# use a new JobID for this test, or we might see an old one
-	[ "$JOBENV" = "FAKE_JOBID" ] && FAKE_JOBID=test_id.$testnum.$RANDOM
+	local cmd=($1)
+	shift
+	local facets="$@"
+
+# we don't really need to clear the stats for this test to work, since each
+# command has a unique jobid, but it makes debugging easier if needed.
+#	for facet in $facets; do
+#		local dev=$(convert_facet2label $facet)
+#		# clear old jobstats
+#		do_facet $facet lctl set_param *.$dev.job_stats="clear"
+#	done
+
+	# use a new JobID for each test, or we might see an old one
+	[ "$JOBENV" = "FAKE_JOBID" ] &&
+		FAKE_JOBID=id.$testnum.$(basename ${cmd[0]}).$RANDOM
 
 	JOBVAL=${!JOBENV}
-	log "Test: $cmd"
+	log "Test: ${cmd[*]}"
 	log "Using JobID environment variable $JOBENV=$JOBVAL"
 
 	if [ $JOBENV = "FAKE_JOBID" ]; then
-		FAKE_JOBID=$JOBVAL $cmd
+		FAKE_JOBID=$JOBVAL ${cmd[*]}
 	else
-		$cmd
+		${cmd[*]}
 	fi
 
-	if [ "$target" = "mdt" -o "$target" = "both" ]; then
-		FACET="$SINGLEMDS" # will need to get MDS number for DNE
-		do_facet $FACET lctl get_param mdt.*.job_stats |
-			grep $JOBVAL || error "No job stats found on MDT $FACET"
-	fi
-	if [ "$target" = "ost" -o "$target" = "both" ]; then
-		FACET=ost1
-		do_facet $FACET lctl get_param obdfilter.*.job_stats |
-			grep $JOBVAL || error "No job stats found on OST $FACET"
-	fi
+	# all files are created on OST0000
+	for facet in $facets; do
+		local stats="*.$(convert_facet2label $facet).job_stats"
+		if [ $(do_facet $facet lctl get_param $stats |
+		       grep -c $JOBVAL) -ne 1 ]; then
+			do_facet $facet lctl get_param $stats
+			error "No jobstats for $JOBVAL found on $facet::$stats"
+		fi
+	done
 }
 
 jobstats_set() {
@@ -11479,6 +11484,13 @@ jobstats_set() {
 	wait_update $HOSTNAME "$LCTL get_param -n jobid_var" $NEW_JOBENV
 }
 
+cleanup_205() {
+	do_facet $SINGLEMDS \
+		$LCTL set_param mdt.*.job_cleanup_interval=$OLD_INTERVAL
+	[ $OLD_JOBENV != $JOBENV ] && jobstats_set $OLD_JOBENV
+	do_facet $SINGLEMDS lctl --device $MDT0 changelog_deregister $CL_USER
+}
+
 test_205() { # Job stats
 	[ $PARALLEL == "yes" ] && skip "skip parallel run" && return
 	remote_mgs_nodsh && skip "remote MGS with nodsh" && return
@@ -11486,47 +11498,66 @@ test_205() { # Job stats
 		skip "Server doesn't support jobstats" && return 0
 	[[ $JOBID_VAR = disable ]] && skip "jobstats is disabled" && return
 
-	local cmd
 	OLD_JOBENV=$($LCTL get_param -n jobid_var)
 	if [ $OLD_JOBENV != $JOBENV ]; then
 		jobstats_set $JOBENV
-		trap jobstats_set EXIT
+		trap cleanup_205 EXIT
 	fi
 
-	local user=$(do_facet $SINGLEMDS $LCTL --device $MDT0 \
-		     changelog_register -n)
-	echo "Registered as changelog user $user"
+	CL_USER=$(do_facet $SINGLEMDS lctl --device $MDT0 changelog_register -n)
+	echo "Registered as changelog user $CL_USER"
+
+	OLD_INTERVAL=$(do_facet $SINGLEMDS \
+		       lctl get_param -n mdt.*.job_cleanup_interval)
+	local interval_new=5
+	do_facet $SINGLEMDS \
+		$LCTL set_param mdt.*.job_cleanup_interval=$interval_new
+	local start=$SECONDS
 
+	local cmd
 	# mkdir
-	cmd="mkdir $DIR/$tfile"
-	verify_jobstats "$cmd" "mdt"
+	cmd="mkdir $DIR/$tdir"
+	verify_jobstats "$cmd" "$SINGLEMDS"
 	# rmdir
-	cmd="rm -fr $DIR/$tfile"
-	verify_jobstats "$cmd" "mdt"
+	cmd="rmdir $DIR/$tdir"
+	verify_jobstats "$cmd" "$SINGLEMDS"
+	# mkdir on secondary MDT
+	if [ $MDSCOUNT -gt 1 ]; then
+		cmd="lfs mkdir -i 1 $DIR/$tdir.remote"
+		verify_jobstats "$cmd" "mds2"
+	fi
 	# mknod
 	cmd="mknod $DIR/$tfile c 1 3"
-	verify_jobstats "$cmd" "mdt"
+	verify_jobstats "$cmd" "$SINGLEMDS"
 	# unlink
 	cmd="rm -f $DIR/$tfile"
-	verify_jobstats "$cmd" "mdt"
+	verify_jobstats "$cmd" "$SINGLEMDS"
+	# create all files on OST0000 so verify_jobstats can find OST stats
 	# open & close
 	cmd="$SETSTRIPE -i 0 -c 1 $DIR/$tfile"
-	verify_jobstats "$cmd" "mdt"
+	verify_jobstats "$cmd" "$SINGLEMDS"
 	# setattr
 	cmd="touch $DIR/$tfile"
-	verify_jobstats "$cmd" "both"
+	verify_jobstats "$cmd" "$SINGLEMDS ost1"
 	# write
 	cmd="dd if=/dev/zero of=$DIR/$tfile bs=1M count=1 oflag=sync"
-	verify_jobstats "$cmd" "ost"
+	verify_jobstats "$cmd" "ost1"
 	# read
 	cmd="dd if=$DIR/$tfile of=/dev/null bs=1M count=1 iflag=direct"
-	verify_jobstats "$cmd" "ost"
+	verify_jobstats "$cmd" "ost1"
 	# truncate
 	cmd="$TRUNCATE $DIR/$tfile 0"
-	verify_jobstats "$cmd" "both"
+	verify_jobstats "$cmd" "$SINGLEMDS ost1"
 	# rename
-	cmd="mv -f $DIR/$tfile $DIR/jobstats_test_rename"
-	verify_jobstats "$cmd" "mdt"
+	cmd="mv -f $DIR/$tfile $DIR/$tdir.rename"
+	verify_jobstats "$cmd" "$SINGLEMDS"
+	# jobstats expiry - sleep until old stats should be expired
+	local left=$((interval_new - (SECONDS - start)))
+	[ $left -ge 0 ] && echo "sleep $left for expiry" && sleep $((left + 1))
+	cmd="mkdir $DIR/$tdir.expire"
+	verify_jobstats "$cmd" "$SINGLEMDS"
+	[ $(do_facet $SINGLEMDS lctl get_param *.*.job_stats |
+	    grep -c "job_id.*mkdir") -gt 1 ] && error "old jobstats not expired"
 
 	# Ensure that jobid are present in changelog (if supported by MDS)
 	if [ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.6.52) ]
@@ -11546,10 +11577,7 @@ test_205() { # Job stats
 			error "Unexpected jobids when jobid_var=$JOBENV"
 	fi
 
-	# cleanup
-	rm -f $DIR/jobstats_test_rename
-
-	[ $OLD_JOBENV != $JOBENV ] && jobstats_set $OLD_JOBENV
+	cleanup_205
 }
 run_test 205 "Verify job stats"
 
diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh
index 27d0606..9a41a39 100755
--- a/lustre/tests/test-framework.sh
+++ b/lustre/tests/test-framework.sh
@@ -2981,6 +2981,13 @@ do_nodes() {
     return ${PIPESTATUS[0]}
 }
 
+##
+# Execute commands on a single service's host
+#
+# The \a facet (service) may be on a local or remote node, which is
+# determined at the time the command is run.
+#
+# usage: do_facet $facet command [arg ...]
 do_facet() {
 	local facet=$1
 	shift