]) # LIBCFS_CRYPTO_HASH_HELPERS
#
-# Kernerl version 4.5-rc3 commit 2fe829aca9d7bed5fd6b49c6a1452e5e486b6cc3dd
+# Kernel version 4.5-rc1 commit 3502cad73c4bbf8f6365d539e814159275252c59
+# introduced rhashtable_replace_fast
+#
+AC_DEFUN([LIBCFS_RHASHTABLE_REPLACE], [
+LB_CHECK_COMPILE([if 'rhashtable_replace_fast' exists],
+rhashtable_replace_fast, [
+ #include <linux/rhashtable.h>
+],[
+ const struct rhashtable_params params = { 0 };
+
+ rhashtable_replace_fast(NULL, NULL, NULL, params);
+],[
+ AC_DEFINE(HAVE_RHASHTABLE_REPLACE, 1,
+ [rhashtable_replace_fast() is available])
+])
+]) # LIBCFS_RHASHTABLE_REPLACE
+
+#
+# Kernel version 4.5-rc3 commit 2fe829aca9d7bed5fd6b49c6a1452e5e486b6cc3dd
# made kset_find_obj() exportable to modules
#
AC_DEFUN([LIBCFS_EXPORT_KSET_FIND_OBJ], [
]) # LIBCFS_RHASHTABLE_INSERT_FAST
#
+# Kernel version 4.7-rc1 commit 8f6fd83c6c5ec66a4a70c728535ddcdfef4f3697
+# added 3rd arg to rhashtable_walk_init
+#
+AC_DEFUN([LIBCFS_RHASHTABLE_WALK_INIT_3ARG], [
+LB_CHECK_COMPILE([if 'rhashtable_walk_init' has 3 args],
+rhashtable_walk_init, [
+ #include <linux/gfp.h>
+ #include <linux/rhashtable.h>
+],[
+ rhashtable_walk_init(NULL, NULL, GFP_KERNEL);
+],[
+ AC_DEFINE(HAVE_3ARG_RHASHTABLE_WALK_INIT, 1,
+ [rhashtable_walk_init() has 3 args])
+])
+]) # LIBCFS_RHASHTABLE_REPLACE
+
+#
# Kernel version 4.8-rc6 commit ca26893f05e86497a86732768ec53cd38c0819ca
# introduced rhashtable_lookup
#
]) # LIBCFS_STACKTRACE_OPS
#
+# Kernel version 4.9-rc1 commit 246779dd090bd1b74d2652b3a6ca7759f593b27a
+# introduced rhashtable_walk_enter
+#
+AC_DEFUN([LIBCFS_RHASHTABLE_WALK_ENTER], [
+LB_CHECK_COMPILE([if 'rhashtable_walk_enter' exists],
+rhashtable_walk_enter, [
+ #include <linux/rhashtable.h>
+],[
+ rhashtable_walk_enter(NULL, NULL);
+],[
+ AC_DEFINE(HAVE_RHASHTABLE_WALK_ENTER, 1,
+ [rhashtable_walk_enter() is available])
+])
+]) # LIBCFS_RHASHTABLE_REPLACE
+
+#
# Kernel version 4.9 commit 768ae309a96103ed02eb1e111e838c87854d8b51
# mm: replace get_user_pages() write/force parameters with gup_flags
#
]) # LIBCFS_SCHED_HEADERS
#
+# Kernel version 4.11-rc1 commit da20420f83ea0fbcf3d03afda08d971ea1d8a356
+# introduced rht_bucket_var
+#
+AC_DEFUN([LIBCFS_RHT_BUCKET_VAR], [
+LB_CHECK_COMPILE([if 'rht_bucket_var' exists],
+rht_bucket_var, [
+ #include <linux/rhashtable.h>
+],[
+
+ rht_bucket_var(NULL, 0);
+],[
+ AC_DEFINE(HAVE_RHT_BUCKET_VAR, 1,
+ [rht_bucket_var() is available])
+])
+]) # LIBCFS_RHT_BUCKET_VAR
+
+#
# Kernel version 4.11 commit f9fe1c12d126f9887441fa5bb165046f30ddd4b5
# introduced rhashtable_lookup_get_insert_fast
#
# 4.5
LIBCFS_CRYPTO_HASH_HELPERS
LIBCFS_EXPORT_KSET_FIND_OBJ
+LIBCFS_RHASHTABLE_REPLACE
# 4.6
LIBCFS_BROKEN_HASH_64
LIBCFS_STACKTRACE_OPS_ADDRESS_RETURN_INT
LIBCFS_STRINGHASH
# 4.7
LIBCFS_RHASHTABLE_INSERT_FAST
+LIBCFS_RHASHTABLE_WALK_INIT_3ARG
# 4.8
LIBCFS_RHASHTABLE_LOOKUP
LIBCFS_RHLTABLE
LIBCFS_STACKTRACE_OPS
# 4.9
LIBCFS_GET_USER_PAGES_GUP_FLAGS
+LIBCFS_RHASHTABLE_WALK_ENTER
# 4.10
LIBCFS_HOTPLUG_STATE_MACHINE
# 4.11
LIBCFS_RHASHTABLE_LOOKUP_GET_INSERT_FAST
LIBCFS_SCHED_HEADERS
+LIBCFS_RHT_BUCKET_VAR
# 4.12
LIBCFS_HAVE_PROCESSOR_HEADER
LIBCFS_HAVE_WAIT_BIT_HEADER
#endif /* HAVE_BROKEN_HASH_64 */
+#ifndef HAVE_RHASHTABLE_WALK_ENTER
+static int rhashtable_walk_enter(struct rhashtable *ht,
+ struct rhashtable_iter *iter)
+{
+#ifdef HAVE_3ARG_RHASHTABLE_WALK_INIT
+ return rhashtable_walk_init(ht, iter, GFP_KERNEL);
+#else
+ return rhashtable_walk_init(ht, iter);
+#endif
+}
+#endif
+
#ifndef HAVE_RHLTABLE
struct rhlist_head {
struct rhash_head rhead;
static inline void rhltable_walk_enter(struct rhltable *hlt,
struct rhashtable_iter *iter)
{
- rhashtable_walk_init(&hlt->ht, iter);
+ rhashtable_walk_enter(&hlt->ht, iter);
}
#endif /* !HAVE_RHLTABLE */
}
#endif /* !HAVE_RHASHTABLE_LOOKUP */
+#ifndef HAVE_RHT_BUCKET_VAR
+static inline struct rhash_head __rcu **rht_bucket_var(
+ struct bucket_table *tbl, unsigned int hash)
+{
+ return &tbl->buckets[hash];
+}
+#endif
+
+#ifndef HAVE_RHASHTABLE_REPLACE
+/* Internal function, please use rhashtable_replace_fast() instead */
+static inline int __rhashtable_replace_fast(
+ struct rhashtable *ht, struct bucket_table *tbl,
+ struct rhash_head *obj_old, struct rhash_head *obj_new,
+ const struct rhashtable_params params)
+{
+ struct rhash_head __rcu **pprev;
+ struct rhash_head *he;
+ spinlock_t *lock;
+ unsigned int hash;
+ int err = -ENOENT;
+
+ /* Minimally, the old and new objects must have same hash
+ * (which should mean identifiers are the same).
+ */
+ hash = rht_head_hashfn(ht, tbl, obj_old, params);
+ if (hash != rht_head_hashfn(ht, tbl, obj_new, params))
+ return -EINVAL;
+
+ lock = rht_bucket_lock(tbl, hash);
+
+ spin_lock_bh(lock);
+
+ pprev = rht_bucket_var(tbl, hash);
+ rht_for_each_continue(he, *pprev, tbl, hash) {
+ if (he != obj_old) {
+ pprev = &he->next;
+ continue;
+ }
+
+ rcu_assign_pointer(obj_new->next, obj_old->next);
+ rcu_assign_pointer(*pprev, obj_new);
+ err = 0;
+ break;
+ }
+
+ spin_unlock_bh(lock);
+
+ return err;
+}
+
+/**
+ * rhashtable_replace_fast - replace an object in hash table
+ * @ht: hash table
+ * @obj_old: pointer to hash head inside object being replaced
+ * @obj_new: pointer to hash head inside object which is new
+ * @params: hash table parameters
+ *
+ * Replacing an object doesn't affect the number of elements in the hash table
+ * or bucket, so we don't need to worry about shrinking or expanding the
+ * table here.
+ *
+ * Returns zero on success, -ENOENT if the entry could not be found,
+ * -EINVAL if hash is not the same for the old and new objects.
+ */
+static inline int rhashtable_replace_fast(
+ struct rhashtable *ht, struct rhash_head *obj_old,
+ struct rhash_head *obj_new,
+ const struct rhashtable_params params)
+{
+ struct bucket_table *tbl;
+ int err;
+
+ rcu_read_lock();
+
+ tbl = rht_dereference_rcu(ht->tbl, ht);
+
+ /* Because we have already taken (and released) the bucket
+ * lock in old_tbl, if we find that future_tbl is not yet
+ * visible then that guarantees the entry to still be in
+ * the old tbl if it exists.
+ */
+ while ((err = __rhashtable_replace_fast(ht, tbl, obj_old,
+ obj_new, params)) &&
+ (tbl = rht_dereference_rcu(tbl->future_tbl, ht)))
+ ;
+
+ rcu_read_unlock();
+
+ return err;
+}
+#endif /* HAVE_RHASHTABLE_REPLACE */
+
#endif /* __LIBCFS_LINUX_HASH_H__ */
#define JOBSTATS_DISABLE "disable"
#define JOBSTATS_PROCNAME_UID "procname_uid"
#define JOBSTATS_NODELOCAL "nodelocal"
+#define JOBSTATS_SESSION "session"
typedef void (*cntr_init_callback)(struct lprocfs_stats *stats);
void lustre_jobid_clear(const char *jobid);
void jobid_cache_fini(void);
int jobid_cache_init(void);
+char *jobid_current(void);
+int jobid_set_current(char *jobid);
struct lu_device_type;
};
/*
+ * Jobid can be set for a session (see setsid(2)) by writing to
+ * a sysfs file from any process in that session.
+ * The jobids are stored in a hash table indexed by the relevant
+ * struct pid. We periodically look for entries where the pid has
+ * no PIDTYPE_SID tasks any more, and prune them. This happens within
+ * 5 seconds of a jobid being added, and every 5 minutes when jobids exist,
+ * but none are added.
+ */
+#define JOBID_EXPEDITED_CLEAN (5)
+#define JOBID_BACKGROUND_CLEAN (5 * 60)
+
+struct session_jobid {
+ struct pid *sj_session;
+ struct rhash_head sj_linkage;
+ struct rcu_head sj_rcu;
+ char sj_jobid[1];
+};
+
+static const struct rhashtable_params jobid_params = {
+ .key_len = sizeof(struct pid *),
+ .key_offset = offsetof(struct session_jobid, sj_session),
+ .head_offset = offsetof(struct session_jobid, sj_linkage),
+};
+
+static struct rhashtable session_jobids;
+
+/*
+ * jobid_current must be called with rcu_read_lock held.
+ * if it returns non-NULL, the string can only be used
+ * until rcu_read_unlock is called.
+ */
+char *jobid_current(void)
+{
+ struct pid *sid = task_session(current);
+ struct session_jobid *sj;
+
+ sj = rhashtable_lookup_fast(&session_jobids, &sid, jobid_params);
+ if (sj)
+ return sj->sj_jobid;
+ return NULL;
+}
+
+static void jobid_prune_expedite(void);
+/*
+ * jobid_set_current will try to add a new entry
+ * to the table. If one exists with the same key, the
+ * jobid will be replaced
+ */
+int jobid_set_current(char *jobid)
+{
+ struct pid *sid;
+ struct session_jobid *sj, *origsj;
+ int ret;
+ int len = strlen(jobid);
+
+ sj = kmalloc(sizeof(*sj) + len, GFP_KERNEL);
+ if (!sj)
+ return -ENOMEM;
+ rcu_read_lock();
+ sid = task_session(current);
+ sj->sj_session = get_pid(sid);
+ strncpy(sj->sj_jobid, jobid, len+1);
+ origsj = rhashtable_lookup_get_insert_fast(&session_jobids,
+ &sj->sj_linkage,
+ jobid_params);
+ if (origsj == NULL) {
+ /* successful insert */
+ rcu_read_unlock();
+ jobid_prune_expedite();
+ return 0;
+ }
+
+ if (IS_ERR(origsj)) {
+ put_pid(sj->sj_session);
+ kfree(sj);
+ rcu_read_unlock();
+ return PTR_ERR(origsj);
+ }
+ ret = rhashtable_replace_fast(&session_jobids,
+ &origsj->sj_linkage,
+ &sj->sj_linkage,
+ jobid_params);
+ if (ret) {
+ put_pid(sj->sj_session);
+ kfree(sj);
+ rcu_read_unlock();
+ return ret;
+ }
+ put_pid(origsj->sj_session);
+ rcu_read_unlock();
+ kfree_rcu(origsj, sj_rcu);
+ jobid_prune_expedite();
+
+ return 0;
+}
+
+static void jobid_free(void *vsj, void *arg)
+{
+ struct session_jobid *sj = vsj;
+
+ put_pid(sj->sj_session);
+ kfree(sj);
+}
+
+static void jobid_prune(struct work_struct *work);
+static DECLARE_DELAYED_WORK(jobid_prune_work, jobid_prune);
+static int jobid_prune_expedited;
+static void jobid_prune(struct work_struct *work)
+{
+ int remaining = 0;
+ struct rhashtable_iter iter;
+ struct session_jobid *sj;
+
+ jobid_prune_expedited = 0;
+ rhashtable_walk_enter(&session_jobids, &iter);
+ rhashtable_walk_start(&iter);
+ while ((sj = rhashtable_walk_next(&iter)) != NULL) {
+ if (!hlist_empty(&sj->sj_session->tasks[PIDTYPE_SID])) {
+ remaining++;
+ continue;
+ }
+ if (rhashtable_remove_fast(&session_jobids,
+ &sj->sj_linkage,
+ jobid_params) == 0) {
+ put_pid(sj->sj_session);
+ kfree_rcu(sj, sj_rcu);
+ }
+ }
+ rhashtable_walk_stop(&iter);
+ rhashtable_walk_exit(&iter);
+ if (remaining)
+ schedule_delayed_work(&jobid_prune_work,
+ cfs_time_seconds(JOBID_BACKGROUND_CLEAN));
+}
+
+static void jobid_prune_expedite(void)
+{
+ if (!jobid_prune_expedited) {
+ jobid_prune_expedited = 1;
+ mod_delayed_work(system_wq, &jobid_prune_work,
+ cfs_time_seconds(JOBID_EXPEDITED_CLEAN));
+ }
+}
+
+/*
* Get jobid of current process by reading the environment variable
* stored in between the "env_start" & "env_end" of task struct.
*
/*
* jobid_get_from_cache()
*
- * Returns contents of jobid_var from process environment for current PID.
- * This will be cached for some time to avoid overhead scanning environment.
+ * Returns contents of jobid_var from process environment for current PID,
+ * or from the per-session jobid table.
+ * Values fetch from process environment will be cached for some time to avoid
+ * the overhead of scanning the environment.
*
* Return: -ENOMEM if allocating a new pidmap fails
* -ENOENT if no entry could be found
int rc = 0;
ENTRY;
+ if (strcmp(obd_jobid_var, JOBSTATS_SESSION) == 0) {
+ char *jid;
+
+ rcu_read_lock();
+ jid = jobid_current();
+ if (jid) {
+ strlcpy(jobid, jid, joblen);
+ joblen = strlen(jobid);
+ } else {
+ rc = -ENOENT;
+ }
+ rcu_read_unlock();
+ GOTO(out, rc);
+ }
+
LASSERT(jobid_hash != NULL);
/* scan hash periodically to remove old PID entries from cache */
HASH_JOBID_MAX_BITS, HASH_JOBID_BKT_BITS,
0, CFS_HASH_MIN_THETA, CFS_HASH_MAX_THETA,
&jobid_hash_ops, CFS_HASH_DEFAULT);
- if (!jobid_hash)
+ if (!jobid_hash) {
rc = -ENOMEM;
+ } else {
+ rc = rhashtable_init(&session_jobids, &jobid_params);
+ if (rc) {
+ cfs_hash_putref(jobid_hash);
+ jobid_hash = NULL;
+ }
+ }
RETURN(rc);
}
jobid_hash = NULL;
spin_unlock(&jobid_hash_lock);
+ cancel_delayed_work_sync(&jobid_prune_work);
+
if (tmp_hash != NULL) {
cfs_hash_cond_del(tmp_hash, jobid_should_free_item, NULL);
cfs_hash_putref(tmp_hash);
+
+ rhashtable_free_and_destroy(&session_jobids, jobid_free, NULL);
}
+
EXIT;
}
EXPORT_SYMBOL(jobid_cache_fini);
*
* Fill in @jobid string based on the value of obd_jobid_var:
* JOBSTATS_DISABLE: none
- * JOBSTATS_NODELOCAL: content of obd_jobid_node (jobid_interpret_string())
+ * JOBSTATS_NODELOCAL: content of obd_jobid_name (jobid_interpret_string())
* JOBSTATS_PROCNAME_UID: process name/UID
+ * JOBSTATS_SESSION per-session value set by
+ * /sys/fs/lustre/jobid_this_session
* anything else: look up obd_jobid_var in the processes environment
*
* Return -ve error number, 0 on success.
rc = jobid_interpret_string(obd_jobid_name, jobid, joblen);
} else if (strcmp(obd_jobid_var, JOBSTATS_PROCNAME_UID) == 0) {
rc = jobid_interpret_string("%e.%u", jobid, joblen);
+ } else if (strcmp(obd_jobid_var, JOBSTATS_SESSION) == 0) {
+ char *jid;
+
+ rcu_read_lock();
+ jid = jobid_current();
+ if (jid)
+ strlcpy(jobid, jid, sizeof(jobid));
+ rcu_read_unlock();
} else if (jobid_name_is_valid(current_comm())) {
/*
* obd_jobid_var holds the jobid environment variable name.
return count;
}
+static ssize_t jobid_this_session_show(struct kobject *kobj,
+ struct attribute *attr,
+ char *buf)
+{
+ char *jid;
+ int ret = -ENOENT;
+
+ rcu_read_lock();
+ jid = jobid_current();
+ if (jid)
+ ret = snprintf(buf, PAGE_SIZE, "%s\n", jid);
+ rcu_read_unlock();
+ return ret;
+}
+
+static ssize_t jobid_this_session_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buffer,
+ size_t count)
+{
+ char *jobid;
+ int len;
+ int ret;
+
+ if (!count || count > LUSTRE_JOBID_SIZE)
+ return -EINVAL;
+
+ jobid = kstrndup(buffer, count, GFP_KERNEL);
+ if (!jobid)
+ return -ENOMEM;
+ len = strcspn(jobid, "\n ");
+ jobid[len] = '\0';
+ ret = jobid_set_current(jobid);
+ kfree(jobid);
+
+ return ret ?: count;
+}
+
/* Root for /sys/kernel/debug/lustre */
struct dentry *debugfs_lustre_root;
EXPORT_SYMBOL_GPL(debugfs_lustre_root);
LUSTRE_RO_ATTR(health_check);
LUSTRE_RW_ATTR(jobid_var);
LUSTRE_RW_ATTR(jobid_name);
+LUSTRE_RW_ATTR(jobid_this_session);
static struct attribute *lustre_attrs[] = {
&lustre_attr_version.attr,
&lustre_attr_health_check.attr,
&lustre_attr_jobid_name.attr,
&lustre_attr_jobid_var.attr,
+ &lustre_attr_jobid_this_session.attr,
&lustre_sattr_timeout.u.attr,
&lustre_attr_max_dirty_mb.attr,
&lustre_sattr_debug_peer_on_timeout.u.attr,