-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
+// SPDX-License-Identifier: GPL-2.0
+
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2011, 2014, Intel Corporation.
*
* Copyright 2017 Cray Inc, all rights reserved.
- * Author: Ben Evans.
+ */
+
+/*
+ * This file is part of Lustre, http://www.lustre.org/
*
* Store PID->JobID mappings
+ *
+ * Author: Ben Evans.
*/
#define DEBUG_SUBSYSTEM S_RPC
#include <linux/user_namespace.h>
-#ifdef HAVE_UIDGID_HEADER
#include <linux/uidgid.h>
-#endif
#include <linux/utsname.h>
#include <libcfs/libcfs.h>
+#include <cfs_hash.h>
#include <obd_support.h>
#include <obd_class.h>
#include <lustre_net.h>
static struct cfs_hash *jobid_hash;
static struct cfs_hash_ops jobid_hash_ops;
-spinlock_t jobid_hash_lock;
+static spinlock_t jobid_hash_lock;
#define RESCAN_INTERVAL 30
#define DELETE_INTERVAL 300
spinlock_t jp_lock; /* protects jp_jobid */
char jp_jobid[LUSTRE_JOBID_SIZE];
unsigned int jp_joblen;
- atomic_t jp_refcount;
+ struct kref jp_refcount;
pid_t jp_pid;
};
/*
+ * Jobid can be set for a session (see setsid(2)) by writing to
+ * a sysfs file from any process in that session.
+ * The jobids are stored in a hash table indexed by the relevant
+ * struct pid. We periodically look for entries where the pid has
+ * no PIDTYPE_SID tasks any more, and prune them. This happens within
+ * 5 seconds of a jobid being added, and every 5 minutes when jobids exist,
+ * but none are added.
+ */
+#define JOBID_EXPEDITED_CLEAN (5)
+#define JOBID_BACKGROUND_CLEAN (5 * 60)
+
+struct session_jobid {
+ struct pid *sj_session;
+ struct rhash_head sj_linkage;
+ struct rcu_head sj_rcu;
+ char sj_jobid[];
+};
+
+static const struct rhashtable_params jobid_params = {
+ .key_len = sizeof(struct pid *),
+ .key_offset = offsetof(struct session_jobid, sj_session),
+ .head_offset = offsetof(struct session_jobid, sj_linkage),
+};
+
+static struct rhashtable session_jobids;
+
+/*
+ * jobid_current must be called with rcu_read_lock held.
+ * if it returns non-NULL, the string can only be used
+ * until rcu_read_unlock is called.
+ */
+char *jobid_current(void)
+{
+ struct pid *sid = task_session(current);
+ struct session_jobid *sj;
+
+ sj = rhashtable_lookup_fast(&session_jobids, &sid, jobid_params);
+ if (sj)
+ return sj->sj_jobid;
+ return NULL;
+}
+
+static void jobid_prune_expedite(void);
+/*
+ * jobid_set_current will try to add a new entry
+ * to the table. If one exists with the same key, the
+ * jobid will be replaced
+ */
+int jobid_set_current(char *jobid)
+{
+ struct pid *sid;
+ struct session_jobid *sj, *origsj;
+ int ret;
+ int len = strlen(jobid);
+
+ OBD_ALLOC(sj, sizeof(*sj) + len + 1);
+ if (!sj)
+ return -ENOMEM;
+ rcu_read_lock();
+ sid = task_session(current);
+ sj->sj_session = get_pid(sid);
+ strncpy(sj->sj_jobid, jobid, len + 1);
+ origsj = rhashtable_lookup_get_insert_fast(&session_jobids,
+ &sj->sj_linkage,
+ jobid_params);
+ if (origsj == NULL) {
+ /* successful insert */
+ rcu_read_unlock();
+ jobid_prune_expedite();
+ return 0;
+ }
+
+ if (IS_ERR(origsj)) {
+ put_pid(sj->sj_session);
+ OBD_FREE(sj, sizeof(*sj) + strlen(sj->sj_jobid) + 1);
+ rcu_read_unlock();
+ return PTR_ERR(origsj);
+ }
+ ret = rhashtable_replace_fast(&session_jobids,
+ &origsj->sj_linkage,
+ &sj->sj_linkage,
+ jobid_params);
+ if (ret) {
+ put_pid(sj->sj_session);
+ OBD_FREE(sj, sizeof(*sj) + strlen(sj->sj_jobid) + 1);
+ rcu_read_unlock();
+ return ret;
+ }
+ put_pid(origsj->sj_session);
+ rcu_read_unlock();
+ OBD_FREE_RCU(origsj, sizeof(*sj) + strlen(origsj->sj_jobid) + 1, sj_rcu);
+ jobid_prune_expedite();
+
+ return 0;
+}
+
+static void jobid_free(void *vsj, void *arg)
+{
+ struct session_jobid *sj = vsj;
+
+ put_pid(sj->sj_session);
+ OBD_FREE(sj, sizeof(*sj) + strlen(sj->sj_jobid) + 1);;
+}
+
+static void jobid_prune(struct work_struct *work);
+static DECLARE_DELAYED_WORK(jobid_prune_work, jobid_prune);
+static int jobid_prune_expedited;
+static void jobid_prune(struct work_struct *work)
+{
+ int remaining = 0;
+ struct rhashtable_iter iter;
+ struct session_jobid *sj;
+
+ jobid_prune_expedited = 0;
+ rhashtable_walk_enter(&session_jobids, &iter);
+ rhashtable_walk_start(&iter);
+ while ((sj = rhashtable_walk_next(&iter)) != NULL) {
+ if (IS_ERR(sj)) {
+ if (PTR_ERR(sj) == -EAGAIN)
+ continue;
+ break;
+ }
+ if (!hlist_empty(&sj->sj_session->tasks[PIDTYPE_SID])) {
+ remaining++;
+ continue;
+ }
+ if (rhashtable_remove_fast(&session_jobids,
+ &sj->sj_linkage,
+ jobid_params) == 0) {
+ put_pid(sj->sj_session);
+ OBD_FREE_RCU(sj, sizeof(*sj) + strlen(sj->sj_jobid) + 1,
+ sj_rcu);
+ }
+ }
+ rhashtable_walk_stop(&iter);
+ rhashtable_walk_exit(&iter);
+ if (remaining)
+ schedule_delayed_work(&jobid_prune_work,
+ cfs_time_seconds(JOBID_BACKGROUND_CLEAN));
+}
+
+static void jobid_prune_expedite(void)
+{
+ /* submit the work only once */
+ if (!cmpxchg(&jobid_prune_expedited, 0, 1))
+ mod_delayed_work(system_wq, &jobid_prune_work,
+ cfs_time_seconds(JOBID_EXPEDITED_CLEAN));
+}
+
+static int cfs_access_process_vm(struct task_struct *tsk,
+ struct mm_struct *mm,
+ unsigned long addr,
+ void *buf, int len, int write)
+{
+ /* Just copied from kernel for the kernels which doesn't
+ * have access_process_vm() exported
+ */
+ struct vm_area_struct *vma = NULL;
+ struct page *page;
+ void *old_buf = buf;
+
+ /* Avoid deadlocks on mmap_sem if called from sys_mmap_pgoff(),
+ * which is already holding mmap_sem for writes. If some other
+ * thread gets the write lock in the meantime, this thread will
+ * block, but at least it won't deadlock on itself. LU-1735
+ */
+ if (!mmap_read_trylock(mm))
+ return -EDEADLK;
+
+ /* ignore errors, just check how much was successfully transferred */
+ while (len) {
+ int bytes, rc, offset;
+ void *maddr;
+
+#if defined(HAVE_GET_USER_PAGES_WITHOUT_VMA)
+ rc = get_user_pages(addr, 1, write ? FOLL_WRITE : 0, &page);
+ if (rc > 0)
+ vma = vma_lookup(mm, addr);
+#elif defined(HAVE_GET_USER_PAGES_GUP_FLAGS)
+ rc = get_user_pages(addr, 1, write ? FOLL_WRITE : 0, &page,
+ &vma);
+#elif defined(HAVE_GET_USER_PAGES_6ARG)
+ rc = get_user_pages(addr, 1, write, 1, &page, &vma);
+#else
+ rc = get_user_pages(tsk, mm, addr, 1, write, 1, &page, &vma);
+#endif
+ if (rc <= 0 || !vma)
+ break;
+
+ bytes = len;
+ offset = addr & (PAGE_SIZE-1);
+ if (bytes > PAGE_SIZE-offset)
+ bytes = PAGE_SIZE-offset;
+
+ maddr = kmap(page);
+ if (write) {
+ copy_to_user_page(vma, page, addr,
+ maddr + offset, buf, bytes);
+ set_page_dirty_lock(page);
+ } else {
+ copy_from_user_page(vma, page, addr,
+ buf, maddr + offset, bytes);
+ }
+ kunmap(page);
+ put_page(page);
+ len -= bytes;
+ buf += bytes;
+ addr += bytes;
+ }
+ mmap_read_unlock(mm);
+
+ return buf - old_buf;
+}
+
+/* Read the environment variable of current process specified by @key. */
+static int cfs_get_environ(const char *key, char *value, int *val_len)
+{
+ struct mm_struct *mm;
+ char *buffer;
+ int buf_len = PAGE_SIZE;
+ int key_len = strlen(key);
+ unsigned long addr;
+ int rc;
+ bool skip = false;
+
+ ENTRY;
+ buffer = kmalloc(buf_len, GFP_USER);
+ if (!buffer)
+ RETURN(-ENOMEM);
+
+ mm = get_task_mm(current);
+ if (!mm) {
+ kfree(buffer);
+ RETURN(-EINVAL);
+ }
+
+ addr = mm->env_start;
+ while (addr < mm->env_end) {
+ int this_len, retval, scan_len;
+ char *env_start, *env_end;
+
+ memset(buffer, 0, buf_len);
+
+ this_len = min_t(int, mm->env_end - addr, buf_len);
+ retval = cfs_access_process_vm(current, mm, addr, buffer,
+ this_len, 0);
+ if (retval < 0)
+ GOTO(out, rc = retval);
+ else if (retval != this_len)
+ break;
+
+ addr += retval;
+
+ /* Parse the buffer to find out the specified key/value pair.
+ * The "key=value" entries are separated by '\0'.
+ */
+ env_start = buffer;
+ scan_len = this_len;
+ while (scan_len) {
+ char *entry;
+ int entry_len;
+
+ env_end = memscan(env_start, '\0', scan_len);
+ LASSERT(env_end >= env_start &&
+ env_end <= env_start + scan_len);
+
+ /* The last entry of this buffer cross the buffer
+ * boundary, reread it in next cycle.
+ */
+ if (unlikely(env_end - env_start == scan_len)) {
+ /* Just skip the entry larger than page size,
+ * it can't be jobID env variable.
+ */
+ if (unlikely(scan_len == this_len))
+ skip = true;
+ else
+ addr -= scan_len;
+ break;
+ } else if (unlikely(skip)) {
+ skip = false;
+ goto skip;
+ }
+ entry = env_start;
+ entry_len = env_end - env_start;
+ CDEBUG(D_INFO, "key: %s, entry: %s\n", key, entry);
+
+ /* Key length + length of '=' */
+ if (entry_len > key_len + 1 &&
+ entry[key_len] == '=' &&
+ !memcmp(entry, key, key_len)) {
+ entry += key_len + 1;
+ entry_len -= key_len + 1;
+
+ /* The 'value' buffer passed in is too small.
+ * Copy what fits, but return -EOVERFLOW.
+ */
+ if (entry_len >= *val_len) {
+ memcpy(value, entry, *val_len);
+ value[*val_len - 1] = 0;
+ GOTO(out, rc = -EOVERFLOW);
+ }
+
+ memcpy(value, entry, entry_len);
+ *val_len = entry_len;
+ GOTO(out, rc = 0);
+ }
+skip:
+ scan_len -= (env_end - env_start + 1);
+ env_start = env_end + 1;
+ }
+ }
+ GOTO(out, rc = -ENOENT);
+
+out:
+ mmput(mm);
+ kfree((void *)buffer);
+ return rc;
+}
+
+/*
* Get jobid of current process by reading the environment variable
* stored in between the "env_start" & "env_end" of task struct.
*
* then an upcall could be issued here to get the jobid by utilizing
* the userspace tools/API. Then, the jobid must be cached.
*/
-int jobid_get_from_environ(char *jobid_var, char *jobid, int *jobid_len)
+static int jobid_get_from_environ(char *jobid_var, char *jobid, int *jobid_len)
{
- static bool printed;
int rc;
rc = cfs_get_environ(jobid_var, jobid, jobid_len);
if (!rc)
goto out;
- if (unlikely(rc == -EOVERFLOW && !printed)) {
+ if (rc == -EOVERFLOW) {
/* For the PBS_JOBID and LOADL_STEP_ID keys (which are
* variable length strings instead of just numbers), it
* might make sense to keep the unique parts for JobID,
* larger temp buffer for cfs_get_environ(), then
* truncating the string at some separator to fit into
* the specified jobid_len. Fix later if needed. */
- LCONSOLE_ERROR_MSG(0x16b,
- "jobid: '%s' value too large (%d)\n",
- obd_jobid_var, *jobid_len);
- printed = true;
+ static ktime_t printed;
+
+ if (unlikely(ktime_to_ns(printed) == 0 ||
+ ktime_after(ktime_get(),
+ ktime_add_ns(printed,
+ 3600ULL * 24 * NSEC_PER_SEC)))) {
+ LCONSOLE_WARN("jobid: '%s' value too large (%d)\n",
+ obd_jobid_var, *jobid_len);
+ printed = ktime_get();
+ }
+
rc = 0;
- }
- if (rc) {
- CDEBUG((rc == -ENOENT || rc == -EINVAL ||
- rc == -EDEADLK) ? D_INFO : D_ERROR,
- "jobid: get '%s' failed: rc = %d\n",
- obd_jobid_var, rc);
+ } else {
+ CDEBUG_LIMIT((rc == -ENOENT || rc == -EINVAL ||
+ rc == -EDEADLK) ? D_INFO : D_ERROR,
+ "jobid: get '%s' failed: rc = %d\n",
+ obd_jobid_var, rc);
}
out:
if (obj == NULL)
return 0;
+ if (jobid == NULL) {
+ WARN_ON_ONCE(kref_read(&pidmap->jp_refcount) != 1);
+ return 1;
+ }
+
spin_lock(&pidmap->jp_lock);
- if (jobid == NULL)
- rc = 1;
- else if (jobid[0] == '\0')
+ /* prevent newly inserted items from deleting */
+ if (jobid[0] == '\0' && kref_read(&pidmap->jp_refcount) == 1)
rc = 1;
else if (ktime_get_real_seconds() - pidmap->jp_time > DELETE_INTERVAL)
rc = 1;
return rc;
}
+static void jobid_pidmap_gc(struct work_struct *work);
+static DECLARE_DELAYED_WORK(jobid_pidmap_gc_work, jobid_pidmap_gc);
+static int jobid_pidmap_gc_started;
+
+static void jobid_pidmap_gc(struct work_struct *work)
+{
+ struct cfs_hash *hash;
+
+ hash = cfs_hash_getref(jobid_hash);
+ if (!hash)
+ return;
+
+ CDEBUG(D_INFO, "jobid: running the PID map GC (count: %d)\n",
+ atomic_read(&jobid_hash->hs_count));
+
+ cfs_hash_cond_del(jobid_hash, jobid_should_free_item,
+ "intentionally_bad_jobid");
+
+ if (atomic_read(&jobid_hash->hs_count) == 0)
+ jobid_pidmap_gc_started = 0;
+ else
+ schedule_delayed_work(&jobid_pidmap_gc_work,
+ cfs_time_seconds(DELETE_INTERVAL));
+
+ cfs_hash_putref(hash);
+}
+
+/* scan hash periodically to remove old PID entries from cache */
+static inline void jobid_pidmap_gc_start(void)
+{
+ /* submit the work only once */
+ if (!cmpxchg(&jobid_pidmap_gc_started, 0, 1))
+ schedule_delayed_work(&jobid_pidmap_gc_work,
+ cfs_time_seconds(DELETE_INTERVAL));
+}
+
+
/*
* jobid_name_is_valid
*
static bool jobid_name_is_valid(char *jobid)
{
const char *const lustre_reserved[] = { "ll_ping", "ptlrpc",
- "ldlm", "ll_sa", NULL };
+ "ldlm", "ll_sa", "kworker",
+ "kswapd", "writeback", "irq",
+ "ksoftirq", "ll_ucp", NULL };
int i;
if (jobid[0] == '\0')
/*
* jobid_get_from_cache()
*
- * Returns contents of jobid_var from process environment for current PID.
- * This will be cached for some time to avoid overhead scanning environment.
+ * Returns contents of jobid_var from process environment for current PID,
+ * or from the per-session jobid table.
+ * Values fetch from process environment will be cached for some time to avoid
+ * the overhead of scanning the environment.
*
* Return: -ENOMEM if allocating a new pidmap fails
* -ENOENT if no entry could be found
*/
static int jobid_get_from_cache(char *jobid, size_t joblen)
{
- static time64_t last_expire;
- bool expire_cache = false;
- pid_t pid = current_pid();
+ pid_t pid = current->pid;
struct jobid_pid_map *pidmap = NULL;
time64_t now = ktime_get_real_seconds();
int rc = 0;
ENTRY;
- LASSERT(jobid_hash != NULL);
+ if (strcmp(obd_jobid_var, JOBSTATS_SESSION) == 0) {
+ char *jid;
- /* scan hash periodically to remove old PID entries from cache */
- spin_lock(&jobid_hash_lock);
- if (unlikely(last_expire + DELETE_INTERVAL <= now)) {
- expire_cache = true;
- last_expire = now;
+ rcu_read_lock();
+ jid = jobid_current();
+ if (jid) {
+ strscpy(jobid, jid, joblen);
+ joblen = strlen(jobid);
+ } else {
+ rc = -ENOENT;
+ }
+ rcu_read_unlock();
+ GOTO(out, rc);
}
- spin_unlock(&jobid_hash_lock);
- if (expire_cache)
- cfs_hash_cond_del(jobid_hash, jobid_should_free_item,
- "intentionally_bad_jobid");
+ LASSERT(jobid_hash != NULL);
/* first try to find PID in the hash and use that value */
pidmap = cfs_hash_lookup(jobid_hash, &pid);
pidmap->jp_jobid[0] = '\0';
spin_lock_init(&pidmap->jp_lock);
INIT_HLIST_NODE(&pidmap->jp_hash);
+ /*
+ * @pidmap might be reclaimed just after it is added into
+ * hash list, init @jp_refcount as 1 to make sure memory
+ * could be not freed during access.
+ */
+ kref_init(&pidmap->jp_refcount);
/*
* Add the newly created map to the hash, on key collision we
OBD_FREE_PTR(pidmap);
pidmap = pidmap2;
} else {
- cfs_hash_get(jobid_hash, &pidmap->jp_hash);
+ jobid_pidmap_gc_start();
}
}
spin_lock(&pidmap->jp_lock);
if (!rc) {
pidmap->jp_joblen = env_len;
- strlcpy(pidmap->jp_jobid, env_jobid,
+ strscpy(pidmap->jp_jobid, env_jobid,
sizeof(pidmap->jp_jobid));
rc = 0;
} else if (rc == -ENOENT) {
* If a cached missing entry was found, return -ENOENT.
*/
if (pidmap->jp_joblen) {
- strlcpy(jobid, pidmap->jp_jobid, joblen);
+ strscpy(jobid, pidmap->jp_jobid, joblen);
joblen = pidmap->jp_joblen;
rc = 0;
} else if (!rc) {
}
/*
+ * jobid_print_current_comm()
+ *
+ * Print current comm name into the provided jobid buffer, and trim names of
+ * kernel threads like "kworker/0:0" to "kworker" or "ll_sa_12345" to "ll_sa"
+ *
+ * Return: number of chars printed to jobid
+ */
+static int jobid_print_current_comm(char *jobid, ssize_t joblen)
+{
+ const char *const names[] = {"kworker", "kswapd", "ll_sa", "ll_agl",
+ "ldlm_bl", "ll_ucp", NULL};
+ int i;
+
+ if (current->flags & PF_KTHREAD) {
+ for (i = 0; names[i] != NULL; i++) {
+ if (strncmp(current->comm, names[i],
+ strlen(names[i])) == 0)
+ return snprintf(jobid, joblen, "%s", names[i]);
+ }
+ }
+
+ return snprintf(jobid, joblen, "%s", current->comm);
+}
+
+/*
* jobid_interpret_string()
*
* Interpret the jobfmt string to expand specified fields, like coredumps do:
* %e = executable
* %g = gid
* %h = hostname
+ * %H = short hostname
* %j = jobid from environment
+ * ? = for use between %j and %H. Uses jobid if set, otherwise use Hostname
* %p = pid
* %u = uid
*
+ * Truncation can also be interpreted by writing .n between % and field, for
+ * example %.3h to print only the 3 first characaters.
+ *
* Unknown escape strings are dropped. Other characters are copied through,
* excluding whitespace (to avoid making jobid parsing difficult).
*
char c;
while ((c = *jobfmt++) && joblen > 1) {
- char f;
+ long width = joblen;
+ char *p;
int l;
if (isspace(c)) /* Don't allow embedded spaces */
*jobid = c;
joblen--;
jobid++;
+ *jobid = '\0';
continue;
}
- switch ((f = *jobfmt++)) {
+ if (*jobfmt == '.') {
+ long w = 0;
+ int size = 0;
+
+ jobfmt++;
+ if (sscanf(jobfmt, "%ld%n", &w, &size) == 1)
+ jobfmt += size;
+ if (w > 0)
+ width = min(w+1, joblen);
+ }
+
+ switch (*jobfmt++) {
case 'e': /* executable name */
- l = snprintf(jobid, joblen, "%s", current_comm());
+ l = jobid_print_current_comm(jobid, width);
break;
case 'g': /* group ID */
- l = snprintf(jobid, joblen, "%u",
+ l = snprintf(jobid, width, "%u",
from_kgid(&init_user_ns, current_fsgid()));
break;
case 'h': /* hostname */
- l = snprintf(jobid, joblen, "%s",
+ l = snprintf(jobid, width, "%s",
init_utsname()->nodename);
break;
+ case 'H': /* short hostname. Cut at first dot */
+ l = snprintf(jobid, width, "%s",
+ init_utsname()->nodename);
+ p = strnchr(jobid, width, '.');
+ if (p) {
+ *p = '\0';
+ l = p - jobid;
+ }
+ break;
case 'j': /* jobid stored in process environment */
- l = jobid_get_from_cache(jobid, joblen);
+ l = jobid_get_from_cache(jobid, width);
if (l < 0)
l = 0;
+ if (*jobfmt == '?') {
+ if (l == 0)
+ jobfmt++;
+ else
+ jobfmt += 3;
+ }
break;
case 'p': /* process ID */
- l = snprintf(jobid, joblen, "%u", current_pid());
+ l = snprintf(jobid, width, "%u", current->pid);
break;
case 'u': /* user ID */
- l = snprintf(jobid, joblen, "%u",
+ l = snprintf(jobid, width, "%u",
from_kuid(&init_user_ns, current_fsuid()));
break;
case '\0': /* '%' at end of format string */
l = 0;
break;
}
+ if (l >= width)
+ l = width-1;
+
jobid += l;
joblen -= l;
}
HASH_JOBID_MAX_BITS, HASH_JOBID_BKT_BITS,
0, CFS_HASH_MIN_THETA, CFS_HASH_MAX_THETA,
&jobid_hash_ops, CFS_HASH_DEFAULT);
- if (!jobid_hash)
+ if (!jobid_hash) {
rc = -ENOMEM;
+ } else {
+ rc = rhashtable_init(&session_jobids, &jobid_params);
+ if (rc) {
+ cfs_hash_putref(jobid_hash);
+ jobid_hash = NULL;
+ }
+ }
RETURN(rc);
}
jobid_hash = NULL;
spin_unlock(&jobid_hash_lock);
+ cancel_delayed_work_sync(&jobid_prune_work);
+ cancel_delayed_work_sync(&jobid_pidmap_gc_work);
+
if (tmp_hash != NULL) {
cfs_hash_cond_del(tmp_hash, jobid_should_free_item, NULL);
cfs_hash_putref(tmp_hash);
+
+ rhashtable_free_and_destroy(&session_jobids, jobid_free, NULL);
}
+
EXIT;
}
EXPORT_SYMBOL(jobid_cache_fini);
/*
* Hash operations for pid<->jobid
*/
-static unsigned jobid_hashfn(struct cfs_hash *hs, const void *key,
- unsigned mask)
+static unsigned int
+jobid_hashfn(struct cfs_hash *hs, const void *key, const unsigned int bits)
{
- return cfs_hash_djb2_hash(key, sizeof(pid_t), mask);
+ return cfs_hash_djb2_hash(key, sizeof(pid_t), bits);
}
static void *jobid_key(struct hlist_node *hnode)
pidmap = hlist_entry(hnode, struct jobid_pid_map, jp_hash);
- atomic_inc(&pidmap->jp_refcount);
+ kref_get(&pidmap->jp_refcount);
+}
+
+static void jobid_put_locked_free(struct kref *kref)
+{
+ struct jobid_pid_map *pidmap = container_of(kref, struct jobid_pid_map,
+ jp_refcount);
+
+ CDEBUG(D_INFO, "Freeing: %d->%s\n", pidmap->jp_pid, pidmap->jp_jobid);
+ OBD_FREE_PTR(pidmap);
}
static void jobid_put_locked(struct cfs_hash *hs, struct hlist_node *hnode)
return;
pidmap = hlist_entry(hnode, struct jobid_pid_map, jp_hash);
- LASSERT(atomic_read(&pidmap->jp_refcount) > 0);
- if (atomic_dec_and_test(&pidmap->jp_refcount)) {
- CDEBUG(D_INFO, "Freeing: %d->%s\n",
- pidmap->jp_pid, pidmap->jp_jobid);
-
- OBD_FREE_PTR(pidmap);
- }
+ LASSERT(kref_read(&pidmap->jp_refcount) > 0);
+ kref_put(&pidmap->jp_refcount, jobid_put_locked_free);
}
static struct cfs_hash_ops jobid_hash_ops = {
*
* Fill in @jobid string based on the value of obd_jobid_var:
* JOBSTATS_DISABLE: none
- * JOBSTATS_NODELOCAL: content of obd_jobid_node (jobid_interpret_string())
+ * JOBSTATS_NODELOCAL: content of obd_jobid_name (jobid_interpret_string())
* JOBSTATS_PROCNAME_UID: process name/UID
+ * JOBSTATS_SESSION per-session value set by
+ * /sys/fs/lustre/jobid_this_session
* anything else: look up obd_jobid_var in the processes environment
*
* Return -ve error number, 0 on success.
*/
int lustre_get_jobid(char *jobid, size_t joblen)
{
+ int len = min_t(int, joblen, LUSTRE_JOBID_SIZE);
int rc = 0;
ENTRY;
if (strcmp(obd_jobid_var, JOBSTATS_DISABLE) == 0) {
/* Jobstats isn't enabled */
memset(jobid, 0, joblen);
- } else if (strcmp(obd_jobid_var, JOBSTATS_NODELOCAL) == 0) {
+ RETURN(0);
+ }
+
+ if (strcmp(obd_jobid_var, JOBSTATS_NODELOCAL) == 0) {
/* Whole node dedicated to single job */
- rc = jobid_interpret_string(obd_jobid_name, jobid, joblen);
+ rc = jobid_interpret_string(obd_jobid_name, jobid, len);
} else if (strcmp(obd_jobid_var, JOBSTATS_PROCNAME_UID) == 0) {
- rc = jobid_interpret_string("%e.%u", jobid, joblen);
- } else if (jobid_name_is_valid(current_comm())) {
+ rc = jobid_interpret_string("%e.%u", jobid, len);
+ } else if (strcmp(obd_jobid_var, JOBSTATS_SESSION) == 0 ||
+ jobid_name_is_valid(current->comm)) {
/*
- * obd_jobid_var holds the jobid environment variable name.
- * Skip initial check if obd_jobid_name already uses "%j",
- * otherwise try just "%j" first, then fall back to whatever
- * is in obd_jobid_name if obd_jobid_var is not found.
+ * per-process jobid wanted, either from environment or from
+ * per-session setting.
+ * If obd_jobid_name contains "%j" or if getting the per-process
+ * jobid directly fails, fall back to using obd_jobid_name.
*/
rc = -EAGAIN;
if (!strnstr(obd_jobid_name, "%j", joblen))
- rc = jobid_get_from_cache(jobid, joblen);
+ rc = jobid_get_from_cache(jobid, len);
- /* fall back to jobid_node if jobid_var not in environment */
+ /* fall back to jobid_name if jobid_var not available */
if (rc < 0) {
int rc2 = jobid_interpret_string(obd_jobid_name,
- jobid, joblen);
+ jobid, len);
if (!rc2)
rc = 0;
}
if (jobid_hash == NULL)
return;
- strlcpy(jobid, find_jobid, sizeof(jobid));
+ strscpy(jobid, find_jobid, sizeof(jobid));
/* trim \n off the end of the incoming jobid */
end = strchr(jobid, '\n');
if (end && *end == '\n')