X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fobdclass%2Fjobid.c;h=dc40eb998b3531a8ae1f747346b8b6a64815fd7c;hb=HEAD;hp=633388bebc92eedd73393a2ab6fd97e2db315995;hpb=8ad9453ee66ec1beaff9c8711cf732a861176a6f;p=fs%2Flustre-release.git diff --git a/lustre/obdclass/jobid.c b/lustre/obdclass/jobid.c index 633388b..78d8ea7 100644 --- a/lustre/obdclass/jobid.c +++ b/lustre/obdclass/jobid.c @@ -33,9 +33,7 @@ #define DEBUG_SUBSYSTEM S_RPC #include -#ifdef HAVE_UIDGID_HEADER #include -#endif #include #include @@ -62,11 +60,330 @@ struct jobid_pid_map { spinlock_t jp_lock; /* protects jp_jobid */ char jp_jobid[LUSTRE_JOBID_SIZE]; unsigned int jp_joblen; - atomic_t jp_refcount; + struct kref jp_refcount; pid_t jp_pid; }; /* + * Jobid can be set for a session (see setsid(2)) by writing to + * a sysfs file from any process in that session. + * The jobids are stored in a hash table indexed by the relevant + * struct pid. We periodically look for entries where the pid has + * no PIDTYPE_SID tasks any more, and prune them. This happens within + * 5 seconds of a jobid being added, and every 5 minutes when jobids exist, + * but none are added. + */ +#define JOBID_EXPEDITED_CLEAN (5) +#define JOBID_BACKGROUND_CLEAN (5 * 60) + +struct session_jobid { + struct pid *sj_session; + struct rhash_head sj_linkage; + struct rcu_head sj_rcu; + char sj_jobid[1]; +}; + +static const struct rhashtable_params jobid_params = { + .key_len = sizeof(struct pid *), + .key_offset = offsetof(struct session_jobid, sj_session), + .head_offset = offsetof(struct session_jobid, sj_linkage), +}; + +static struct rhashtable session_jobids; + +/* + * jobid_current must be called with rcu_read_lock held. + * if it returns non-NULL, the string can only be used + * until rcu_read_unlock is called. + */ +char *jobid_current(void) +{ + struct pid *sid = task_session(current); + struct session_jobid *sj; + + sj = rhashtable_lookup_fast(&session_jobids, &sid, jobid_params); + if (sj) + return sj->sj_jobid; + return NULL; +} + +static void jobid_prune_expedite(void); +/* + * jobid_set_current will try to add a new entry + * to the table. If one exists with the same key, the + * jobid will be replaced + */ +int jobid_set_current(char *jobid) +{ + struct pid *sid; + struct session_jobid *sj, *origsj; + int ret; + int len = strlen(jobid); + + sj = kmalloc(sizeof(*sj) + len, GFP_KERNEL); + if (!sj) + return -ENOMEM; + rcu_read_lock(); + sid = task_session(current); + sj->sj_session = get_pid(sid); + strncpy(sj->sj_jobid, jobid, len+1); + origsj = rhashtable_lookup_get_insert_fast(&session_jobids, + &sj->sj_linkage, + jobid_params); + if (origsj == NULL) { + /* successful insert */ + rcu_read_unlock(); + jobid_prune_expedite(); + return 0; + } + + if (IS_ERR(origsj)) { + put_pid(sj->sj_session); + kfree(sj); + rcu_read_unlock(); + return PTR_ERR(origsj); + } + ret = rhashtable_replace_fast(&session_jobids, + &origsj->sj_linkage, + &sj->sj_linkage, + jobid_params); + if (ret) { + put_pid(sj->sj_session); + kfree(sj); + rcu_read_unlock(); + return ret; + } + put_pid(origsj->sj_session); + rcu_read_unlock(); + kfree_rcu(origsj, sj_rcu); + jobid_prune_expedite(); + + return 0; +} + +static void jobid_free(void *vsj, void *arg) +{ + struct session_jobid *sj = vsj; + + put_pid(sj->sj_session); + kfree(sj); +} + +static void jobid_prune(struct work_struct *work); +static DECLARE_DELAYED_WORK(jobid_prune_work, jobid_prune); +static int jobid_prune_expedited; +static void jobid_prune(struct work_struct *work) +{ + int remaining = 0; + struct rhashtable_iter iter; + struct session_jobid *sj; + + jobid_prune_expedited = 0; + rhashtable_walk_enter(&session_jobids, &iter); + rhashtable_walk_start(&iter); + while ((sj = rhashtable_walk_next(&iter)) != NULL) { + if (IS_ERR(sj)) { + if (PTR_ERR(sj) == -EAGAIN) + continue; + break; + } + if (!hlist_empty(&sj->sj_session->tasks[PIDTYPE_SID])) { + remaining++; + continue; + } + if (rhashtable_remove_fast(&session_jobids, + &sj->sj_linkage, + jobid_params) == 0) { + put_pid(sj->sj_session); + kfree_rcu(sj, sj_rcu); + } + } + rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); + if (remaining) + schedule_delayed_work(&jobid_prune_work, + cfs_time_seconds(JOBID_BACKGROUND_CLEAN)); +} + +static void jobid_prune_expedite(void) +{ + /* submit the work only once */ + if (!cmpxchg(&jobid_prune_expedited, 0, 1)) + mod_delayed_work(system_wq, &jobid_prune_work, + cfs_time_seconds(JOBID_EXPEDITED_CLEAN)); +} + +static int cfs_access_process_vm(struct task_struct *tsk, + struct mm_struct *mm, + unsigned long addr, + void *buf, int len, int write) +{ + /* Just copied from kernel for the kernels which doesn't + * have access_process_vm() exported + */ + struct vm_area_struct *vma = NULL; + struct page *page; + void *old_buf = buf; + + /* Avoid deadlocks on mmap_sem if called from sys_mmap_pgoff(), + * which is already holding mmap_sem for writes. If some other + * thread gets the write lock in the meantime, this thread will + * block, but at least it won't deadlock on itself. LU-1735 + */ + if (!mmap_read_trylock(mm)) + return -EDEADLK; + + /* ignore errors, just check how much was successfully transferred */ + while (len) { + int bytes, rc, offset; + void *maddr; + +#if defined(HAVE_GET_USER_PAGES_WITHOUT_VMA) + rc = get_user_pages(addr, 1, write ? FOLL_WRITE : 0, &page); + if (rc > 0) + vma = vma_lookup(mm, addr); +#elif defined(HAVE_GET_USER_PAGES_GUP_FLAGS) + rc = get_user_pages(addr, 1, write ? FOLL_WRITE : 0, &page, + &vma); +#elif defined(HAVE_GET_USER_PAGES_6ARG) + rc = get_user_pages(addr, 1, write, 1, &page, &vma); +#else + rc = get_user_pages(tsk, mm, addr, 1, write, 1, &page, &vma); +#endif + if (rc <= 0 || !vma) + break; + + bytes = len; + offset = addr & (PAGE_SIZE-1); + if (bytes > PAGE_SIZE-offset) + bytes = PAGE_SIZE-offset; + + maddr = kmap(page); + if (write) { + copy_to_user_page(vma, page, addr, + maddr + offset, buf, bytes); + set_page_dirty_lock(page); + } else { + copy_from_user_page(vma, page, addr, + buf, maddr + offset, bytes); + } + kunmap(page); + put_page(page); + len -= bytes; + buf += bytes; + addr += bytes; + } + mmap_read_unlock(mm); + + return buf - old_buf; +} + +/* Read the environment variable of current process specified by @key. */ +static int cfs_get_environ(const char *key, char *value, int *val_len) +{ + struct mm_struct *mm; + char *buffer; + int buf_len = PAGE_SIZE; + int key_len = strlen(key); + unsigned long addr; + int rc; + bool skip = false; + + ENTRY; + buffer = kmalloc(buf_len, GFP_USER); + if (!buffer) + RETURN(-ENOMEM); + + mm = get_task_mm(current); + if (!mm) { + kfree(buffer); + RETURN(-EINVAL); + } + + addr = mm->env_start; + while (addr < mm->env_end) { + int this_len, retval, scan_len; + char *env_start, *env_end; + + memset(buffer, 0, buf_len); + + this_len = min_t(int, mm->env_end - addr, buf_len); + retval = cfs_access_process_vm(current, mm, addr, buffer, + this_len, 0); + if (retval < 0) + GOTO(out, rc = retval); + else if (retval != this_len) + break; + + addr += retval; + + /* Parse the buffer to find out the specified key/value pair. + * The "key=value" entries are separated by '\0'. + */ + env_start = buffer; + scan_len = this_len; + while (scan_len) { + char *entry; + int entry_len; + + env_end = memscan(env_start, '\0', scan_len); + LASSERT(env_end >= env_start && + env_end <= env_start + scan_len); + + /* The last entry of this buffer cross the buffer + * boundary, reread it in next cycle. + */ + if (unlikely(env_end - env_start == scan_len)) { + /* Just skip the entry larger than page size, + * it can't be jobID env variable. + */ + if (unlikely(scan_len == this_len)) + skip = true; + else + addr -= scan_len; + break; + } else if (unlikely(skip)) { + skip = false; + goto skip; + } + entry = env_start; + entry_len = env_end - env_start; + CDEBUG(D_INFO, "key: %s, entry: %s\n", key, entry); + + /* Key length + length of '=' */ + if (entry_len > key_len + 1 && + entry[key_len] == '=' && + !memcmp(entry, key, key_len)) { + entry += key_len + 1; + entry_len -= key_len + 1; + + /* The 'value' buffer passed in is too small. + * Copy what fits, but return -EOVERFLOW. + */ + if (entry_len >= *val_len) { + memcpy(value, entry, *val_len); + value[*val_len - 1] = 0; + GOTO(out, rc = -EOVERFLOW); + } + + memcpy(value, entry, entry_len); + *val_len = entry_len; + GOTO(out, rc = 0); + } +skip: + scan_len -= (env_end - env_start + 1); + env_start = env_end + 1; + } + } + GOTO(out, rc = -ENOENT); + +out: + mmput(mm); + kfree((void *)buffer); + return rc; +} + +/* * Get jobid of current process by reading the environment variable * stored in between the "env_start" & "env_end" of task struct. * @@ -74,16 +391,15 @@ struct jobid_pid_map { * then an upcall could be issued here to get the jobid by utilizing * the userspace tools/API. Then, the jobid must be cached. */ -int jobid_get_from_environ(char *jobid_var, char *jobid, int *jobid_len) +static int jobid_get_from_environ(char *jobid_var, char *jobid, int *jobid_len) { - static bool printed; int rc; rc = cfs_get_environ(jobid_var, jobid, jobid_len); if (!rc) goto out; - if (unlikely(rc == -EOVERFLOW && !printed)) { + if (rc == -EOVERFLOW) { /* For the PBS_JOBID and LOADL_STEP_ID keys (which are * variable length strings instead of just numbers), it * might make sense to keep the unique parts for JobID, @@ -91,16 +407,23 @@ int jobid_get_from_environ(char *jobid_var, char *jobid, int *jobid_len) * larger temp buffer for cfs_get_environ(), then * truncating the string at some separator to fit into * the specified jobid_len. Fix later if needed. */ - LCONSOLE_WARN("jobid: '%s' value too large (%d)\n", - obd_jobid_var, *jobid_len); - printed = true; + static ktime_t printed; + + if (unlikely(ktime_to_ns(printed) == 0 || + ktime_after(ktime_get(), + ktime_add_ns(printed, + 3600ULL * 24 * NSEC_PER_SEC)))) { + LCONSOLE_WARN("jobid: '%s' value too large (%d)\n", + obd_jobid_var, *jobid_len); + printed = ktime_get(); + } + rc = 0; - } - if (rc) { - CDEBUG((rc == -ENOENT || rc == -EINVAL || - rc == -EDEADLK) ? D_INFO : D_ERROR, - "jobid: get '%s' failed: rc = %d\n", - obd_jobid_var, rc); + } else { + CDEBUG_LIMIT((rc == -ENOENT || rc == -EINVAL || + rc == -EDEADLK) ? D_INFO : D_ERROR, + "jobid: get '%s' failed: rc = %d\n", + obd_jobid_var, rc); } out: @@ -126,10 +449,14 @@ static int jobid_should_free_item(void *obj, void *data) if (obj == NULL) return 0; + if (jobid == NULL) { + WARN_ON_ONCE(kref_read(&pidmap->jp_refcount) != 1); + return 1; + } + spin_lock(&pidmap->jp_lock); - if (jobid == NULL) - rc = 1; - else if (jobid[0] == '\0') + /* prevent newly inserted items from deleting */ + if (jobid[0] == '\0' && kref_read(&pidmap->jp_refcount) == 1) rc = 1; else if (ktime_get_real_seconds() - pidmap->jp_time > DELETE_INTERVAL) rc = 1; @@ -140,6 +467,43 @@ static int jobid_should_free_item(void *obj, void *data) return rc; } +static void jobid_pidmap_gc(struct work_struct *work); +static DECLARE_DELAYED_WORK(jobid_pidmap_gc_work, jobid_pidmap_gc); +static int jobid_pidmap_gc_started; + +static void jobid_pidmap_gc(struct work_struct *work) +{ + struct cfs_hash *hash; + + hash = cfs_hash_getref(jobid_hash); + if (!hash) + return; + + CDEBUG(D_INFO, "jobid: running the PID map GC (count: %d)\n", + atomic_read(&jobid_hash->hs_count)); + + cfs_hash_cond_del(jobid_hash, jobid_should_free_item, + "intentionally_bad_jobid"); + + if (atomic_read(&jobid_hash->hs_count) == 0) + jobid_pidmap_gc_started = 0; + else + schedule_delayed_work(&jobid_pidmap_gc_work, + cfs_time_seconds(DELETE_INTERVAL)); + + cfs_hash_putref(hash); +} + +/* scan hash periodically to remove old PID entries from cache */ +static inline void jobid_pidmap_gc_start(void) +{ + /* submit the work only once */ + if (!cmpxchg(&jobid_pidmap_gc_started, 0, 1)) + schedule_delayed_work(&jobid_pidmap_gc_work, + cfs_time_seconds(DELETE_INTERVAL)); +} + + /* * jobid_name_is_valid * @@ -151,7 +515,9 @@ static int jobid_should_free_item(void *obj, void *data) static bool jobid_name_is_valid(char *jobid) { const char *const lustre_reserved[] = { "ll_ping", "ptlrpc", - "ldlm", "ll_sa", NULL }; + "ldlm", "ll_sa", "kworker", + "kswapd", "writeback", "irq", + "ksoftirq", NULL }; int i; if (jobid[0] == '\0') @@ -168,8 +534,10 @@ static bool jobid_name_is_valid(char *jobid) /* * jobid_get_from_cache() * - * Returns contents of jobid_var from process environment for current PID. - * This will be cached for some time to avoid overhead scanning environment. + * Returns contents of jobid_var from process environment for current PID, + * or from the per-session jobid table. + * Values fetch from process environment will be cached for some time to avoid + * the overhead of scanning the environment. * * Return: -ENOMEM if allocating a new pidmap fails * -ENOENT if no entry could be found @@ -177,27 +545,28 @@ static bool jobid_name_is_valid(char *jobid) */ static int jobid_get_from_cache(char *jobid, size_t joblen) { - static time64_t last_expire; - bool expire_cache = false; - pid_t pid = current_pid(); + pid_t pid = current->pid; struct jobid_pid_map *pidmap = NULL; time64_t now = ktime_get_real_seconds(); int rc = 0; ENTRY; - LASSERT(jobid_hash != NULL); + if (strcmp(obd_jobid_var, JOBSTATS_SESSION) == 0) { + char *jid; - /* scan hash periodically to remove old PID entries from cache */ - spin_lock(&jobid_hash_lock); - if (unlikely(last_expire + DELETE_INTERVAL <= now)) { - expire_cache = true; - last_expire = now; + rcu_read_lock(); + jid = jobid_current(); + if (jid) { + strscpy(jobid, jid, joblen); + joblen = strlen(jobid); + } else { + rc = -ENOENT; + } + rcu_read_unlock(); + GOTO(out, rc); } - spin_unlock(&jobid_hash_lock); - if (expire_cache) - cfs_hash_cond_del(jobid_hash, jobid_should_free_item, - "intentionally_bad_jobid"); + LASSERT(jobid_hash != NULL); /* first try to find PID in the hash and use that value */ pidmap = cfs_hash_lookup(jobid_hash, &pid); @@ -213,6 +582,12 @@ static int jobid_get_from_cache(char *jobid, size_t joblen) pidmap->jp_jobid[0] = '\0'; spin_lock_init(&pidmap->jp_lock); INIT_HLIST_NODE(&pidmap->jp_hash); + /* + * @pidmap might be reclaimed just after it is added into + * hash list, init @jp_refcount as 1 to make sure memory + * could be not freed during access. + */ + kref_init(&pidmap->jp_refcount); /* * Add the newly created map to the hash, on key collision we @@ -227,7 +602,7 @@ static int jobid_get_from_cache(char *jobid, size_t joblen) OBD_FREE_PTR(pidmap); pidmap = pidmap2; } else { - cfs_hash_get(jobid_hash, &pidmap->jp_hash); + jobid_pidmap_gc_start(); } } @@ -251,7 +626,7 @@ static int jobid_get_from_cache(char *jobid, size_t joblen) spin_lock(&pidmap->jp_lock); if (!rc) { pidmap->jp_joblen = env_len; - strlcpy(pidmap->jp_jobid, env_jobid, + strscpy(pidmap->jp_jobid, env_jobid, sizeof(pidmap->jp_jobid)); rc = 0; } else if (rc == -ENOENT) { @@ -268,7 +643,7 @@ static int jobid_get_from_cache(char *jobid, size_t joblen) * If a cached missing entry was found, return -ENOENT. */ if (pidmap->jp_joblen) { - strlcpy(jobid, pidmap->jp_jobid, joblen); + strscpy(jobid, pidmap->jp_jobid, joblen); joblen = pidmap->jp_joblen; rc = 0; } else if (!rc) { @@ -284,12 +659,38 @@ out: } /* + * jobid_print_current_comm() + * + * Print current comm name into the provided jobid buffer, and trim names of + * kernel threads like "kworker/0:0" to "kworker" or "ll_sa_12345" to "ll_sa" + * + * Return: number of chars printed to jobid + */ +static int jobid_print_current_comm(char *jobid, ssize_t joblen) +{ + const char *const names[] = {"kworker", "kswapd", "ll_sa", "ll_agl", + "ldlm_bl", NULL}; + int i; + + if (current->flags & PF_KTHREAD) { + for (i = 0; names[i] != NULL; i++) { + if (strncmp(current->comm, names[i], + strlen(names[i])) == 0) + return snprintf(jobid, joblen, "%s", names[i]); + } + } + + return snprintf(jobid, joblen, "%s", current->comm); +} + +/* * jobid_interpret_string() * * Interpret the jobfmt string to expand specified fields, like coredumps do: * %e = executable * %g = gid * %h = hostname + * %H = short hostname * %j = jobid from environment * %p = pid * %u = uid @@ -306,7 +707,7 @@ static int jobid_interpret_string(const char *jobfmt, char *jobid, char c; while ((c = *jobfmt++) && joblen > 1) { - char f; + char f, *p; int l; if (isspace(c)) /* Don't allow embedded spaces */ @@ -316,12 +717,13 @@ static int jobid_interpret_string(const char *jobfmt, char *jobid, *jobid = c; joblen--; jobid++; + *jobid = '\0'; continue; } switch ((f = *jobfmt++)) { case 'e': /* executable name */ - l = snprintf(jobid, joblen, "%s", current_comm()); + l = jobid_print_current_comm(jobid, joblen); break; case 'g': /* group ID */ l = snprintf(jobid, joblen, "%u", @@ -331,13 +733,22 @@ static int jobid_interpret_string(const char *jobfmt, char *jobid, l = snprintf(jobid, joblen, "%s", init_utsname()->nodename); break; + case 'H': /* short hostname. Cut at first dot */ + l = snprintf(jobid, joblen, "%s", + init_utsname()->nodename); + p = strnchr(jobid, joblen, '.'); + if (p) { + *p = '\0'; + l = p - jobid; + } + break; case 'j': /* jobid stored in process environment */ l = jobid_get_from_cache(jobid, joblen); if (l < 0) l = 0; break; case 'p': /* process ID */ - l = snprintf(jobid, joblen, "%u", current_pid()); + l = snprintf(jobid, joblen, "%u", current->pid); break; case 'u': /* user ID */ l = snprintf(jobid, joblen, "%u", @@ -383,8 +794,15 @@ int jobid_cache_init(void) HASH_JOBID_MAX_BITS, HASH_JOBID_BKT_BITS, 0, CFS_HASH_MIN_THETA, CFS_HASH_MAX_THETA, &jobid_hash_ops, CFS_HASH_DEFAULT); - if (!jobid_hash) + if (!jobid_hash) { rc = -ENOMEM; + } else { + rc = rhashtable_init(&session_jobids, &jobid_params); + if (rc) { + cfs_hash_putref(jobid_hash); + jobid_hash = NULL; + } + } RETURN(rc); } @@ -400,11 +818,17 @@ void jobid_cache_fini(void) jobid_hash = NULL; spin_unlock(&jobid_hash_lock); + cancel_delayed_work_sync(&jobid_prune_work); + cancel_delayed_work_sync(&jobid_pidmap_gc_work); + if (tmp_hash != NULL) { cfs_hash_cond_del(tmp_hash, jobid_should_free_item, NULL); cfs_hash_putref(tmp_hash); + + rhashtable_free_and_destroy(&session_jobids, jobid_free, NULL); } + EXIT; } EXPORT_SYMBOL(jobid_cache_fini); @@ -412,10 +836,10 @@ EXPORT_SYMBOL(jobid_cache_fini); /* * Hash operations for pid<->jobid */ -static unsigned jobid_hashfn(struct cfs_hash *hs, const void *key, - unsigned mask) +static unsigned int +jobid_hashfn(struct cfs_hash *hs, const void *key, const unsigned int bits) { - return cfs_hash_djb2_hash(key, sizeof(pid_t), mask); + return cfs_hash_djb2_hash(key, sizeof(pid_t), bits); } static void *jobid_key(struct hlist_node *hnode) @@ -449,7 +873,16 @@ static void jobid_get(struct cfs_hash *hs, struct hlist_node *hnode) pidmap = hlist_entry(hnode, struct jobid_pid_map, jp_hash); - atomic_inc(&pidmap->jp_refcount); + kref_get(&pidmap->jp_refcount); +} + +static void jobid_put_locked_free(struct kref *kref) +{ + struct jobid_pid_map *pidmap = container_of(kref, struct jobid_pid_map, + jp_refcount); + + CDEBUG(D_INFO, "Freeing: %d->%s\n", pidmap->jp_pid, pidmap->jp_jobid); + OBD_FREE_PTR(pidmap); } static void jobid_put_locked(struct cfs_hash *hs, struct hlist_node *hnode) @@ -460,13 +893,8 @@ static void jobid_put_locked(struct cfs_hash *hs, struct hlist_node *hnode) return; pidmap = hlist_entry(hnode, struct jobid_pid_map, jp_hash); - LASSERT(atomic_read(&pidmap->jp_refcount) > 0); - if (atomic_dec_and_test(&pidmap->jp_refcount)) { - CDEBUG(D_INFO, "Freeing: %d->%s\n", - pidmap->jp_pid, pidmap->jp_jobid); - - OBD_FREE_PTR(pidmap); - } + LASSERT(kref_read(&pidmap->jp_refcount) > 0); + kref_put(&pidmap->jp_refcount, jobid_put_locked_free); } static struct cfs_hash_ops jobid_hash_ops = { @@ -484,14 +912,17 @@ static struct cfs_hash_ops jobid_hash_ops = { * * Fill in @jobid string based on the value of obd_jobid_var: * JOBSTATS_DISABLE: none - * JOBSTATS_NODELOCAL: content of obd_jobid_node (jobid_interpret_string()) + * JOBSTATS_NODELOCAL: content of obd_jobid_name (jobid_interpret_string()) * JOBSTATS_PROCNAME_UID: process name/UID + * JOBSTATS_SESSION per-session value set by + * /sys/fs/lustre/jobid_this_session * anything else: look up obd_jobid_var in the processes environment * * Return -ve error number, 0 on success. */ int lustre_get_jobid(char *jobid, size_t joblen) { + int len = min_t(int, joblen, LUSTRE_JOBID_SIZE); int rc = 0; ENTRY; @@ -504,26 +935,30 @@ int lustre_get_jobid(char *jobid, size_t joblen) if (strcmp(obd_jobid_var, JOBSTATS_DISABLE) == 0) { /* Jobstats isn't enabled */ memset(jobid, 0, joblen); - } else if (strcmp(obd_jobid_var, JOBSTATS_NODELOCAL) == 0) { + RETURN(0); + } + + if (strcmp(obd_jobid_var, JOBSTATS_NODELOCAL) == 0) { /* Whole node dedicated to single job */ - rc = jobid_interpret_string(obd_jobid_name, jobid, joblen); + rc = jobid_interpret_string(obd_jobid_name, jobid, len); } else if (strcmp(obd_jobid_var, JOBSTATS_PROCNAME_UID) == 0) { - rc = jobid_interpret_string("%e.%u", jobid, joblen); - } else if (jobid_name_is_valid(current_comm())) { + rc = jobid_interpret_string("%e.%u", jobid, len); + } else if (strcmp(obd_jobid_var, JOBSTATS_SESSION) == 0 || + jobid_name_is_valid(current->comm)) { /* - * obd_jobid_var holds the jobid environment variable name. - * Skip initial check if obd_jobid_name already uses "%j", - * otherwise try just "%j" first, then fall back to whatever - * is in obd_jobid_name if obd_jobid_var is not found. + * per-process jobid wanted, either from environment or from + * per-session setting. + * If obd_jobid_name contains "%j" or if getting the per-process + * jobid directly fails, fall back to using obd_jobid_name. */ rc = -EAGAIN; if (!strnstr(obd_jobid_name, "%j", joblen)) - rc = jobid_get_from_cache(jobid, joblen); + rc = jobid_get_from_cache(jobid, len); - /* fall back to jobid_node if jobid_var not in environment */ + /* fall back to jobid_name if jobid_var not available */ if (rc < 0) { int rc2 = jobid_interpret_string(obd_jobid_name, - jobid, joblen); + jobid, len); if (!rc2) rc = 0; } @@ -547,7 +982,7 @@ void lustre_jobid_clear(const char *find_jobid) if (jobid_hash == NULL) return; - strlcpy(jobid, find_jobid, sizeof(jobid)); + strscpy(jobid, find_jobid, sizeof(jobid)); /* trim \n off the end of the incoming jobid */ end = strchr(jobid, '\n'); if (end && *end == '\n')