X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fobdclass%2Fjobid.c;h=1093f7829a96238809d6b9ca01b1d753c2e187ce;hp=8fc4956d8b49abc341e154850a072b42a8017fd4;hb=a06c5e12c536bc588ead483a701a6fe2a777d73b;hpb=08479b74ec3599ee91e14f3f646389bb0aca4575 diff --git a/lustre/obdclass/jobid.c b/lustre/obdclass/jobid.c index 8fc4956..1093f78 100644 --- a/lustre/obdclass/jobid.c +++ b/lustre/obdclass/jobid.c @@ -33,10 +33,10 @@ #define DEBUG_SUBSYSTEM S_RPC #include -#ifdef HAVE_UIDGID_HEADER #include -#endif +#include +#include #include #include #include @@ -49,32 +49,345 @@ spinlock_t jobid_hash_lock; #define DELETE_INTERVAL 300 char obd_jobid_var[JOBSTATS_JOBID_VAR_MAX_LEN + 1] = JOBSTATS_DISABLE; -char obd_jobid_node[LUSTRE_JOBID_SIZE + 1]; +char obd_jobid_name[LUSTRE_JOBID_SIZE] = "%e.%u"; /** - * Structure to store a single jobID/PID mapping + * Structure to store a single PID->JobID mapping */ -struct jobid_to_pid_map { +struct jobid_pid_map { struct hlist_node jp_hash; time64_t jp_time; - atomic_t jp_refcount; spinlock_t jp_lock; /* protects jp_jobid */ - char jp_jobid[LUSTRE_JOBID_SIZE + 1]; + char jp_jobid[LUSTRE_JOBID_SIZE]; + unsigned int jp_joblen; + atomic_t jp_refcount; pid_t jp_pid; }; -/* Get jobid of current process by reading the environment variable +/* + * Jobid can be set for a session (see setsid(2)) by writing to + * a sysfs file from any process in that session. + * The jobids are stored in a hash table indexed by the relevant + * struct pid. We periodically look for entries where the pid has + * no PIDTYPE_SID tasks any more, and prune them. This happens within + * 5 seconds of a jobid being added, and every 5 minutes when jobids exist, + * but none are added. + */ +#define JOBID_EXPEDITED_CLEAN (5) +#define JOBID_BACKGROUND_CLEAN (5 * 60) + +struct session_jobid { + struct pid *sj_session; + struct rhash_head sj_linkage; + struct rcu_head sj_rcu; + char sj_jobid[1]; +}; + +static const struct rhashtable_params jobid_params = { + .key_len = sizeof(struct pid *), + .key_offset = offsetof(struct session_jobid, sj_session), + .head_offset = offsetof(struct session_jobid, sj_linkage), +}; + +static struct rhashtable session_jobids; + +/* + * jobid_current must be called with rcu_read_lock held. + * if it returns non-NULL, the string can only be used + * until rcu_read_unlock is called. + */ +char *jobid_current(void) +{ + struct pid *sid = task_session(current); + struct session_jobid *sj; + + sj = rhashtable_lookup_fast(&session_jobids, &sid, jobid_params); + if (sj) + return sj->sj_jobid; + return NULL; +} + +static void jobid_prune_expedite(void); +/* + * jobid_set_current will try to add a new entry + * to the table. If one exists with the same key, the + * jobid will be replaced + */ +int jobid_set_current(char *jobid) +{ + struct pid *sid; + struct session_jobid *sj, *origsj; + int ret; + int len = strlen(jobid); + + sj = kmalloc(sizeof(*sj) + len, GFP_KERNEL); + if (!sj) + return -ENOMEM; + rcu_read_lock(); + sid = task_session(current); + sj->sj_session = get_pid(sid); + strncpy(sj->sj_jobid, jobid, len+1); + origsj = rhashtable_lookup_get_insert_fast(&session_jobids, + &sj->sj_linkage, + jobid_params); + if (origsj == NULL) { + /* successful insert */ + rcu_read_unlock(); + jobid_prune_expedite(); + return 0; + } + + if (IS_ERR(origsj)) { + put_pid(sj->sj_session); + kfree(sj); + rcu_read_unlock(); + return PTR_ERR(origsj); + } + ret = rhashtable_replace_fast(&session_jobids, + &origsj->sj_linkage, + &sj->sj_linkage, + jobid_params); + if (ret) { + put_pid(sj->sj_session); + kfree(sj); + rcu_read_unlock(); + return ret; + } + put_pid(origsj->sj_session); + rcu_read_unlock(); + kfree_rcu(origsj, sj_rcu); + jobid_prune_expedite(); + + return 0; +} + +static void jobid_free(void *vsj, void *arg) +{ + struct session_jobid *sj = vsj; + + put_pid(sj->sj_session); + kfree(sj); +} + +static void jobid_prune(struct work_struct *work); +static DECLARE_DELAYED_WORK(jobid_prune_work, jobid_prune); +static int jobid_prune_expedited; +static void jobid_prune(struct work_struct *work) +{ + int remaining = 0; + struct rhashtable_iter iter; + struct session_jobid *sj; + + jobid_prune_expedited = 0; + rhashtable_walk_enter(&session_jobids, &iter); + rhashtable_walk_start(&iter); + while ((sj = rhashtable_walk_next(&iter)) != NULL) { + if (!hlist_empty(&sj->sj_session->tasks[PIDTYPE_SID])) { + remaining++; + continue; + } + if (rhashtable_remove_fast(&session_jobids, + &sj->sj_linkage, + jobid_params) == 0) { + put_pid(sj->sj_session); + kfree_rcu(sj, sj_rcu); + } + } + rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); + if (remaining) + schedule_delayed_work(&jobid_prune_work, + cfs_time_seconds(JOBID_BACKGROUND_CLEAN)); +} + +static void jobid_prune_expedite(void) +{ + if (!jobid_prune_expedited) { + jobid_prune_expedited = 1; + mod_delayed_work(system_wq, &jobid_prune_work, + cfs_time_seconds(JOBID_EXPEDITED_CLEAN)); + } +} + +static int cfs_access_process_vm(struct task_struct *tsk, + struct mm_struct *mm, + unsigned long addr, + void *buf, int len, int write) +{ + /* Just copied from kernel for the kernels which doesn't + * have access_process_vm() exported + */ + struct vm_area_struct *vma; + struct page *page; + void *old_buf = buf; + + /* Avoid deadlocks on mmap_sem if called from sys_mmap_pgoff(), + * which is already holding mmap_sem for writes. If some other + * thread gets the write lock in the meantime, this thread will + * block, but at least it won't deadlock on itself. LU-1735 + */ + if (!mmap_read_trylock(mm)) + return -EDEADLK; + + /* ignore errors, just check how much was successfully transferred */ + while (len) { + int bytes, rc, offset; + void *maddr; + +#if defined(HAVE_GET_USER_PAGES_GUP_FLAGS) + rc = get_user_pages(addr, 1, write ? FOLL_WRITE : 0, &page, + &vma); +#elif defined(HAVE_GET_USER_PAGES_6ARG) + rc = get_user_pages(addr, 1, write, 1, &page, &vma); +#else + rc = get_user_pages(tsk, mm, addr, 1, write, 1, &page, &vma); +#endif + if (rc <= 0) + break; + + bytes = len; + offset = addr & (PAGE_SIZE-1); + if (bytes > PAGE_SIZE-offset) + bytes = PAGE_SIZE-offset; + + maddr = kmap(page); + if (write) { + copy_to_user_page(vma, page, addr, + maddr + offset, buf, bytes); + set_page_dirty_lock(page); + } else { + copy_from_user_page(vma, page, addr, + buf, maddr + offset, bytes); + } + kunmap(page); + put_page(page); + len -= bytes; + buf += bytes; + addr += bytes; + } + mmap_read_unlock(mm); + + return buf - old_buf; +} + +/* Read the environment variable of current process specified by @key. */ +static int cfs_get_environ(const char *key, char *value, int *val_len) +{ + struct mm_struct *mm; + char *buffer; + int buf_len = PAGE_SIZE; + int key_len = strlen(key); + unsigned long addr; + int rc; + bool skip = false; + + ENTRY; + buffer = kmalloc(buf_len, GFP_USER); + if (!buffer) + RETURN(-ENOMEM); + + mm = get_task_mm(current); + if (!mm) { + kfree(buffer); + RETURN(-EINVAL); + } + + addr = mm->env_start; + while (addr < mm->env_end) { + int this_len, retval, scan_len; + char *env_start, *env_end; + + memset(buffer, 0, buf_len); + + this_len = min_t(int, mm->env_end - addr, buf_len); + retval = cfs_access_process_vm(current, mm, addr, buffer, + this_len, 0); + if (retval < 0) + GOTO(out, rc = retval); + else if (retval != this_len) + break; + + addr += retval; + + /* Parse the buffer to find out the specified key/value pair. + * The "key=value" entries are separated by '\0'. + */ + env_start = buffer; + scan_len = this_len; + while (scan_len) { + char *entry; + int entry_len; + + env_end = memscan(env_start, '\0', scan_len); + LASSERT(env_end >= env_start && + env_end <= env_start + scan_len); + + /* The last entry of this buffer cross the buffer + * boundary, reread it in next cycle. + */ + if (unlikely(env_end - env_start == scan_len)) { + /* Just skip the entry larger than page size, + * it can't be jobID env variable. + */ + if (unlikely(scan_len == this_len)) + skip = true; + else + addr -= scan_len; + break; + } else if (unlikely(skip)) { + skip = false; + goto skip; + } + entry = env_start; + entry_len = env_end - env_start; + CDEBUG(D_INFO, "key: %s, entry: %s\n", key, entry); + + /* Key length + length of '=' */ + if (entry_len > key_len + 1 && + entry[key_len] == '=' && + !memcmp(entry, key, key_len)) { + entry += key_len + 1; + entry_len -= key_len + 1; + + /* The 'value' buffer passed in is too small. + * Copy what fits, but return -EOVERFLOW. + */ + if (entry_len >= *val_len) { + memcpy(value, entry, *val_len); + value[*val_len - 1] = 0; + GOTO(out, rc = -EOVERFLOW); + } + + memcpy(value, entry, entry_len); + *val_len = entry_len; + GOTO(out, rc = 0); + } +skip: + scan_len -= (env_end - env_start + 1); + env_start = env_end + 1; + } + } + GOTO(out, rc = -ENOENT); + +out: + mmput(mm); + kfree((void *)buffer); + return rc; +} + +/* + * Get jobid of current process by reading the environment variable * stored in between the "env_start" & "env_end" of task struct. * * If some job scheduler doesn't store jobid in the "env_start/end", * then an upcall could be issued here to get the jobid by utilizing * the userspace tools/API. Then, the jobid must be cached. */ -int get_jobid_from_environ(char *jobid_var, char *jobid, int jobid_len) +int jobid_get_from_environ(char *jobid_var, char *jobid, int *jobid_len) { int rc; - rc = cfs_get_environ(jobid_var, jobid, &jobid_len); + rc = cfs_get_environ(jobid_var, jobid, jobid_len); if (!rc) goto out; @@ -86,18 +399,23 @@ int get_jobid_from_environ(char *jobid_var, char *jobid, int jobid_len) * larger temp buffer for cfs_get_environ(), then * truncating the string at some separator to fit into * the specified jobid_len. Fix later if needed. */ - static bool printed; - if (unlikely(!printed)) { - LCONSOLE_ERROR_MSG(0x16b, "%s value too large " - "for JobID buffer (%d)\n", - obd_jobid_var, jobid_len); - printed = true; + static ktime_t printed; + + if (unlikely(ktime_to_ns(printed) == 0 || + ktime_after(ktime_get(), + ktime_add_ns(printed, + 3600ULL * 24 * NSEC_PER_SEC)))) { + LCONSOLE_WARN("jobid: '%s' value too large (%d)\n", + obd_jobid_var, *jobid_len); + printed = ktime_get(); } + + rc = 0; } else { - CDEBUG((rc == -ENOENT || rc == -EINVAL || - rc == -EDEADLK) ? D_INFO : D_ERROR, - "Get jobid for (%s) failed: rc = %d\n", - obd_jobid_var, rc); + CDEBUG_LIMIT((rc == -ENOENT || rc == -EINVAL || + rc == -EDEADLK) ? D_INFO : D_ERROR, + "jobid: get '%s' failed: rc = %d\n", + obd_jobid_var, rc); } out: @@ -117,16 +435,20 @@ out: static int jobid_should_free_item(void *obj, void *data) { char *jobid = data; - struct jobid_to_pid_map *pidmap = obj; + struct jobid_pid_map *pidmap = obj; int rc = 0; if (obj == NULL) return 0; + if (jobid == NULL) { + WARN_ON_ONCE(atomic_read(&pidmap->jp_refcount) != 1); + return 1; + } + spin_lock(&pidmap->jp_lock); - if (jobid == NULL) - rc = 1; - else if (jobid[0] == '\0') + /* prevent newly inserted items from deleting */ + if (jobid[0] == '\0' && atomic_read(&pidmap->jp_refcount) == 1) rc = 1; else if (ktime_get_real_seconds() - pidmap->jp_time > DELETE_INTERVAL) rc = 1; @@ -138,19 +460,22 @@ static int jobid_should_free_item(void *obj, void *data) } /* - * check_job_name + * jobid_name_is_valid * * Checks if the jobid is a Lustre process * * Returns true if jobid is valid * Returns false if jobid looks like it's a Lustre process */ -static bool check_job_name(char *jobid) +static bool jobid_name_is_valid(char *jobid) { - const char *const lustre_reserved[] = {"ll_ping", "ptlrpc", - "ldlm", "ll_sa", NULL}; + const char *const lustre_reserved[] = { "ll_ping", "ptlrpc", + "ldlm", "ll_sa", NULL }; int i; + if (jobid[0] == '\0') + return false; + for (i = 0; lustre_reserved[i] != NULL; i++) { if (strncmp(jobid, lustre_reserved[i], strlen(lustre_reserved[i])) == 0) @@ -160,27 +485,61 @@ static bool check_job_name(char *jobid) } /* - * get_jobid + * jobid_get_from_cache() * - * Returns the jobid for the current pid. - * - * If no jobid is found in the table, the jobid is calculated based on - * the value of jobid_var, using procname_uid as the default. + * Returns contents of jobid_var from process environment for current PID, + * or from the per-session jobid table. + * Values fetch from process environment will be cached for some time to avoid + * the overhead of scanning the environment. * * Return: -ENOMEM if allocating a new pidmap fails - * 0 for success + * -ENOENT if no entry could be found + * +ve string length for success (something was returned in jobid) */ -int get_jobid(char *jobid) +static int jobid_get_from_cache(char *jobid, size_t joblen) { - pid_t pid = current_pid(); - struct jobid_to_pid_map *pidmap = NULL; - struct jobid_to_pid_map *pidmap2; - char tmp_jobid[LUSTRE_JOBID_SIZE + 1]; + static time64_t last_expire; + bool expire_cache = false; + pid_t pid = current->pid; + struct jobid_pid_map *pidmap = NULL; + time64_t now = ktime_get_real_seconds(); int rc = 0; ENTRY; + if (strcmp(obd_jobid_var, JOBSTATS_SESSION) == 0) { + char *jid; + + rcu_read_lock(); + jid = jobid_current(); + if (jid) { + strlcpy(jobid, jid, joblen); + joblen = strlen(jobid); + } else { + rc = -ENOENT; + } + rcu_read_unlock(); + GOTO(out, rc); + } + + LASSERT(jobid_hash != NULL); + + /* scan hash periodically to remove old PID entries from cache */ + spin_lock(&jobid_hash_lock); + if (unlikely(last_expire + DELETE_INTERVAL <= now)) { + expire_cache = true; + last_expire = now; + } + spin_unlock(&jobid_hash_lock); + + if (expire_cache) + cfs_hash_cond_del(jobid_hash, jobid_should_free_item, + "intentionally_bad_jobid"); + + /* first try to find PID in the hash and use that value */ pidmap = cfs_hash_lookup(jobid_hash, &pid); if (pidmap == NULL) { + struct jobid_pid_map *pidmap2; + OBD_ALLOC_PTR(pidmap); if (pidmap == NULL) GOTO(out, rc = -ENOMEM); @@ -190,74 +549,169 @@ int get_jobid(char *jobid) pidmap->jp_jobid[0] = '\0'; spin_lock_init(&pidmap->jp_lock); INIT_HLIST_NODE(&pidmap->jp_hash); + /* + * @pidmap might be reclaimed just after it is added into + * hash list, init @jp_refcount as 1 to make sure memory + * could be not freed during access. + */ + atomic_set(&pidmap->jp_refcount, 1); /* * Add the newly created map to the hash, on key collision we * lost a racing addition and must destroy our newly allocated - * map. The object which exists in the hash will be - * returned. + * map. The object which exists in the hash will be returned. */ pidmap2 = cfs_hash_findadd_unique(jobid_hash, &pid, &pidmap->jp_hash); if (unlikely(pidmap != pidmap2)) { - CDEBUG(D_INFO, "Duplicate jobid found\n"); + CDEBUG(D_INFO, "jobid: duplicate found for PID=%u\n", + pid); OBD_FREE_PTR(pidmap); pidmap = pidmap2; - } else { - cfs_hash_get(jobid_hash, &pidmap->jp_hash); } } + /* + * If pidmap is old (this is always true for new entries) refresh it. + * If obd_jobid_var is not found, cache empty entry and try again + * later, to avoid repeat lookups for PID if obd_jobid_var missing. + */ spin_lock(&pidmap->jp_lock); - if ((ktime_get_real_seconds() - pidmap->jp_time >= RESCAN_INTERVAL) || - pidmap->jp_jobid[0] == '\0') { - /* mark the pidmap as being up to date, if we fail to find - * a good jobid, revert to the old time and try again later - * prevent a race with deletion */ + if (pidmap->jp_time + RESCAN_INTERVAL <= now) { + char env_jobid[LUSTRE_JOBID_SIZE] = ""; + int env_len = sizeof(env_jobid); - time64_t tmp_time = pidmap->jp_time; - pidmap->jp_time = ktime_get_real_seconds(); + pidmap->jp_time = now; spin_unlock(&pidmap->jp_lock); - if (strcmp(obd_jobid_var, JOBSTATS_PROCNAME_UID) == 0) { - rc = 1; - } else { - memset(tmp_jobid, '\0', LUSTRE_JOBID_SIZE + 1); - rc = get_jobid_from_environ(obd_jobid_var, - tmp_jobid, - LUSTRE_JOBID_SIZE + 1); - } + rc = jobid_get_from_environ(obd_jobid_var, env_jobid, &env_len); - /* Use process name + fsuid as jobid default, or when - * specified by "jobname_uid" */ - if (rc) { - snprintf(tmp_jobid, LUSTRE_JOBID_SIZE, "%s.%u", - current_comm(), - from_kuid(&init_user_ns, current_fsuid())); + CDEBUG(D_INFO, "jobid: PID mapping established: %d->%s\n", + pidmap->jp_pid, env_jobid); + spin_lock(&pidmap->jp_lock); + if (!rc) { + pidmap->jp_joblen = env_len; + strlcpy(pidmap->jp_jobid, env_jobid, + sizeof(pidmap->jp_jobid)); rc = 0; + } else if (rc == -ENOENT) { + /* It might have been deleted, clear out old entry */ + pidmap->jp_joblen = 0; + pidmap->jp_jobid[0] = '\0'; } - - CDEBUG(D_INFO, "Jobid to pid mapping established: %d->%s\n", - pidmap->jp_pid, tmp_jobid); - - spin_lock(&pidmap->jp_lock); - if (check_job_name(tmp_jobid)) - strncpy(pidmap->jp_jobid, tmp_jobid, - LUSTRE_JOBID_SIZE); - else - pidmap->jp_time = tmp_time; } - if (strlen(pidmap->jp_jobid) != 0) - strncpy(jobid, pidmap->jp_jobid, LUSTRE_JOBID_SIZE); - + /* + * Regardless of how pidmap was found, if it contains a valid entry + * use that for now. If there was a technical error (e.g. -ENOMEM) + * use the old cached value until it can be looked up again properly. + * If a cached missing entry was found, return -ENOENT. + */ + if (pidmap->jp_joblen) { + strlcpy(jobid, pidmap->jp_jobid, joblen); + joblen = pidmap->jp_joblen; + rc = 0; + } else if (!rc) { + rc = -ENOENT; + } spin_unlock(&pidmap->jp_lock); cfs_hash_put(jobid_hash, &pidmap->jp_hash); EXIT; out: - return rc; + return rc < 0 ? rc : joblen; +} + +/* + * jobid_interpret_string() + * + * Interpret the jobfmt string to expand specified fields, like coredumps do: + * %e = executable + * %g = gid + * %h = hostname + * %H = short hostname + * %j = jobid from environment + * %p = pid + * %u = uid + * + * Unknown escape strings are dropped. Other characters are copied through, + * excluding whitespace (to avoid making jobid parsing difficult). + * + * Return: -EOVERFLOW if the expanded string does not fit within @joblen + * 0 for success + */ +static int jobid_interpret_string(const char *jobfmt, char *jobid, + ssize_t joblen) +{ + char c; + + while ((c = *jobfmt++) && joblen > 1) { + char f, *p; + int l; + + if (isspace(c)) /* Don't allow embedded spaces */ + continue; + + if (c != '%') { + *jobid = c; + joblen--; + jobid++; + *jobid = '\0'; + continue; + } + + switch ((f = *jobfmt++)) { + case 'e': /* executable name */ + l = snprintf(jobid, joblen, "%s", current->comm); + break; + case 'g': /* group ID */ + l = snprintf(jobid, joblen, "%u", + from_kgid(&init_user_ns, current_fsgid())); + break; + case 'h': /* hostname */ + l = snprintf(jobid, joblen, "%s", + init_utsname()->nodename); + break; + case 'H': /* short hostname. Cut at first dot */ + l = snprintf(jobid, joblen, "%s", + init_utsname()->nodename); + p = strnchr(jobid, joblen, '.'); + if (p) { + *p = '\0'; + l = p - jobid; + } + break; + case 'j': /* jobid stored in process environment */ + l = jobid_get_from_cache(jobid, joblen); + if (l < 0) + l = 0; + break; + case 'p': /* process ID */ + l = snprintf(jobid, joblen, "%u", current->pid); + break; + case 'u': /* user ID */ + l = snprintf(jobid, joblen, "%u", + from_kuid(&init_user_ns, current_fsuid())); + break; + case '\0': /* '%' at end of format string */ + l = 0; + goto out; + default: /* drop unknown %x format strings */ + l = 0; + break; + } + jobid += l; + joblen -= l; + } + /* + * This points at the end of the buffer, so long as jobid is always + * incremented the same amount as joblen is decremented. + */ +out: + jobid[joblen - 1] = '\0'; + + return joblen < 0 ? -EOVERFLOW : 0; } /* @@ -270,33 +724,26 @@ out: int jobid_cache_init(void) { int rc = 0; - struct cfs_hash *tmp_jobid_hash; ENTRY; - spin_lock_init(&jobid_hash_lock); - - tmp_jobid_hash = cfs_hash_create("JOBID_HASH", - HASH_JOBID_CUR_BITS, - HASH_JOBID_MAX_BITS, - HASH_JOBID_BKT_BITS, 0, - CFS_HASH_MIN_THETA, - CFS_HASH_MAX_THETA, - &jobid_hash_ops, - CFS_HASH_DEFAULT); + if (jobid_hash) + return 0; - spin_lock(&jobid_hash_lock); - if (jobid_hash == NULL) { - jobid_hash = tmp_jobid_hash; - spin_unlock(&jobid_hash_lock); + spin_lock_init(&jobid_hash_lock); + jobid_hash = cfs_hash_create("JOBID_HASH", HASH_JOBID_CUR_BITS, + HASH_JOBID_MAX_BITS, HASH_JOBID_BKT_BITS, + 0, CFS_HASH_MIN_THETA, CFS_HASH_MAX_THETA, + &jobid_hash_ops, CFS_HASH_DEFAULT); + if (!jobid_hash) { + rc = -ENOMEM; } else { - spin_unlock(&jobid_hash_lock); - if (tmp_jobid_hash != NULL) - cfs_hash_putref(tmp_jobid_hash); + rc = rhashtable_init(&session_jobids, &jobid_params); + if (rc) { + cfs_hash_putref(jobid_hash); + jobid_hash = NULL; + } } - if (!jobid_hash) - rc = -ENOMEM; - RETURN(rc); } EXPORT_SYMBOL(jobid_cache_init); @@ -311,11 +758,16 @@ void jobid_cache_fini(void) jobid_hash = NULL; spin_unlock(&jobid_hash_lock); + cancel_delayed_work_sync(&jobid_prune_work); + if (tmp_hash != NULL) { cfs_hash_cond_del(tmp_hash, jobid_should_free_item, NULL); cfs_hash_putref(tmp_hash); + + rhashtable_free_and_destroy(&session_jobids, jobid_free, NULL); } + EXIT; } EXPORT_SYMBOL(jobid_cache_fini); @@ -331,9 +783,9 @@ static unsigned jobid_hashfn(struct cfs_hash *hs, const void *key, static void *jobid_key(struct hlist_node *hnode) { - struct jobid_to_pid_map *pidmap; + struct jobid_pid_map *pidmap; - pidmap = hlist_entry(hnode, struct jobid_to_pid_map, jp_hash); + pidmap = hlist_entry(hnode, struct jobid_pid_map, jp_hash); return &pidmap->jp_pid; } @@ -351,26 +803,26 @@ static int jobid_keycmp(const void *key, struct hlist_node *hnode) static void *jobid_object(struct hlist_node *hnode) { - return hlist_entry(hnode, struct jobid_to_pid_map, jp_hash); + return hlist_entry(hnode, struct jobid_pid_map, jp_hash); } static void jobid_get(struct cfs_hash *hs, struct hlist_node *hnode) { - struct jobid_to_pid_map *pidmap; + struct jobid_pid_map *pidmap; - pidmap = hlist_entry(hnode, struct jobid_to_pid_map, jp_hash); + pidmap = hlist_entry(hnode, struct jobid_pid_map, jp_hash); atomic_inc(&pidmap->jp_refcount); } static void jobid_put_locked(struct cfs_hash *hs, struct hlist_node *hnode) { - struct jobid_to_pid_map *pidmap; + struct jobid_pid_map *pidmap; if (hnode == NULL) return; - pidmap = hlist_entry(hnode, struct jobid_to_pid_map, jp_hash); + pidmap = hlist_entry(hnode, struct jobid_pid_map, jp_hash); LASSERT(atomic_read(&pidmap->jp_refcount) > 0); if (atomic_dec_and_test(&pidmap->jp_refcount)) { CDEBUG(D_INFO, "Freeing: %d->%s\n", @@ -390,46 +842,58 @@ static struct cfs_hash_ops jobid_hash_ops = { .hs_put_locked = jobid_put_locked, }; -/* - * Return the jobid: +/** + * Generate the job identifier string for this process for tracking purposes. * - * Based on the value of obd_jobid_var - * JOBSTATS_DISABLE: none - * JOBSTATS_NODELOCAL: Contents of obd_jobid_name - * JOBSTATS_PROCNAME_UID: Process name/UID - * anything else: Look up the value in the processes environment - * default: JOBSTATS_PROCNAME_UID + * Fill in @jobid string based on the value of obd_jobid_var: + * JOBSTATS_DISABLE: none + * JOBSTATS_NODELOCAL: content of obd_jobid_name (jobid_interpret_string()) + * JOBSTATS_PROCNAME_UID: process name/UID + * JOBSTATS_SESSION per-session value set by + * /sys/fs/lustre/jobid_this_session + * anything else: look up obd_jobid_var in the processes environment + * + * Return -ve error number, 0 on success. */ - -int lustre_get_jobid(char *jobid) +int lustre_get_jobid(char *jobid, size_t joblen) { int rc = 0; - int clear = 0; - static time64_t last_delete; ENTRY; - LASSERT(jobid_hash != NULL); - - spin_lock(&jobid_hash_lock); - if (last_delete + DELETE_INTERVAL <= ktime_get_real_seconds()) { - clear = 1; - last_delete = ktime_get_real_seconds(); + if (unlikely(joblen < 2)) { + if (joblen == 1) + jobid[0] = '\0'; + RETURN(-EINVAL); } - spin_unlock(&jobid_hash_lock); - - if (clear) - cfs_hash_cond_del(jobid_hash, jobid_should_free_item, - "intentionally_bad_jobid"); - if (strcmp(obd_jobid_var, JOBSTATS_DISABLE) == 0) + if (strcmp(obd_jobid_var, JOBSTATS_DISABLE) == 0) { /* Jobstats isn't enabled */ - memset(jobid, 0, LUSTRE_JOBID_SIZE); - else if (strcmp(obd_jobid_var, JOBSTATS_NODELOCAL) == 0) + memset(jobid, 0, joblen); + } else if (strcmp(obd_jobid_var, JOBSTATS_NODELOCAL) == 0) { /* Whole node dedicated to single job */ - memcpy(jobid, obd_jobid_node, LUSTRE_JOBID_SIZE); - else - /* Get jobid from hash table */ - rc = get_jobid(jobid); + rc = jobid_interpret_string(obd_jobid_name, jobid, joblen); + } else if (strcmp(obd_jobid_var, JOBSTATS_PROCNAME_UID) == 0) { + rc = jobid_interpret_string("%e.%u", jobid, joblen); + } else if (strcmp(obd_jobid_var, JOBSTATS_SESSION) == 0 || + jobid_name_is_valid(current->comm)) { + /* + * per-process jobid wanted, either from environment or from + * per-session setting. + * If obd_jobid_name contains "%j" or if getting the per-process + * jobid directly fails, fall back to using obd_jobid_name. + */ + rc = -EAGAIN; + if (!strnstr(obd_jobid_name, "%j", joblen)) + rc = jobid_get_from_cache(jobid, joblen); + + /* fall back to jobid_name if jobid_var not available */ + if (rc < 0) { + int rc2 = jobid_interpret_string(obd_jobid_name, + jobid, joblen); + if (!rc2) + rc = 0; + } + } RETURN(rc); } @@ -438,20 +902,22 @@ EXPORT_SYMBOL(lustre_get_jobid); /* * lustre_jobid_clear * - * uses value pushed in via jobid_name + * Search cache for JobID given by @find_jobid. * If any entries in the hash table match the value, they are removed */ -void lustre_jobid_clear(const char *data) +void lustre_jobid_clear(const char *find_jobid) { - char jobid[LUSTRE_JOBID_SIZE + 1]; + char jobid[LUSTRE_JOBID_SIZE]; + char *end; if (jobid_hash == NULL) return; - strncpy(jobid, data, LUSTRE_JOBID_SIZE); + strlcpy(jobid, find_jobid, sizeof(jobid)); /* trim \n off the end of the incoming jobid */ - if (jobid[strlen(jobid) - 1] == '\n') - jobid[strlen(jobid) - 1] = '\0'; + end = strchr(jobid, '\n'); + if (end && *end == '\n') + *end = '\0'; CDEBUG(D_INFO, "Clearing Jobid: %s\n", jobid); cfs_hash_cond_del(jobid_hash, jobid_should_free_item, jobid);