4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2011, 2014, Intel Corporation.
28 * Copyright 2017 Cray Inc, all rights reserved.
31 * Store PID->JobID mappings
34 #define DEBUG_SUBSYSTEM S_RPC
35 #include <linux/user_namespace.h>
36 #ifdef HAVE_UIDGID_HEADER
37 #include <linux/uidgid.h>
39 #include <linux/utsname.h>
41 #include <libcfs/libcfs.h>
42 #include <obd_support.h>
43 #include <obd_class.h>
44 #include <lustre_net.h>
46 static struct cfs_hash *jobid_hash;
47 static struct cfs_hash_ops jobid_hash_ops;
48 spinlock_t jobid_hash_lock;
50 #define RESCAN_INTERVAL 30
51 #define DELETE_INTERVAL 300
53 char obd_jobid_var[JOBSTATS_JOBID_VAR_MAX_LEN + 1] = JOBSTATS_DISABLE;
54 char obd_jobid_name[LUSTRE_JOBID_SIZE] = "%e.%u";
57 * Structure to store a single PID->JobID mapping
59 struct jobid_pid_map {
60 struct hlist_node jp_hash;
62 spinlock_t jp_lock; /* protects jp_jobid */
63 char jp_jobid[LUSTRE_JOBID_SIZE];
64 unsigned int jp_joblen;
70 * Get jobid of current process by reading the environment variable
71 * stored in between the "env_start" & "env_end" of task struct.
73 * If some job scheduler doesn't store jobid in the "env_start/end",
74 * then an upcall could be issued here to get the jobid by utilizing
75 * the userspace tools/API. Then, the jobid must be cached.
77 int jobid_get_from_environ(char *jobid_var, char *jobid, int *jobid_len)
82 rc = cfs_get_environ(jobid_var, jobid, jobid_len);
86 if (unlikely(rc == -EOVERFLOW && !printed)) {
87 /* For the PBS_JOBID and LOADL_STEP_ID keys (which are
88 * variable length strings instead of just numbers), it
89 * might make sense to keep the unique parts for JobID,
90 * instead of just returning an error. That means a
91 * larger temp buffer for cfs_get_environ(), then
92 * truncating the string at some separator to fit into
93 * the specified jobid_len. Fix later if needed. */
94 LCONSOLE_WARN("jobid: '%s' value too large (%d)\n",
95 obd_jobid_var, *jobid_len);
100 CDEBUG((rc == -ENOENT || rc == -EINVAL ||
101 rc == -EDEADLK) ? D_INFO : D_ERROR,
102 "jobid: get '%s' failed: rc = %d\n",
111 * jobid_should_free_item
113 * Each item is checked to see if it should be released
114 * Removed from hash table by caller
115 * Actually freed in jobid_put_locked
117 * Returns 1 if item is to be freed, 0 if it is to be kept
120 static int jobid_should_free_item(void *obj, void *data)
123 struct jobid_pid_map *pidmap = obj;
130 WARN_ON_ONCE(atomic_read(&pidmap->jp_refcount) != 1);
134 spin_lock(&pidmap->jp_lock);
135 /* prevent newly inserted items from deleting */
136 if (jobid[0] == '\0' && atomic_read(&pidmap->jp_refcount) == 1)
138 else if (ktime_get_real_seconds() - pidmap->jp_time > DELETE_INTERVAL)
140 else if (strcmp(pidmap->jp_jobid, jobid) == 0)
142 spin_unlock(&pidmap->jp_lock);
148 * jobid_name_is_valid
150 * Checks if the jobid is a Lustre process
152 * Returns true if jobid is valid
153 * Returns false if jobid looks like it's a Lustre process
155 static bool jobid_name_is_valid(char *jobid)
157 const char *const lustre_reserved[] = { "ll_ping", "ptlrpc",
158 "ldlm", "ll_sa", NULL };
161 if (jobid[0] == '\0')
164 for (i = 0; lustre_reserved[i] != NULL; i++) {
165 if (strncmp(jobid, lustre_reserved[i],
166 strlen(lustre_reserved[i])) == 0)
173 * jobid_get_from_cache()
175 * Returns contents of jobid_var from process environment for current PID.
176 * This will be cached for some time to avoid overhead scanning environment.
178 * Return: -ENOMEM if allocating a new pidmap fails
179 * -ENOENT if no entry could be found
180 * +ve string length for success (something was returned in jobid)
182 static int jobid_get_from_cache(char *jobid, size_t joblen)
184 static time64_t last_expire;
185 bool expire_cache = false;
186 pid_t pid = current_pid();
187 struct jobid_pid_map *pidmap = NULL;
188 time64_t now = ktime_get_real_seconds();
192 LASSERT(jobid_hash != NULL);
194 /* scan hash periodically to remove old PID entries from cache */
195 spin_lock(&jobid_hash_lock);
196 if (unlikely(last_expire + DELETE_INTERVAL <= now)) {
200 spin_unlock(&jobid_hash_lock);
203 cfs_hash_cond_del(jobid_hash, jobid_should_free_item,
204 "intentionally_bad_jobid");
206 /* first try to find PID in the hash and use that value */
207 pidmap = cfs_hash_lookup(jobid_hash, &pid);
208 if (pidmap == NULL) {
209 struct jobid_pid_map *pidmap2;
211 OBD_ALLOC_PTR(pidmap);
213 GOTO(out, rc = -ENOMEM);
215 pidmap->jp_pid = pid;
217 pidmap->jp_jobid[0] = '\0';
218 spin_lock_init(&pidmap->jp_lock);
219 INIT_HLIST_NODE(&pidmap->jp_hash);
221 * @pidmap might be reclaimed just after it is added into
222 * hash list, init @jp_refcount as 1 to make sure memory
223 * could be not freed during access.
225 atomic_set(&pidmap->jp_refcount, 1);
228 * Add the newly created map to the hash, on key collision we
229 * lost a racing addition and must destroy our newly allocated
230 * map. The object which exists in the hash will be returned.
232 pidmap2 = cfs_hash_findadd_unique(jobid_hash, &pid,
234 if (unlikely(pidmap != pidmap2)) {
235 CDEBUG(D_INFO, "jobid: duplicate found for PID=%u\n",
237 OBD_FREE_PTR(pidmap);
243 * If pidmap is old (this is always true for new entries) refresh it.
244 * If obd_jobid_var is not found, cache empty entry and try again
245 * later, to avoid repeat lookups for PID if obd_jobid_var missing.
247 spin_lock(&pidmap->jp_lock);
248 if (pidmap->jp_time + RESCAN_INTERVAL <= now) {
249 char env_jobid[LUSTRE_JOBID_SIZE] = "";
250 int env_len = sizeof(env_jobid);
252 pidmap->jp_time = now;
254 spin_unlock(&pidmap->jp_lock);
255 rc = jobid_get_from_environ(obd_jobid_var, env_jobid, &env_len);
257 CDEBUG(D_INFO, "jobid: PID mapping established: %d->%s\n",
258 pidmap->jp_pid, env_jobid);
259 spin_lock(&pidmap->jp_lock);
261 pidmap->jp_joblen = env_len;
262 strlcpy(pidmap->jp_jobid, env_jobid,
263 sizeof(pidmap->jp_jobid));
265 } else if (rc == -ENOENT) {
266 /* It might have been deleted, clear out old entry */
267 pidmap->jp_joblen = 0;
268 pidmap->jp_jobid[0] = '\0';
273 * Regardless of how pidmap was found, if it contains a valid entry
274 * use that for now. If there was a technical error (e.g. -ENOMEM)
275 * use the old cached value until it can be looked up again properly.
276 * If a cached missing entry was found, return -ENOENT.
278 if (pidmap->jp_joblen) {
279 strlcpy(jobid, pidmap->jp_jobid, joblen);
280 joblen = pidmap->jp_joblen;
285 spin_unlock(&pidmap->jp_lock);
287 cfs_hash_put(jobid_hash, &pidmap->jp_hash);
291 return rc < 0 ? rc : joblen;
295 * jobid_interpret_string()
297 * Interpret the jobfmt string to expand specified fields, like coredumps do:
301 * %j = jobid from environment
305 * Unknown escape strings are dropped. Other characters are copied through,
306 * excluding whitespace (to avoid making jobid parsing difficult).
308 * Return: -EOVERFLOW if the expanded string does not fit within @joblen
311 static int jobid_interpret_string(const char *jobfmt, char *jobid,
316 while ((c = *jobfmt++) && joblen > 1) {
320 if (isspace(c)) /* Don't allow embedded spaces */
330 switch ((f = *jobfmt++)) {
331 case 'e': /* executable name */
332 l = snprintf(jobid, joblen, "%s", current_comm());
334 case 'g': /* group ID */
335 l = snprintf(jobid, joblen, "%u",
336 from_kgid(&init_user_ns, current_fsgid()));
338 case 'h': /* hostname */
339 l = snprintf(jobid, joblen, "%s",
340 init_utsname()->nodename);
342 case 'j': /* jobid stored in process environment */
343 l = jobid_get_from_cache(jobid, joblen);
347 case 'p': /* process ID */
348 l = snprintf(jobid, joblen, "%u", current_pid());
350 case 'u': /* user ID */
351 l = snprintf(jobid, joblen, "%u",
352 from_kuid(&init_user_ns, current_fsuid()));
354 case '\0': /* '%' at end of format string */
357 default: /* drop unknown %x format strings */
365 * This points at the end of the buffer, so long as jobid is always
366 * incremented the same amount as joblen is decremented.
369 jobid[joblen - 1] = '\0';
371 return joblen < 0 ? -EOVERFLOW : 0;
375 * Hash initialization, copied from server-side job stats bucket sizes
377 #define HASH_JOBID_BKT_BITS 5
378 #define HASH_JOBID_CUR_BITS 7
379 #define HASH_JOBID_MAX_BITS 12
381 int jobid_cache_init(void)
389 spin_lock_init(&jobid_hash_lock);
390 jobid_hash = cfs_hash_create("JOBID_HASH", HASH_JOBID_CUR_BITS,
391 HASH_JOBID_MAX_BITS, HASH_JOBID_BKT_BITS,
392 0, CFS_HASH_MIN_THETA, CFS_HASH_MAX_THETA,
393 &jobid_hash_ops, CFS_HASH_DEFAULT);
399 EXPORT_SYMBOL(jobid_cache_init);
401 void jobid_cache_fini(void)
403 struct cfs_hash *tmp_hash;
406 spin_lock(&jobid_hash_lock);
407 tmp_hash = jobid_hash;
409 spin_unlock(&jobid_hash_lock);
411 if (tmp_hash != NULL) {
412 cfs_hash_cond_del(tmp_hash, jobid_should_free_item, NULL);
413 cfs_hash_putref(tmp_hash);
418 EXPORT_SYMBOL(jobid_cache_fini);
421 * Hash operations for pid<->jobid
423 static unsigned jobid_hashfn(struct cfs_hash *hs, const void *key,
426 return cfs_hash_djb2_hash(key, sizeof(pid_t), mask);
429 static void *jobid_key(struct hlist_node *hnode)
431 struct jobid_pid_map *pidmap;
433 pidmap = hlist_entry(hnode, struct jobid_pid_map, jp_hash);
434 return &pidmap->jp_pid;
437 static int jobid_keycmp(const void *key, struct hlist_node *hnode)
439 const pid_t *pid_key1;
440 const pid_t *pid_key2;
442 LASSERT(key != NULL);
443 pid_key1 = (pid_t *)key;
444 pid_key2 = (pid_t *)jobid_key(hnode);
446 return *pid_key1 == *pid_key2;
449 static void *jobid_object(struct hlist_node *hnode)
451 return hlist_entry(hnode, struct jobid_pid_map, jp_hash);
454 static void jobid_get(struct cfs_hash *hs, struct hlist_node *hnode)
456 struct jobid_pid_map *pidmap;
458 pidmap = hlist_entry(hnode, struct jobid_pid_map, jp_hash);
460 atomic_inc(&pidmap->jp_refcount);
463 static void jobid_put_locked(struct cfs_hash *hs, struct hlist_node *hnode)
465 struct jobid_pid_map *pidmap;
470 pidmap = hlist_entry(hnode, struct jobid_pid_map, jp_hash);
471 LASSERT(atomic_read(&pidmap->jp_refcount) > 0);
472 if (atomic_dec_and_test(&pidmap->jp_refcount)) {
473 CDEBUG(D_INFO, "Freeing: %d->%s\n",
474 pidmap->jp_pid, pidmap->jp_jobid);
476 OBD_FREE_PTR(pidmap);
480 static struct cfs_hash_ops jobid_hash_ops = {
481 .hs_hash = jobid_hashfn,
482 .hs_keycmp = jobid_keycmp,
484 .hs_object = jobid_object,
486 .hs_put = jobid_put_locked,
487 .hs_put_locked = jobid_put_locked,
491 * Generate the job identifier string for this process for tracking purposes.
493 * Fill in @jobid string based on the value of obd_jobid_var:
494 * JOBSTATS_DISABLE: none
495 * JOBSTATS_NODELOCAL: content of obd_jobid_node (jobid_interpret_string())
496 * JOBSTATS_PROCNAME_UID: process name/UID
497 * anything else: look up obd_jobid_var in the processes environment
499 * Return -ve error number, 0 on success.
501 int lustre_get_jobid(char *jobid, size_t joblen)
506 if (unlikely(joblen < 2)) {
512 if (strcmp(obd_jobid_var, JOBSTATS_DISABLE) == 0) {
513 /* Jobstats isn't enabled */
514 memset(jobid, 0, joblen);
515 } else if (strcmp(obd_jobid_var, JOBSTATS_NODELOCAL) == 0) {
516 /* Whole node dedicated to single job */
517 rc = jobid_interpret_string(obd_jobid_name, jobid, joblen);
518 } else if (strcmp(obd_jobid_var, JOBSTATS_PROCNAME_UID) == 0) {
519 rc = jobid_interpret_string("%e.%u", jobid, joblen);
520 } else if (jobid_name_is_valid(current_comm())) {
522 * obd_jobid_var holds the jobid environment variable name.
523 * Skip initial check if obd_jobid_name already uses "%j",
524 * otherwise try just "%j" first, then fall back to whatever
525 * is in obd_jobid_name if obd_jobid_var is not found.
528 if (!strnstr(obd_jobid_name, "%j", joblen))
529 rc = jobid_get_from_cache(jobid, joblen);
531 /* fall back to jobid_node if jobid_var not in environment */
533 int rc2 = jobid_interpret_string(obd_jobid_name,
542 EXPORT_SYMBOL(lustre_get_jobid);
547 * Search cache for JobID given by @find_jobid.
548 * If any entries in the hash table match the value, they are removed
550 void lustre_jobid_clear(const char *find_jobid)
552 char jobid[LUSTRE_JOBID_SIZE];
555 if (jobid_hash == NULL)
558 strlcpy(jobid, find_jobid, sizeof(jobid));
559 /* trim \n off the end of the incoming jobid */
560 end = strchr(jobid, '\n');
561 if (end && *end == '\n')
564 CDEBUG(D_INFO, "Clearing Jobid: %s\n", jobid);
565 cfs_hash_cond_del(jobid_hash, jobid_should_free_item, jobid);
567 CDEBUG(D_INFO, "%d items remain in jobID table\n",
568 atomic_read(&jobid_hash->hs_count));