Whamcloud - gitweb
7aba58e5d6efc83040524f55977479f02f3c06ec
[fs/lustre-release.git] / lustre / obdclass / jobid.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2014, Intel Corporation.
27  *
28  * Copyright 2017 Cray Inc, all rights reserved.
29  * Author: Ben Evans.
30  *
31  * Store PID->JobID mappings
32  */
33
34 #define DEBUG_SUBSYSTEM S_RPC
35 #include <linux/user_namespace.h>
36 #ifdef HAVE_UIDGID_HEADER
37 #include <linux/uidgid.h>
38 #endif
39 #include <linux/utsname.h>
40
41 #include <libcfs/libcfs.h>
42 #include <obd_support.h>
43 #include <obd_class.h>
44 #include <lustre_net.h>
45
46 static struct cfs_hash *jobid_hash;
47 static struct cfs_hash_ops jobid_hash_ops;
48 spinlock_t jobid_hash_lock;
49
50 #define RESCAN_INTERVAL 30
51 #define DELETE_INTERVAL 300
52
53 char obd_jobid_var[JOBSTATS_JOBID_VAR_MAX_LEN + 1] = JOBSTATS_DISABLE;
54 char obd_jobid_name[LUSTRE_JOBID_SIZE] = "%e.%u";
55
56 /**
57  * Structure to store a single PID->JobID mapping
58  */
59 struct jobid_pid_map {
60         struct hlist_node       jp_hash;
61         time64_t                jp_time;
62         spinlock_t              jp_lock; /* protects jp_jobid */
63         char                    jp_jobid[LUSTRE_JOBID_SIZE];
64         unsigned int            jp_joblen;
65         atomic_t                jp_refcount;
66         pid_t                   jp_pid;
67 };
68
69 /*
70  * Get jobid of current process by reading the environment variable
71  * stored in between the "env_start" & "env_end" of task struct.
72  *
73  * If some job scheduler doesn't store jobid in the "env_start/end",
74  * then an upcall could be issued here to get the jobid by utilizing
75  * the userspace tools/API. Then, the jobid must be cached.
76  */
77 int jobid_get_from_environ(char *jobid_var, char *jobid, int *jobid_len)
78 {
79         static bool printed;
80         int rc;
81
82         rc = cfs_get_environ(jobid_var, jobid, jobid_len);
83         if (!rc)
84                 goto out;
85
86         if (unlikely(rc == -EOVERFLOW && !printed)) {
87                 /* For the PBS_JOBID and LOADL_STEP_ID keys (which are
88                  * variable length strings instead of just numbers), it
89                  * might make sense to keep the unique parts for JobID,
90                  * instead of just returning an error.  That means a
91                  * larger temp buffer for cfs_get_environ(), then
92                  * truncating the string at some separator to fit into
93                  * the specified jobid_len.  Fix later if needed. */
94                 LCONSOLE_WARN("jobid: '%s' value too large (%d)\n",
95                               obd_jobid_var, *jobid_len);
96                 printed = true;
97                 rc = 0;
98         }
99         if (rc) {
100                 CDEBUG((rc == -ENOENT || rc == -EINVAL ||
101                         rc == -EDEADLK) ? D_INFO : D_ERROR,
102                        "jobid: get '%s' failed: rc = %d\n",
103                        obd_jobid_var, rc);
104         }
105
106 out:
107         return rc;
108 }
109
110 /*
111  * jobid_should_free_item
112  *
113  * Each item is checked to see if it should be released
114  * Removed from hash table by caller
115  * Actually freed in jobid_put_locked
116  *
117  * Returns 1 if item is to be freed, 0 if it is to be kept
118  */
119
120 static int jobid_should_free_item(void *obj, void *data)
121 {
122         char *jobid = data;
123         struct jobid_pid_map *pidmap = obj;
124         int rc = 0;
125
126         if (obj == NULL)
127                 return 0;
128
129         if (jobid == NULL) {
130                 WARN_ON_ONCE(atomic_read(&pidmap->jp_refcount) != 1);
131                 return 1;
132         }
133
134         spin_lock(&pidmap->jp_lock);
135         /* prevent newly inserted items from deleting */
136         if (jobid[0] == '\0' && atomic_read(&pidmap->jp_refcount) == 1)
137                 rc = 1;
138         else if (ktime_get_real_seconds() - pidmap->jp_time > DELETE_INTERVAL)
139                 rc = 1;
140         else if (strcmp(pidmap->jp_jobid, jobid) == 0)
141                 rc = 1;
142         spin_unlock(&pidmap->jp_lock);
143
144         return rc;
145 }
146
147 /*
148  * jobid_name_is_valid
149  *
150  * Checks if the jobid is a Lustre process
151  *
152  * Returns true if jobid is valid
153  * Returns false if jobid looks like it's a Lustre process
154  */
155 static bool jobid_name_is_valid(char *jobid)
156 {
157         const char *const lustre_reserved[] = { "ll_ping", "ptlrpc",
158                                                 "ldlm", "ll_sa", NULL };
159         int i;
160
161         if (jobid[0] == '\0')
162                 return false;
163
164         for (i = 0; lustre_reserved[i] != NULL; i++) {
165                 if (strncmp(jobid, lustre_reserved[i],
166                             strlen(lustre_reserved[i])) == 0)
167                         return false;
168         }
169         return true;
170 }
171
172 /*
173  * jobid_get_from_cache()
174  *
175  * Returns contents of jobid_var from process environment for current PID.
176  * This will be cached for some time to avoid overhead scanning environment.
177  *
178  * Return: -ENOMEM if allocating a new pidmap fails
179  *         -ENOENT if no entry could be found
180  *         +ve string length for success (something was returned in jobid)
181  */
182 static int jobid_get_from_cache(char *jobid, size_t joblen)
183 {
184         static time64_t last_expire;
185         bool expire_cache = false;
186         pid_t pid = current_pid();
187         struct jobid_pid_map *pidmap = NULL;
188         time64_t now = ktime_get_real_seconds();
189         int rc = 0;
190         ENTRY;
191
192         LASSERT(jobid_hash != NULL);
193
194         /* scan hash periodically to remove old PID entries from cache */
195         spin_lock(&jobid_hash_lock);
196         if (unlikely(last_expire + DELETE_INTERVAL <= now)) {
197                 expire_cache = true;
198                 last_expire = now;
199         }
200         spin_unlock(&jobid_hash_lock);
201
202         if (expire_cache)
203                 cfs_hash_cond_del(jobid_hash, jobid_should_free_item,
204                                   "intentionally_bad_jobid");
205
206         /* first try to find PID in the hash and use that value */
207         pidmap = cfs_hash_lookup(jobid_hash, &pid);
208         if (pidmap == NULL) {
209                 struct jobid_pid_map *pidmap2;
210
211                 OBD_ALLOC_PTR(pidmap);
212                 if (pidmap == NULL)
213                         GOTO(out, rc = -ENOMEM);
214
215                 pidmap->jp_pid = pid;
216                 pidmap->jp_time = 0;
217                 pidmap->jp_jobid[0] = '\0';
218                 spin_lock_init(&pidmap->jp_lock);
219                 INIT_HLIST_NODE(&pidmap->jp_hash);
220                 /*
221                  * @pidmap might be reclaimed just after it is added into
222                  * hash list, init @jp_refcount as 1 to make sure memory
223                  * could be not freed during access.
224                  */
225                 atomic_set(&pidmap->jp_refcount, 1);
226
227                 /*
228                  * Add the newly created map to the hash, on key collision we
229                  * lost a racing addition and must destroy our newly allocated
230                  * map.  The object which exists in the hash will be returned.
231                  */
232                 pidmap2 = cfs_hash_findadd_unique(jobid_hash, &pid,
233                                                   &pidmap->jp_hash);
234                 if (unlikely(pidmap != pidmap2)) {
235                         CDEBUG(D_INFO, "jobid: duplicate found for PID=%u\n",
236                                pid);
237                         OBD_FREE_PTR(pidmap);
238                         pidmap = pidmap2;
239                 }
240         }
241
242         /*
243          * If pidmap is old (this is always true for new entries) refresh it.
244          * If obd_jobid_var is not found, cache empty entry and try again
245          * later, to avoid repeat lookups for PID if obd_jobid_var missing.
246          */
247         spin_lock(&pidmap->jp_lock);
248         if (pidmap->jp_time + RESCAN_INTERVAL <= now) {
249                 char env_jobid[LUSTRE_JOBID_SIZE] = "";
250                 int env_len = sizeof(env_jobid);
251
252                 pidmap->jp_time = now;
253
254                 spin_unlock(&pidmap->jp_lock);
255                 rc = jobid_get_from_environ(obd_jobid_var, env_jobid, &env_len);
256
257                 CDEBUG(D_INFO, "jobid: PID mapping established: %d->%s\n",
258                        pidmap->jp_pid, env_jobid);
259                 spin_lock(&pidmap->jp_lock);
260                 if (!rc) {
261                         pidmap->jp_joblen = env_len;
262                         strlcpy(pidmap->jp_jobid, env_jobid,
263                                 sizeof(pidmap->jp_jobid));
264                         rc = 0;
265                 } else if (rc == -ENOENT) {
266                         /* It might have been deleted, clear out old entry */
267                         pidmap->jp_joblen = 0;
268                         pidmap->jp_jobid[0] = '\0';
269                 }
270         }
271
272         /*
273          * Regardless of how pidmap was found, if it contains a valid entry
274          * use that for now.  If there was a technical error (e.g. -ENOMEM)
275          * use the old cached value until it can be looked up again properly.
276          * If a cached missing entry was found, return -ENOENT.
277          */
278         if (pidmap->jp_joblen) {
279                 strlcpy(jobid, pidmap->jp_jobid, joblen);
280                 joblen = pidmap->jp_joblen;
281                 rc = 0;
282         } else if (!rc) {
283                 rc = -ENOENT;
284         }
285         spin_unlock(&pidmap->jp_lock);
286
287         cfs_hash_put(jobid_hash, &pidmap->jp_hash);
288
289         EXIT;
290 out:
291         return rc < 0 ? rc : joblen;
292 }
293
294 /*
295  * jobid_interpret_string()
296  *
297  * Interpret the jobfmt string to expand specified fields, like coredumps do:
298  *   %e = executable
299  *   %g = gid
300  *   %h = hostname
301  *   %j = jobid from environment
302  *   %p = pid
303  *   %u = uid
304  *
305  * Unknown escape strings are dropped.  Other characters are copied through,
306  * excluding whitespace (to avoid making jobid parsing difficult).
307  *
308  * Return: -EOVERFLOW if the expanded string does not fit within @joblen
309  *         0 for success
310  */
311 static int jobid_interpret_string(const char *jobfmt, char *jobid,
312                                   ssize_t joblen)
313 {
314         char c;
315
316         while ((c = *jobfmt++) && joblen > 1) {
317                 char f;
318                 int l;
319
320                 if (isspace(c)) /* Don't allow embedded spaces */
321                         continue;
322
323                 if (c != '%') {
324                         *jobid = c;
325                         joblen--;
326                         jobid++;
327                         continue;
328                 }
329
330                 switch ((f = *jobfmt++)) {
331                 case 'e': /* executable name */
332                         l = snprintf(jobid, joblen, "%s", current_comm());
333                         break;
334                 case 'g': /* group ID */
335                         l = snprintf(jobid, joblen, "%u",
336                                      from_kgid(&init_user_ns, current_fsgid()));
337                         break;
338                 case 'h': /* hostname */
339                         l = snprintf(jobid, joblen, "%s",
340                                      init_utsname()->nodename);
341                         break;
342                 case 'j': /* jobid stored in process environment */
343                         l = jobid_get_from_cache(jobid, joblen);
344                         if (l < 0)
345                                 l = 0;
346                         break;
347                 case 'p': /* process ID */
348                         l = snprintf(jobid, joblen, "%u", current_pid());
349                         break;
350                 case 'u': /* user ID */
351                         l = snprintf(jobid, joblen, "%u",
352                                      from_kuid(&init_user_ns, current_fsuid()));
353                         break;
354                 case '\0': /* '%' at end of format string */
355                         l = 0;
356                         goto out;
357                 default: /* drop unknown %x format strings */
358                         l = 0;
359                         break;
360                 }
361                 jobid += l;
362                 joblen -= l;
363         }
364         /*
365          * This points at the end of the buffer, so long as jobid is always
366          * incremented the same amount as joblen is decremented.
367          */
368 out:
369         jobid[joblen - 1] = '\0';
370
371         return joblen < 0 ? -EOVERFLOW : 0;
372 }
373
374 /*
375  * Hash initialization, copied from server-side job stats bucket sizes
376  */
377 #define HASH_JOBID_BKT_BITS 5
378 #define HASH_JOBID_CUR_BITS 7
379 #define HASH_JOBID_MAX_BITS 12
380
381 int jobid_cache_init(void)
382 {
383         int rc = 0;
384         ENTRY;
385
386         if (jobid_hash)
387                 return 0;
388
389         spin_lock_init(&jobid_hash_lock);
390         jobid_hash = cfs_hash_create("JOBID_HASH", HASH_JOBID_CUR_BITS,
391                                      HASH_JOBID_MAX_BITS, HASH_JOBID_BKT_BITS,
392                                      0, CFS_HASH_MIN_THETA, CFS_HASH_MAX_THETA,
393                                      &jobid_hash_ops, CFS_HASH_DEFAULT);
394         if (!jobid_hash)
395                 rc = -ENOMEM;
396
397         RETURN(rc);
398 }
399 EXPORT_SYMBOL(jobid_cache_init);
400
401 void jobid_cache_fini(void)
402 {
403         struct cfs_hash *tmp_hash;
404         ENTRY;
405
406         spin_lock(&jobid_hash_lock);
407         tmp_hash = jobid_hash;
408         jobid_hash = NULL;
409         spin_unlock(&jobid_hash_lock);
410
411         if (tmp_hash != NULL) {
412                 cfs_hash_cond_del(tmp_hash, jobid_should_free_item, NULL);
413                 cfs_hash_putref(tmp_hash);
414         }
415
416         EXIT;
417 }
418 EXPORT_SYMBOL(jobid_cache_fini);
419
420 /*
421  * Hash operations for pid<->jobid
422  */
423 static unsigned jobid_hashfn(struct cfs_hash *hs, const void *key,
424                              unsigned mask)
425 {
426         return cfs_hash_djb2_hash(key, sizeof(pid_t), mask);
427 }
428
429 static void *jobid_key(struct hlist_node *hnode)
430 {
431         struct jobid_pid_map *pidmap;
432
433         pidmap = hlist_entry(hnode, struct jobid_pid_map, jp_hash);
434         return &pidmap->jp_pid;
435 }
436
437 static int jobid_keycmp(const void *key, struct hlist_node *hnode)
438 {
439         const pid_t *pid_key1;
440         const pid_t *pid_key2;
441
442         LASSERT(key != NULL);
443         pid_key1 = (pid_t *)key;
444         pid_key2 = (pid_t *)jobid_key(hnode);
445
446         return *pid_key1 == *pid_key2;
447 }
448
449 static void *jobid_object(struct hlist_node *hnode)
450 {
451         return hlist_entry(hnode, struct jobid_pid_map, jp_hash);
452 }
453
454 static void jobid_get(struct cfs_hash *hs, struct hlist_node *hnode)
455 {
456         struct jobid_pid_map *pidmap;
457
458         pidmap = hlist_entry(hnode, struct jobid_pid_map, jp_hash);
459
460         atomic_inc(&pidmap->jp_refcount);
461 }
462
463 static void jobid_put_locked(struct cfs_hash *hs, struct hlist_node *hnode)
464 {
465         struct jobid_pid_map *pidmap;
466
467         if (hnode == NULL)
468                 return;
469
470         pidmap = hlist_entry(hnode, struct jobid_pid_map, jp_hash);
471         LASSERT(atomic_read(&pidmap->jp_refcount) > 0);
472         if (atomic_dec_and_test(&pidmap->jp_refcount)) {
473                 CDEBUG(D_INFO, "Freeing: %d->%s\n",
474                        pidmap->jp_pid, pidmap->jp_jobid);
475
476                 OBD_FREE_PTR(pidmap);
477         }
478 }
479
480 static struct cfs_hash_ops jobid_hash_ops = {
481         .hs_hash        = jobid_hashfn,
482         .hs_keycmp      = jobid_keycmp,
483         .hs_key         = jobid_key,
484         .hs_object      = jobid_object,
485         .hs_get         = jobid_get,
486         .hs_put         = jobid_put_locked,
487         .hs_put_locked  = jobid_put_locked,
488 };
489
490 /**
491  * Generate the job identifier string for this process for tracking purposes.
492  *
493  * Fill in @jobid string based on the value of obd_jobid_var:
494  * JOBSTATS_DISABLE:      none
495  * JOBSTATS_NODELOCAL:    content of obd_jobid_node (jobid_interpret_string())
496  * JOBSTATS_PROCNAME_UID: process name/UID
497  * anything else:         look up obd_jobid_var in the processes environment
498  *
499  * Return -ve error number, 0 on success.
500  */
501 int lustre_get_jobid(char *jobid, size_t joblen)
502 {
503         int rc = 0;
504         ENTRY;
505
506         if (unlikely(joblen < 2)) {
507                 if (joblen == 1)
508                         jobid[0] = '\0';
509                 RETURN(-EINVAL);
510         }
511
512         if (strcmp(obd_jobid_var, JOBSTATS_DISABLE) == 0) {
513                 /* Jobstats isn't enabled */
514                 memset(jobid, 0, joblen);
515         } else if (strcmp(obd_jobid_var, JOBSTATS_NODELOCAL) == 0) {
516                 /* Whole node dedicated to single job */
517                 rc = jobid_interpret_string(obd_jobid_name, jobid, joblen);
518         } else if (strcmp(obd_jobid_var, JOBSTATS_PROCNAME_UID) == 0) {
519                 rc = jobid_interpret_string("%e.%u", jobid, joblen);
520         } else if (jobid_name_is_valid(current_comm())) {
521                 /*
522                  * obd_jobid_var holds the jobid environment variable name.
523                  * Skip initial check if obd_jobid_name already uses "%j",
524                  * otherwise try just "%j" first, then fall back to whatever
525                  * is in obd_jobid_name if obd_jobid_var is not found.
526                  */
527                 rc = -EAGAIN;
528                 if (!strnstr(obd_jobid_name, "%j", joblen))
529                         rc = jobid_get_from_cache(jobid, joblen);
530
531                 /* fall back to jobid_node if jobid_var not in environment */
532                 if (rc < 0) {
533                         int rc2 = jobid_interpret_string(obd_jobid_name,
534                                                          jobid, joblen);
535                         if (!rc2)
536                                 rc = 0;
537                 }
538         }
539
540         RETURN(rc);
541 }
542 EXPORT_SYMBOL(lustre_get_jobid);
543
544 /*
545  * lustre_jobid_clear
546  *
547  * Search cache for JobID given by @find_jobid.
548  * If any entries in the hash table match the value, they are removed
549  */
550 void lustre_jobid_clear(const char *find_jobid)
551 {
552         char jobid[LUSTRE_JOBID_SIZE];
553         char *end;
554
555         if (jobid_hash == NULL)
556                 return;
557
558         strlcpy(jobid, find_jobid, sizeof(jobid));
559         /* trim \n off the end of the incoming jobid */
560         end = strchr(jobid, '\n');
561         if (end && *end == '\n')
562                 *end = '\0';
563
564         CDEBUG(D_INFO, "Clearing Jobid: %s\n", jobid);
565         cfs_hash_cond_del(jobid_hash, jobid_should_free_item, jobid);
566
567         CDEBUG(D_INFO, "%d items remain in jobID table\n",
568                atomic_read(&jobid_hash->hs_count));
569 }