Whamcloud - gitweb
LU-8191 obdclass: add static and remove functions
[fs/lustre-release.git] / lustre / obdclass / jobid.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2014, Intel Corporation.
27  *
28  * Copyright 2017 Cray Inc, all rights reserved.
29  * Author: Ben Evans.
30  *
31  * Store PID->JobID mappings
32  */
33
34 #define DEBUG_SUBSYSTEM S_RPC
35 #include <linux/user_namespace.h>
36 #include <linux/uidgid.h>
37 #include <linux/utsname.h>
38
39 #include <libcfs/libcfs.h>
40 #include <obd_support.h>
41 #include <obd_class.h>
42 #include <lustre_net.h>
43
44 static struct cfs_hash *jobid_hash;
45 static struct cfs_hash_ops jobid_hash_ops;
46 spinlock_t jobid_hash_lock;
47
48 #define RESCAN_INTERVAL 30
49 #define DELETE_INTERVAL 300
50
51 char obd_jobid_var[JOBSTATS_JOBID_VAR_MAX_LEN + 1] = JOBSTATS_DISABLE;
52 char obd_jobid_name[LUSTRE_JOBID_SIZE] = "%e.%u";
53
54 /**
55  * Structure to store a single PID->JobID mapping
56  */
57 struct jobid_pid_map {
58         struct hlist_node       jp_hash;
59         time64_t                jp_time;
60         spinlock_t              jp_lock; /* protects jp_jobid */
61         char                    jp_jobid[LUSTRE_JOBID_SIZE];
62         unsigned int            jp_joblen;
63         atomic_t                jp_refcount;
64         pid_t                   jp_pid;
65 };
66
67 /*
68  * Jobid can be set for a session (see setsid(2)) by writing to
69  * a sysfs file from any process in that session.
70  * The jobids are stored in a hash table indexed by the relevant
71  * struct pid.  We periodically look for entries where the pid has
72  * no PIDTYPE_SID tasks any more, and prune them.  This happens within
73  * 5 seconds of a jobid being added, and every 5 minutes when jobids exist,
74  * but none are added.
75  */
76 #define JOBID_EXPEDITED_CLEAN (5)
77 #define JOBID_BACKGROUND_CLEAN (5 * 60)
78
79 struct session_jobid {
80         struct pid              *sj_session;
81         struct rhash_head       sj_linkage;
82         struct rcu_head         sj_rcu;
83         char                    sj_jobid[1];
84 };
85
86 static const struct rhashtable_params jobid_params = {
87         .key_len        = sizeof(struct pid *),
88         .key_offset     = offsetof(struct session_jobid, sj_session),
89         .head_offset    = offsetof(struct session_jobid, sj_linkage),
90 };
91
92 static struct rhashtable session_jobids;
93
94 /*
95  * jobid_current must be called with rcu_read_lock held.
96  * if it returns non-NULL, the string can only be used
97  * until rcu_read_unlock is called.
98  */
99 char *jobid_current(void)
100 {
101         struct pid *sid = task_session(current);
102         struct session_jobid *sj;
103
104         sj = rhashtable_lookup_fast(&session_jobids, &sid, jobid_params);
105         if (sj)
106                 return sj->sj_jobid;
107         return NULL;
108 }
109
110 static void jobid_prune_expedite(void);
111 /*
112  * jobid_set_current will try to add a new entry
113  * to the table.  If one exists with the same key, the
114  * jobid will be replaced
115  */
116 int jobid_set_current(char *jobid)
117 {
118         struct pid *sid;
119         struct session_jobid *sj, *origsj;
120         int ret;
121         int len = strlen(jobid);
122
123         sj = kmalloc(sizeof(*sj) + len, GFP_KERNEL);
124         if (!sj)
125                 return -ENOMEM;
126         rcu_read_lock();
127         sid = task_session(current);
128         sj->sj_session = get_pid(sid);
129         strncpy(sj->sj_jobid, jobid, len+1);
130         origsj = rhashtable_lookup_get_insert_fast(&session_jobids,
131                                                    &sj->sj_linkage,
132                                                    jobid_params);
133         if (origsj == NULL) {
134                 /* successful insert */
135                 rcu_read_unlock();
136                 jobid_prune_expedite();
137                 return 0;
138         }
139
140         if (IS_ERR(origsj)) {
141                 put_pid(sj->sj_session);
142                 kfree(sj);
143                 rcu_read_unlock();
144                 return PTR_ERR(origsj);
145         }
146         ret = rhashtable_replace_fast(&session_jobids,
147                                       &origsj->sj_linkage,
148                                       &sj->sj_linkage,
149                                       jobid_params);
150         if (ret) {
151                 put_pid(sj->sj_session);
152                 kfree(sj);
153                 rcu_read_unlock();
154                 return ret;
155         }
156         put_pid(origsj->sj_session);
157         rcu_read_unlock();
158         kfree_rcu(origsj, sj_rcu);
159         jobid_prune_expedite();
160
161         return 0;
162 }
163
164 static void jobid_free(void *vsj, void *arg)
165 {
166         struct session_jobid *sj = vsj;
167
168         put_pid(sj->sj_session);
169         kfree(sj);
170 }
171
172 static void jobid_prune(struct work_struct *work);
173 static DECLARE_DELAYED_WORK(jobid_prune_work, jobid_prune);
174 static int jobid_prune_expedited;
175 static void jobid_prune(struct work_struct *work)
176 {
177         int remaining = 0;
178         struct rhashtable_iter iter;
179         struct session_jobid *sj;
180
181         jobid_prune_expedited = 0;
182         rhashtable_walk_enter(&session_jobids, &iter);
183         rhashtable_walk_start(&iter);
184         while ((sj = rhashtable_walk_next(&iter)) != NULL) {
185                 if (IS_ERR(sj)) {
186                         if (PTR_ERR(sj) == -EAGAIN)
187                                 continue;
188                         break;
189                 }
190                 if (!hlist_empty(&sj->sj_session->tasks[PIDTYPE_SID])) {
191                         remaining++;
192                         continue;
193                 }
194                 if (rhashtable_remove_fast(&session_jobids,
195                                            &sj->sj_linkage,
196                                            jobid_params) == 0) {
197                         put_pid(sj->sj_session);
198                         kfree_rcu(sj, sj_rcu);
199                 }
200         }
201         rhashtable_walk_stop(&iter);
202         rhashtable_walk_exit(&iter);
203         if (remaining)
204                 schedule_delayed_work(&jobid_prune_work,
205                                       cfs_time_seconds(JOBID_BACKGROUND_CLEAN));
206 }
207
208 static void jobid_prune_expedite(void)
209 {
210         if (!jobid_prune_expedited) {
211                 jobid_prune_expedited = 1;
212                 mod_delayed_work(system_wq, &jobid_prune_work,
213                                  cfs_time_seconds(JOBID_EXPEDITED_CLEAN));
214         }
215 }
216
217 static int cfs_access_process_vm(struct task_struct *tsk,
218                                  struct mm_struct *mm,
219                                  unsigned long addr,
220                                  void *buf, int len, int write)
221 {
222         /* Just copied from kernel for the kernels which doesn't
223          * have access_process_vm() exported
224          */
225         struct vm_area_struct *vma;
226         struct page *page;
227         void *old_buf = buf;
228
229         /* Avoid deadlocks on mmap_sem if called from sys_mmap_pgoff(),
230          * which is already holding mmap_sem for writes.  If some other
231          * thread gets the write lock in the meantime, this thread will
232          * block, but at least it won't deadlock on itself.  LU-1735
233          */
234         if (!mmap_read_trylock(mm))
235                 return -EDEADLK;
236
237         /* ignore errors, just check how much was successfully transferred */
238         while (len) {
239                 int bytes, rc, offset;
240                 void *maddr;
241
242 #if defined(HAVE_GET_USER_PAGES_GUP_FLAGS)
243                 rc = get_user_pages(addr, 1, write ? FOLL_WRITE : 0, &page,
244                                     &vma);
245 #elif defined(HAVE_GET_USER_PAGES_6ARG)
246                 rc = get_user_pages(addr, 1, write, 1, &page, &vma);
247 #else
248                 rc = get_user_pages(tsk, mm, addr, 1, write, 1, &page, &vma);
249 #endif
250                 if (rc <= 0)
251                         break;
252
253                 bytes = len;
254                 offset = addr & (PAGE_SIZE-1);
255                 if (bytes > PAGE_SIZE-offset)
256                         bytes = PAGE_SIZE-offset;
257
258                 maddr = kmap(page);
259                 if (write) {
260                         copy_to_user_page(vma, page, addr,
261                                           maddr + offset, buf, bytes);
262                         set_page_dirty_lock(page);
263                 } else {
264                         copy_from_user_page(vma, page, addr,
265                                             buf, maddr + offset, bytes);
266                 }
267                 kunmap(page);
268                 put_page(page);
269                 len -= bytes;
270                 buf += bytes;
271                 addr += bytes;
272         }
273         mmap_read_unlock(mm);
274
275         return buf - old_buf;
276 }
277
278 /* Read the environment variable of current process specified by @key. */
279 static int cfs_get_environ(const char *key, char *value, int *val_len)
280 {
281         struct mm_struct *mm;
282         char *buffer;
283         int buf_len = PAGE_SIZE;
284         int key_len = strlen(key);
285         unsigned long addr;
286         int rc;
287         bool skip = false;
288
289         ENTRY;
290         buffer = kmalloc(buf_len, GFP_USER);
291         if (!buffer)
292                 RETURN(-ENOMEM);
293
294         mm = get_task_mm(current);
295         if (!mm) {
296                 kfree(buffer);
297                 RETURN(-EINVAL);
298         }
299
300         addr = mm->env_start;
301         while (addr < mm->env_end) {
302                 int this_len, retval, scan_len;
303                 char *env_start, *env_end;
304
305                 memset(buffer, 0, buf_len);
306
307                 this_len = min_t(int, mm->env_end - addr, buf_len);
308                 retval = cfs_access_process_vm(current, mm, addr, buffer,
309                                                this_len, 0);
310                 if (retval < 0)
311                         GOTO(out, rc = retval);
312                 else if (retval != this_len)
313                         break;
314
315                 addr += retval;
316
317                 /* Parse the buffer to find out the specified key/value pair.
318                  * The "key=value" entries are separated by '\0'.
319                  */
320                 env_start = buffer;
321                 scan_len = this_len;
322                 while (scan_len) {
323                         char *entry;
324                         int entry_len;
325
326                         env_end = memscan(env_start, '\0', scan_len);
327                         LASSERT(env_end >= env_start &&
328                                 env_end <= env_start + scan_len);
329
330                         /* The last entry of this buffer cross the buffer
331                          * boundary, reread it in next cycle.
332                          */
333                         if (unlikely(env_end - env_start == scan_len)) {
334                                 /* Just skip the entry larger than page size,
335                                  * it can't be jobID env variable.
336                                  */
337                                 if (unlikely(scan_len == this_len))
338                                         skip = true;
339                                 else
340                                         addr -= scan_len;
341                                 break;
342                         } else if (unlikely(skip)) {
343                                 skip = false;
344                                 goto skip;
345                         }
346                         entry = env_start;
347                         entry_len = env_end - env_start;
348                         CDEBUG(D_INFO, "key: %s, entry: %s\n", key, entry);
349
350                         /* Key length + length of '=' */
351                         if (entry_len > key_len + 1 &&
352                             entry[key_len] == '='  &&
353                             !memcmp(entry, key, key_len)) {
354                                 entry += key_len + 1;
355                                 entry_len -= key_len + 1;
356
357                                 /* The 'value' buffer passed in is too small.
358                                  * Copy what fits, but return -EOVERFLOW.
359                                  */
360                                 if (entry_len >= *val_len) {
361                                         memcpy(value, entry, *val_len);
362                                         value[*val_len - 1] = 0;
363                                         GOTO(out, rc = -EOVERFLOW);
364                                 }
365
366                                 memcpy(value, entry, entry_len);
367                                 *val_len = entry_len;
368                                 GOTO(out, rc = 0);
369                         }
370 skip:
371                         scan_len -= (env_end - env_start + 1);
372                         env_start = env_end + 1;
373                 }
374         }
375         GOTO(out, rc = -ENOENT);
376
377 out:
378         mmput(mm);
379         kfree((void *)buffer);
380         return rc;
381 }
382
383 /*
384  * Get jobid of current process by reading the environment variable
385  * stored in between the "env_start" & "env_end" of task struct.
386  *
387  * If some job scheduler doesn't store jobid in the "env_start/end",
388  * then an upcall could be issued here to get the jobid by utilizing
389  * the userspace tools/API. Then, the jobid must be cached.
390  */
391 static int jobid_get_from_environ(char *jobid_var, char *jobid, int *jobid_len)
392 {
393         int rc;
394
395         rc = cfs_get_environ(jobid_var, jobid, jobid_len);
396         if (!rc)
397                 goto out;
398
399         if (rc == -EOVERFLOW) {
400                 /* For the PBS_JOBID and LOADL_STEP_ID keys (which are
401                  * variable length strings instead of just numbers), it
402                  * might make sense to keep the unique parts for JobID,
403                  * instead of just returning an error.  That means a
404                  * larger temp buffer for cfs_get_environ(), then
405                  * truncating the string at some separator to fit into
406                  * the specified jobid_len.  Fix later if needed. */
407                 static ktime_t printed;
408
409                 if (unlikely(ktime_to_ns(printed) == 0 ||
410                              ktime_after(ktime_get(),
411                                          ktime_add_ns(printed,
412                                              3600ULL * 24 * NSEC_PER_SEC)))) {
413                         LCONSOLE_WARN("jobid: '%s' value too large (%d)\n",
414                                       obd_jobid_var, *jobid_len);
415                         printed = ktime_get();
416                 }
417
418                 rc = 0;
419         } else {
420                 CDEBUG_LIMIT((rc == -ENOENT || rc == -EINVAL ||
421                               rc == -EDEADLK) ? D_INFO : D_ERROR,
422                              "jobid: get '%s' failed: rc = %d\n",
423                              obd_jobid_var, rc);
424         }
425
426 out:
427         return rc;
428 }
429
430 /*
431  * jobid_should_free_item
432  *
433  * Each item is checked to see if it should be released
434  * Removed from hash table by caller
435  * Actually freed in jobid_put_locked
436  *
437  * Returns 1 if item is to be freed, 0 if it is to be kept
438  */
439
440 static int jobid_should_free_item(void *obj, void *data)
441 {
442         char *jobid = data;
443         struct jobid_pid_map *pidmap = obj;
444         int rc = 0;
445
446         if (obj == NULL)
447                 return 0;
448
449         if (jobid == NULL) {
450                 WARN_ON_ONCE(atomic_read(&pidmap->jp_refcount) != 1);
451                 return 1;
452         }
453
454         spin_lock(&pidmap->jp_lock);
455         /* prevent newly inserted items from deleting */
456         if (jobid[0] == '\0' && atomic_read(&pidmap->jp_refcount) == 1)
457                 rc = 1;
458         else if (ktime_get_real_seconds() - pidmap->jp_time > DELETE_INTERVAL)
459                 rc = 1;
460         else if (strcmp(pidmap->jp_jobid, jobid) == 0)
461                 rc = 1;
462         spin_unlock(&pidmap->jp_lock);
463
464         return rc;
465 }
466
467 /*
468  * jobid_name_is_valid
469  *
470  * Checks if the jobid is a Lustre process
471  *
472  * Returns true if jobid is valid
473  * Returns false if jobid looks like it's a Lustre process
474  */
475 static bool jobid_name_is_valid(char *jobid)
476 {
477         const char *const lustre_reserved[] = { "ll_ping", "ptlrpc",
478                                                 "ldlm", "ll_sa", "kworker",
479                                                 "kswapd", "writeback", "irq",
480                                                 "ksoftirq", NULL };
481         int i;
482
483         if (jobid[0] == '\0')
484                 return false;
485
486         for (i = 0; lustre_reserved[i] != NULL; i++) {
487                 if (strncmp(jobid, lustre_reserved[i],
488                             strlen(lustre_reserved[i])) == 0)
489                         return false;
490         }
491         return true;
492 }
493
494 /*
495  * jobid_get_from_cache()
496  *
497  * Returns contents of jobid_var from process environment for current PID,
498  * or from the per-session jobid table.
499  * Values fetch from process environment will be cached for some time to avoid
500  * the overhead of scanning the environment.
501  *
502  * Return: -ENOMEM if allocating a new pidmap fails
503  *         -ENOENT if no entry could be found
504  *         +ve string length for success (something was returned in jobid)
505  */
506 static int jobid_get_from_cache(char *jobid, size_t joblen)
507 {
508         static time64_t last_expire;
509         bool expire_cache = false;
510         pid_t pid = current->pid;
511         struct jobid_pid_map *pidmap = NULL;
512         time64_t now = ktime_get_real_seconds();
513         int rc = 0;
514         ENTRY;
515
516         if (strcmp(obd_jobid_var, JOBSTATS_SESSION) == 0) {
517                 char *jid;
518
519                 rcu_read_lock();
520                 jid = jobid_current();
521                 if (jid) {
522                         strlcpy(jobid, jid, joblen);
523                         joblen = strlen(jobid);
524                 } else {
525                         rc = -ENOENT;
526                 }
527                 rcu_read_unlock();
528                 GOTO(out, rc);
529         }
530
531         LASSERT(jobid_hash != NULL);
532
533         /* scan hash periodically to remove old PID entries from cache */
534         spin_lock(&jobid_hash_lock);
535         if (unlikely(last_expire + DELETE_INTERVAL <= now)) {
536                 expire_cache = true;
537                 last_expire = now;
538         }
539         spin_unlock(&jobid_hash_lock);
540
541         if (expire_cache)
542                 cfs_hash_cond_del(jobid_hash, jobid_should_free_item,
543                                   "intentionally_bad_jobid");
544
545         /* first try to find PID in the hash and use that value */
546         pidmap = cfs_hash_lookup(jobid_hash, &pid);
547         if (pidmap == NULL) {
548                 struct jobid_pid_map *pidmap2;
549
550                 OBD_ALLOC_PTR(pidmap);
551                 if (pidmap == NULL)
552                         GOTO(out, rc = -ENOMEM);
553
554                 pidmap->jp_pid = pid;
555                 pidmap->jp_time = 0;
556                 pidmap->jp_jobid[0] = '\0';
557                 spin_lock_init(&pidmap->jp_lock);
558                 INIT_HLIST_NODE(&pidmap->jp_hash);
559                 /*
560                  * @pidmap might be reclaimed just after it is added into
561                  * hash list, init @jp_refcount as 1 to make sure memory
562                  * could be not freed during access.
563                  */
564                 atomic_set(&pidmap->jp_refcount, 1);
565
566                 /*
567                  * Add the newly created map to the hash, on key collision we
568                  * lost a racing addition and must destroy our newly allocated
569                  * map.  The object which exists in the hash will be returned.
570                  */
571                 pidmap2 = cfs_hash_findadd_unique(jobid_hash, &pid,
572                                                   &pidmap->jp_hash);
573                 if (unlikely(pidmap != pidmap2)) {
574                         CDEBUG(D_INFO, "jobid: duplicate found for PID=%u\n",
575                                pid);
576                         OBD_FREE_PTR(pidmap);
577                         pidmap = pidmap2;
578                 }
579         }
580
581         /*
582          * If pidmap is old (this is always true for new entries) refresh it.
583          * If obd_jobid_var is not found, cache empty entry and try again
584          * later, to avoid repeat lookups for PID if obd_jobid_var missing.
585          */
586         spin_lock(&pidmap->jp_lock);
587         if (pidmap->jp_time + RESCAN_INTERVAL <= now) {
588                 char env_jobid[LUSTRE_JOBID_SIZE] = "";
589                 int env_len = sizeof(env_jobid);
590
591                 pidmap->jp_time = now;
592
593                 spin_unlock(&pidmap->jp_lock);
594                 rc = jobid_get_from_environ(obd_jobid_var, env_jobid, &env_len);
595
596                 CDEBUG(D_INFO, "jobid: PID mapping established: %d->%s\n",
597                        pidmap->jp_pid, env_jobid);
598                 spin_lock(&pidmap->jp_lock);
599                 if (!rc) {
600                         pidmap->jp_joblen = env_len;
601                         strlcpy(pidmap->jp_jobid, env_jobid,
602                                 sizeof(pidmap->jp_jobid));
603                         rc = 0;
604                 } else if (rc == -ENOENT) {
605                         /* It might have been deleted, clear out old entry */
606                         pidmap->jp_joblen = 0;
607                         pidmap->jp_jobid[0] = '\0';
608                 }
609         }
610
611         /*
612          * Regardless of how pidmap was found, if it contains a valid entry
613          * use that for now.  If there was a technical error (e.g. -ENOMEM)
614          * use the old cached value until it can be looked up again properly.
615          * If a cached missing entry was found, return -ENOENT.
616          */
617         if (pidmap->jp_joblen) {
618                 strlcpy(jobid, pidmap->jp_jobid, joblen);
619                 joblen = pidmap->jp_joblen;
620                 rc = 0;
621         } else if (!rc) {
622                 rc = -ENOENT;
623         }
624         spin_unlock(&pidmap->jp_lock);
625
626         cfs_hash_put(jobid_hash, &pidmap->jp_hash);
627
628         EXIT;
629 out:
630         return rc < 0 ? rc : joblen;
631 }
632
633 /*
634  * jobid_interpret_string()
635  *
636  * Interpret the jobfmt string to expand specified fields, like coredumps do:
637  *   %e = executable
638  *   %g = gid
639  *   %h = hostname
640  *   %H = short hostname
641  *   %j = jobid from environment
642  *   %p = pid
643  *   %u = uid
644  *
645  * Unknown escape strings are dropped.  Other characters are copied through,
646  * excluding whitespace (to avoid making jobid parsing difficult).
647  *
648  * Return: -EOVERFLOW if the expanded string does not fit within @joblen
649  *         0 for success
650  */
651 static int jobid_interpret_string(const char *jobfmt, char *jobid,
652                                   ssize_t joblen)
653 {
654         char c;
655
656         while ((c = *jobfmt++) && joblen > 1) {
657                 char f, *p;
658                 int l;
659
660                 if (isspace(c)) /* Don't allow embedded spaces */
661                         continue;
662
663                 if (c != '%') {
664                         *jobid = c;
665                         joblen--;
666                         jobid++;
667                         *jobid = '\0';
668                         continue;
669                 }
670
671                 switch ((f = *jobfmt++)) {
672                 case 'e': /* executable name */
673                         l = snprintf(jobid, joblen, "%s", current->comm);
674                         break;
675                 case 'g': /* group ID */
676                         l = snprintf(jobid, joblen, "%u",
677                                      from_kgid(&init_user_ns, current_fsgid()));
678                         break;
679                 case 'h': /* hostname */
680                         l = snprintf(jobid, joblen, "%s",
681                                      init_utsname()->nodename);
682                         break;
683                 case 'H': /* short hostname. Cut at first dot */
684                         l = snprintf(jobid, joblen, "%s",
685                                      init_utsname()->nodename);
686                         p = strnchr(jobid, joblen, '.');
687                         if (p) {
688                                 *p = '\0';
689                                 l = p - jobid;
690                         }
691                         break;
692                 case 'j': /* jobid stored in process environment */
693                         l = jobid_get_from_cache(jobid, joblen);
694                         if (l < 0)
695                                 l = 0;
696                         break;
697                 case 'p': /* process ID */
698                         l = snprintf(jobid, joblen, "%u", current->pid);
699                         break;
700                 case 'u': /* user ID */
701                         l = snprintf(jobid, joblen, "%u",
702                                      from_kuid(&init_user_ns, current_fsuid()));
703                         break;
704                 case '\0': /* '%' at end of format string */
705                         l = 0;
706                         goto out;
707                 default: /* drop unknown %x format strings */
708                         l = 0;
709                         break;
710                 }
711                 jobid += l;
712                 joblen -= l;
713         }
714         /*
715          * This points at the end of the buffer, so long as jobid is always
716          * incremented the same amount as joblen is decremented.
717          */
718 out:
719         jobid[joblen - 1] = '\0';
720
721         return joblen < 0 ? -EOVERFLOW : 0;
722 }
723
724 /*
725  * Hash initialization, copied from server-side job stats bucket sizes
726  */
727 #define HASH_JOBID_BKT_BITS 5
728 #define HASH_JOBID_CUR_BITS 7
729 #define HASH_JOBID_MAX_BITS 12
730
731 int jobid_cache_init(void)
732 {
733         int rc = 0;
734         ENTRY;
735
736         if (jobid_hash)
737                 return 0;
738
739         spin_lock_init(&jobid_hash_lock);
740         jobid_hash = cfs_hash_create("JOBID_HASH", HASH_JOBID_CUR_BITS,
741                                      HASH_JOBID_MAX_BITS, HASH_JOBID_BKT_BITS,
742                                      0, CFS_HASH_MIN_THETA, CFS_HASH_MAX_THETA,
743                                      &jobid_hash_ops, CFS_HASH_DEFAULT);
744         if (!jobid_hash) {
745                 rc = -ENOMEM;
746         } else {
747                 rc = rhashtable_init(&session_jobids, &jobid_params);
748                 if (rc) {
749                         cfs_hash_putref(jobid_hash);
750                         jobid_hash = NULL;
751                 }
752         }
753
754         RETURN(rc);
755 }
756 EXPORT_SYMBOL(jobid_cache_init);
757
758 void jobid_cache_fini(void)
759 {
760         struct cfs_hash *tmp_hash;
761         ENTRY;
762
763         spin_lock(&jobid_hash_lock);
764         tmp_hash = jobid_hash;
765         jobid_hash = NULL;
766         spin_unlock(&jobid_hash_lock);
767
768         cancel_delayed_work_sync(&jobid_prune_work);
769
770         if (tmp_hash != NULL) {
771                 cfs_hash_cond_del(tmp_hash, jobid_should_free_item, NULL);
772                 cfs_hash_putref(tmp_hash);
773
774                 rhashtable_free_and_destroy(&session_jobids, jobid_free, NULL);
775         }
776
777
778         EXIT;
779 }
780 EXPORT_SYMBOL(jobid_cache_fini);
781
782 /*
783  * Hash operations for pid<->jobid
784  */
785 static unsigned jobid_hashfn(struct cfs_hash *hs, const void *key,
786                              unsigned mask)
787 {
788         return cfs_hash_djb2_hash(key, sizeof(pid_t), mask);
789 }
790
791 static void *jobid_key(struct hlist_node *hnode)
792 {
793         struct jobid_pid_map *pidmap;
794
795         pidmap = hlist_entry(hnode, struct jobid_pid_map, jp_hash);
796         return &pidmap->jp_pid;
797 }
798
799 static int jobid_keycmp(const void *key, struct hlist_node *hnode)
800 {
801         const pid_t *pid_key1;
802         const pid_t *pid_key2;
803
804         LASSERT(key != NULL);
805         pid_key1 = (pid_t *)key;
806         pid_key2 = (pid_t *)jobid_key(hnode);
807
808         return *pid_key1 == *pid_key2;
809 }
810
811 static void *jobid_object(struct hlist_node *hnode)
812 {
813         return hlist_entry(hnode, struct jobid_pid_map, jp_hash);
814 }
815
816 static void jobid_get(struct cfs_hash *hs, struct hlist_node *hnode)
817 {
818         struct jobid_pid_map *pidmap;
819
820         pidmap = hlist_entry(hnode, struct jobid_pid_map, jp_hash);
821
822         atomic_inc(&pidmap->jp_refcount);
823 }
824
825 static void jobid_put_locked(struct cfs_hash *hs, struct hlist_node *hnode)
826 {
827         struct jobid_pid_map *pidmap;
828
829         if (hnode == NULL)
830                 return;
831
832         pidmap = hlist_entry(hnode, struct jobid_pid_map, jp_hash);
833         LASSERT(atomic_read(&pidmap->jp_refcount) > 0);
834         if (atomic_dec_and_test(&pidmap->jp_refcount)) {
835                 CDEBUG(D_INFO, "Freeing: %d->%s\n",
836                        pidmap->jp_pid, pidmap->jp_jobid);
837
838                 OBD_FREE_PTR(pidmap);
839         }
840 }
841
842 static struct cfs_hash_ops jobid_hash_ops = {
843         .hs_hash        = jobid_hashfn,
844         .hs_keycmp      = jobid_keycmp,
845         .hs_key         = jobid_key,
846         .hs_object      = jobid_object,
847         .hs_get         = jobid_get,
848         .hs_put         = jobid_put_locked,
849         .hs_put_locked  = jobid_put_locked,
850 };
851
852 /**
853  * Generate the job identifier string for this process for tracking purposes.
854  *
855  * Fill in @jobid string based on the value of obd_jobid_var:
856  * JOBSTATS_DISABLE:      none
857  * JOBSTATS_NODELOCAL:    content of obd_jobid_name (jobid_interpret_string())
858  * JOBSTATS_PROCNAME_UID: process name/UID
859  * JOBSTATS_SESSION       per-session value set by
860  *                            /sys/fs/lustre/jobid_this_session
861  * anything else:         look up obd_jobid_var in the processes environment
862  *
863  * Return -ve error number, 0 on success.
864  */
865 int lustre_get_jobid(char *jobid, size_t joblen)
866 {
867         char id[LUSTRE_JOBID_SIZE] = "";
868         int len = min_t(int, joblen, LUSTRE_JOBID_SIZE);
869         int rc = 0;
870         ENTRY;
871
872         if (unlikely(joblen < 2)) {
873                 if (joblen == 1)
874                         jobid[0] = '\0';
875                 RETURN(-EINVAL);
876         }
877
878         if (strcmp(obd_jobid_var, JOBSTATS_DISABLE) == 0) {
879                 /* Jobstats isn't enabled */
880                 memset(jobid, 0, joblen);
881                 RETURN(0);
882         }
883
884         if (strcmp(obd_jobid_var, JOBSTATS_NODELOCAL) == 0) {
885                 /* Whole node dedicated to single job */
886                 rc = jobid_interpret_string(obd_jobid_name, id, len);
887         } else if (strcmp(obd_jobid_var, JOBSTATS_PROCNAME_UID) == 0) {
888                 rc = jobid_interpret_string("%e.%u", id, len);
889         } else if (strcmp(obd_jobid_var, JOBSTATS_SESSION) == 0 ||
890                    jobid_name_is_valid(current->comm)) {
891                 /*
892                  * per-process jobid wanted, either from environment or from
893                  * per-session setting.
894                  * If obd_jobid_name contains "%j" or if getting the per-process
895                  * jobid directly fails, fall back to using obd_jobid_name.
896                  */
897                 rc = -EAGAIN;
898                 if (!strnstr(obd_jobid_name, "%j", joblen))
899                         rc = jobid_get_from_cache(id, len);
900
901                 /* fall back to jobid_name if jobid_var not available */
902                 if (rc < 0) {
903                         int rc2 = jobid_interpret_string(obd_jobid_name,
904                                                          id, len);
905                         if (!rc2)
906                                 rc = 0;
907                 }
908         }
909
910         memcpy(jobid, id, len);
911         RETURN(rc);
912 }
913 EXPORT_SYMBOL(lustre_get_jobid);
914
915 /*
916  * lustre_jobid_clear
917  *
918  * Search cache for JobID given by @find_jobid.
919  * If any entries in the hash table match the value, they are removed
920  */
921 void lustre_jobid_clear(const char *find_jobid)
922 {
923         char jobid[LUSTRE_JOBID_SIZE];
924         char *end;
925
926         if (jobid_hash == NULL)
927                 return;
928
929         strlcpy(jobid, find_jobid, sizeof(jobid));
930         /* trim \n off the end of the incoming jobid */
931         end = strchr(jobid, '\n');
932         if (end && *end == '\n')
933                 *end = '\0';
934
935         CDEBUG(D_INFO, "Clearing Jobid: %s\n", jobid);
936         cfs_hash_cond_del(jobid_hash, jobid_should_free_item, jobid);
937
938         CDEBUG(D_INFO, "%d items remain in jobID table\n",
939                atomic_read(&jobid_hash->hs_count));
940 }