Whamcloud - gitweb
LU-15406 sec: fix in-kernel fscrypt support
[fs/lustre-release.git] / lustre / obdclass / jobid.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2014, Intel Corporation.
27  *
28  * Copyright 2017 Cray Inc, all rights reserved.
29  * Author: Ben Evans.
30  *
31  * Store PID->JobID mappings
32  */
33
34 #define DEBUG_SUBSYSTEM S_RPC
35 #include <linux/user_namespace.h>
36 #include <linux/uidgid.h>
37 #include <linux/utsname.h>
38
39 #include <libcfs/libcfs.h>
40 #include <obd_support.h>
41 #include <obd_class.h>
42 #include <lustre_net.h>
43
44 static struct cfs_hash *jobid_hash;
45 static struct cfs_hash_ops jobid_hash_ops;
46 spinlock_t jobid_hash_lock;
47
48 #define RESCAN_INTERVAL 30
49 #define DELETE_INTERVAL 300
50
51 char obd_jobid_var[JOBSTATS_JOBID_VAR_MAX_LEN + 1] = JOBSTATS_DISABLE;
52 char obd_jobid_name[LUSTRE_JOBID_SIZE] = "%e.%u";
53
54 /**
55  * Structure to store a single PID->JobID mapping
56  */
57 struct jobid_pid_map {
58         struct hlist_node       jp_hash;
59         time64_t                jp_time;
60         spinlock_t              jp_lock; /* protects jp_jobid */
61         char                    jp_jobid[LUSTRE_JOBID_SIZE];
62         unsigned int            jp_joblen;
63         atomic_t                jp_refcount;
64         pid_t                   jp_pid;
65 };
66
67 /*
68  * Jobid can be set for a session (see setsid(2)) by writing to
69  * a sysfs file from any process in that session.
70  * The jobids are stored in a hash table indexed by the relevant
71  * struct pid.  We periodically look for entries where the pid has
72  * no PIDTYPE_SID tasks any more, and prune them.  This happens within
73  * 5 seconds of a jobid being added, and every 5 minutes when jobids exist,
74  * but none are added.
75  */
76 #define JOBID_EXPEDITED_CLEAN (5)
77 #define JOBID_BACKGROUND_CLEAN (5 * 60)
78
79 struct session_jobid {
80         struct pid              *sj_session;
81         struct rhash_head       sj_linkage;
82         struct rcu_head         sj_rcu;
83         char                    sj_jobid[1];
84 };
85
86 static const struct rhashtable_params jobid_params = {
87         .key_len        = sizeof(struct pid *),
88         .key_offset     = offsetof(struct session_jobid, sj_session),
89         .head_offset    = offsetof(struct session_jobid, sj_linkage),
90 };
91
92 static struct rhashtable session_jobids;
93
94 /*
95  * jobid_current must be called with rcu_read_lock held.
96  * if it returns non-NULL, the string can only be used
97  * until rcu_read_unlock is called.
98  */
99 char *jobid_current(void)
100 {
101         struct pid *sid = task_session(current);
102         struct session_jobid *sj;
103
104         sj = rhashtable_lookup_fast(&session_jobids, &sid, jobid_params);
105         if (sj)
106                 return sj->sj_jobid;
107         return NULL;
108 }
109
110 static void jobid_prune_expedite(void);
111 /*
112  * jobid_set_current will try to add a new entry
113  * to the table.  If one exists with the same key, the
114  * jobid will be replaced
115  */
116 int jobid_set_current(char *jobid)
117 {
118         struct pid *sid;
119         struct session_jobid *sj, *origsj;
120         int ret;
121         int len = strlen(jobid);
122
123         sj = kmalloc(sizeof(*sj) + len, GFP_KERNEL);
124         if (!sj)
125                 return -ENOMEM;
126         rcu_read_lock();
127         sid = task_session(current);
128         sj->sj_session = get_pid(sid);
129         strncpy(sj->sj_jobid, jobid, len+1);
130         origsj = rhashtable_lookup_get_insert_fast(&session_jobids,
131                                                    &sj->sj_linkage,
132                                                    jobid_params);
133         if (origsj == NULL) {
134                 /* successful insert */
135                 rcu_read_unlock();
136                 jobid_prune_expedite();
137                 return 0;
138         }
139
140         if (IS_ERR(origsj)) {
141                 put_pid(sj->sj_session);
142                 kfree(sj);
143                 rcu_read_unlock();
144                 return PTR_ERR(origsj);
145         }
146         ret = rhashtable_replace_fast(&session_jobids,
147                                       &origsj->sj_linkage,
148                                       &sj->sj_linkage,
149                                       jobid_params);
150         if (ret) {
151                 put_pid(sj->sj_session);
152                 kfree(sj);
153                 rcu_read_unlock();
154                 return ret;
155         }
156         put_pid(origsj->sj_session);
157         rcu_read_unlock();
158         kfree_rcu(origsj, sj_rcu);
159         jobid_prune_expedite();
160
161         return 0;
162 }
163
164 static void jobid_free(void *vsj, void *arg)
165 {
166         struct session_jobid *sj = vsj;
167
168         put_pid(sj->sj_session);
169         kfree(sj);
170 }
171
172 static void jobid_prune(struct work_struct *work);
173 static DECLARE_DELAYED_WORK(jobid_prune_work, jobid_prune);
174 static int jobid_prune_expedited;
175 static void jobid_prune(struct work_struct *work)
176 {
177         int remaining = 0;
178         struct rhashtable_iter iter;
179         struct session_jobid *sj;
180
181         jobid_prune_expedited = 0;
182         rhashtable_walk_enter(&session_jobids, &iter);
183         rhashtable_walk_start(&iter);
184         while ((sj = rhashtable_walk_next(&iter)) != NULL) {
185                 if (IS_ERR(sj)) {
186                         if (PTR_ERR(sj) == -EAGAIN)
187                                 continue;
188                         break;
189                 }
190                 if (!hlist_empty(&sj->sj_session->tasks[PIDTYPE_SID])) {
191                         remaining++;
192                         continue;
193                 }
194                 if (rhashtable_remove_fast(&session_jobids,
195                                            &sj->sj_linkage,
196                                            jobid_params) == 0) {
197                         put_pid(sj->sj_session);
198                         kfree_rcu(sj, sj_rcu);
199                 }
200         }
201         rhashtable_walk_stop(&iter);
202         rhashtable_walk_exit(&iter);
203         if (remaining)
204                 schedule_delayed_work(&jobid_prune_work,
205                                       cfs_time_seconds(JOBID_BACKGROUND_CLEAN));
206 }
207
208 static void jobid_prune_expedite(void)
209 {
210         if (!jobid_prune_expedited) {
211                 jobid_prune_expedited = 1;
212                 mod_delayed_work(system_wq, &jobid_prune_work,
213                                  cfs_time_seconds(JOBID_EXPEDITED_CLEAN));
214         }
215 }
216
217 static int cfs_access_process_vm(struct task_struct *tsk,
218                                  struct mm_struct *mm,
219                                  unsigned long addr,
220                                  void *buf, int len, int write)
221 {
222         /* Just copied from kernel for the kernels which doesn't
223          * have access_process_vm() exported
224          */
225         struct vm_area_struct *vma;
226         struct page *page;
227         void *old_buf = buf;
228
229         /* Avoid deadlocks on mmap_sem if called from sys_mmap_pgoff(),
230          * which is already holding mmap_sem for writes.  If some other
231          * thread gets the write lock in the meantime, this thread will
232          * block, but at least it won't deadlock on itself.  LU-1735
233          */
234         if (!mmap_read_trylock(mm))
235                 return -EDEADLK;
236
237         /* ignore errors, just check how much was successfully transferred */
238         while (len) {
239                 int bytes, rc, offset;
240                 void *maddr;
241
242 #if defined(HAVE_GET_USER_PAGES_GUP_FLAGS)
243                 rc = get_user_pages(addr, 1, write ? FOLL_WRITE : 0, &page,
244                                     &vma);
245 #elif defined(HAVE_GET_USER_PAGES_6ARG)
246                 rc = get_user_pages(addr, 1, write, 1, &page, &vma);
247 #else
248                 rc = get_user_pages(tsk, mm, addr, 1, write, 1, &page, &vma);
249 #endif
250                 if (rc <= 0)
251                         break;
252
253                 bytes = len;
254                 offset = addr & (PAGE_SIZE-1);
255                 if (bytes > PAGE_SIZE-offset)
256                         bytes = PAGE_SIZE-offset;
257
258                 maddr = kmap(page);
259                 if (write) {
260                         copy_to_user_page(vma, page, addr,
261                                           maddr + offset, buf, bytes);
262                         set_page_dirty_lock(page);
263                 } else {
264                         copy_from_user_page(vma, page, addr,
265                                             buf, maddr + offset, bytes);
266                 }
267                 kunmap(page);
268                 put_page(page);
269                 len -= bytes;
270                 buf += bytes;
271                 addr += bytes;
272         }
273         mmap_read_unlock(mm);
274
275         return buf - old_buf;
276 }
277
278 /* Read the environment variable of current process specified by @key. */
279 static int cfs_get_environ(const char *key, char *value, int *val_len)
280 {
281         struct mm_struct *mm;
282         char *buffer;
283         int buf_len = PAGE_SIZE;
284         int key_len = strlen(key);
285         unsigned long addr;
286         int rc;
287         bool skip = false;
288
289         ENTRY;
290         buffer = kmalloc(buf_len, GFP_USER);
291         if (!buffer)
292                 RETURN(-ENOMEM);
293
294         mm = get_task_mm(current);
295         if (!mm) {
296                 kfree(buffer);
297                 RETURN(-EINVAL);
298         }
299
300         addr = mm->env_start;
301         while (addr < mm->env_end) {
302                 int this_len, retval, scan_len;
303                 char *env_start, *env_end;
304
305                 memset(buffer, 0, buf_len);
306
307                 this_len = min_t(int, mm->env_end - addr, buf_len);
308                 retval = cfs_access_process_vm(current, mm, addr, buffer,
309                                                this_len, 0);
310                 if (retval < 0)
311                         GOTO(out, rc = retval);
312                 else if (retval != this_len)
313                         break;
314
315                 addr += retval;
316
317                 /* Parse the buffer to find out the specified key/value pair.
318                  * The "key=value" entries are separated by '\0'.
319                  */
320                 env_start = buffer;
321                 scan_len = this_len;
322                 while (scan_len) {
323                         char *entry;
324                         int entry_len;
325
326                         env_end = memscan(env_start, '\0', scan_len);
327                         LASSERT(env_end >= env_start &&
328                                 env_end <= env_start + scan_len);
329
330                         /* The last entry of this buffer cross the buffer
331                          * boundary, reread it in next cycle.
332                          */
333                         if (unlikely(env_end - env_start == scan_len)) {
334                                 /* Just skip the entry larger than page size,
335                                  * it can't be jobID env variable.
336                                  */
337                                 if (unlikely(scan_len == this_len))
338                                         skip = true;
339                                 else
340                                         addr -= scan_len;
341                                 break;
342                         } else if (unlikely(skip)) {
343                                 skip = false;
344                                 goto skip;
345                         }
346                         entry = env_start;
347                         entry_len = env_end - env_start;
348                         CDEBUG(D_INFO, "key: %s, entry: %s\n", key, entry);
349
350                         /* Key length + length of '=' */
351                         if (entry_len > key_len + 1 &&
352                             entry[key_len] == '='  &&
353                             !memcmp(entry, key, key_len)) {
354                                 entry += key_len + 1;
355                                 entry_len -= key_len + 1;
356
357                                 /* The 'value' buffer passed in is too small.
358                                  * Copy what fits, but return -EOVERFLOW.
359                                  */
360                                 if (entry_len >= *val_len) {
361                                         memcpy(value, entry, *val_len);
362                                         value[*val_len - 1] = 0;
363                                         GOTO(out, rc = -EOVERFLOW);
364                                 }
365
366                                 memcpy(value, entry, entry_len);
367                                 *val_len = entry_len;
368                                 GOTO(out, rc = 0);
369                         }
370 skip:
371                         scan_len -= (env_end - env_start + 1);
372                         env_start = env_end + 1;
373                 }
374         }
375         GOTO(out, rc = -ENOENT);
376
377 out:
378         mmput(mm);
379         kfree((void *)buffer);
380         return rc;
381 }
382
383 /*
384  * Get jobid of current process by reading the environment variable
385  * stored in between the "env_start" & "env_end" of task struct.
386  *
387  * If some job scheduler doesn't store jobid in the "env_start/end",
388  * then an upcall could be issued here to get the jobid by utilizing
389  * the userspace tools/API. Then, the jobid must be cached.
390  */
391 int jobid_get_from_environ(char *jobid_var, char *jobid, int *jobid_len)
392 {
393         int rc;
394
395         rc = cfs_get_environ(jobid_var, jobid, jobid_len);
396         if (!rc)
397                 goto out;
398
399         if (rc == -EOVERFLOW) {
400                 /* For the PBS_JOBID and LOADL_STEP_ID keys (which are
401                  * variable length strings instead of just numbers), it
402                  * might make sense to keep the unique parts for JobID,
403                  * instead of just returning an error.  That means a
404                  * larger temp buffer for cfs_get_environ(), then
405                  * truncating the string at some separator to fit into
406                  * the specified jobid_len.  Fix later if needed. */
407                 static ktime_t printed;
408
409                 if (unlikely(ktime_to_ns(printed) == 0 ||
410                              ktime_after(ktime_get(),
411                                          ktime_add_ns(printed,
412                                              3600ULL * 24 * NSEC_PER_SEC)))) {
413                         LCONSOLE_WARN("jobid: '%s' value too large (%d)\n",
414                                       obd_jobid_var, *jobid_len);
415                         printed = ktime_get();
416                 }
417
418                 rc = 0;
419         } else {
420                 CDEBUG_LIMIT((rc == -ENOENT || rc == -EINVAL ||
421                               rc == -EDEADLK) ? D_INFO : D_ERROR,
422                              "jobid: get '%s' failed: rc = %d\n",
423                              obd_jobid_var, rc);
424         }
425
426 out:
427         return rc;
428 }
429
430 /*
431  * jobid_should_free_item
432  *
433  * Each item is checked to see if it should be released
434  * Removed from hash table by caller
435  * Actually freed in jobid_put_locked
436  *
437  * Returns 1 if item is to be freed, 0 if it is to be kept
438  */
439
440 static int jobid_should_free_item(void *obj, void *data)
441 {
442         char *jobid = data;
443         struct jobid_pid_map *pidmap = obj;
444         int rc = 0;
445
446         if (obj == NULL)
447                 return 0;
448
449         if (jobid == NULL) {
450                 WARN_ON_ONCE(atomic_read(&pidmap->jp_refcount) != 1);
451                 return 1;
452         }
453
454         spin_lock(&pidmap->jp_lock);
455         /* prevent newly inserted items from deleting */
456         if (jobid[0] == '\0' && atomic_read(&pidmap->jp_refcount) == 1)
457                 rc = 1;
458         else if (ktime_get_real_seconds() - pidmap->jp_time > DELETE_INTERVAL)
459                 rc = 1;
460         else if (strcmp(pidmap->jp_jobid, jobid) == 0)
461                 rc = 1;
462         spin_unlock(&pidmap->jp_lock);
463
464         return rc;
465 }
466
467 /*
468  * jobid_name_is_valid
469  *
470  * Checks if the jobid is a Lustre process
471  *
472  * Returns true if jobid is valid
473  * Returns false if jobid looks like it's a Lustre process
474  */
475 static bool jobid_name_is_valid(char *jobid)
476 {
477         const char *const lustre_reserved[] = { "ll_ping", "ptlrpc",
478                                                 "ldlm", "ll_sa", NULL };
479         int i;
480
481         if (jobid[0] == '\0')
482                 return false;
483
484         for (i = 0; lustre_reserved[i] != NULL; i++) {
485                 if (strncmp(jobid, lustre_reserved[i],
486                             strlen(lustre_reserved[i])) == 0)
487                         return false;
488         }
489         return true;
490 }
491
492 /*
493  * jobid_get_from_cache()
494  *
495  * Returns contents of jobid_var from process environment for current PID,
496  * or from the per-session jobid table.
497  * Values fetch from process environment will be cached for some time to avoid
498  * the overhead of scanning the environment.
499  *
500  * Return: -ENOMEM if allocating a new pidmap fails
501  *         -ENOENT if no entry could be found
502  *         +ve string length for success (something was returned in jobid)
503  */
504 static int jobid_get_from_cache(char *jobid, size_t joblen)
505 {
506         static time64_t last_expire;
507         bool expire_cache = false;
508         pid_t pid = current->pid;
509         struct jobid_pid_map *pidmap = NULL;
510         time64_t now = ktime_get_real_seconds();
511         int rc = 0;
512         ENTRY;
513
514         if (strcmp(obd_jobid_var, JOBSTATS_SESSION) == 0) {
515                 char *jid;
516
517                 rcu_read_lock();
518                 jid = jobid_current();
519                 if (jid) {
520                         strlcpy(jobid, jid, joblen);
521                         joblen = strlen(jobid);
522                 } else {
523                         rc = -ENOENT;
524                 }
525                 rcu_read_unlock();
526                 GOTO(out, rc);
527         }
528
529         LASSERT(jobid_hash != NULL);
530
531         /* scan hash periodically to remove old PID entries from cache */
532         spin_lock(&jobid_hash_lock);
533         if (unlikely(last_expire + DELETE_INTERVAL <= now)) {
534                 expire_cache = true;
535                 last_expire = now;
536         }
537         spin_unlock(&jobid_hash_lock);
538
539         if (expire_cache)
540                 cfs_hash_cond_del(jobid_hash, jobid_should_free_item,
541                                   "intentionally_bad_jobid");
542
543         /* first try to find PID in the hash and use that value */
544         pidmap = cfs_hash_lookup(jobid_hash, &pid);
545         if (pidmap == NULL) {
546                 struct jobid_pid_map *pidmap2;
547
548                 OBD_ALLOC_PTR(pidmap);
549                 if (pidmap == NULL)
550                         GOTO(out, rc = -ENOMEM);
551
552                 pidmap->jp_pid = pid;
553                 pidmap->jp_time = 0;
554                 pidmap->jp_jobid[0] = '\0';
555                 spin_lock_init(&pidmap->jp_lock);
556                 INIT_HLIST_NODE(&pidmap->jp_hash);
557                 /*
558                  * @pidmap might be reclaimed just after it is added into
559                  * hash list, init @jp_refcount as 1 to make sure memory
560                  * could be not freed during access.
561                  */
562                 atomic_set(&pidmap->jp_refcount, 1);
563
564                 /*
565                  * Add the newly created map to the hash, on key collision we
566                  * lost a racing addition and must destroy our newly allocated
567                  * map.  The object which exists in the hash will be returned.
568                  */
569                 pidmap2 = cfs_hash_findadd_unique(jobid_hash, &pid,
570                                                   &pidmap->jp_hash);
571                 if (unlikely(pidmap != pidmap2)) {
572                         CDEBUG(D_INFO, "jobid: duplicate found for PID=%u\n",
573                                pid);
574                         OBD_FREE_PTR(pidmap);
575                         pidmap = pidmap2;
576                 }
577         }
578
579         /*
580          * If pidmap is old (this is always true for new entries) refresh it.
581          * If obd_jobid_var is not found, cache empty entry and try again
582          * later, to avoid repeat lookups for PID if obd_jobid_var missing.
583          */
584         spin_lock(&pidmap->jp_lock);
585         if (pidmap->jp_time + RESCAN_INTERVAL <= now) {
586                 char env_jobid[LUSTRE_JOBID_SIZE] = "";
587                 int env_len = sizeof(env_jobid);
588
589                 pidmap->jp_time = now;
590
591                 spin_unlock(&pidmap->jp_lock);
592                 rc = jobid_get_from_environ(obd_jobid_var, env_jobid, &env_len);
593
594                 CDEBUG(D_INFO, "jobid: PID mapping established: %d->%s\n",
595                        pidmap->jp_pid, env_jobid);
596                 spin_lock(&pidmap->jp_lock);
597                 if (!rc) {
598                         pidmap->jp_joblen = env_len;
599                         strlcpy(pidmap->jp_jobid, env_jobid,
600                                 sizeof(pidmap->jp_jobid));
601                         rc = 0;
602                 } else if (rc == -ENOENT) {
603                         /* It might have been deleted, clear out old entry */
604                         pidmap->jp_joblen = 0;
605                         pidmap->jp_jobid[0] = '\0';
606                 }
607         }
608
609         /*
610          * Regardless of how pidmap was found, if it contains a valid entry
611          * use that for now.  If there was a technical error (e.g. -ENOMEM)
612          * use the old cached value until it can be looked up again properly.
613          * If a cached missing entry was found, return -ENOENT.
614          */
615         if (pidmap->jp_joblen) {
616                 strlcpy(jobid, pidmap->jp_jobid, joblen);
617                 joblen = pidmap->jp_joblen;
618                 rc = 0;
619         } else if (!rc) {
620                 rc = -ENOENT;
621         }
622         spin_unlock(&pidmap->jp_lock);
623
624         cfs_hash_put(jobid_hash, &pidmap->jp_hash);
625
626         EXIT;
627 out:
628         return rc < 0 ? rc : joblen;
629 }
630
631 /*
632  * jobid_interpret_string()
633  *
634  * Interpret the jobfmt string to expand specified fields, like coredumps do:
635  *   %e = executable
636  *   %g = gid
637  *   %h = hostname
638  *   %H = short hostname
639  *   %j = jobid from environment
640  *   %p = pid
641  *   %u = uid
642  *
643  * Unknown escape strings are dropped.  Other characters are copied through,
644  * excluding whitespace (to avoid making jobid parsing difficult).
645  *
646  * Return: -EOVERFLOW if the expanded string does not fit within @joblen
647  *         0 for success
648  */
649 static int jobid_interpret_string(const char *jobfmt, char *jobid,
650                                   ssize_t joblen)
651 {
652         char c;
653
654         while ((c = *jobfmt++) && joblen > 1) {
655                 char f, *p;
656                 int l;
657
658                 if (isspace(c)) /* Don't allow embedded spaces */
659                         continue;
660
661                 if (c != '%') {
662                         *jobid = c;
663                         joblen--;
664                         jobid++;
665                         *jobid = '\0';
666                         continue;
667                 }
668
669                 switch ((f = *jobfmt++)) {
670                 case 'e': /* executable name */
671                         l = snprintf(jobid, joblen, "%s", current->comm);
672                         break;
673                 case 'g': /* group ID */
674                         l = snprintf(jobid, joblen, "%u",
675                                      from_kgid(&init_user_ns, current_fsgid()));
676                         break;
677                 case 'h': /* hostname */
678                         l = snprintf(jobid, joblen, "%s",
679                                      init_utsname()->nodename);
680                         break;
681                 case 'H': /* short hostname. Cut at first dot */
682                         l = snprintf(jobid, joblen, "%s",
683                                      init_utsname()->nodename);
684                         p = strnchr(jobid, joblen, '.');
685                         if (p) {
686                                 *p = '\0';
687                                 l = p - jobid;
688                         }
689                         break;
690                 case 'j': /* jobid stored in process environment */
691                         l = jobid_get_from_cache(jobid, joblen);
692                         if (l < 0)
693                                 l = 0;
694                         break;
695                 case 'p': /* process ID */
696                         l = snprintf(jobid, joblen, "%u", current->pid);
697                         break;
698                 case 'u': /* user ID */
699                         l = snprintf(jobid, joblen, "%u",
700                                      from_kuid(&init_user_ns, current_fsuid()));
701                         break;
702                 case '\0': /* '%' at end of format string */
703                         l = 0;
704                         goto out;
705                 default: /* drop unknown %x format strings */
706                         l = 0;
707                         break;
708                 }
709                 jobid += l;
710                 joblen -= l;
711         }
712         /*
713          * This points at the end of the buffer, so long as jobid is always
714          * incremented the same amount as joblen is decremented.
715          */
716 out:
717         jobid[joblen - 1] = '\0';
718
719         return joblen < 0 ? -EOVERFLOW : 0;
720 }
721
722 /*
723  * Hash initialization, copied from server-side job stats bucket sizes
724  */
725 #define HASH_JOBID_BKT_BITS 5
726 #define HASH_JOBID_CUR_BITS 7
727 #define HASH_JOBID_MAX_BITS 12
728
729 int jobid_cache_init(void)
730 {
731         int rc = 0;
732         ENTRY;
733
734         if (jobid_hash)
735                 return 0;
736
737         spin_lock_init(&jobid_hash_lock);
738         jobid_hash = cfs_hash_create("JOBID_HASH", HASH_JOBID_CUR_BITS,
739                                      HASH_JOBID_MAX_BITS, HASH_JOBID_BKT_BITS,
740                                      0, CFS_HASH_MIN_THETA, CFS_HASH_MAX_THETA,
741                                      &jobid_hash_ops, CFS_HASH_DEFAULT);
742         if (!jobid_hash) {
743                 rc = -ENOMEM;
744         } else {
745                 rc = rhashtable_init(&session_jobids, &jobid_params);
746                 if (rc) {
747                         cfs_hash_putref(jobid_hash);
748                         jobid_hash = NULL;
749                 }
750         }
751
752         RETURN(rc);
753 }
754 EXPORT_SYMBOL(jobid_cache_init);
755
756 void jobid_cache_fini(void)
757 {
758         struct cfs_hash *tmp_hash;
759         ENTRY;
760
761         spin_lock(&jobid_hash_lock);
762         tmp_hash = jobid_hash;
763         jobid_hash = NULL;
764         spin_unlock(&jobid_hash_lock);
765
766         cancel_delayed_work_sync(&jobid_prune_work);
767
768         if (tmp_hash != NULL) {
769                 cfs_hash_cond_del(tmp_hash, jobid_should_free_item, NULL);
770                 cfs_hash_putref(tmp_hash);
771
772                 rhashtable_free_and_destroy(&session_jobids, jobid_free, NULL);
773         }
774
775
776         EXIT;
777 }
778 EXPORT_SYMBOL(jobid_cache_fini);
779
780 /*
781  * Hash operations for pid<->jobid
782  */
783 static unsigned jobid_hashfn(struct cfs_hash *hs, const void *key,
784                              unsigned mask)
785 {
786         return cfs_hash_djb2_hash(key, sizeof(pid_t), mask);
787 }
788
789 static void *jobid_key(struct hlist_node *hnode)
790 {
791         struct jobid_pid_map *pidmap;
792
793         pidmap = hlist_entry(hnode, struct jobid_pid_map, jp_hash);
794         return &pidmap->jp_pid;
795 }
796
797 static int jobid_keycmp(const void *key, struct hlist_node *hnode)
798 {
799         const pid_t *pid_key1;
800         const pid_t *pid_key2;
801
802         LASSERT(key != NULL);
803         pid_key1 = (pid_t *)key;
804         pid_key2 = (pid_t *)jobid_key(hnode);
805
806         return *pid_key1 == *pid_key2;
807 }
808
809 static void *jobid_object(struct hlist_node *hnode)
810 {
811         return hlist_entry(hnode, struct jobid_pid_map, jp_hash);
812 }
813
814 static void jobid_get(struct cfs_hash *hs, struct hlist_node *hnode)
815 {
816         struct jobid_pid_map *pidmap;
817
818         pidmap = hlist_entry(hnode, struct jobid_pid_map, jp_hash);
819
820         atomic_inc(&pidmap->jp_refcount);
821 }
822
823 static void jobid_put_locked(struct cfs_hash *hs, struct hlist_node *hnode)
824 {
825         struct jobid_pid_map *pidmap;
826
827         if (hnode == NULL)
828                 return;
829
830         pidmap = hlist_entry(hnode, struct jobid_pid_map, jp_hash);
831         LASSERT(atomic_read(&pidmap->jp_refcount) > 0);
832         if (atomic_dec_and_test(&pidmap->jp_refcount)) {
833                 CDEBUG(D_INFO, "Freeing: %d->%s\n",
834                        pidmap->jp_pid, pidmap->jp_jobid);
835
836                 OBD_FREE_PTR(pidmap);
837         }
838 }
839
840 static struct cfs_hash_ops jobid_hash_ops = {
841         .hs_hash        = jobid_hashfn,
842         .hs_keycmp      = jobid_keycmp,
843         .hs_key         = jobid_key,
844         .hs_object      = jobid_object,
845         .hs_get         = jobid_get,
846         .hs_put         = jobid_put_locked,
847         .hs_put_locked  = jobid_put_locked,
848 };
849
850 /**
851  * Generate the job identifier string for this process for tracking purposes.
852  *
853  * Fill in @jobid string based on the value of obd_jobid_var:
854  * JOBSTATS_DISABLE:      none
855  * JOBSTATS_NODELOCAL:    content of obd_jobid_name (jobid_interpret_string())
856  * JOBSTATS_PROCNAME_UID: process name/UID
857  * JOBSTATS_SESSION       per-session value set by
858  *                            /sys/fs/lustre/jobid_this_session
859  * anything else:         look up obd_jobid_var in the processes environment
860  *
861  * Return -ve error number, 0 on success.
862  */
863 int lustre_get_jobid(char *jobid, size_t joblen)
864 {
865         int rc = 0;
866         ENTRY;
867
868         if (unlikely(joblen < 2)) {
869                 if (joblen == 1)
870                         jobid[0] = '\0';
871                 RETURN(-EINVAL);
872         }
873
874         if (strcmp(obd_jobid_var, JOBSTATS_DISABLE) == 0) {
875                 /* Jobstats isn't enabled */
876                 memset(jobid, 0, joblen);
877         } else if (strcmp(obd_jobid_var, JOBSTATS_NODELOCAL) == 0) {
878                 /* Whole node dedicated to single job */
879                 rc = jobid_interpret_string(obd_jobid_name, jobid, joblen);
880         } else if (strcmp(obd_jobid_var, JOBSTATS_PROCNAME_UID) == 0) {
881                 rc = jobid_interpret_string("%e.%u", jobid, joblen);
882         } else if (strcmp(obd_jobid_var, JOBSTATS_SESSION) == 0 ||
883                    jobid_name_is_valid(current->comm)) {
884                 /*
885                  * per-process jobid wanted, either from environment or from
886                  * per-session setting.
887                  * If obd_jobid_name contains "%j" or if getting the per-process
888                  * jobid directly fails, fall back to using obd_jobid_name.
889                  */
890                 rc = -EAGAIN;
891                 if (!strnstr(obd_jobid_name, "%j", joblen))
892                         rc = jobid_get_from_cache(jobid, joblen);
893
894                 /* fall back to jobid_name if jobid_var not available */
895                 if (rc < 0) {
896                         int rc2 = jobid_interpret_string(obd_jobid_name,
897                                                          jobid, joblen);
898                         if (!rc2)
899                                 rc = 0;
900                 }
901         }
902
903         RETURN(rc);
904 }
905 EXPORT_SYMBOL(lustre_get_jobid);
906
907 /*
908  * lustre_jobid_clear
909  *
910  * Search cache for JobID given by @find_jobid.
911  * If any entries in the hash table match the value, they are removed
912  */
913 void lustre_jobid_clear(const char *find_jobid)
914 {
915         char jobid[LUSTRE_JOBID_SIZE];
916         char *end;
917
918         if (jobid_hash == NULL)
919                 return;
920
921         strlcpy(jobid, find_jobid, sizeof(jobid));
922         /* trim \n off the end of the incoming jobid */
923         end = strchr(jobid, '\n');
924         if (end && *end == '\n')
925                 *end = '\0';
926
927         CDEBUG(D_INFO, "Clearing Jobid: %s\n", jobid);
928         cfs_hash_cond_del(jobid_hash, jobid_should_free_item, jobid);
929
930         CDEBUG(D_INFO, "%d items remain in jobID table\n",
931                atomic_read(&jobid_hash->hs_count));
932 }