Whamcloud - gitweb
LU-14627 lnet: Ensure ref taken when queueing for discovery
[fs/lustre-release.git] / lustre / obdclass / jobid.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2014, Intel Corporation.
27  *
28  * Copyright 2017 Cray Inc, all rights reserved.
29  * Author: Ben Evans.
30  *
31  * Store PID->JobID mappings
32  */
33
34 #define DEBUG_SUBSYSTEM S_RPC
35 #include <linux/user_namespace.h>
36 #include <linux/uidgid.h>
37 #include <linux/utsname.h>
38
39 #include <libcfs/libcfs.h>
40 #include <obd_support.h>
41 #include <obd_class.h>
42 #include <lustre_net.h>
43
44 static struct cfs_hash *jobid_hash;
45 static struct cfs_hash_ops jobid_hash_ops;
46 spinlock_t jobid_hash_lock;
47
48 #define RESCAN_INTERVAL 30
49 #define DELETE_INTERVAL 300
50
51 char obd_jobid_var[JOBSTATS_JOBID_VAR_MAX_LEN + 1] = JOBSTATS_DISABLE;
52 char obd_jobid_name[LUSTRE_JOBID_SIZE] = "%e.%u";
53
54 /**
55  * Structure to store a single PID->JobID mapping
56  */
57 struct jobid_pid_map {
58         struct hlist_node       jp_hash;
59         time64_t                jp_time;
60         spinlock_t              jp_lock; /* protects jp_jobid */
61         char                    jp_jobid[LUSTRE_JOBID_SIZE];
62         unsigned int            jp_joblen;
63         atomic_t                jp_refcount;
64         pid_t                   jp_pid;
65 };
66
67 /*
68  * Jobid can be set for a session (see setsid(2)) by writing to
69  * a sysfs file from any process in that session.
70  * The jobids are stored in a hash table indexed by the relevant
71  * struct pid.  We periodically look for entries where the pid has
72  * no PIDTYPE_SID tasks any more, and prune them.  This happens within
73  * 5 seconds of a jobid being added, and every 5 minutes when jobids exist,
74  * but none are added.
75  */
76 #define JOBID_EXPEDITED_CLEAN (5)
77 #define JOBID_BACKGROUND_CLEAN (5 * 60)
78
79 struct session_jobid {
80         struct pid              *sj_session;
81         struct rhash_head       sj_linkage;
82         struct rcu_head         sj_rcu;
83         char                    sj_jobid[1];
84 };
85
86 static const struct rhashtable_params jobid_params = {
87         .key_len        = sizeof(struct pid *),
88         .key_offset     = offsetof(struct session_jobid, sj_session),
89         .head_offset    = offsetof(struct session_jobid, sj_linkage),
90 };
91
92 static struct rhashtable session_jobids;
93
94 /*
95  * jobid_current must be called with rcu_read_lock held.
96  * if it returns non-NULL, the string can only be used
97  * until rcu_read_unlock is called.
98  */
99 char *jobid_current(void)
100 {
101         struct pid *sid = task_session(current);
102         struct session_jobid *sj;
103
104         sj = rhashtable_lookup_fast(&session_jobids, &sid, jobid_params);
105         if (sj)
106                 return sj->sj_jobid;
107         return NULL;
108 }
109
110 static void jobid_prune_expedite(void);
111 /*
112  * jobid_set_current will try to add a new entry
113  * to the table.  If one exists with the same key, the
114  * jobid will be replaced
115  */
116 int jobid_set_current(char *jobid)
117 {
118         struct pid *sid;
119         struct session_jobid *sj, *origsj;
120         int ret;
121         int len = strlen(jobid);
122
123         sj = kmalloc(sizeof(*sj) + len, GFP_KERNEL);
124         if (!sj)
125                 return -ENOMEM;
126         rcu_read_lock();
127         sid = task_session(current);
128         sj->sj_session = get_pid(sid);
129         strncpy(sj->sj_jobid, jobid, len+1);
130         origsj = rhashtable_lookup_get_insert_fast(&session_jobids,
131                                                    &sj->sj_linkage,
132                                                    jobid_params);
133         if (origsj == NULL) {
134                 /* successful insert */
135                 rcu_read_unlock();
136                 jobid_prune_expedite();
137                 return 0;
138         }
139
140         if (IS_ERR(origsj)) {
141                 put_pid(sj->sj_session);
142                 kfree(sj);
143                 rcu_read_unlock();
144                 return PTR_ERR(origsj);
145         }
146         ret = rhashtable_replace_fast(&session_jobids,
147                                       &origsj->sj_linkage,
148                                       &sj->sj_linkage,
149                                       jobid_params);
150         if (ret) {
151                 put_pid(sj->sj_session);
152                 kfree(sj);
153                 rcu_read_unlock();
154                 return ret;
155         }
156         put_pid(origsj->sj_session);
157         rcu_read_unlock();
158         kfree_rcu(origsj, sj_rcu);
159         jobid_prune_expedite();
160
161         return 0;
162 }
163
164 static void jobid_free(void *vsj, void *arg)
165 {
166         struct session_jobid *sj = vsj;
167
168         put_pid(sj->sj_session);
169         kfree(sj);
170 }
171
172 static void jobid_prune(struct work_struct *work);
173 static DECLARE_DELAYED_WORK(jobid_prune_work, jobid_prune);
174 static int jobid_prune_expedited;
175 static void jobid_prune(struct work_struct *work)
176 {
177         int remaining = 0;
178         struct rhashtable_iter iter;
179         struct session_jobid *sj;
180
181         jobid_prune_expedited = 0;
182         rhashtable_walk_enter(&session_jobids, &iter);
183         rhashtable_walk_start(&iter);
184         while ((sj = rhashtable_walk_next(&iter)) != NULL) {
185                 if (!hlist_empty(&sj->sj_session->tasks[PIDTYPE_SID])) {
186                         remaining++;
187                         continue;
188                 }
189                 if (rhashtable_remove_fast(&session_jobids,
190                                            &sj->sj_linkage,
191                                            jobid_params) == 0) {
192                         put_pid(sj->sj_session);
193                         kfree_rcu(sj, sj_rcu);
194                 }
195         }
196         rhashtable_walk_stop(&iter);
197         rhashtable_walk_exit(&iter);
198         if (remaining)
199                 schedule_delayed_work(&jobid_prune_work,
200                                       cfs_time_seconds(JOBID_BACKGROUND_CLEAN));
201 }
202
203 static void jobid_prune_expedite(void)
204 {
205         if (!jobid_prune_expedited) {
206                 jobid_prune_expedited = 1;
207                 mod_delayed_work(system_wq, &jobid_prune_work,
208                                  cfs_time_seconds(JOBID_EXPEDITED_CLEAN));
209         }
210 }
211
212 static int cfs_access_process_vm(struct task_struct *tsk,
213                                  struct mm_struct *mm,
214                                  unsigned long addr,
215                                  void *buf, int len, int write)
216 {
217         /* Just copied from kernel for the kernels which doesn't
218          * have access_process_vm() exported
219          */
220         struct vm_area_struct *vma;
221         struct page *page;
222         void *old_buf = buf;
223
224         /* Avoid deadlocks on mmap_sem if called from sys_mmap_pgoff(),
225          * which is already holding mmap_sem for writes.  If some other
226          * thread gets the write lock in the meantime, this thread will
227          * block, but at least it won't deadlock on itself.  LU-1735
228          */
229         if (!mmap_read_trylock(mm))
230                 return -EDEADLK;
231
232         /* ignore errors, just check how much was successfully transferred */
233         while (len) {
234                 int bytes, rc, offset;
235                 void *maddr;
236
237 #if defined(HAVE_GET_USER_PAGES_GUP_FLAGS)
238                 rc = get_user_pages(addr, 1, write ? FOLL_WRITE : 0, &page,
239                                     &vma);
240 #elif defined(HAVE_GET_USER_PAGES_6ARG)
241                 rc = get_user_pages(addr, 1, write, 1, &page, &vma);
242 #else
243                 rc = get_user_pages(tsk, mm, addr, 1, write, 1, &page, &vma);
244 #endif
245                 if (rc <= 0)
246                         break;
247
248                 bytes = len;
249                 offset = addr & (PAGE_SIZE-1);
250                 if (bytes > PAGE_SIZE-offset)
251                         bytes = PAGE_SIZE-offset;
252
253                 maddr = kmap(page);
254                 if (write) {
255                         copy_to_user_page(vma, page, addr,
256                                           maddr + offset, buf, bytes);
257                         set_page_dirty_lock(page);
258                 } else {
259                         copy_from_user_page(vma, page, addr,
260                                             buf, maddr + offset, bytes);
261                 }
262                 kunmap(page);
263                 put_page(page);
264                 len -= bytes;
265                 buf += bytes;
266                 addr += bytes;
267         }
268         mmap_read_unlock(mm);
269
270         return buf - old_buf;
271 }
272
273 /* Read the environment variable of current process specified by @key. */
274 static int cfs_get_environ(const char *key, char *value, int *val_len)
275 {
276         struct mm_struct *mm;
277         char *buffer;
278         int buf_len = PAGE_SIZE;
279         int key_len = strlen(key);
280         unsigned long addr;
281         int rc;
282         bool skip = false;
283
284         ENTRY;
285         buffer = kmalloc(buf_len, GFP_USER);
286         if (!buffer)
287                 RETURN(-ENOMEM);
288
289         mm = get_task_mm(current);
290         if (!mm) {
291                 kfree(buffer);
292                 RETURN(-EINVAL);
293         }
294
295         addr = mm->env_start;
296         while (addr < mm->env_end) {
297                 int this_len, retval, scan_len;
298                 char *env_start, *env_end;
299
300                 memset(buffer, 0, buf_len);
301
302                 this_len = min_t(int, mm->env_end - addr, buf_len);
303                 retval = cfs_access_process_vm(current, mm, addr, buffer,
304                                                this_len, 0);
305                 if (retval < 0)
306                         GOTO(out, rc = retval);
307                 else if (retval != this_len)
308                         break;
309
310                 addr += retval;
311
312                 /* Parse the buffer to find out the specified key/value pair.
313                  * The "key=value" entries are separated by '\0'.
314                  */
315                 env_start = buffer;
316                 scan_len = this_len;
317                 while (scan_len) {
318                         char *entry;
319                         int entry_len;
320
321                         env_end = memscan(env_start, '\0', scan_len);
322                         LASSERT(env_end >= env_start &&
323                                 env_end <= env_start + scan_len);
324
325                         /* The last entry of this buffer cross the buffer
326                          * boundary, reread it in next cycle.
327                          */
328                         if (unlikely(env_end - env_start == scan_len)) {
329                                 /* Just skip the entry larger than page size,
330                                  * it can't be jobID env variable.
331                                  */
332                                 if (unlikely(scan_len == this_len))
333                                         skip = true;
334                                 else
335                                         addr -= scan_len;
336                                 break;
337                         } else if (unlikely(skip)) {
338                                 skip = false;
339                                 goto skip;
340                         }
341                         entry = env_start;
342                         entry_len = env_end - env_start;
343                         CDEBUG(D_INFO, "key: %s, entry: %s\n", key, entry);
344
345                         /* Key length + length of '=' */
346                         if (entry_len > key_len + 1 &&
347                             entry[key_len] == '='  &&
348                             !memcmp(entry, key, key_len)) {
349                                 entry += key_len + 1;
350                                 entry_len -= key_len + 1;
351
352                                 /* The 'value' buffer passed in is too small.
353                                  * Copy what fits, but return -EOVERFLOW.
354                                  */
355                                 if (entry_len >= *val_len) {
356                                         memcpy(value, entry, *val_len);
357                                         value[*val_len - 1] = 0;
358                                         GOTO(out, rc = -EOVERFLOW);
359                                 }
360
361                                 memcpy(value, entry, entry_len);
362                                 *val_len = entry_len;
363                                 GOTO(out, rc = 0);
364                         }
365 skip:
366                         scan_len -= (env_end - env_start + 1);
367                         env_start = env_end + 1;
368                 }
369         }
370         GOTO(out, rc = -ENOENT);
371
372 out:
373         mmput(mm);
374         kfree((void *)buffer);
375         return rc;
376 }
377
378 /*
379  * Get jobid of current process by reading the environment variable
380  * stored in between the "env_start" & "env_end" of task struct.
381  *
382  * If some job scheduler doesn't store jobid in the "env_start/end",
383  * then an upcall could be issued here to get the jobid by utilizing
384  * the userspace tools/API. Then, the jobid must be cached.
385  */
386 int jobid_get_from_environ(char *jobid_var, char *jobid, int *jobid_len)
387 {
388         int rc;
389
390         rc = cfs_get_environ(jobid_var, jobid, jobid_len);
391         if (!rc)
392                 goto out;
393
394         if (rc == -EOVERFLOW) {
395                 /* For the PBS_JOBID and LOADL_STEP_ID keys (which are
396                  * variable length strings instead of just numbers), it
397                  * might make sense to keep the unique parts for JobID,
398                  * instead of just returning an error.  That means a
399                  * larger temp buffer for cfs_get_environ(), then
400                  * truncating the string at some separator to fit into
401                  * the specified jobid_len.  Fix later if needed. */
402                 static ktime_t printed;
403
404                 if (unlikely(ktime_to_ns(printed) == 0 ||
405                              ktime_after(ktime_get(),
406                                          ktime_add_ns(printed,
407                                              3600ULL * 24 * NSEC_PER_SEC)))) {
408                         LCONSOLE_WARN("jobid: '%s' value too large (%d)\n",
409                                       obd_jobid_var, *jobid_len);
410                         printed = ktime_get();
411                 }
412
413                 rc = 0;
414         } else {
415                 CDEBUG_LIMIT((rc == -ENOENT || rc == -EINVAL ||
416                               rc == -EDEADLK) ? D_INFO : D_ERROR,
417                              "jobid: get '%s' failed: rc = %d\n",
418                              obd_jobid_var, rc);
419         }
420
421 out:
422         return rc;
423 }
424
425 /*
426  * jobid_should_free_item
427  *
428  * Each item is checked to see if it should be released
429  * Removed from hash table by caller
430  * Actually freed in jobid_put_locked
431  *
432  * Returns 1 if item is to be freed, 0 if it is to be kept
433  */
434
435 static int jobid_should_free_item(void *obj, void *data)
436 {
437         char *jobid = data;
438         struct jobid_pid_map *pidmap = obj;
439         int rc = 0;
440
441         if (obj == NULL)
442                 return 0;
443
444         if (jobid == NULL) {
445                 WARN_ON_ONCE(atomic_read(&pidmap->jp_refcount) != 1);
446                 return 1;
447         }
448
449         spin_lock(&pidmap->jp_lock);
450         /* prevent newly inserted items from deleting */
451         if (jobid[0] == '\0' && atomic_read(&pidmap->jp_refcount) == 1)
452                 rc = 1;
453         else if (ktime_get_real_seconds() - pidmap->jp_time > DELETE_INTERVAL)
454                 rc = 1;
455         else if (strcmp(pidmap->jp_jobid, jobid) == 0)
456                 rc = 1;
457         spin_unlock(&pidmap->jp_lock);
458
459         return rc;
460 }
461
462 /*
463  * jobid_name_is_valid
464  *
465  * Checks if the jobid is a Lustre process
466  *
467  * Returns true if jobid is valid
468  * Returns false if jobid looks like it's a Lustre process
469  */
470 static bool jobid_name_is_valid(char *jobid)
471 {
472         const char *const lustre_reserved[] = { "ll_ping", "ptlrpc",
473                                                 "ldlm", "ll_sa", NULL };
474         int i;
475
476         if (jobid[0] == '\0')
477                 return false;
478
479         for (i = 0; lustre_reserved[i] != NULL; i++) {
480                 if (strncmp(jobid, lustre_reserved[i],
481                             strlen(lustre_reserved[i])) == 0)
482                         return false;
483         }
484         return true;
485 }
486
487 /*
488  * jobid_get_from_cache()
489  *
490  * Returns contents of jobid_var from process environment for current PID,
491  * or from the per-session jobid table.
492  * Values fetch from process environment will be cached for some time to avoid
493  * the overhead of scanning the environment.
494  *
495  * Return: -ENOMEM if allocating a new pidmap fails
496  *         -ENOENT if no entry could be found
497  *         +ve string length for success (something was returned in jobid)
498  */
499 static int jobid_get_from_cache(char *jobid, size_t joblen)
500 {
501         static time64_t last_expire;
502         bool expire_cache = false;
503         pid_t pid = current->pid;
504         struct jobid_pid_map *pidmap = NULL;
505         time64_t now = ktime_get_real_seconds();
506         int rc = 0;
507         ENTRY;
508
509         if (strcmp(obd_jobid_var, JOBSTATS_SESSION) == 0) {
510                 char *jid;
511
512                 rcu_read_lock();
513                 jid = jobid_current();
514                 if (jid) {
515                         strlcpy(jobid, jid, joblen);
516                         joblen = strlen(jobid);
517                 } else {
518                         rc = -ENOENT;
519                 }
520                 rcu_read_unlock();
521                 GOTO(out, rc);
522         }
523
524         LASSERT(jobid_hash != NULL);
525
526         /* scan hash periodically to remove old PID entries from cache */
527         spin_lock(&jobid_hash_lock);
528         if (unlikely(last_expire + DELETE_INTERVAL <= now)) {
529                 expire_cache = true;
530                 last_expire = now;
531         }
532         spin_unlock(&jobid_hash_lock);
533
534         if (expire_cache)
535                 cfs_hash_cond_del(jobid_hash, jobid_should_free_item,
536                                   "intentionally_bad_jobid");
537
538         /* first try to find PID in the hash and use that value */
539         pidmap = cfs_hash_lookup(jobid_hash, &pid);
540         if (pidmap == NULL) {
541                 struct jobid_pid_map *pidmap2;
542
543                 OBD_ALLOC_PTR(pidmap);
544                 if (pidmap == NULL)
545                         GOTO(out, rc = -ENOMEM);
546
547                 pidmap->jp_pid = pid;
548                 pidmap->jp_time = 0;
549                 pidmap->jp_jobid[0] = '\0';
550                 spin_lock_init(&pidmap->jp_lock);
551                 INIT_HLIST_NODE(&pidmap->jp_hash);
552                 /*
553                  * @pidmap might be reclaimed just after it is added into
554                  * hash list, init @jp_refcount as 1 to make sure memory
555                  * could be not freed during access.
556                  */
557                 atomic_set(&pidmap->jp_refcount, 1);
558
559                 /*
560                  * Add the newly created map to the hash, on key collision we
561                  * lost a racing addition and must destroy our newly allocated
562                  * map.  The object which exists in the hash will be returned.
563                  */
564                 pidmap2 = cfs_hash_findadd_unique(jobid_hash, &pid,
565                                                   &pidmap->jp_hash);
566                 if (unlikely(pidmap != pidmap2)) {
567                         CDEBUG(D_INFO, "jobid: duplicate found for PID=%u\n",
568                                pid);
569                         OBD_FREE_PTR(pidmap);
570                         pidmap = pidmap2;
571                 }
572         }
573
574         /*
575          * If pidmap is old (this is always true for new entries) refresh it.
576          * If obd_jobid_var is not found, cache empty entry and try again
577          * later, to avoid repeat lookups for PID if obd_jobid_var missing.
578          */
579         spin_lock(&pidmap->jp_lock);
580         if (pidmap->jp_time + RESCAN_INTERVAL <= now) {
581                 char env_jobid[LUSTRE_JOBID_SIZE] = "";
582                 int env_len = sizeof(env_jobid);
583
584                 pidmap->jp_time = now;
585
586                 spin_unlock(&pidmap->jp_lock);
587                 rc = jobid_get_from_environ(obd_jobid_var, env_jobid, &env_len);
588
589                 CDEBUG(D_INFO, "jobid: PID mapping established: %d->%s\n",
590                        pidmap->jp_pid, env_jobid);
591                 spin_lock(&pidmap->jp_lock);
592                 if (!rc) {
593                         pidmap->jp_joblen = env_len;
594                         strlcpy(pidmap->jp_jobid, env_jobid,
595                                 sizeof(pidmap->jp_jobid));
596                         rc = 0;
597                 } else if (rc == -ENOENT) {
598                         /* It might have been deleted, clear out old entry */
599                         pidmap->jp_joblen = 0;
600                         pidmap->jp_jobid[0] = '\0';
601                 }
602         }
603
604         /*
605          * Regardless of how pidmap was found, if it contains a valid entry
606          * use that for now.  If there was a technical error (e.g. -ENOMEM)
607          * use the old cached value until it can be looked up again properly.
608          * If a cached missing entry was found, return -ENOENT.
609          */
610         if (pidmap->jp_joblen) {
611                 strlcpy(jobid, pidmap->jp_jobid, joblen);
612                 joblen = pidmap->jp_joblen;
613                 rc = 0;
614         } else if (!rc) {
615                 rc = -ENOENT;
616         }
617         spin_unlock(&pidmap->jp_lock);
618
619         cfs_hash_put(jobid_hash, &pidmap->jp_hash);
620
621         EXIT;
622 out:
623         return rc < 0 ? rc : joblen;
624 }
625
626 /*
627  * jobid_interpret_string()
628  *
629  * Interpret the jobfmt string to expand specified fields, like coredumps do:
630  *   %e = executable
631  *   %g = gid
632  *   %h = hostname
633  *   %H = short hostname
634  *   %j = jobid from environment
635  *   %p = pid
636  *   %u = uid
637  *
638  * Unknown escape strings are dropped.  Other characters are copied through,
639  * excluding whitespace (to avoid making jobid parsing difficult).
640  *
641  * Return: -EOVERFLOW if the expanded string does not fit within @joblen
642  *         0 for success
643  */
644 static int jobid_interpret_string(const char *jobfmt, char *jobid,
645                                   ssize_t joblen)
646 {
647         char c;
648
649         while ((c = *jobfmt++) && joblen > 1) {
650                 char f, *p;
651                 int l;
652
653                 if (isspace(c)) /* Don't allow embedded spaces */
654                         continue;
655
656                 if (c != '%') {
657                         *jobid = c;
658                         joblen--;
659                         jobid++;
660                         *jobid = '\0';
661                         continue;
662                 }
663
664                 switch ((f = *jobfmt++)) {
665                 case 'e': /* executable name */
666                         l = snprintf(jobid, joblen, "%s", current->comm);
667                         break;
668                 case 'g': /* group ID */
669                         l = snprintf(jobid, joblen, "%u",
670                                      from_kgid(&init_user_ns, current_fsgid()));
671                         break;
672                 case 'h': /* hostname */
673                         l = snprintf(jobid, joblen, "%s",
674                                      init_utsname()->nodename);
675                         break;
676                 case 'H': /* short hostname. Cut at first dot */
677                         l = snprintf(jobid, joblen, "%s",
678                                      init_utsname()->nodename);
679                         p = strnchr(jobid, joblen, '.');
680                         if (p) {
681                                 *p = '\0';
682                                 l = p - jobid;
683                         }
684                         break;
685                 case 'j': /* jobid stored in process environment */
686                         l = jobid_get_from_cache(jobid, joblen);
687                         if (l < 0)
688                                 l = 0;
689                         break;
690                 case 'p': /* process ID */
691                         l = snprintf(jobid, joblen, "%u", current->pid);
692                         break;
693                 case 'u': /* user ID */
694                         l = snprintf(jobid, joblen, "%u",
695                                      from_kuid(&init_user_ns, current_fsuid()));
696                         break;
697                 case '\0': /* '%' at end of format string */
698                         l = 0;
699                         goto out;
700                 default: /* drop unknown %x format strings */
701                         l = 0;
702                         break;
703                 }
704                 jobid += l;
705                 joblen -= l;
706         }
707         /*
708          * This points at the end of the buffer, so long as jobid is always
709          * incremented the same amount as joblen is decremented.
710          */
711 out:
712         jobid[joblen - 1] = '\0';
713
714         return joblen < 0 ? -EOVERFLOW : 0;
715 }
716
717 /*
718  * Hash initialization, copied from server-side job stats bucket sizes
719  */
720 #define HASH_JOBID_BKT_BITS 5
721 #define HASH_JOBID_CUR_BITS 7
722 #define HASH_JOBID_MAX_BITS 12
723
724 int jobid_cache_init(void)
725 {
726         int rc = 0;
727         ENTRY;
728
729         if (jobid_hash)
730                 return 0;
731
732         spin_lock_init(&jobid_hash_lock);
733         jobid_hash = cfs_hash_create("JOBID_HASH", HASH_JOBID_CUR_BITS,
734                                      HASH_JOBID_MAX_BITS, HASH_JOBID_BKT_BITS,
735                                      0, CFS_HASH_MIN_THETA, CFS_HASH_MAX_THETA,
736                                      &jobid_hash_ops, CFS_HASH_DEFAULT);
737         if (!jobid_hash) {
738                 rc = -ENOMEM;
739         } else {
740                 rc = rhashtable_init(&session_jobids, &jobid_params);
741                 if (rc) {
742                         cfs_hash_putref(jobid_hash);
743                         jobid_hash = NULL;
744                 }
745         }
746
747         RETURN(rc);
748 }
749 EXPORT_SYMBOL(jobid_cache_init);
750
751 void jobid_cache_fini(void)
752 {
753         struct cfs_hash *tmp_hash;
754         ENTRY;
755
756         spin_lock(&jobid_hash_lock);
757         tmp_hash = jobid_hash;
758         jobid_hash = NULL;
759         spin_unlock(&jobid_hash_lock);
760
761         cancel_delayed_work_sync(&jobid_prune_work);
762
763         if (tmp_hash != NULL) {
764                 cfs_hash_cond_del(tmp_hash, jobid_should_free_item, NULL);
765                 cfs_hash_putref(tmp_hash);
766
767                 rhashtable_free_and_destroy(&session_jobids, jobid_free, NULL);
768         }
769
770
771         EXIT;
772 }
773 EXPORT_SYMBOL(jobid_cache_fini);
774
775 /*
776  * Hash operations for pid<->jobid
777  */
778 static unsigned jobid_hashfn(struct cfs_hash *hs, const void *key,
779                              unsigned mask)
780 {
781         return cfs_hash_djb2_hash(key, sizeof(pid_t), mask);
782 }
783
784 static void *jobid_key(struct hlist_node *hnode)
785 {
786         struct jobid_pid_map *pidmap;
787
788         pidmap = hlist_entry(hnode, struct jobid_pid_map, jp_hash);
789         return &pidmap->jp_pid;
790 }
791
792 static int jobid_keycmp(const void *key, struct hlist_node *hnode)
793 {
794         const pid_t *pid_key1;
795         const pid_t *pid_key2;
796
797         LASSERT(key != NULL);
798         pid_key1 = (pid_t *)key;
799         pid_key2 = (pid_t *)jobid_key(hnode);
800
801         return *pid_key1 == *pid_key2;
802 }
803
804 static void *jobid_object(struct hlist_node *hnode)
805 {
806         return hlist_entry(hnode, struct jobid_pid_map, jp_hash);
807 }
808
809 static void jobid_get(struct cfs_hash *hs, struct hlist_node *hnode)
810 {
811         struct jobid_pid_map *pidmap;
812
813         pidmap = hlist_entry(hnode, struct jobid_pid_map, jp_hash);
814
815         atomic_inc(&pidmap->jp_refcount);
816 }
817
818 static void jobid_put_locked(struct cfs_hash *hs, struct hlist_node *hnode)
819 {
820         struct jobid_pid_map *pidmap;
821
822         if (hnode == NULL)
823                 return;
824
825         pidmap = hlist_entry(hnode, struct jobid_pid_map, jp_hash);
826         LASSERT(atomic_read(&pidmap->jp_refcount) > 0);
827         if (atomic_dec_and_test(&pidmap->jp_refcount)) {
828                 CDEBUG(D_INFO, "Freeing: %d->%s\n",
829                        pidmap->jp_pid, pidmap->jp_jobid);
830
831                 OBD_FREE_PTR(pidmap);
832         }
833 }
834
835 static struct cfs_hash_ops jobid_hash_ops = {
836         .hs_hash        = jobid_hashfn,
837         .hs_keycmp      = jobid_keycmp,
838         .hs_key         = jobid_key,
839         .hs_object      = jobid_object,
840         .hs_get         = jobid_get,
841         .hs_put         = jobid_put_locked,
842         .hs_put_locked  = jobid_put_locked,
843 };
844
845 /**
846  * Generate the job identifier string for this process for tracking purposes.
847  *
848  * Fill in @jobid string based on the value of obd_jobid_var:
849  * JOBSTATS_DISABLE:      none
850  * JOBSTATS_NODELOCAL:    content of obd_jobid_name (jobid_interpret_string())
851  * JOBSTATS_PROCNAME_UID: process name/UID
852  * JOBSTATS_SESSION       per-session value set by
853  *                            /sys/fs/lustre/jobid_this_session
854  * anything else:         look up obd_jobid_var in the processes environment
855  *
856  * Return -ve error number, 0 on success.
857  */
858 int lustre_get_jobid(char *jobid, size_t joblen)
859 {
860         int rc = 0;
861         ENTRY;
862
863         if (unlikely(joblen < 2)) {
864                 if (joblen == 1)
865                         jobid[0] = '\0';
866                 RETURN(-EINVAL);
867         }
868
869         if (strcmp(obd_jobid_var, JOBSTATS_DISABLE) == 0) {
870                 /* Jobstats isn't enabled */
871                 memset(jobid, 0, joblen);
872         } else if (strcmp(obd_jobid_var, JOBSTATS_NODELOCAL) == 0) {
873                 /* Whole node dedicated to single job */
874                 rc = jobid_interpret_string(obd_jobid_name, jobid, joblen);
875         } else if (strcmp(obd_jobid_var, JOBSTATS_PROCNAME_UID) == 0) {
876                 rc = jobid_interpret_string("%e.%u", jobid, joblen);
877         } else if (strcmp(obd_jobid_var, JOBSTATS_SESSION) == 0 ||
878                    jobid_name_is_valid(current->comm)) {
879                 /*
880                  * per-process jobid wanted, either from environment or from
881                  * per-session setting.
882                  * If obd_jobid_name contains "%j" or if getting the pre-process
883                  * jobid directly fails, fall back to using obd_jobid_name.
884                  */
885                 rc = -EAGAIN;
886                 if (!strnstr(obd_jobid_name, "%j", joblen))
887                         rc = jobid_get_from_cache(jobid, joblen);
888
889                 /* fall back to jobid_node if jobid_var not available */
890                 if (rc < 0) {
891                         int rc2 = jobid_interpret_string(obd_jobid_name,
892                                                          jobid, joblen);
893                         if (!rc2)
894                                 rc = 0;
895                 }
896         }
897
898         RETURN(rc);
899 }
900 EXPORT_SYMBOL(lustre_get_jobid);
901
902 /*
903  * lustre_jobid_clear
904  *
905  * Search cache for JobID given by @find_jobid.
906  * If any entries in the hash table match the value, they are removed
907  */
908 void lustre_jobid_clear(const char *find_jobid)
909 {
910         char jobid[LUSTRE_JOBID_SIZE];
911         char *end;
912
913         if (jobid_hash == NULL)
914                 return;
915
916         strlcpy(jobid, find_jobid, sizeof(jobid));
917         /* trim \n off the end of the incoming jobid */
918         end = strchr(jobid, '\n');
919         if (end && *end == '\n')
920                 *end = '\0';
921
922         CDEBUG(D_INFO, "Clearing Jobid: %s\n", jobid);
923         cfs_hash_cond_del(jobid_hash, jobid_should_free_item, jobid);
924
925         CDEBUG(D_INFO, "%d items remain in jobID table\n",
926                atomic_read(&jobid_hash->hs_count));
927 }