Whamcloud - gitweb
LU-9859 libcfs: delete libcfs/linux/libcfs.h
[fs/lustre-release.git] / lustre / obdclass / jobid.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2014, Intel Corporation.
27  *
28  * Copyright 2017 Cray Inc, all rights reserved.
29  * Author: Ben Evans.
30  *
31  * Store PID->JobID mappings
32  */
33
34 #define DEBUG_SUBSYSTEM S_RPC
35 #include <linux/user_namespace.h>
36 #ifdef HAVE_UIDGID_HEADER
37 #include <linux/uidgid.h>
38 #endif
39
40 #include <libcfs/linux/linux-misc.h>
41 #include <obd_support.h>
42 #include <obd_class.h>
43 #include <lustre_net.h>
44
45 static struct cfs_hash *jobid_hash;
46 static struct cfs_hash_ops jobid_hash_ops;
47 spinlock_t jobid_hash_lock;
48
49 #define RESCAN_INTERVAL 30
50 #define DELETE_INTERVAL 300
51
52 char obd_jobid_var[JOBSTATS_JOBID_VAR_MAX_LEN + 1] = JOBSTATS_DISABLE;
53 char obd_jobid_node[LUSTRE_JOBID_SIZE + 1];
54
55 /**
56  * Structure to store a single jobID/PID mapping
57  */
58 struct jobid_to_pid_map {
59         struct hlist_node       jp_hash;
60         time64_t                jp_time;
61         atomic_t                jp_refcount;
62         spinlock_t              jp_lock; /* protects jp_jobid */
63         char                    jp_jobid[LUSTRE_JOBID_SIZE + 1];
64         pid_t                   jp_pid;
65 };
66
67 /* Get jobid of current process by reading the environment variable
68  * stored in between the "env_start" & "env_end" of task struct.
69  *
70  * If some job scheduler doesn't store jobid in the "env_start/end",
71  * then an upcall could be issued here to get the jobid by utilizing
72  * the userspace tools/API. Then, the jobid must be cached.
73  */
74 int get_jobid_from_environ(char *jobid_var, char *jobid, int jobid_len)
75 {
76         int rc;
77
78         rc = cfs_get_environ(jobid_var, jobid, &jobid_len);
79         if (!rc)
80                 goto out;
81
82         if (rc == -EOVERFLOW) {
83                 /* For the PBS_JOBID and LOADL_STEP_ID keys (which are
84                  * variable length strings instead of just numbers), it
85                  * might make sense to keep the unique parts for JobID,
86                  * instead of just returning an error.  That means a
87                  * larger temp buffer for cfs_get_environ(), then
88                  * truncating the string at some separator to fit into
89                  * the specified jobid_len.  Fix later if needed. */
90                 static bool printed;
91                 if (unlikely(!printed)) {
92                         LCONSOLE_ERROR_MSG(0x16b, "%s value too large "
93                                            "for JobID buffer (%d)\n",
94                                            obd_jobid_var, jobid_len);
95                         printed = true;
96                 }
97         } else {
98                 CDEBUG((rc == -ENOENT || rc == -EINVAL ||
99                         rc == -EDEADLK) ? D_INFO : D_ERROR,
100                        "Get jobid for (%s) failed: rc = %d\n",
101                        obd_jobid_var, rc);
102         }
103
104 out:
105         return rc;
106 }
107
108 /*
109  * jobid_should_free_item
110  *
111  * Each item is checked to see if it should be released
112  * Removed from hash table by caller
113  * Actually freed in jobid_put_locked
114  *
115  * Returns 1 if item is to be freed, 0 if it is to be kept
116  */
117
118 static int jobid_should_free_item(void *obj, void *data)
119 {
120         char *jobid = data;
121         struct jobid_to_pid_map *pidmap = obj;
122         int rc = 0;
123
124         if (obj == NULL)
125                 return 0;
126
127         spin_lock(&pidmap->jp_lock);
128         if (jobid == NULL)
129                 rc = 1;
130         else if (jobid[0] == '\0')
131                 rc = 1;
132         else if (ktime_get_real_seconds() - pidmap->jp_time > DELETE_INTERVAL)
133                 rc = 1;
134         else if (strcmp(pidmap->jp_jobid, jobid) == 0)
135                 rc = 1;
136         spin_unlock(&pidmap->jp_lock);
137
138         return rc;
139 }
140
141 /*
142  * check_job_name
143  *
144  * Checks if the jobid is a Lustre process
145  *
146  * Returns true if jobid is valid
147  * Returns false if jobid looks like it's a Lustre process
148  */
149 static bool check_job_name(char *jobid)
150 {
151         const char *const lustre_reserved[] = {"ll_ping", "ptlrpc",
152                                                 "ldlm", "ll_sa", NULL};
153         int i;
154
155         for (i = 0; lustre_reserved[i] != NULL; i++) {
156                 if (strncmp(jobid, lustre_reserved[i],
157                             strlen(lustre_reserved[i])) == 0)
158                         return false;
159         }
160         return true;
161 }
162
163 /*
164  * get_jobid
165  *
166  * Returns the jobid for the current pid.
167  *
168  * If no jobid is found in the table, the jobid is calculated based on
169  * the value of jobid_var, using procname_uid as the default.
170  *
171  * Return: -ENOMEM if allocating a new pidmap fails
172  *         0 for success
173  */
174 int get_jobid(char *jobid)
175 {
176         pid_t pid = current_pid();
177         struct jobid_to_pid_map *pidmap = NULL;
178         struct jobid_to_pid_map *pidmap2;
179         char tmp_jobid[LUSTRE_JOBID_SIZE + 1];
180         int rc = 0;
181         ENTRY;
182
183         pidmap = cfs_hash_lookup(jobid_hash, &pid);
184         if (pidmap == NULL) {
185                 OBD_ALLOC_PTR(pidmap);
186                 if (pidmap == NULL)
187                         GOTO(out, rc = -ENOMEM);
188
189                 pidmap->jp_pid = pid;
190                 pidmap->jp_time = 0;
191                 pidmap->jp_jobid[0] = '\0';
192                 spin_lock_init(&pidmap->jp_lock);
193                 INIT_HLIST_NODE(&pidmap->jp_hash);
194
195                 /*
196                  * Add the newly created map to the hash, on key collision we
197                  * lost a racing addition and must destroy our newly allocated
198                  * map.  The object which exists in the hash will be
199                  * returned.
200                  */
201                 pidmap2 = cfs_hash_findadd_unique(jobid_hash, &pid,
202                                                   &pidmap->jp_hash);
203                 if (unlikely(pidmap != pidmap2)) {
204                         CDEBUG(D_INFO, "Duplicate jobid found\n");
205                         OBD_FREE_PTR(pidmap);
206                         pidmap = pidmap2;
207                 } else {
208                         cfs_hash_get(jobid_hash, &pidmap->jp_hash);
209                 }
210         }
211
212         spin_lock(&pidmap->jp_lock);
213         if ((ktime_get_real_seconds() - pidmap->jp_time >= RESCAN_INTERVAL) ||
214             pidmap->jp_jobid[0] == '\0') {
215                 /* mark the pidmap as being up to date, if we fail to find
216                  * a good jobid, revert to the old time and try again later
217                  * prevent a race with deletion */
218
219                 time64_t tmp_time = pidmap->jp_time;
220                 pidmap->jp_time = ktime_get_real_seconds();
221
222                 spin_unlock(&pidmap->jp_lock);
223                 if (strcmp(obd_jobid_var, JOBSTATS_PROCNAME_UID) == 0) {
224                         rc = 1;
225                 } else {
226                         memset(tmp_jobid, '\0', LUSTRE_JOBID_SIZE + 1);
227                         rc = get_jobid_from_environ(obd_jobid_var,
228                                                     tmp_jobid,
229                                                     LUSTRE_JOBID_SIZE + 1);
230                 }
231
232                 /* Use process name + fsuid as jobid default, or when
233                  * specified by "jobname_uid" */
234                 if (rc) {
235                         snprintf(tmp_jobid, LUSTRE_JOBID_SIZE, "%s.%u",
236                                  current_comm(),
237                                  from_kuid(&init_user_ns, current_fsuid()));
238                         rc = 0;
239                 }
240
241                 CDEBUG(D_INFO, "Jobid to pid mapping established: %d->%s\n",
242                        pidmap->jp_pid, tmp_jobid);
243
244                 spin_lock(&pidmap->jp_lock);
245                 if (check_job_name(tmp_jobid))
246                         strncpy(pidmap->jp_jobid, tmp_jobid,
247                                 LUSTRE_JOBID_SIZE);
248                 else
249                         pidmap->jp_time = tmp_time;
250         }
251
252         if (strlen(pidmap->jp_jobid) != 0)
253                 strncpy(jobid, pidmap->jp_jobid, LUSTRE_JOBID_SIZE);
254
255         spin_unlock(&pidmap->jp_lock);
256
257         cfs_hash_put(jobid_hash, &pidmap->jp_hash);
258
259         EXIT;
260 out:
261         return rc;
262 }
263
264 /*
265  * Hash initialization, copied from server-side job stats bucket sizes
266  */
267 #define HASH_JOBID_BKT_BITS 5
268 #define HASH_JOBID_CUR_BITS 7
269 #define HASH_JOBID_MAX_BITS 12
270
271 int jobid_cache_init(void)
272 {
273         int rc = 0;
274         struct cfs_hash *tmp_jobid_hash;
275         ENTRY;
276
277         spin_lock_init(&jobid_hash_lock);
278
279         tmp_jobid_hash = cfs_hash_create("JOBID_HASH",
280                                          HASH_JOBID_CUR_BITS,
281                                          HASH_JOBID_MAX_BITS,
282                                          HASH_JOBID_BKT_BITS, 0,
283                                          CFS_HASH_MIN_THETA,
284                                          CFS_HASH_MAX_THETA,
285                                          &jobid_hash_ops,
286                                          CFS_HASH_DEFAULT);
287
288         spin_lock(&jobid_hash_lock);
289         if (jobid_hash == NULL) {
290                 jobid_hash = tmp_jobid_hash;
291                 spin_unlock(&jobid_hash_lock);
292         } else {
293                 spin_unlock(&jobid_hash_lock);
294                 if (tmp_jobid_hash != NULL)
295                         cfs_hash_putref(tmp_jobid_hash);
296         }
297
298         if (!jobid_hash)
299                 rc = -ENOMEM;
300
301         RETURN(rc);
302 }
303 EXPORT_SYMBOL(jobid_cache_init);
304
305 void jobid_cache_fini(void)
306 {
307         struct cfs_hash *tmp_hash;
308         ENTRY;
309
310         spin_lock(&jobid_hash_lock);
311         tmp_hash = jobid_hash;
312         jobid_hash = NULL;
313         spin_unlock(&jobid_hash_lock);
314
315         if (tmp_hash != NULL) {
316                 cfs_hash_cond_del(tmp_hash, jobid_should_free_item, NULL);
317                 cfs_hash_putref(tmp_hash);
318         }
319
320         EXIT;
321 }
322 EXPORT_SYMBOL(jobid_cache_fini);
323
324 /*
325  * Hash operations for pid<->jobid
326  */
327 static unsigned jobid_hashfn(struct cfs_hash *hs, const void *key,
328                              unsigned mask)
329 {
330         return cfs_hash_djb2_hash(key, sizeof(pid_t), mask);
331 }
332
333 static void *jobid_key(struct hlist_node *hnode)
334 {
335         struct jobid_to_pid_map *pidmap;
336
337         pidmap = hlist_entry(hnode, struct jobid_to_pid_map, jp_hash);
338         return &pidmap->jp_pid;
339 }
340
341 static int jobid_keycmp(const void *key, struct hlist_node *hnode)
342 {
343         const pid_t *pid_key1;
344         const pid_t *pid_key2;
345
346         LASSERT(key != NULL);
347         pid_key1 = (pid_t *)key;
348         pid_key2 = (pid_t *)jobid_key(hnode);
349
350         return *pid_key1 == *pid_key2;
351 }
352
353 static void *jobid_object(struct hlist_node *hnode)
354 {
355         return hlist_entry(hnode, struct jobid_to_pid_map, jp_hash);
356 }
357
358 static void jobid_get(struct cfs_hash *hs, struct hlist_node *hnode)
359 {
360         struct jobid_to_pid_map *pidmap;
361
362         pidmap = hlist_entry(hnode, struct jobid_to_pid_map, jp_hash);
363
364         atomic_inc(&pidmap->jp_refcount);
365 }
366
367 static void jobid_put_locked(struct cfs_hash *hs, struct hlist_node *hnode)
368 {
369         struct jobid_to_pid_map *pidmap;
370
371         if (hnode == NULL)
372                 return;
373
374         pidmap = hlist_entry(hnode, struct jobid_to_pid_map, jp_hash);
375         LASSERT(atomic_read(&pidmap->jp_refcount) > 0);
376         if (atomic_dec_and_test(&pidmap->jp_refcount)) {
377                 CDEBUG(D_INFO, "Freeing: %d->%s\n",
378                        pidmap->jp_pid, pidmap->jp_jobid);
379
380                 OBD_FREE_PTR(pidmap);
381         }
382 }
383
384 static struct cfs_hash_ops jobid_hash_ops = {
385         .hs_hash        = jobid_hashfn,
386         .hs_keycmp      = jobid_keycmp,
387         .hs_key         = jobid_key,
388         .hs_object      = jobid_object,
389         .hs_get         = jobid_get,
390         .hs_put         = jobid_put_locked,
391         .hs_put_locked  = jobid_put_locked,
392 };
393
394 /*
395  * Return the jobid:
396  *
397  * Based on the value of obd_jobid_var
398  * JOBSTATS_DISABLE:  none
399  * JOBSTATS_NODELOCAL:  Contents of obd_jobid_name
400  * JOBSTATS_PROCNAME_UID:  Process name/UID
401  * anything else:  Look up the value in the processes environment
402  * default: JOBSTATS_PROCNAME_UID
403  */
404
405 int lustre_get_jobid(char *jobid)
406 {
407         int rc = 0;
408         int clear = 0;
409         static time64_t last_delete;
410         ENTRY;
411
412         LASSERT(jobid_hash != NULL);
413
414         spin_lock(&jobid_hash_lock);
415         if (last_delete + DELETE_INTERVAL <= ktime_get_real_seconds()) {
416                 clear = 1;
417                 last_delete = ktime_get_real_seconds();
418         }
419         spin_unlock(&jobid_hash_lock);
420
421         if (clear)
422                 cfs_hash_cond_del(jobid_hash, jobid_should_free_item,
423                                   "intentionally_bad_jobid");
424
425         if (strcmp(obd_jobid_var, JOBSTATS_DISABLE) == 0)
426                 /* Jobstats isn't enabled */
427                 memset(jobid, 0, LUSTRE_JOBID_SIZE);
428         else if (strcmp(obd_jobid_var, JOBSTATS_NODELOCAL) == 0)
429                 /* Whole node dedicated to single job */
430                 memcpy(jobid, obd_jobid_node, LUSTRE_JOBID_SIZE);
431         else
432                 /* Get jobid from hash table */
433                 rc = get_jobid(jobid);
434
435         RETURN(rc);
436 }
437 EXPORT_SYMBOL(lustre_get_jobid);
438
439 /*
440  * lustre_jobid_clear
441  *
442  * uses value pushed in via jobid_name
443  * If any entries in the hash table match the value, they are removed
444  */
445 void lustre_jobid_clear(const char *data)
446 {
447         char jobid[LUSTRE_JOBID_SIZE + 1];
448
449         if (jobid_hash == NULL)
450                 return;
451
452         strncpy(jobid, data, LUSTRE_JOBID_SIZE);
453         /* trim \n off the end of the incoming jobid */
454         if (jobid[strlen(jobid) - 1] == '\n')
455                 jobid[strlen(jobid) - 1] = '\0';
456
457         CDEBUG(D_INFO, "Clearing Jobid: %s\n", jobid);
458         cfs_hash_cond_del(jobid_hash, jobid_should_free_item, jobid);
459
460         CDEBUG(D_INFO, "%d items remain in jobID table\n",
461                atomic_read(&jobid_hash->hs_count));
462 }