Whamcloud - gitweb
LU-9221 jobstats: Create a pid-based hash for jobid values
[fs/lustre-release.git] / lustre / obdclass / jobid.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2014, Intel Corporation.
27  *
28  * Copyright 2017 Cray Inc, all rights reserved.
29  * Author: Ben Evans.
30  *
31  * Store PID->JobID mappings
32  */
33
34 #define DEBUG_SUBSYSTEM S_RPC
35 #include <linux/user_namespace.h>
36 #ifdef HAVE_UIDGID_HEADER
37 #include <linux/uidgid.h>
38 #endif
39
40 #include <obd_support.h>
41 #include <obd_class.h>
42 #include <lustre_net.h>
43
44 static struct cfs_hash *jobid_hash;
45 static struct cfs_hash_ops jobid_hash_ops;
46 spinlock_t jobid_hash_lock;
47
48 #define RESCAN_INTERVAL 30
49 #define DELETE_INTERVAL 300
50
51 char obd_jobid_var[JOBSTATS_JOBID_VAR_MAX_LEN + 1] = JOBSTATS_DISABLE;
52 char obd_jobid_node[LUSTRE_JOBID_SIZE + 1];
53
54 /**
55  * Structure to store a single jobID/PID mapping
56  */
57 struct jobid_to_pid_map {
58         struct hlist_node       jp_hash;
59         time64_t                jp_time;
60         atomic_t                jp_refcount;
61         spinlock_t              jp_lock; /* protects jp_jobid */
62         char                    jp_jobid[LUSTRE_JOBID_SIZE + 1];
63         pid_t                   jp_pid;
64 };
65
66 /* Get jobid of current process by reading the environment variable
67  * stored in between the "env_start" & "env_end" of task struct.
68  *
69  * If some job scheduler doesn't store jobid in the "env_start/end",
70  * then an upcall could be issued here to get the jobid by utilizing
71  * the userspace tools/API. Then, the jobid must be cached.
72  */
73 int get_jobid_from_environ(char *jobid_var, char *jobid, int jobid_len)
74 {
75         int rc;
76
77         rc = cfs_get_environ(jobid_var, jobid, &jobid_len);
78         if (!rc)
79                 goto out;
80
81         if (rc == -EOVERFLOW) {
82                 /* For the PBS_JOBID and LOADL_STEP_ID keys (which are
83                  * variable length strings instead of just numbers), it
84                  * might make sense to keep the unique parts for JobID,
85                  * instead of just returning an error.  That means a
86                  * larger temp buffer for cfs_get_environ(), then
87                  * truncating the string at some separator to fit into
88                  * the specified jobid_len.  Fix later if needed. */
89                 static bool printed;
90                 if (unlikely(!printed)) {
91                         LCONSOLE_ERROR_MSG(0x16b, "%s value too large "
92                                            "for JobID buffer (%d)\n",
93                                            obd_jobid_var, jobid_len);
94                         printed = true;
95                 }
96         } else {
97                 CDEBUG((rc == -ENOENT || rc == -EINVAL ||
98                         rc == -EDEADLK) ? D_INFO : D_ERROR,
99                        "Get jobid for (%s) failed: rc = %d\n",
100                        obd_jobid_var, rc);
101         }
102
103 out:
104         return rc;
105 }
106
107 /*
108  * jobid_should_free_item
109  *
110  * Each item is checked to see if it should be released
111  * Removed from hash table by caller
112  * Actually freed in jobid_put_locked
113  *
114  * Returns 1 if item is to be freed, 0 if it is to be kept
115  */
116
117 static int jobid_should_free_item(void *obj, void *data)
118 {
119         char *jobid = data;
120         struct jobid_to_pid_map *pidmap = obj;
121         int rc = 0;
122
123         if (obj == NULL)
124                 return 0;
125
126         spin_lock(&pidmap->jp_lock);
127         if (jobid == NULL)
128                 rc = 1;
129         else if (jobid[0] == '\0')
130                 rc = 1;
131         else if (ktime_get_real_seconds() - pidmap->jp_time > DELETE_INTERVAL)
132                 rc = 1;
133         else if (strcmp(pidmap->jp_jobid, jobid) == 0)
134                 rc = 1;
135         spin_unlock(&pidmap->jp_lock);
136
137         return rc;
138 }
139
140 /*
141  * check_job_name
142  *
143  * Checks if the jobid is a Lustre process
144  *
145  * Returns true if jobid is valid
146  * Returns false if jobid looks like it's a Lustre process
147  */
148 static bool check_job_name(char *jobid)
149 {
150         const char *const lustre_reserved[] = {"ll_ping", "ptlrpc",
151                                                 "ldlm", "ll_sa", NULL};
152         int i;
153
154         for (i = 0; lustre_reserved[i] != NULL; i++) {
155                 if (strncmp(jobid, lustre_reserved[i],
156                             strlen(lustre_reserved[i])) == 0)
157                         return false;
158         }
159         return true;
160 }
161
162 /*
163  * get_jobid
164  *
165  * Returns the jobid for the current pid.
166  *
167  * If no jobid is found in the table, the jobid is calculated based on
168  * the value of jobid_var, using procname_uid as the default.
169  *
170  * Return: -ENOMEM if allocating a new pidmap fails
171  *         0 for success
172  */
173 int get_jobid(char *jobid)
174 {
175         pid_t pid = current_pid();
176         struct jobid_to_pid_map *pidmap = NULL;
177         struct jobid_to_pid_map *pidmap2;
178         char tmp_jobid[LUSTRE_JOBID_SIZE + 1];
179         int rc = 0;
180         ENTRY;
181
182         pidmap = cfs_hash_lookup(jobid_hash, &pid);
183         if (pidmap == NULL) {
184                 OBD_ALLOC_PTR(pidmap);
185                 if (pidmap == NULL)
186                         GOTO(out, rc = -ENOMEM);
187
188                 pidmap->jp_pid = pid;
189                 pidmap->jp_time = 0;
190                 pidmap->jp_jobid[0] = '\0';
191                 spin_lock_init(&pidmap->jp_lock);
192                 INIT_HLIST_NODE(&pidmap->jp_hash);
193
194                 /*
195                  * Add the newly created map to the hash, on key collision we
196                  * lost a racing addition and must destroy our newly allocated
197                  * map.  The object which exists in the hash will be
198                  * returned.
199                  */
200                 pidmap2 = cfs_hash_findadd_unique(jobid_hash, &pid,
201                                                   &pidmap->jp_hash);
202                 if (unlikely(pidmap != pidmap2)) {
203                         CDEBUG(D_INFO, "Duplicate jobid found\n");
204                         OBD_FREE_PTR(pidmap);
205                         pidmap = pidmap2;
206                 } else {
207                         cfs_hash_get(jobid_hash, &pidmap->jp_hash);
208                 }
209         }
210
211         spin_lock(&pidmap->jp_lock);
212         if ((ktime_get_real_seconds() - pidmap->jp_time >= RESCAN_INTERVAL) ||
213             pidmap->jp_jobid[0] == '\0') {
214                 /* mark the pidmap as being up to date, if we fail to find
215                  * a good jobid, revert to the old time and try again later
216                  * prevent a race with deletion */
217
218                 time64_t tmp_time = pidmap->jp_time;
219                 pidmap->jp_time = ktime_get_real_seconds();
220
221                 spin_unlock(&pidmap->jp_lock);
222                 if (strcmp(obd_jobid_var, JOBSTATS_PROCNAME_UID) == 0) {
223                         rc = 1;
224                 } else {
225                         memset(tmp_jobid, '\0', LUSTRE_JOBID_SIZE + 1);
226                         rc = get_jobid_from_environ(obd_jobid_var,
227                                                     tmp_jobid,
228                                                     LUSTRE_JOBID_SIZE + 1);
229                 }
230
231                 /* Use process name + fsuid as jobid default, or when
232                  * specified by "jobname_uid" */
233                 if (rc) {
234                         snprintf(tmp_jobid, LUSTRE_JOBID_SIZE, "%s.%u",
235                                  current_comm(),
236                                  from_kuid(&init_user_ns, current_fsuid()));
237                         rc = 0;
238                 }
239
240                 CDEBUG(D_INFO, "Jobid to pid mapping established: %d->%s\n",
241                        pidmap->jp_pid, tmp_jobid);
242
243                 spin_lock(&pidmap->jp_lock);
244                 if (check_job_name(tmp_jobid))
245                         strncpy(pidmap->jp_jobid, tmp_jobid,
246                                 LUSTRE_JOBID_SIZE);
247                 else
248                         pidmap->jp_time = tmp_time;
249         }
250
251         if (strlen(pidmap->jp_jobid) != 0)
252                 strncpy(jobid, pidmap->jp_jobid, LUSTRE_JOBID_SIZE);
253
254         spin_unlock(&pidmap->jp_lock);
255
256         cfs_hash_put(jobid_hash, &pidmap->jp_hash);
257
258         EXIT;
259 out:
260         return rc;
261 }
262
263 /*
264  * Hash initialization, copied from server-side job stats bucket sizes
265  */
266 #define HASH_JOBID_BKT_BITS 5
267 #define HASH_JOBID_CUR_BITS 7
268 #define HASH_JOBID_MAX_BITS 12
269
270 int jobid_cache_init(void)
271 {
272         int rc = 0;
273         struct cfs_hash *tmp_jobid_hash;
274         ENTRY;
275
276         spin_lock_init(&jobid_hash_lock);
277
278         tmp_jobid_hash = cfs_hash_create("JOBID_HASH",
279                                          HASH_JOBID_CUR_BITS,
280                                          HASH_JOBID_MAX_BITS,
281                                          HASH_JOBID_BKT_BITS, 0,
282                                          CFS_HASH_MIN_THETA,
283                                          CFS_HASH_MAX_THETA,
284                                          &jobid_hash_ops,
285                                          CFS_HASH_DEFAULT);
286
287         spin_lock(&jobid_hash_lock);
288         if (jobid_hash == NULL) {
289                 jobid_hash = tmp_jobid_hash;
290                 spin_unlock(&jobid_hash_lock);
291         } else {
292                 spin_unlock(&jobid_hash_lock);
293                 if (tmp_jobid_hash != NULL)
294                         cfs_hash_putref(tmp_jobid_hash);
295         }
296
297         if (!jobid_hash)
298                 rc = -ENOMEM;
299
300         RETURN(rc);
301 }
302 EXPORT_SYMBOL(jobid_cache_init);
303
304 void jobid_cache_fini(void)
305 {
306         struct cfs_hash *tmp_hash;
307         ENTRY;
308
309         spin_lock(&jobid_hash_lock);
310         tmp_hash = jobid_hash;
311         jobid_hash = NULL;
312         spin_unlock(&jobid_hash_lock);
313
314         if (tmp_hash != NULL) {
315                 cfs_hash_cond_del(tmp_hash, jobid_should_free_item, NULL);
316                 cfs_hash_putref(tmp_hash);
317         }
318
319         EXIT;
320 }
321 EXPORT_SYMBOL(jobid_cache_fini);
322
323 /*
324  * Hash operations for pid<->jobid
325  */
326 static unsigned jobid_hashfn(struct cfs_hash *hs, const void *key,
327                              unsigned mask)
328 {
329         return cfs_hash_djb2_hash(key, sizeof(pid_t), mask);
330 }
331
332 static void *jobid_key(struct hlist_node *hnode)
333 {
334         struct jobid_to_pid_map *pidmap;
335
336         pidmap = hlist_entry(hnode, struct jobid_to_pid_map, jp_hash);
337         return &pidmap->jp_pid;
338 }
339
340 static int jobid_keycmp(const void *key, struct hlist_node *hnode)
341 {
342         const pid_t *pid_key1;
343         const pid_t *pid_key2;
344
345         LASSERT(key != NULL);
346         pid_key1 = (pid_t *)key;
347         pid_key2 = (pid_t *)jobid_key(hnode);
348
349         return *pid_key1 == *pid_key2;
350 }
351
352 static void *jobid_object(struct hlist_node *hnode)
353 {
354         return hlist_entry(hnode, struct jobid_to_pid_map, jp_hash);
355 }
356
357 static void jobid_get(struct cfs_hash *hs, struct hlist_node *hnode)
358 {
359         struct jobid_to_pid_map *pidmap;
360
361         pidmap = hlist_entry(hnode, struct jobid_to_pid_map, jp_hash);
362
363         atomic_inc(&pidmap->jp_refcount);
364 }
365
366 static void jobid_put_locked(struct cfs_hash *hs, struct hlist_node *hnode)
367 {
368         struct jobid_to_pid_map *pidmap;
369
370         if (hnode == NULL)
371                 return;
372
373         pidmap = hlist_entry(hnode, struct jobid_to_pid_map, jp_hash);
374         LASSERT(atomic_read(&pidmap->jp_refcount) > 0);
375         if (atomic_dec_and_test(&pidmap->jp_refcount)) {
376                 CDEBUG(D_INFO, "Freeing: %d->%s\n",
377                        pidmap->jp_pid, pidmap->jp_jobid);
378
379                 OBD_FREE_PTR(pidmap);
380         }
381 }
382
383 static struct cfs_hash_ops jobid_hash_ops = {
384         .hs_hash        = jobid_hashfn,
385         .hs_keycmp      = jobid_keycmp,
386         .hs_key         = jobid_key,
387         .hs_object      = jobid_object,
388         .hs_get         = jobid_get,
389         .hs_put         = jobid_put_locked,
390         .hs_put_locked  = jobid_put_locked,
391 };
392
393 /*
394  * Return the jobid:
395  *
396  * Based on the value of obd_jobid_var
397  * JOBSTATS_DISABLE:  none
398  * JOBSTATS_NODELOCAL:  Contents of obd_jobid_name
399  * JOBSTATS_PROCNAME_UID:  Process name/UID
400  * anything else:  Look up the value in the processes environment
401  * default: JOBSTATS_PROCNAME_UID
402  */
403
404 int lustre_get_jobid(char *jobid)
405 {
406         int rc = 0;
407         int clear = 0;
408         static time64_t last_delete;
409         ENTRY;
410
411         LASSERT(jobid_hash != NULL);
412
413         spin_lock(&jobid_hash_lock);
414         if (last_delete + DELETE_INTERVAL <= ktime_get_real_seconds()) {
415                 clear = 1;
416                 last_delete = ktime_get_real_seconds();
417         }
418         spin_unlock(&jobid_hash_lock);
419
420         if (clear)
421                 cfs_hash_cond_del(jobid_hash, jobid_should_free_item,
422                                   "intentionally_bad_jobid");
423
424         if (strcmp(obd_jobid_var, JOBSTATS_DISABLE) == 0)
425                 /* Jobstats isn't enabled */
426                 memset(jobid, 0, LUSTRE_JOBID_SIZE);
427         else if (strcmp(obd_jobid_var, JOBSTATS_NODELOCAL) == 0)
428                 /* Whole node dedicated to single job */
429                 memcpy(jobid, obd_jobid_node, LUSTRE_JOBID_SIZE);
430         else
431                 /* Get jobid from hash table */
432                 rc = get_jobid(jobid);
433
434         RETURN(rc);
435 }
436 EXPORT_SYMBOL(lustre_get_jobid);
437
438 /*
439  * lustre_jobid_clear
440  *
441  * uses value pushed in via jobid_name
442  * If any entries in the hash table match the value, they are removed
443  */
444 void lustre_jobid_clear(const char *data)
445 {
446         char jobid[LUSTRE_JOBID_SIZE + 1];
447
448         if (jobid_hash == NULL)
449                 return;
450
451         strncpy(jobid, data, LUSTRE_JOBID_SIZE);
452         /* trim \n off the end of the incoming jobid */
453         if (jobid[strlen(jobid) - 1] == '\n')
454                 jobid[strlen(jobid) - 1] = '\0';
455
456         CDEBUG(D_INFO, "Clearing Jobid: %s\n", jobid);
457         cfs_hash_cond_del(jobid_hash, jobid_should_free_item, jobid);
458
459         CDEBUG(D_INFO, "%d items remain in jobID table\n",
460                atomic_read(&jobid_hash->hs_count));
461 }