Whamcloud - gitweb
LU-9221 jobstats: Create a pid-based hash for jobid values
[fs/lustre-release.git] / lustre / obdclass / class_obd.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2015, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  */
32
33 #define DEBUG_SUBSYSTEM S_CLASS
34
35 #include <linux/user_namespace.h>
36 #ifdef HAVE_UIDGID_HEADER
37 # include <linux/uidgid.h>
38 #endif
39 #include <linux/atomic.h>
40 #include <linux/list.h>
41
42 #include <obd_support.h>
43 #include <obd_class.h>
44 #include <uapi/linux/lnet/lnetctl.h>
45 #include <lustre_debug.h>
46 #include <lustre_kernelcomm.h>
47 #include <lprocfs_status.h>
48 #include <cl_object.h>
49 #ifdef HAVE_SERVER_SUPPORT
50 # include <dt_object.h>
51 # include <md_object.h>
52 #endif /* HAVE_SERVER_SUPPORT */
53 #include <uapi/linux/lustre/lustre_ioctl.h>
54 #include "llog_internal.h"
55
56 #ifdef CONFIG_PROC_FS
57 static __u64 obd_max_alloc;
58 #else
59 __u64 obd_max_alloc;
60 #endif
61
62 static DEFINE_SPINLOCK(obd_updatemax_lock);
63
64 /* The following are visible and mutable through /proc/sys/lustre/. */
65 unsigned int obd_debug_peer_on_timeout;
66 EXPORT_SYMBOL(obd_debug_peer_on_timeout);
67 unsigned int obd_dump_on_timeout;
68 EXPORT_SYMBOL(obd_dump_on_timeout);
69 unsigned int obd_dump_on_eviction;
70 EXPORT_SYMBOL(obd_dump_on_eviction);
71 unsigned long obd_max_dirty_pages;
72 EXPORT_SYMBOL(obd_max_dirty_pages);
73 atomic_long_t obd_dirty_pages;
74 EXPORT_SYMBOL(obd_dirty_pages);
75 unsigned int obd_timeout = OBD_TIMEOUT_DEFAULT;   /* seconds */
76 EXPORT_SYMBOL(obd_timeout);
77 unsigned int ldlm_timeout = LDLM_TIMEOUT_DEFAULT; /* seconds */
78 EXPORT_SYMBOL(ldlm_timeout);
79 unsigned int obd_timeout_set;
80 EXPORT_SYMBOL(obd_timeout_set);
81 unsigned int ldlm_timeout_set;
82 EXPORT_SYMBOL(ldlm_timeout_set);
83 /* bulk transfer timeout, give up after 100s by default */
84 unsigned int bulk_timeout = 100; /* seconds */
85 EXPORT_SYMBOL(bulk_timeout);
86 /* Adaptive timeout defs here instead of ptlrpc module for /proc/sys/ access */
87 unsigned int at_min = 0;
88 EXPORT_SYMBOL(at_min);
89 unsigned int at_max = 600;
90 EXPORT_SYMBOL(at_max);
91 unsigned int at_history = 600;
92 EXPORT_SYMBOL(at_history);
93 int at_early_margin = 5;
94 EXPORT_SYMBOL(at_early_margin);
95 int at_extra = 30;
96 EXPORT_SYMBOL(at_extra);
97
98 atomic_long_t obd_dirty_transit_pages;
99 EXPORT_SYMBOL(obd_dirty_transit_pages);
100
101 #ifdef CONFIG_PROC_FS
102 struct lprocfs_stats *obd_memory = NULL;
103 EXPORT_SYMBOL(obd_memory);
104 #endif
105
106 static int class_resolve_dev_name(__u32 len, const char *name)
107 {
108         int rc;
109         int dev;
110
111         ENTRY;
112         if (!len || !name) {
113                 CERROR("No name passed,!\n");
114                 GOTO(out, rc = -EINVAL);
115         }
116         if (name[len - 1] != 0) {
117                 CERROR("Name not nul terminated!\n");
118                 GOTO(out, rc = -EINVAL);
119         }
120
121         CDEBUG(D_IOCTL, "device name %s\n", name);
122         dev = class_name2dev(name);
123         if (dev == -1) {
124                 CDEBUG(D_IOCTL, "No device for name %s!\n", name);
125                 GOTO(out, rc = -EINVAL);
126         }
127
128         CDEBUG(D_IOCTL, "device name %s, dev %d\n", name, dev);
129         rc = dev;
130
131 out:
132         RETURN(rc);
133 }
134
135 int class_handle_ioctl(unsigned int cmd, unsigned long arg)
136 {
137         char *buf = NULL;
138         struct obd_ioctl_data *data;
139         struct libcfs_debug_ioctl_data *debug_data;
140         struct obd_device *obd = NULL;
141         int err = 0, len = 0;
142         ENTRY;
143
144         /* only for debugging */
145         if (cmd == LIBCFS_IOC_DEBUG_MASK) {
146                 debug_data = (struct libcfs_debug_ioctl_data*)arg;
147                 libcfs_subsystem_debug = debug_data->subs;
148                 libcfs_debug = debug_data->debug;
149                 return 0;
150         }
151
152         CDEBUG(D_IOCTL, "cmd = %x\n", cmd);
153         if (obd_ioctl_getdata(&buf, &len, (void __user *)arg)) {
154                 CERROR("OBD ioctl: data error\n");
155                 RETURN(-EINVAL);
156         }
157         data = (struct obd_ioctl_data *)buf;
158
159         switch (cmd) {
160         case OBD_IOC_PROCESS_CFG: {
161                 struct lustre_cfg *lcfg;
162
163                 if (!data->ioc_plen1 || !data->ioc_pbuf1) {
164                         CERROR("No config buffer passed!\n");
165                         GOTO(out, err = -EINVAL);
166                 }
167                 OBD_ALLOC(lcfg, data->ioc_plen1);
168                 if (lcfg == NULL)
169                         GOTO(out, err = -ENOMEM);
170                 err = copy_from_user(lcfg, data->ioc_pbuf1,
171                                          data->ioc_plen1);
172                 if (!err)
173                         err = lustre_cfg_sanity_check(lcfg, data->ioc_plen1);
174                 if (!err)
175                         err = class_process_config(lcfg);
176
177                 OBD_FREE(lcfg, data->ioc_plen1);
178                 GOTO(out, err);
179         }
180
181 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 0, 53, 0)
182         case OBD_GET_VERSION: {
183                 static bool warned;
184
185                 if (!data->ioc_inlbuf1) {
186                         CERROR("No buffer passed in ioctl\n");
187                         GOTO(out, err = -EINVAL);
188                 }
189
190                 if (strlen(LUSTRE_VERSION_STRING) + 1 > data->ioc_inllen1) {
191                         CERROR("ioctl buffer too small to hold version\n");
192                         GOTO(out, err = -EINVAL);
193                 }
194
195                 if (!warned) {
196                         warned = true;
197                         CWARN("%s: ioctl(OBD_GET_VERSION) is deprecated, "
198                               "use llapi_get_version_string() and/or relink\n",
199                               current->comm);
200                 }
201                 memcpy(data->ioc_bulk, LUSTRE_VERSION_STRING,
202                        strlen(LUSTRE_VERSION_STRING) + 1);
203
204                 if (copy_to_user((void __user *)arg, data, len))
205                         err = -EFAULT;
206                 GOTO(out, err);
207         }
208 #endif
209         case OBD_IOC_NAME2DEV: {
210                 /* Resolve a device name.  This does not change the
211                  * currently selected device.
212                  */
213                 int dev;
214
215                 dev = class_resolve_dev_name(data->ioc_inllen1,
216                                              data->ioc_inlbuf1);
217                 data->ioc_dev = dev;
218                 if (dev < 0)
219                         GOTO(out, err = -EINVAL);
220
221                 if (copy_to_user((void __user *)arg, data, sizeof(*data)))
222                         err = -EFAULT;
223                 GOTO(out, err);
224         }
225
226         case OBD_IOC_UUID2DEV: {
227                 /* Resolve a device uuid.  This does not change the
228                  * currently selected device.
229                  */
230                 int dev;
231                 struct obd_uuid uuid;
232
233                 if (!data->ioc_inllen1 || !data->ioc_inlbuf1) {
234                         CERROR("No UUID passed!\n");
235                         GOTO(out, err = -EINVAL);
236                 }
237                 if (data->ioc_inlbuf1[data->ioc_inllen1 - 1] != 0) {
238                         CERROR("UUID not NUL terminated!\n");
239                         GOTO(out, err = -EINVAL);
240                 }
241
242                 CDEBUG(D_IOCTL, "device name %s\n", data->ioc_inlbuf1);
243                 obd_str2uuid(&uuid, data->ioc_inlbuf1);
244                 dev = class_uuid2dev(&uuid);
245                 data->ioc_dev = dev;
246                 if (dev == -1) {
247                         CDEBUG(D_IOCTL, "No device for UUID %s!\n",
248                                data->ioc_inlbuf1);
249                         GOTO(out, err = -EINVAL);
250                 }
251
252                 CDEBUG(D_IOCTL, "device name %s, dev %d\n", data->ioc_inlbuf1,
253                        dev);
254                 if (copy_to_user((void __user *)arg, data, sizeof(*data)))
255                         err = -EFAULT;
256                 GOTO(out, err);
257         }
258
259         case OBD_IOC_GETDEVICE: {
260                 int     index = data->ioc_count;
261                 char    *status, *str;
262
263                 if (!data->ioc_inlbuf1) {
264                         CERROR("No buffer passed in ioctl\n");
265                         GOTO(out, err = -EINVAL);
266                 }
267                 if (data->ioc_inllen1 < 128) {
268                         CERROR("ioctl buffer too small to hold version\n");
269                         GOTO(out, err = -EINVAL);
270                 }
271
272                 obd = class_num2obd(index);
273                 if (!obd)
274                         GOTO(out, err = -ENOENT);
275
276                 if (obd->obd_stopping)
277                         status = "ST";
278                 else if (obd->obd_set_up)
279                         status = "UP";
280                 else if (obd->obd_attached)
281                         status = "AT";
282                 else
283                         status = "--";
284                 str = (char *)data->ioc_bulk;
285                 snprintf(str, len - sizeof(*data), "%3d %s %s %s %s %d",
286                          (int)index, status, obd->obd_type->typ_name,
287                          obd->obd_name, obd->obd_uuid.uuid,
288                          atomic_read(&obd->obd_refcount));
289
290                 if (copy_to_user((void __user *)arg, data, len))
291                         err = -EFAULT;
292
293                 GOTO(out, err);
294         }
295
296         }
297
298         if (data->ioc_dev == OBD_DEV_BY_DEVNAME) {
299                 if (data->ioc_inllen4 <= 0 || data->ioc_inlbuf4 == NULL)
300                         GOTO(out, err = -EINVAL);
301                 if (strnlen(data->ioc_inlbuf4, MAX_OBD_NAME) >= MAX_OBD_NAME)
302                         GOTO(out, err = -EINVAL);
303                 obd = class_name2obd(data->ioc_inlbuf4);
304         } else if (data->ioc_dev < class_devno_max()) {
305                 obd = class_num2obd(data->ioc_dev);
306         } else {
307                 CERROR("OBD ioctl: No device\n");
308                 GOTO(out, err = -EINVAL);
309         }
310
311         if (obd == NULL) {
312                 CERROR("OBD ioctl : No Device %d\n", data->ioc_dev);
313                 GOTO(out, err = -EINVAL);
314         }
315         LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
316
317         if (!obd->obd_set_up || obd->obd_stopping) {
318                 CERROR("OBD ioctl: device not setup %d \n", data->ioc_dev);
319                 GOTO(out, err = -EINVAL);
320         }
321
322         switch(cmd) {
323         case OBD_IOC_NO_TRANSNO: {
324                 if (!obd->obd_attached) {
325                         CERROR("Device %d not attached\n", obd->obd_minor);
326                         GOTO(out, err = -ENODEV);
327                 }
328                 CDEBUG(D_HA, "%s: disabling committed-transno notification\n",
329                        obd->obd_name);
330                 obd->obd_no_transno = 1;
331                 GOTO(out, err = 0);
332         }
333
334         default: {
335                 err = obd_iocontrol(cmd, obd->obd_self_export, len, data, NULL);
336                 if (err)
337                         GOTO(out, err);
338
339                 if (copy_to_user((void __user *)arg, data, len))
340                         err = -EFAULT;
341                 GOTO(out, err);
342         }
343         }
344
345 out:
346         OBD_FREE_LARGE(buf, len);
347         RETURN(err);
348 } /* class_handle_ioctl */
349
350 #define OBD_INIT_CHECK
351 #ifdef OBD_INIT_CHECK
352 static int obd_init_checks(void)
353 {
354         __u64 u64val, div64val;
355         char buf[64];
356         int len, ret = 0;
357
358         CDEBUG(D_INFO, "OBD_OBJECT_EOF = %#llx\n", (__u64)OBD_OBJECT_EOF);
359
360         u64val = OBD_OBJECT_EOF;
361         CDEBUG(D_INFO, "u64val OBD_OBJECT_EOF = %#llx\n", u64val);
362         if (u64val != OBD_OBJECT_EOF) {
363                 CERROR("__u64 %#llx(%d) != 0xffffffffffffffff\n",
364                        u64val, (int)sizeof(u64val));
365                 ret = -EINVAL;
366         }
367         len = snprintf(buf, sizeof(buf), "%#llx", u64val);
368         if (len != 18) {
369                 CWARN("u64 hex wrong length! strlen(%s)=%d != 18\n", buf, len);
370                 ret = -EINVAL;
371         }
372
373         div64val = OBD_OBJECT_EOF;
374         CDEBUG(D_INFO, "u64val OBD_OBJECT_EOF = %#llx\n", u64val);
375         if (u64val != OBD_OBJECT_EOF) {
376                 CERROR("__u64 %#llx(%d) != 0xffffffffffffffff\n",
377                        u64val, (int)sizeof(u64val));
378                 ret = -EOVERFLOW;
379         }
380         if (u64val >> 8 != OBD_OBJECT_EOF >> 8) {
381                 CERROR("__u64 %#llx(%d) != 0xffffffffffffffff\n",
382                        u64val, (int)sizeof(u64val));
383                 return -EOVERFLOW;
384         }
385         if (do_div(div64val, 256) != (u64val & 255)) {
386                 CERROR("do_div(%#llx,256) != %llu\n", u64val, u64val & 255);
387                 return -EOVERFLOW;
388         }
389         if (u64val >> 8 != div64val) {
390                 CERROR("do_div(%#llx,256) %llu != %llu\n",
391                        u64val, div64val, u64val >> 8);
392                 return -EOVERFLOW;
393         }
394         len = snprintf(buf, sizeof(buf), "%#llx", u64val);
395         if (len != 18) {
396                 CWARN("u64 hex wrong length! strlen(%s)=%d != 18\n", buf, len);
397                 ret = -EINVAL;
398         }
399         len = snprintf(buf, sizeof(buf), "%llu", u64val);
400         if (len != 20) {
401                 CWARN("u64 wrong length! strlen(%s)=%d != 20\n", buf, len);
402                 ret = -EINVAL;
403         }
404         len = snprintf(buf, sizeof(buf), "%lld", u64val);
405         if (len != 2) {
406                 CWARN("s64 wrong length! strlen(%s)=%d != 2\n", buf, len);
407                 ret = -EINVAL;
408         }
409         if ((u64val & ~PAGE_MASK) >= PAGE_SIZE) {
410                 CWARN("mask failed: u64val %llu >= %llu\n", u64val,
411                       (__u64)PAGE_SIZE);
412                 ret = -EINVAL;
413         }
414
415         return ret;
416 }
417 #else
418 #define obd_init_checks() do {} while(0)
419 #endif
420
421 static int __init obdclass_init(void)
422 {
423         int err;
424
425         LCONSOLE_INFO("Lustre: Build Version: "LUSTRE_VERSION_STRING"\n");
426
427         libcfs_kkuc_init();
428
429         err = obd_init_checks();
430         if (err == -EOVERFLOW)
431                 return err;
432
433 #ifdef CONFIG_PROC_FS
434         obd_memory = lprocfs_alloc_stats(OBD_STATS_NUM,
435                                          LPROCFS_STATS_FLAG_NONE |
436                                          LPROCFS_STATS_FLAG_IRQ_SAFE);
437         if (obd_memory == NULL) {
438                 CERROR("kmalloc of 'obd_memory' failed\n");
439                 return -ENOMEM;
440         }
441
442         lprocfs_counter_init(obd_memory, OBD_MEMORY_STAT,
443                              LPROCFS_CNTR_AVGMINMAX,
444                              "memused", "bytes");
445 #endif
446         err = obd_zombie_impexp_init();
447         if (err)
448                 goto cleanup_obd_memory;
449
450         err = class_handle_init();
451         if (err)
452                 goto cleanup_zombie_impexp;
453
454         err = misc_register(&obd_psdev);
455         if (err) {
456                 CERROR("cannot register %d err %d\n", OBD_DEV_MINOR, err);
457                 goto cleanup_class_handle;
458         }
459
460         /* Default the dirty page cache cap to 1/2 of system memory.
461          * For clients with less memory, a larger fraction is needed
462          * for other purposes (mostly for BGL). */
463         if (totalram_pages <= 512 << (20 - PAGE_SHIFT))
464                 obd_max_dirty_pages = totalram_pages / 4;
465         else
466                 obd_max_dirty_pages = totalram_pages / 2;
467
468         err = obd_init_caches();
469         if (err)
470                 goto cleanup_deregister;
471
472         err = class_procfs_init();
473         if (err)
474                 goto cleanup_caches;
475
476         err = lu_global_init();
477         if (err)
478                 goto cleanup_class_procfs;
479
480         err = cl_global_init();
481         if (err != 0)
482                 goto cleanup_lu_global;
483
484 #ifdef HAVE_SERVER_SUPPORT
485         err = dt_global_init();
486         if (err != 0)
487                 goto cleanup_cl_global;
488
489         err = lu_ucred_global_init();
490         if (err != 0)
491                 goto cleanup_dt_global;
492 #endif /* HAVE_SERVER_SUPPORT */
493
494         err = llog_info_init();
495         if (err)
496 #ifdef HAVE_SERVER_SUPPORT
497                 goto cleanup_lu_ucred_global;
498 #else /* !HAVE_SERVER_SUPPORT */
499                 goto cleanup_cl_global;
500 #endif /* HAVE_SERVER_SUPPORT */
501
502         err = lustre_register_fs();
503
504         /* simulate a late OOM situation now to require all
505          * alloc'ed/initialized resources to be freed */
506         if (OBD_FAIL_CHECK(OBD_FAIL_OBDCLASS_MODULE_LOAD)) {
507                 /* fake error but filesystem has been registered */
508                 lustre_unregister_fs();
509                 /* force error to ensure module will be unloaded/cleaned */
510                 err = -ENOMEM;
511         }
512
513         if (err)
514                 goto cleanup_llog_info;
515
516         return 0;
517
518 cleanup_llog_info:
519         llog_info_fini();
520
521 #ifdef HAVE_SERVER_SUPPORT
522 cleanup_lu_ucred_global:
523         lu_ucred_global_fini();
524
525 cleanup_dt_global:
526         dt_global_fini();
527 #endif /* HAVE_SERVER_SUPPORT */
528
529 cleanup_cl_global:
530         cl_global_fini();
531
532 cleanup_lu_global:
533         lu_global_fini();
534
535 cleanup_class_procfs:
536         obd_sysctl_clean();
537         class_procfs_clean();
538
539 cleanup_caches:
540         obd_cleanup_caches();
541
542 cleanup_deregister:
543         misc_deregister(&obd_psdev);
544
545 cleanup_class_handle:
546         class_handle_cleanup();
547
548 cleanup_zombie_impexp:
549         obd_zombie_impexp_stop();
550
551 cleanup_obd_memory:
552 #ifdef CONFIG_PROC_FS
553         lprocfs_free_stats(&obd_memory);
554 #endif
555
556         return err;
557 }
558
559 void obd_update_maxusage(void)
560 {
561         __u64 max;
562
563         max = obd_memory_sum();
564
565         spin_lock(&obd_updatemax_lock);
566         if (max > obd_max_alloc)
567                 obd_max_alloc = max;
568         spin_unlock(&obd_updatemax_lock);
569 }
570 EXPORT_SYMBOL(obd_update_maxusage);
571
572 #ifdef CONFIG_PROC_FS
573 __u64 obd_memory_max(void)
574 {
575         __u64 ret;
576
577         obd_update_maxusage();
578         spin_lock(&obd_updatemax_lock);
579         ret = obd_max_alloc;
580         spin_unlock(&obd_updatemax_lock);
581
582         return ret;
583 }
584 #endif /* CONFIG_PROC_FS */
585
586 static void __exit obdclass_exit(void)
587 {
588 #ifdef CONFIG_PROC_FS
589         __u64 memory_leaked;
590         __u64 memory_max;
591 #endif /* CONFIG_PROC_FS */
592         ENTRY;
593
594         lustre_unregister_fs();
595
596         misc_deregister(&obd_psdev);
597         llog_info_fini();
598 #ifdef HAVE_SERVER_SUPPORT
599         lu_ucred_global_fini();
600         dt_global_fini();
601 #endif /* HAVE_SERVER_SUPPORT */
602         cl_global_fini();
603         lu_global_fini();
604
605         obd_cleanup_caches();
606         obd_sysctl_clean();
607
608         class_procfs_clean();
609
610         class_handle_cleanup();
611         class_del_uuid(NULL); /* Delete all UUIDs. */
612         obd_zombie_impexp_stop();
613
614 #ifdef CONFIG_PROC_FS
615         memory_leaked = obd_memory_sum();
616         memory_max = obd_memory_max();
617
618         lprocfs_free_stats(&obd_memory);
619         CDEBUG((memory_leaked) ? D_ERROR : D_INFO,
620                "obd_memory max: %llu, leaked: %llu\n",
621                memory_max, memory_leaked);
622 #endif /* CONFIG_PROC_FS */
623
624         EXIT;
625 }
626
627 MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
628 MODULE_DESCRIPTION("Lustre Class Driver");
629 MODULE_VERSION(LUSTRE_VERSION_STRING);
630 MODULE_LICENSE("GPL");
631
632 module_init(obdclass_init);
633 module_exit(obdclass_exit);