Whamcloud - gitweb
LU-8475 target: use slab allocation
[fs/lustre-release.git] / lustre / obdclass / class_obd.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2015, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  */
32
33 #define DEBUG_SUBSYSTEM S_CLASS
34
35 #include <linux/user_namespace.h>
36 #ifdef HAVE_UIDGID_HEADER
37 # include <linux/uidgid.h>
38 #endif
39 #include <linux/atomic.h>
40 #include <linux/list.h>
41
42 #include <obd_support.h>
43 #include <obd_class.h>
44 #include <uapi/linux/lnet/lnetctl.h>
45 #include <lustre_debug.h>
46 #include <lustre_kernelcomm.h>
47 #include <lprocfs_status.h>
48 #include <cl_object.h>
49 #ifdef HAVE_SERVER_SUPPORT
50 # include <dt_object.h>
51 # include <md_object.h>
52 #endif /* HAVE_SERVER_SUPPORT */
53 #include <uapi/linux/lustre/lustre_ioctl.h>
54 #include "llog_internal.h"
55
56 #ifdef CONFIG_PROC_FS
57 static __u64 obd_max_alloc;
58 #else
59 __u64 obd_max_alloc;
60 #endif
61
62 static DEFINE_SPINLOCK(obd_updatemax_lock);
63
64 /* The following are visible and mutable through /proc/sys/lustre/. */
65 unsigned int obd_debug_peer_on_timeout;
66 EXPORT_SYMBOL(obd_debug_peer_on_timeout);
67 unsigned int obd_dump_on_timeout;
68 EXPORT_SYMBOL(obd_dump_on_timeout);
69 unsigned int obd_dump_on_eviction;
70 EXPORT_SYMBOL(obd_dump_on_eviction);
71 unsigned long obd_max_dirty_pages;
72 EXPORT_SYMBOL(obd_max_dirty_pages);
73 atomic_long_t obd_dirty_pages;
74 EXPORT_SYMBOL(obd_dirty_pages);
75 unsigned int obd_timeout = OBD_TIMEOUT_DEFAULT;   /* seconds */
76 EXPORT_SYMBOL(obd_timeout);
77 unsigned int ldlm_timeout = LDLM_TIMEOUT_DEFAULT; /* seconds */
78 EXPORT_SYMBOL(ldlm_timeout);
79 unsigned int obd_timeout_set;
80 EXPORT_SYMBOL(obd_timeout_set);
81 unsigned int ldlm_timeout_set;
82 EXPORT_SYMBOL(ldlm_timeout_set);
83 /* bulk transfer timeout, give up after 100s by default */
84 unsigned int bulk_timeout = 100; /* seconds */
85 EXPORT_SYMBOL(bulk_timeout);
86 /* Adaptive timeout defs here instead of ptlrpc module for /proc/sys/ access */
87 unsigned int at_min = 0;
88 EXPORT_SYMBOL(at_min);
89 unsigned int at_max = 600;
90 EXPORT_SYMBOL(at_max);
91 unsigned int at_history = 600;
92 EXPORT_SYMBOL(at_history);
93 int at_early_margin = 5;
94 EXPORT_SYMBOL(at_early_margin);
95 int at_extra = 30;
96 EXPORT_SYMBOL(at_extra);
97
98 atomic_long_t obd_dirty_transit_pages;
99 EXPORT_SYMBOL(obd_dirty_transit_pages);
100
101 #ifdef CONFIG_PROC_FS
102 struct lprocfs_stats *obd_memory = NULL;
103 EXPORT_SYMBOL(obd_memory);
104 #endif
105
106 static int class_resolve_dev_name(__u32 len, const char *name)
107 {
108         int rc;
109         int dev;
110
111         ENTRY;
112         if (!len || !name) {
113                 CERROR("No name passed,!\n");
114                 GOTO(out, rc = -EINVAL);
115         }
116         if (name[len - 1] != 0) {
117                 CERROR("Name not nul terminated!\n");
118                 GOTO(out, rc = -EINVAL);
119         }
120
121         CDEBUG(D_IOCTL, "device name %s\n", name);
122         dev = class_name2dev(name);
123         if (dev == -1) {
124                 CDEBUG(D_IOCTL, "No device for name %s!\n", name);
125                 GOTO(out, rc = -EINVAL);
126         }
127
128         CDEBUG(D_IOCTL, "device name %s, dev %d\n", name, dev);
129         rc = dev;
130
131 out:
132         RETURN(rc);
133 }
134
135 int class_handle_ioctl(unsigned int cmd, unsigned long arg)
136 {
137         char *buf = NULL;
138         struct obd_ioctl_data *data;
139         struct libcfs_debug_ioctl_data *debug_data;
140         struct obd_device *obd = NULL;
141         int err = 0, len = 0;
142         ENTRY;
143
144         /* only for debugging */
145         if (cmd == LIBCFS_IOC_DEBUG_MASK) {
146                 debug_data = (struct libcfs_debug_ioctl_data*)arg;
147                 libcfs_subsystem_debug = debug_data->subs;
148                 libcfs_debug = debug_data->debug;
149                 return 0;
150         }
151
152         CDEBUG(D_IOCTL, "cmd = %x\n", cmd);
153         if (obd_ioctl_getdata(&buf, &len, (void __user *)arg)) {
154                 CERROR("OBD ioctl: data error\n");
155                 RETURN(-EINVAL);
156         }
157         data = (struct obd_ioctl_data *)buf;
158
159         switch (cmd) {
160         case OBD_IOC_PROCESS_CFG: {
161                 struct lustre_cfg *lcfg;
162
163                 if (!data->ioc_plen1 || !data->ioc_pbuf1) {
164                         CERROR("No config buffer passed!\n");
165                         GOTO(out, err = -EINVAL);
166                 }
167                 OBD_ALLOC(lcfg, data->ioc_plen1);
168                 if (lcfg == NULL)
169                         GOTO(out, err = -ENOMEM);
170                 err = copy_from_user(lcfg, data->ioc_pbuf1,
171                                          data->ioc_plen1);
172                 if (!err)
173                         err = lustre_cfg_sanity_check(lcfg, data->ioc_plen1);
174                 if (!err)
175                         err = class_process_config(lcfg);
176
177                 OBD_FREE(lcfg, data->ioc_plen1);
178                 GOTO(out, err);
179         }
180
181 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 0, 53, 0)
182         case OBD_GET_VERSION: {
183                 static bool warned;
184
185                 if (!data->ioc_inlbuf1) {
186                         CERROR("No buffer passed in ioctl\n");
187                         GOTO(out, err = -EINVAL);
188                 }
189
190                 if (strlen(LUSTRE_VERSION_STRING) + 1 > data->ioc_inllen1) {
191                         CERROR("ioctl buffer too small to hold version\n");
192                         GOTO(out, err = -EINVAL);
193                 }
194
195                 if (!warned) {
196                         warned = true;
197                         CWARN("%s: ioctl(OBD_GET_VERSION) is deprecated, "
198                               "use llapi_get_version_string() and/or relink\n",
199                               current->comm);
200                 }
201                 memcpy(data->ioc_bulk, LUSTRE_VERSION_STRING,
202                        strlen(LUSTRE_VERSION_STRING) + 1);
203
204                 if (copy_to_user((void __user *)arg, data, len))
205                         err = -EFAULT;
206                 GOTO(out, err);
207         }
208 #endif
209         case OBD_IOC_NAME2DEV: {
210                 /* Resolve a device name.  This does not change the
211                  * currently selected device.
212                  */
213                 int dev;
214
215                 dev = class_resolve_dev_name(data->ioc_inllen1,
216                                              data->ioc_inlbuf1);
217                 data->ioc_dev = dev;
218                 if (dev < 0)
219                         GOTO(out, err = -EINVAL);
220
221                 if (copy_to_user((void __user *)arg, data, sizeof(*data)))
222                         err = -EFAULT;
223                 GOTO(out, err);
224         }
225
226         case OBD_IOC_UUID2DEV: {
227                 /* Resolve a device uuid.  This does not change the
228                  * currently selected device.
229                  */
230                 int dev;
231                 struct obd_uuid uuid;
232
233                 if (!data->ioc_inllen1 || !data->ioc_inlbuf1) {
234                         CERROR("No UUID passed!\n");
235                         GOTO(out, err = -EINVAL);
236                 }
237                 if (data->ioc_inlbuf1[data->ioc_inllen1 - 1] != 0) {
238                         CERROR("UUID not NUL terminated!\n");
239                         GOTO(out, err = -EINVAL);
240                 }
241
242                 CDEBUG(D_IOCTL, "device name %s\n", data->ioc_inlbuf1);
243                 obd_str2uuid(&uuid, data->ioc_inlbuf1);
244                 dev = class_uuid2dev(&uuid);
245                 data->ioc_dev = dev;
246                 if (dev == -1) {
247                         CDEBUG(D_IOCTL, "No device for UUID %s!\n",
248                                data->ioc_inlbuf1);
249                         GOTO(out, err = -EINVAL);
250                 }
251
252                 CDEBUG(D_IOCTL, "device name %s, dev %d\n", data->ioc_inlbuf1,
253                        dev);
254                 if (copy_to_user((void __user *)arg, data, sizeof(*data)))
255                         err = -EFAULT;
256                 GOTO(out, err);
257         }
258
259         case OBD_IOC_GETDEVICE: {
260                 int     index = data->ioc_count;
261                 char    *status, *str;
262
263                 if (!data->ioc_inlbuf1) {
264                         CERROR("No buffer passed in ioctl\n");
265                         GOTO(out, err = -EINVAL);
266                 }
267                 if (data->ioc_inllen1 < 128) {
268                         CERROR("ioctl buffer too small to hold version\n");
269                         GOTO(out, err = -EINVAL);
270                 }
271
272                 obd = class_num2obd(index);
273                 if (!obd)
274                         GOTO(out, err = -ENOENT);
275
276                 if (obd->obd_stopping)
277                         status = "ST";
278                 else if (obd->obd_set_up)
279                         status = "UP";
280                 else if (obd->obd_attached)
281                         status = "AT";
282                 else
283                         status = "--";
284                 str = (char *)data->ioc_bulk;
285                 snprintf(str, len - sizeof(*data), "%3d %s %s %s %s %d",
286                          (int)index, status, obd->obd_type->typ_name,
287                          obd->obd_name, obd->obd_uuid.uuid,
288                          atomic_read(&obd->obd_refcount));
289
290                 if (copy_to_user((void __user *)arg, data, len))
291                         err = -EFAULT;
292
293                 GOTO(out, err);
294         }
295
296         }
297
298         if (data->ioc_dev == OBD_DEV_BY_DEVNAME) {
299                 if (data->ioc_inllen4 <= 0 || data->ioc_inlbuf4 == NULL)
300                         GOTO(out, err = -EINVAL);
301                 if (strnlen(data->ioc_inlbuf4, MAX_OBD_NAME) >= MAX_OBD_NAME)
302                         GOTO(out, err = -EINVAL);
303                 obd = class_name2obd(data->ioc_inlbuf4);
304         } else if (data->ioc_dev < class_devno_max()) {
305                 obd = class_num2obd(data->ioc_dev);
306         } else {
307                 CERROR("OBD ioctl: No device\n");
308                 GOTO(out, err = -EINVAL);
309         }
310
311         if (obd == NULL) {
312                 CERROR("OBD ioctl : No Device %d\n", data->ioc_dev);
313                 GOTO(out, err = -EINVAL);
314         }
315         LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
316
317         if (!obd->obd_set_up || obd->obd_stopping) {
318                 CERROR("OBD ioctl: device not setup %d \n", data->ioc_dev);
319                 GOTO(out, err = -EINVAL);
320         }
321
322         switch(cmd) {
323         case OBD_IOC_NO_TRANSNO: {
324                 if (!obd->obd_attached) {
325                         CERROR("Device %d not attached\n", obd->obd_minor);
326                         GOTO(out, err = -ENODEV);
327                 }
328                 CDEBUG(D_HA, "%s: disabling committed-transno notification\n",
329                        obd->obd_name);
330                 obd->obd_no_transno = 1;
331                 GOTO(out, err = 0);
332         }
333
334         default: {
335                 err = obd_iocontrol(cmd, obd->obd_self_export, len, data, NULL);
336                 if (err)
337                         GOTO(out, err);
338
339                 if (copy_to_user((void __user *)arg, data, len))
340                         err = -EFAULT;
341                 GOTO(out, err);
342         }
343         }
344
345 out:
346         OBD_FREE_LARGE(buf, len);
347         RETURN(err);
348 } /* class_handle_ioctl */
349
350 static int obd_init_checks(void)
351 {
352         __u64 u64val, div64val;
353         char buf[64];
354         int len, ret = 0;
355
356         CDEBUG(D_INFO, "OBD_OBJECT_EOF = %#llx\n", (__u64)OBD_OBJECT_EOF);
357
358         u64val = OBD_OBJECT_EOF;
359         CDEBUG(D_INFO, "u64val OBD_OBJECT_EOF = %#llx\n", u64val);
360         if (u64val != OBD_OBJECT_EOF) {
361                 CERROR("__u64 %#llx(%d) != 0xffffffffffffffff\n",
362                        u64val, (int)sizeof(u64val));
363                 ret = -EINVAL;
364         }
365         len = snprintf(buf, sizeof(buf), "%#llx", u64val);
366         if (len != 18) {
367                 CWARN("u64 hex wrong length! strlen(%s)=%d != 18\n", buf, len);
368                 ret = -EINVAL;
369         }
370
371         div64val = OBD_OBJECT_EOF;
372         CDEBUG(D_INFO, "u64val OBD_OBJECT_EOF = %#llx\n", u64val);
373         if (u64val != OBD_OBJECT_EOF) {
374                 CERROR("__u64 %#llx(%d) != 0xffffffffffffffff\n",
375                        u64val, (int)sizeof(u64val));
376                 ret = -EOVERFLOW;
377         }
378         if (u64val >> 8 != OBD_OBJECT_EOF >> 8) {
379                 CERROR("__u64 %#llx(%d) != 0xffffffffffffffff\n",
380                        u64val, (int)sizeof(u64val));
381                 return -EOVERFLOW;
382         }
383         if (do_div(div64val, 256) != (u64val & 255)) {
384                 CERROR("do_div(%#llx,256) != %llu\n", u64val, u64val & 255);
385                 return -EOVERFLOW;
386         }
387         if (u64val >> 8 != div64val) {
388                 CERROR("do_div(%#llx,256) %llu != %llu\n",
389                        u64val, div64val, u64val >> 8);
390                 return -EOVERFLOW;
391         }
392         len = snprintf(buf, sizeof(buf), "%#llx", u64val);
393         if (len != 18) {
394                 CWARN("u64 hex wrong length! strlen(%s)=%d != 18\n", buf, len);
395                 ret = -EINVAL;
396         }
397         len = snprintf(buf, sizeof(buf), "%llu", u64val);
398         if (len != 20) {
399                 CWARN("u64 wrong length! strlen(%s)=%d != 20\n", buf, len);
400                 ret = -EINVAL;
401         }
402         len = snprintf(buf, sizeof(buf), "%lld", u64val);
403         if (len != 2) {
404                 CWARN("s64 wrong length! strlen(%s)=%d != 2\n", buf, len);
405                 ret = -EINVAL;
406         }
407         if ((u64val & ~PAGE_MASK) >= PAGE_SIZE) {
408                 CWARN("mask failed: u64val %llu >= %llu\n", u64val,
409                       (__u64)PAGE_SIZE);
410                 ret = -EINVAL;
411         }
412
413         return ret;
414 }
415
416 static int __init obdclass_init(void)
417 {
418         int err;
419
420         LCONSOLE_INFO("Lustre: Build Version: "LUSTRE_VERSION_STRING"\n");
421
422         libcfs_kkuc_init();
423
424         err = obd_init_checks();
425         if (err == -EOVERFLOW)
426                 return err;
427
428 #ifdef CONFIG_PROC_FS
429         obd_memory = lprocfs_alloc_stats(OBD_STATS_NUM,
430                                          LPROCFS_STATS_FLAG_NONE |
431                                          LPROCFS_STATS_FLAG_IRQ_SAFE);
432         if (obd_memory == NULL) {
433                 CERROR("kmalloc of 'obd_memory' failed\n");
434                 return -ENOMEM;
435         }
436
437         lprocfs_counter_init(obd_memory, OBD_MEMORY_STAT,
438                              LPROCFS_CNTR_AVGMINMAX,
439                              "memused", "bytes");
440 #endif
441         err = obd_zombie_impexp_init();
442         if (err)
443                 goto cleanup_obd_memory;
444
445         err = class_handle_init();
446         if (err)
447                 goto cleanup_zombie_impexp;
448
449         err = misc_register(&obd_psdev);
450         if (err) {
451                 CERROR("cannot register %d err %d\n", OBD_DEV_MINOR, err);
452                 goto cleanup_class_handle;
453         }
454
455         /* Default the dirty page cache cap to 1/2 of system memory.
456          * For clients with less memory, a larger fraction is needed
457          * for other purposes (mostly for BGL). */
458         if (totalram_pages <= 512 << (20 - PAGE_SHIFT))
459                 obd_max_dirty_pages = totalram_pages / 4;
460         else
461                 obd_max_dirty_pages = totalram_pages / 2;
462
463         err = obd_init_caches();
464         if (err)
465                 goto cleanup_deregister;
466
467         err = class_procfs_init();
468         if (err)
469                 goto cleanup_caches;
470
471         err = lu_global_init();
472         if (err)
473                 goto cleanup_class_procfs;
474
475         err = cl_global_init();
476         if (err != 0)
477                 goto cleanup_lu_global;
478
479 #ifdef HAVE_SERVER_SUPPORT
480         err = dt_global_init();
481         if (err != 0)
482                 goto cleanup_cl_global;
483
484         err = lu_ucred_global_init();
485         if (err != 0)
486                 goto cleanup_dt_global;
487 #endif /* HAVE_SERVER_SUPPORT */
488
489         err = llog_info_init();
490         if (err)
491 #ifdef HAVE_SERVER_SUPPORT
492                 goto cleanup_lu_ucred_global;
493 #else /* !HAVE_SERVER_SUPPORT */
494                 goto cleanup_cl_global;
495 #endif /* HAVE_SERVER_SUPPORT */
496
497         err = lustre_register_fs();
498
499         /* simulate a late OOM situation now to require all
500          * alloc'ed/initialized resources to be freed */
501         if (OBD_FAIL_CHECK(OBD_FAIL_OBDCLASS_MODULE_LOAD)) {
502                 /* fake error but filesystem has been registered */
503                 lustre_unregister_fs();
504                 /* force error to ensure module will be unloaded/cleaned */
505                 err = -ENOMEM;
506         }
507
508         if (err)
509                 goto cleanup_llog_info;
510
511         return 0;
512
513 cleanup_llog_info:
514         llog_info_fini();
515
516 #ifdef HAVE_SERVER_SUPPORT
517 cleanup_lu_ucred_global:
518         lu_ucred_global_fini();
519
520 cleanup_dt_global:
521         dt_global_fini();
522 #endif /* HAVE_SERVER_SUPPORT */
523
524 cleanup_cl_global:
525         cl_global_fini();
526
527 cleanup_lu_global:
528         lu_global_fini();
529
530 cleanup_class_procfs:
531         obd_sysctl_clean();
532         class_procfs_clean();
533
534 cleanup_caches:
535         obd_cleanup_caches();
536
537 cleanup_deregister:
538         misc_deregister(&obd_psdev);
539
540 cleanup_class_handle:
541         class_handle_cleanup();
542
543 cleanup_zombie_impexp:
544         obd_zombie_impexp_stop();
545
546 cleanup_obd_memory:
547 #ifdef CONFIG_PROC_FS
548         lprocfs_free_stats(&obd_memory);
549 #endif
550
551         return err;
552 }
553
554 void obd_update_maxusage(void)
555 {
556         __u64 max;
557
558         max = obd_memory_sum();
559
560         spin_lock(&obd_updatemax_lock);
561         if (max > obd_max_alloc)
562                 obd_max_alloc = max;
563         spin_unlock(&obd_updatemax_lock);
564 }
565 EXPORT_SYMBOL(obd_update_maxusage);
566
567 #ifdef CONFIG_PROC_FS
568 __u64 obd_memory_max(void)
569 {
570         __u64 ret;
571
572         obd_update_maxusage();
573         spin_lock(&obd_updatemax_lock);
574         ret = obd_max_alloc;
575         spin_unlock(&obd_updatemax_lock);
576
577         return ret;
578 }
579 #endif /* CONFIG_PROC_FS */
580
581 static void __exit obdclass_exit(void)
582 {
583 #ifdef CONFIG_PROC_FS
584         __u64 memory_leaked;
585         __u64 memory_max;
586 #endif /* CONFIG_PROC_FS */
587         ENTRY;
588
589         lustre_unregister_fs();
590
591         misc_deregister(&obd_psdev);
592         llog_info_fini();
593 #ifdef HAVE_SERVER_SUPPORT
594         lu_ucred_global_fini();
595         dt_global_fini();
596 #endif /* HAVE_SERVER_SUPPORT */
597         cl_global_fini();
598         lu_global_fini();
599
600         obd_cleanup_caches();
601         obd_sysctl_clean();
602
603         class_procfs_clean();
604
605         class_handle_cleanup();
606         class_del_uuid(NULL); /* Delete all UUIDs. */
607         obd_zombie_impexp_stop();
608
609 #ifdef CONFIG_PROC_FS
610         memory_leaked = obd_memory_sum();
611         memory_max = obd_memory_max();
612
613         lprocfs_free_stats(&obd_memory);
614         CDEBUG((memory_leaked) ? D_ERROR : D_INFO,
615                "obd_memory max: %llu, leaked: %llu\n",
616                memory_max, memory_leaked);
617 #endif /* CONFIG_PROC_FS */
618
619         EXIT;
620 }
621
622 MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
623 MODULE_DESCRIPTION("Lustre Class Driver");
624 MODULE_VERSION(LUSTRE_VERSION_STRING);
625 MODULE_LICENSE("GPL");
626
627 module_init(obdclass_init);
628 module_exit(obdclass_exit);