Whamcloud - gitweb
293b55980ee0412f45c3d9b93836325ccf2267f6
[fs/lustre-release.git] / lustre / obdclass / class_obd.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2015, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  */
32
33 #define DEBUG_SUBSYSTEM S_CLASS
34
35 #include <linux/user_namespace.h>
36 #ifdef HAVE_UIDGID_HEADER
37 # include <linux/uidgid.h>
38 #endif
39 #include <linux/atomic.h>
40 #include <linux/list.h>
41
42 #include <obd_support.h>
43 #include <obd_class.h>
44 #include <lnet/lnetctl.h>
45 #include <lustre_debug.h>
46 #include <lprocfs_status.h>
47 #include <lustre_ver.h>
48 #include <cl_object.h>
49 #ifdef HAVE_SERVER_SUPPORT
50 # include <dt_object.h>
51 # include <md_object.h>
52 #endif /* HAVE_SERVER_SUPPORT */
53 #include <lustre_ioctl.h>
54 #include "llog_internal.h"
55
56 struct obd_device *obd_devs[MAX_OBD_DEVICES];
57 struct list_head obd_types;
58 DEFINE_RWLOCK(obd_dev_lock);
59
60 #ifdef CONFIG_PROC_FS
61 static __u64 obd_max_alloc;
62 #else
63 __u64 obd_max_alloc;
64 #endif
65
66 static DEFINE_SPINLOCK(obd_updatemax_lock);
67
68 /* The following are visible and mutable through /proc/sys/lustre/. */
69 unsigned int obd_debug_peer_on_timeout;
70 EXPORT_SYMBOL(obd_debug_peer_on_timeout);
71 unsigned int obd_dump_on_timeout;
72 EXPORT_SYMBOL(obd_dump_on_timeout);
73 unsigned int obd_dump_on_eviction;
74 EXPORT_SYMBOL(obd_dump_on_eviction);
75 unsigned long obd_max_dirty_pages;
76 EXPORT_SYMBOL(obd_max_dirty_pages);
77 atomic_long_t obd_dirty_pages;
78 EXPORT_SYMBOL(obd_dirty_pages);
79 unsigned int obd_timeout = OBD_TIMEOUT_DEFAULT;   /* seconds */
80 EXPORT_SYMBOL(obd_timeout);
81 unsigned int ldlm_timeout = LDLM_TIMEOUT_DEFAULT; /* seconds */
82 EXPORT_SYMBOL(ldlm_timeout);
83 unsigned int obd_timeout_set;
84 EXPORT_SYMBOL(obd_timeout_set);
85 unsigned int ldlm_timeout_set;
86 EXPORT_SYMBOL(ldlm_timeout_set);
87 /* bulk transfer timeout, give up after 100s by default */
88 unsigned int bulk_timeout = 100; /* seconds */
89 EXPORT_SYMBOL(bulk_timeout);
90 /* Adaptive timeout defs here instead of ptlrpc module for /proc/sys/ access */
91 unsigned int at_min = 0;
92 EXPORT_SYMBOL(at_min);
93 unsigned int at_max = 600;
94 EXPORT_SYMBOL(at_max);
95 unsigned int at_history = 600;
96 EXPORT_SYMBOL(at_history);
97 int at_early_margin = 5;
98 EXPORT_SYMBOL(at_early_margin);
99 int at_extra = 30;
100 EXPORT_SYMBOL(at_extra);
101
102 atomic_long_t obd_dirty_transit_pages;
103 EXPORT_SYMBOL(obd_dirty_transit_pages);
104
105 char obd_jobid_var[JOBSTATS_JOBID_VAR_MAX_LEN + 1] = JOBSTATS_DISABLE;
106
107 #ifdef CONFIG_PROC_FS
108 struct lprocfs_stats *obd_memory = NULL;
109 EXPORT_SYMBOL(obd_memory);
110 #endif
111
112 char obd_jobid_node[LUSTRE_JOBID_SIZE + 1];
113
114 /* Get jobid of current process by reading the environment variable
115  * stored in between the "env_start" & "env_end" of task struct.
116  *
117  * TODO:
118  * It's better to cache the jobid for later use if there is any
119  * efficient way, the cl_env code probably could be reused for this
120  * purpose.
121  *
122  * If some job scheduler doesn't store jobid in the "env_start/end",
123  * then an upcall could be issued here to get the jobid by utilizing
124  * the userspace tools/api. Then, the jobid must be cached.
125  */
126 int lustre_get_jobid(char *jobid)
127 {
128         int jobid_len = LUSTRE_JOBID_SIZE;
129         int rc = 0;
130         ENTRY;
131
132         memset(jobid, 0, LUSTRE_JOBID_SIZE);
133         /* Jobstats isn't enabled */
134         if (strcmp(obd_jobid_var, JOBSTATS_DISABLE) == 0)
135                 RETURN(0);
136
137         /* Whole node dedicated to single job */
138         if (strcmp(obd_jobid_var, JOBSTATS_NODELOCAL) == 0) {
139                 memcpy(jobid, obd_jobid_node, LUSTRE_JOBID_SIZE);
140                 RETURN(0);
141         }
142
143         /* Use process name + fsuid as jobid */
144         if (strcmp(obd_jobid_var, JOBSTATS_PROCNAME_UID) == 0) {
145                 snprintf(jobid, LUSTRE_JOBID_SIZE, "%s.%u",
146                          current_comm(),
147                          from_kuid(&init_user_ns, current_fsuid()));
148                 RETURN(0);
149         }
150
151         rc = cfs_get_environ(obd_jobid_var, jobid, &jobid_len);
152         if (rc) {
153                 if (rc == -EOVERFLOW) {
154                         /* For the PBS_JOBID and LOADL_STEP_ID keys (which are
155                          * variable length strings instead of just numbers), it
156                          * might make sense to keep the unique parts for JobID,
157                          * instead of just returning an error.  That means a
158                          * larger temp buffer for cfs_get_environ(), then
159                          * truncating the string at some separator to fit into
160                          * the specified jobid_len.  Fix later if needed. */
161                         static bool printed;
162                         if (unlikely(!printed)) {
163                                 LCONSOLE_ERROR_MSG(0x16b, "%s value too large "
164                                                    "for JobID buffer (%d)\n",
165                                                    obd_jobid_var, jobid_len);
166                                 printed = true;
167                         }
168                 } else {
169                         CDEBUG((rc == -ENOENT || rc == -EINVAL ||
170                                 rc == -EDEADLK) ? D_INFO : D_ERROR,
171                                "Get jobid for (%s) failed: rc = %d\n",
172                                obd_jobid_var, rc);
173                 }
174         }
175         RETURN(rc);
176 }
177 EXPORT_SYMBOL(lustre_get_jobid);
178
179 static int class_resolve_dev_name(__u32 len, const char *name)
180 {
181         int rc;
182         int dev;
183
184         ENTRY;
185         if (!len || !name) {
186                 CERROR("No name passed,!\n");
187                 GOTO(out, rc = -EINVAL);
188         }
189         if (name[len - 1] != 0) {
190                 CERROR("Name not nul terminated!\n");
191                 GOTO(out, rc = -EINVAL);
192         }
193
194         CDEBUG(D_IOCTL, "device name %s\n", name);
195         dev = class_name2dev(name);
196         if (dev == -1) {
197                 CDEBUG(D_IOCTL, "No device for name %s!\n", name);
198                 GOTO(out, rc = -EINVAL);
199         }
200
201         CDEBUG(D_IOCTL, "device name %s, dev %d\n", name, dev);
202         rc = dev;
203
204 out:
205         RETURN(rc);
206 }
207
208 int class_handle_ioctl(unsigned int cmd, unsigned long arg)
209 {
210         char *buf = NULL;
211         struct obd_ioctl_data *data;
212         struct libcfs_debug_ioctl_data *debug_data;
213         struct obd_device *obd = NULL;
214         int err = 0, len = 0;
215         ENTRY;
216
217         /* only for debugging */
218         if (cmd == LIBCFS_IOC_DEBUG_MASK) {
219                 debug_data = (struct libcfs_debug_ioctl_data*)arg;
220                 libcfs_subsystem_debug = debug_data->subs;
221                 libcfs_debug = debug_data->debug;
222                 return 0;
223         }
224
225         CDEBUG(D_IOCTL, "cmd = %x\n", cmd);
226         if (obd_ioctl_getdata(&buf, &len, (void __user *)arg)) {
227                 CERROR("OBD ioctl: data error\n");
228                 RETURN(-EINVAL);
229         }
230         data = (struct obd_ioctl_data *)buf;
231
232         switch (cmd) {
233         case OBD_IOC_PROCESS_CFG: {
234                 struct lustre_cfg *lcfg;
235
236                 if (!data->ioc_plen1 || !data->ioc_pbuf1) {
237                         CERROR("No config buffer passed!\n");
238                         GOTO(out, err = -EINVAL);
239                 }
240                 OBD_ALLOC(lcfg, data->ioc_plen1);
241                 if (lcfg == NULL)
242                         GOTO(out, err = -ENOMEM);
243                 err = copy_from_user(lcfg, data->ioc_pbuf1,
244                                          data->ioc_plen1);
245                 if (!err)
246                         err = lustre_cfg_sanity_check(lcfg, data->ioc_plen1);
247                 if (!err)
248                         err = class_process_config(lcfg);
249
250                 OBD_FREE(lcfg, data->ioc_plen1);
251                 GOTO(out, err);
252         }
253
254         case OBD_GET_VERSION:
255                 if (!data->ioc_inlbuf1) {
256                         CERROR("No buffer passed in ioctl\n");
257                         GOTO(out, err = -EINVAL);
258                 }
259
260                 if (strlen(LUSTRE_VERSION_STRING) + 1 > data->ioc_inllen1) {
261                         CERROR("ioctl buffer too small to hold version\n");
262                         GOTO(out, err = -EINVAL);
263                 }
264
265                 memcpy(data->ioc_bulk, LUSTRE_VERSION_STRING,
266                        strlen(LUSTRE_VERSION_STRING) + 1);
267
268                 err = obd_ioctl_popdata((void __user *)arg, data, len);
269                 if (err)
270                         err = -EFAULT;
271                 GOTO(out, err);
272
273         case OBD_IOC_NAME2DEV: {
274                 /* Resolve a device name.  This does not change the
275                  * currently selected device.
276                  */
277                 int dev;
278
279                 dev = class_resolve_dev_name(data->ioc_inllen1,
280                                              data->ioc_inlbuf1);
281                 data->ioc_dev = dev;
282                 if (dev < 0)
283                         GOTO(out, err = -EINVAL);
284
285                 err = obd_ioctl_popdata((void __user *)arg, data,
286                                         sizeof(*data));
287                 if (err)
288                         err = -EFAULT;
289                 GOTO(out, err);
290         }
291
292         case OBD_IOC_UUID2DEV: {
293                 /* Resolve a device uuid.  This does not change the
294                  * currently selected device.
295                  */
296                 int dev;
297                 struct obd_uuid uuid;
298
299                 if (!data->ioc_inllen1 || !data->ioc_inlbuf1) {
300                         CERROR("No UUID passed!\n");
301                         GOTO(out, err = -EINVAL);
302                 }
303                 if (data->ioc_inlbuf1[data->ioc_inllen1 - 1] != 0) {
304                         CERROR("UUID not NUL terminated!\n");
305                         GOTO(out, err = -EINVAL);
306                 }
307
308                 CDEBUG(D_IOCTL, "device name %s\n", data->ioc_inlbuf1);
309                 obd_str2uuid(&uuid, data->ioc_inlbuf1);
310                 dev = class_uuid2dev(&uuid);
311                 data->ioc_dev = dev;
312                 if (dev == -1) {
313                         CDEBUG(D_IOCTL, "No device for UUID %s!\n",
314                                data->ioc_inlbuf1);
315                         GOTO(out, err = -EINVAL);
316                 }
317
318                 CDEBUG(D_IOCTL, "device name %s, dev %d\n", data->ioc_inlbuf1,
319                        dev);
320                 err = obd_ioctl_popdata((void __user *)arg, data,
321                                         sizeof(*data));
322                 if (err)
323                         err = -EFAULT;
324                 GOTO(out, err);
325         }
326
327         case OBD_IOC_GETDEVICE: {
328                 int     index = data->ioc_count;
329                 char    *status, *str;
330
331                 if (!data->ioc_inlbuf1) {
332                         CERROR("No buffer passed in ioctl\n");
333                         GOTO(out, err = -EINVAL);
334                 }
335                 if (data->ioc_inllen1 < 128) {
336                         CERROR("ioctl buffer too small to hold version\n");
337                         GOTO(out, err = -EINVAL);
338                 }
339
340                 obd = class_num2obd(index);
341                 if (!obd)
342                         GOTO(out, err = -ENOENT);
343
344                 if (obd->obd_stopping)
345                         status = "ST";
346                 else if (obd->obd_set_up)
347                         status = "UP";
348                 else if (obd->obd_attached)
349                         status = "AT";
350                 else
351                         status = "--";
352                 str = (char *)data->ioc_bulk;
353                 snprintf(str, len - sizeof(*data), "%3d %s %s %s %s %d",
354                          (int)index, status, obd->obd_type->typ_name,
355                          obd->obd_name, obd->obd_uuid.uuid,
356                          atomic_read(&obd->obd_refcount));
357                 err = obd_ioctl_popdata((void __user *)arg, data, len);
358
359                 GOTO(out, err = 0);
360         }
361
362         }
363
364         if (data->ioc_dev == OBD_DEV_BY_DEVNAME) {
365                 if (data->ioc_inllen4 <= 0 || data->ioc_inlbuf4 == NULL)
366                         GOTO(out, err = -EINVAL);
367                 if (strnlen(data->ioc_inlbuf4, MAX_OBD_NAME) >= MAX_OBD_NAME)
368                         GOTO(out, err = -EINVAL);
369                 obd = class_name2obd(data->ioc_inlbuf4);
370         } else if (data->ioc_dev < class_devno_max()) {
371                 obd = class_num2obd(data->ioc_dev);
372         } else {
373                 CERROR("OBD ioctl: No device\n");
374                 GOTO(out, err = -EINVAL);
375         }
376
377         if (obd == NULL) {
378                 CERROR("OBD ioctl : No Device %d\n", data->ioc_dev);
379                 GOTO(out, err = -EINVAL);
380         }
381         LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
382
383         if (!obd->obd_set_up || obd->obd_stopping) {
384                 CERROR("OBD ioctl: device not setup %d \n", data->ioc_dev);
385                 GOTO(out, err = -EINVAL);
386         }
387
388         switch(cmd) {
389         case OBD_IOC_NO_TRANSNO: {
390                 if (!obd->obd_attached) {
391                         CERROR("Device %d not attached\n", obd->obd_minor);
392                         GOTO(out, err = -ENODEV);
393                 }
394                 CDEBUG(D_HA, "%s: disabling committed-transno notification\n",
395                        obd->obd_name);
396                 obd->obd_no_transno = 1;
397                 GOTO(out, err = 0);
398         }
399
400         default: {
401                 err = obd_iocontrol(cmd, obd->obd_self_export, len, data, NULL);
402                 if (err)
403                         GOTO(out, err);
404
405                 err = obd_ioctl_popdata((void __user *)arg, data, len);
406                 if (err)
407                         err = -EFAULT;
408                 GOTO(out, err);
409         }
410         }
411
412  out:
413         if (buf)
414                 obd_ioctl_freedata(buf, len);
415         RETURN(err);
416 } /* class_handle_ioctl */
417
418 #define OBD_INIT_CHECK
419 #ifdef OBD_INIT_CHECK
420 static int obd_init_checks(void)
421 {
422         __u64 u64val, div64val;
423         char buf[64];
424         int len, ret = 0;
425
426         CDEBUG(D_INFO, "OBD_OBJECT_EOF = %#llx\n", (__u64)OBD_OBJECT_EOF);
427
428         u64val = OBD_OBJECT_EOF;
429         CDEBUG(D_INFO, "u64val OBD_OBJECT_EOF = %#llx\n", u64val);
430         if (u64val != OBD_OBJECT_EOF) {
431                 CERROR("__u64 %#llx(%d) != 0xffffffffffffffff\n",
432                        u64val, (int)sizeof(u64val));
433                 ret = -EINVAL;
434         }
435         len = snprintf(buf, sizeof(buf), "%#llx", u64val);
436         if (len != 18) {
437                 CWARN("u64 hex wrong length! strlen(%s)=%d != 18\n", buf, len);
438                 ret = -EINVAL;
439         }
440
441         div64val = OBD_OBJECT_EOF;
442         CDEBUG(D_INFO, "u64val OBD_OBJECT_EOF = %#llx\n", u64val);
443         if (u64val != OBD_OBJECT_EOF) {
444                 CERROR("__u64 %#llx(%d) != 0xffffffffffffffff\n",
445                        u64val, (int)sizeof(u64val));
446                 ret = -EOVERFLOW;
447         }
448         if (u64val >> 8 != OBD_OBJECT_EOF >> 8) {
449                 CERROR("__u64 %#llx(%d) != 0xffffffffffffffff\n",
450                        u64val, (int)sizeof(u64val));
451                 return -EOVERFLOW;
452         }
453         if (do_div(div64val, 256) != (u64val & 255)) {
454                 CERROR("do_div(%#llx,256) != %llu\n", u64val, u64val & 255);
455                 return -EOVERFLOW;
456         }
457         if (u64val >> 8 != div64val) {
458                 CERROR("do_div(%#llx,256) %llu != %llu\n",
459                        u64val, div64val, u64val >> 8);
460                 return -EOVERFLOW;
461         }
462         len = snprintf(buf, sizeof(buf), "%#llx", u64val);
463         if (len != 18) {
464                 CWARN("u64 hex wrong length! strlen(%s)=%d != 18\n", buf, len);
465                 ret = -EINVAL;
466         }
467         len = snprintf(buf, sizeof(buf), "%llu", u64val);
468         if (len != 20) {
469                 CWARN("u64 wrong length! strlen(%s)=%d != 20\n", buf, len);
470                 ret = -EINVAL;
471         }
472         len = snprintf(buf, sizeof(buf), "%lld", u64val);
473         if (len != 2) {
474                 CWARN("s64 wrong length! strlen(%s)=%d != 2\n", buf, len);
475                 ret = -EINVAL;
476         }
477         if ((u64val & ~PAGE_MASK) >= PAGE_SIZE) {
478                 CWARN("mask failed: u64val %llu >= %llu\n", u64val,
479                       (__u64)PAGE_SIZE);
480                 ret = -EINVAL;
481         }
482
483         return ret;
484 }
485 #else
486 #define obd_init_checks() do {} while(0)
487 #endif
488
489 static int __init obdclass_init(void)
490 {
491         int i, err;
492
493         spin_lock_init(&obd_stale_export_lock);
494         INIT_LIST_HEAD(&obd_stale_exports);
495         atomic_set(&obd_stale_export_num, 0);
496
497         LCONSOLE_INFO("Lustre: Build Version: "LUSTRE_VERSION_STRING"\n");
498
499         spin_lock_init(&obd_types_lock);
500         obd_zombie_impexp_init();
501 #ifdef CONFIG_PROC_FS
502         obd_memory = lprocfs_alloc_stats(OBD_STATS_NUM,
503                                          LPROCFS_STATS_FLAG_NONE |
504                                          LPROCFS_STATS_FLAG_IRQ_SAFE);
505         if (obd_memory == NULL) {
506                 CERROR("kmalloc of 'obd_memory' failed\n");
507                 RETURN(-ENOMEM);
508         }
509
510         lprocfs_counter_init(obd_memory, OBD_MEMORY_STAT,
511                              LPROCFS_CNTR_AVGMINMAX,
512                              "memused", "bytes");
513 #endif
514         err = obd_init_checks();
515         if (err == -EOVERFLOW)
516                 return err;
517
518         class_init_uuidlist();
519         err = class_handle_init();
520         if (err)
521                 return err;
522
523         INIT_LIST_HEAD(&obd_types);
524
525         err = misc_register(&obd_psdev);
526         if (err) {
527                 CERROR("cannot register %d err %d\n", OBD_DEV_MINOR, err);
528                 return err;
529         }
530
531         /* This struct is already zeroed for us (static global) */
532         for (i = 0; i < class_devno_max(); i++)
533                 obd_devs[i] = NULL;
534
535         /* Default the dirty page cache cap to 1/2 of system memory.
536          * For clients with less memory, a larger fraction is needed
537          * for other purposes (mostly for BGL). */
538         if (totalram_pages <= 512 << (20 - PAGE_SHIFT))
539                 obd_max_dirty_pages = totalram_pages / 4;
540         else
541                 obd_max_dirty_pages = totalram_pages / 2;
542
543         err = obd_init_caches();
544         if (err)
545                 return err;
546         err = class_procfs_init();
547         if (err)
548                 return err;
549
550         err = lu_global_init();
551         if (err)
552                 return err;
553
554         err = cl_global_init();
555         if (err != 0)
556                 return err;
557
558 #ifdef HAVE_SERVER_SUPPORT
559         err = dt_global_init();
560         if (err != 0)
561                 return err;
562
563         err = lu_ucred_global_init();
564         if (err != 0)
565                 return err;
566 #endif /* HAVE_SERVER_SUPPORT */
567
568         err = llog_info_init();
569         if (err)
570                 return err;
571
572         err = lustre_register_fs();
573
574         return err;
575 }
576
577 void obd_update_maxusage(void)
578 {
579         __u64 max;
580
581         max = obd_memory_sum();
582
583         spin_lock(&obd_updatemax_lock);
584         if (max > obd_max_alloc)
585                 obd_max_alloc = max;
586         spin_unlock(&obd_updatemax_lock);
587 }
588 EXPORT_SYMBOL(obd_update_maxusage);
589
590 #ifdef CONFIG_PROC_FS
591 __u64 obd_memory_max(void)
592 {
593         __u64 ret;
594
595         obd_update_maxusage();
596         spin_lock(&obd_updatemax_lock);
597         ret = obd_max_alloc;
598         spin_unlock(&obd_updatemax_lock);
599
600         return ret;
601 }
602 #endif /* CONFIG_PROC_FS */
603
604 static void __exit obdclass_exit(void)
605 {
606         __u64 memory_leaked;
607         __u64 memory_max;
608         ENTRY;
609
610         lustre_unregister_fs();
611
612         misc_deregister(&obd_psdev);
613         llog_info_fini();
614 #ifdef HAVE_SERVER_SUPPORT
615         lu_ucred_global_fini();
616         dt_global_fini();
617 #endif /* HAVE_SERVER_SUPPORT */
618         cl_global_fini();
619         lu_global_fini();
620
621         obd_cleanup_caches();
622         obd_sysctl_clean();
623
624         class_procfs_clean();
625
626         class_handle_cleanup();
627         class_exit_uuidlist();
628         obd_zombie_impexp_stop();
629         LASSERT(list_empty(&obd_stale_exports));
630
631         memory_leaked = obd_memory_sum();
632
633         memory_max = obd_memory_max();
634
635         lprocfs_free_stats(&obd_memory);
636         CDEBUG((memory_leaked) ? D_ERROR : D_INFO,
637                "obd_memory max: %llu, leaked: %llu\n",
638                memory_max, memory_leaked);
639
640         EXIT;
641 }
642
643 MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
644 MODULE_DESCRIPTION("Lustre Class Driver");
645 MODULE_VERSION(LUSTRE_VERSION_STRING);
646 MODULE_LICENSE("GPL");
647
648 module_init(obdclass_init);
649 module_exit(obdclass_exit);