1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
6 * This code is issued under the GNU General Public License.
7 * See the file COPYING in this distribution
9 * These are the only exported functions, they provide some generic
10 * infrastructure for managing object devices
12 * Object Devices Class Driver
16 #include <linux/config.h> /* for CONFIG_PROC_FS */
17 #include <linux/module.h>
18 #include <linux/errno.h>
19 #include <linux/kernel.h>
20 #include <linux/major.h>
21 #include <linux/sched.h>
23 #include <linux/slab.h>
24 #include <linux/ioport.h>
25 #include <linux/fcntl.h>
26 #include <linux/delay.h>
27 #include <linux/skbuff.h>
28 #include <linux/proc_fs.h>
30 #include <linux/poll.h>
31 #include <linux/init.h>
32 #include <linux/list.h>
33 #include <linux/highmem.h>
35 #include <asm/ioctls.h>
36 #include <asm/system.h>
38 #include <asm/uaccess.h>
39 #include <linux/miscdevice.h>
41 #define DEBUG_SUBSYSTEM S_CLASS
43 #include <linux/obd_support.h>
44 #include <linux/obd_class.h>
45 #include <linux/lustre_debug.h>
46 #include <linux/smp_lock.h>
47 #include <linux/lprocfs_status.h>
48 #include <portals/lib-types.h> /* for PTL_MD_MAX_IOV */
49 #include <linux/lustre_build_version.h>
51 struct semaphore obd_conf_sem; /* serialize configuration commands */
52 struct obd_device obd_dev[MAX_OBD_DEVICES];
53 struct list_head obd_types;
54 unsigned long obd_memory;
56 /* The following are visible and mutable through /proc/sys/lustre/. */
57 unsigned long obd_fail_loc;
58 unsigned long obd_timeout = 100;
59 char obd_recovery_upcall[128] = "/usr/lib/lustre/ha_assist";
61 extern struct obd_type *class_nm_to_type(char *nm);
63 /* opening /dev/obd */
64 static int obd_class_open(struct inode * inode, struct file * file)
68 file->private_data = NULL;
69 CDEBUG(D_IOCTL, "MOD_INC_USE for open: count = %d\n",
70 atomic_read(&(THIS_MODULE)->uc.usecount));
75 /* closing /dev/obd */
76 static int obd_class_release(struct inode * inode, struct file * file)
80 // XXX drop lsm, connections here
81 if (file->private_data)
82 file->private_data = NULL;
84 CDEBUG(D_IOCTL, "MOD_DEC_USE for close: count = %d\n",
85 atomic_read(&(THIS_MODULE)->uc.usecount) - 1);
90 static inline void obd_data2conn(struct lustre_handle *conn,
91 struct obd_ioctl_data *data)
93 conn->addr = data->ioc_addr;
94 conn->cookie = data->ioc_cookie;
97 static inline void obd_conn2data(struct obd_ioctl_data *data,
98 struct lustre_handle *conn)
100 data->ioc_addr = conn->addr;
101 data->ioc_cookie = conn->cookie;
104 static void forcibly_detach_exports(struct obd_device *obd)
107 struct list_head *tmp, *n;
108 struct lustre_handle fake_conn;
110 CDEBUG(D_IOCTL, "OBD device %d (%p) has exports, "
111 "disconnecting them", obd->obd_minor, obd);
112 list_for_each_safe(tmp, n, &obd->obd_exports) {
113 struct obd_export *exp = list_entry(tmp, struct obd_export,
115 fake_conn.addr = (__u64)(unsigned long)exp;
116 fake_conn.cookie = exp->exp_cookie;
117 rc = obd_disconnect(&fake_conn);
119 CDEBUG(D_IOCTL, "disconnecting export %p failed: %d\n",
122 CDEBUG(D_IOCTL, "export %p disconnected\n", exp);
127 /* to control /dev/obd */
128 static int obd_class_ioctl (struct inode * inode, struct file * filp,
129 unsigned int cmd, unsigned long arg)
132 struct obd_ioctl_data *data;
133 struct obd_device *obd = filp->private_data;
134 struct lustre_handle conn;
135 int err = 0, len = 0, serialised = 0;
139 case OBD_IOC_BRW_WRITE:
140 case OBD_IOC_BRW_READ:
141 case OBD_IOC_GETATTR:
149 if (!obd && cmd != OBD_IOC_DEVICE && cmd != TCGETS &&
150 cmd != OBD_IOC_LIST && cmd != OBD_GET_VERSION &&
151 cmd != OBD_IOC_NAME2DEV && cmd != OBD_IOC_NEWDEV) {
152 CERROR("OBD ioctl: No device\n");
153 GOTO(out, err = -EINVAL);
155 if (obd_ioctl_getdata(&buf, &len, (void *)arg)) {
156 CERROR("OBD ioctl: data error\n");
157 GOTO(out, err = -EINVAL);
159 data = (struct obd_ioctl_data *)buf;
163 GOTO(out, err=-EINVAL);
164 case OBD_IOC_DEVICE: {
165 CDEBUG(D_IOCTL, "\n");
166 if (data->ioc_dev >= MAX_OBD_DEVICES || data->ioc_dev < 0) {
167 CERROR("OBD ioctl: DEVICE insufficient devices\n");
168 GOTO(out, err=-EINVAL);
170 CDEBUG(D_IOCTL, "device %d\n", data->ioc_dev);
172 filp->private_data = &obd_dev[data->ioc_dev];
178 char *buf2 = data->ioc_bulk;
179 int remains = data->ioc_inllen1;
181 if (!data->ioc_inlbuf1) {
182 CERROR("No buffer passed!\n");
183 GOTO(out, err=-EINVAL);
187 for (i = 0 ; i < MAX_OBD_DEVICES ; i++) {
190 struct obd_device *obd = &obd_dev[i];
193 if (obd->obd_flags & OBD_SET_UP)
195 else if (obd->obd_flags & OBD_ATTACHED)
199 l = snprintf(buf2, remains, "%2d %s %s %s %s %d\n",
200 i, status, obd->obd_type->typ_name,
201 obd->obd_name, obd->obd_uuid, obd->obd_type->typ_refcnt);
205 CERROR("not enough space for device listing\n");
210 err = copy_to_user((void *)arg, data, len);
216 case OBD_GET_VERSION:
217 if (!data->ioc_inlbuf1) {
218 CERROR("No buffer passed in ioctl\n");
219 GOTO(out, err = -EINVAL);
222 if (strlen(BUILD_VERSION) + 1 > data->ioc_inllen1) {
223 CERROR("ioctl buffer too small to hold version\n");
224 GOTO(out, err = -EINVAL);
227 memcpy(data->ioc_bulk, BUILD_VERSION,
228 strlen(BUILD_VERSION) + 1);
230 err = copy_to_user((void *)arg, data, len);
235 case OBD_IOC_NAME2DEV: {
236 /* Resolve a device name. This does not change the
237 * currently selected device.
241 if (!data->ioc_inllen1 || !data->ioc_inlbuf1 ) {
242 CERROR("No name passed,!\n");
243 GOTO(out, err=-EINVAL);
245 if (data->ioc_inlbuf1[data->ioc_inllen1-1] !=0) {
246 CERROR("Name not nul terminated!\n");
247 GOTO(out, err=-EINVAL);
250 CDEBUG(D_IOCTL, "device name %s\n", data->ioc_inlbuf1);
251 dev = class_name2dev(data->ioc_inlbuf1);
254 CDEBUG(D_IOCTL, "No device for name %s!\n",
256 GOTO(out, err=-EINVAL);
259 CDEBUG(D_IOCTL, "device name %s, dev %d\n", data->ioc_inlbuf1,
261 err = copy_to_user((void *)arg, data, sizeof(*data));
267 case OBD_IOC_UUID2DEV: {
268 /* Resolve a device uuid. This does not change the
269 * currently selected device.
273 if (!data->ioc_inllen1 || !data->ioc_inlbuf1) {
274 CERROR("No UUID passed!\n");
275 GOTO(out, err=-EINVAL);
277 if (data->ioc_inlbuf1[data->ioc_inllen1-1] !=0) {
278 CERROR("Name not nul terminated!\n");
279 GOTO(out, err=-EINVAL);
282 CDEBUG(D_IOCTL, "device name %s\n", data->ioc_inlbuf1);
283 dev = class_uuid2dev(data->ioc_inlbuf1);
286 CDEBUG(D_IOCTL, "No device for name %s!\n",
288 GOTO(out, err=-EINVAL);
291 CDEBUG(D_IOCTL, "device name %s, dev %d\n", data->ioc_inlbuf1,
293 err = copy_to_user((void *)arg, data, sizeof(*data));
299 case OBD_IOC_NEWDEV: {
303 filp->private_data = NULL;
304 for (i = 0 ; i < MAX_OBD_DEVICES ; i++) {
305 struct obd_device *obd = &obd_dev[i];
306 if (!obd->obd_type) {
307 filp->private_data = obd;
316 GOTO(out, err=-EINVAL);
318 err = copy_to_user((void *)arg, data, sizeof(*data));
324 case OBD_IOC_ATTACH: {
325 struct obd_type *type;
328 /* have we attached a type to this device */
329 if (obd->obd_flags & OBD_ATTACHED || obd->obd_type) {
330 CERROR("OBD: Device %d already typed as %s.\n",
331 obd->obd_minor, MKSTR(obd->obd_type->typ_name));
332 GOTO(out, err=-EBUSY);
335 if (!data->ioc_inllen1 || !data->ioc_inlbuf1) {
336 CERROR("No type passed!\n");
337 GOTO(out, err=-EINVAL);
339 if (data->ioc_inlbuf1[data->ioc_inllen1-1] !=0) {
340 CERROR("Type not nul terminated!\n");
341 GOTO(out, err=-EINVAL);
344 CDEBUG(D_IOCTL, "attach type %s name: %s uuid: %s\n",
345 MKSTR(data->ioc_inlbuf1),
346 MKSTR(data->ioc_inlbuf2), MKSTR(data->ioc_inlbuf3));
349 type = class_nm_to_type(data->ioc_inlbuf1);
351 CERROR("OBD: unknown type dev %d\n", obd->obd_minor);
352 GOTO(out, err=-EINVAL);
355 minor = obd->obd_minor;
356 memset(obd, 0, sizeof(*obd));
357 obd->obd_minor = minor;
358 obd->obd_type = type;
359 INIT_LIST_HEAD(&obd->obd_exports);
360 INIT_LIST_HEAD(&obd->obd_imports);
361 spin_lock_init(&obd->obd_dev_lock);
363 if (data->ioc_inlbuf2) {
364 int len = strlen(data->ioc_inlbuf2) + 1;
365 OBD_ALLOC(obd->obd_name, len);
366 if (!obd->obd_name) {
367 CERROR("no memory\n");
370 memcpy(obd->obd_name, data->ioc_inlbuf2, len);
372 CERROR("WARNING: unnamed obd device\n");
374 if (data->ioc_inlbuf3) {
375 int len = strlen(data->ioc_inlbuf3);
376 if (len >= sizeof(obd->obd_uuid)) {
377 CERROR("uuid must be < %d bytes long\n",
378 sizeof(obd->obd_uuid));
380 OBD_FREE(obd->obd_name,
381 strlen(obd->obd_name) + 1);
382 GOTO(out, err=-EINVAL);
384 memcpy(obd->obd_uuid, data->ioc_inlbuf3, len);
387 if (OBP(obd, attach))
388 err = OBP(obd,attach)(obd, sizeof(*data), data);
390 if(data->ioc_inlbuf2)
391 OBD_FREE(obd->obd_name, strlen(obd->obd_name)+1);
392 obd->obd_type = NULL;
394 obd->obd_flags |= OBD_ATTACHED;
397 CDEBUG(D_IOCTL, "OBD: dev %d attached type %s\n",
398 obd->obd_minor, data->ioc_inlbuf1);
400 CDEBUG(D_IOCTL, "MOD_INC_USE for attach: count = %d\n",
401 atomic_read(&(THIS_MODULE)->uc.usecount));
408 case OBD_IOC_DETACH: {
410 if (obd->obd_flags & OBD_SET_UP) {
411 CERROR("OBD device %d still set up\n", obd->obd_minor);
412 GOTO(out, err=-EBUSY);
414 if (!(obd->obd_flags & OBD_ATTACHED) ) {
415 CERROR("OBD device %d not attached\n", obd->obd_minor);
416 GOTO(out, err=-ENODEV);
418 if (!list_empty(&obd->obd_exports)) {
419 if (!data->ioc_inlbuf1 || data->ioc_inlbuf1[0] != 'F') {
420 CERROR("OBD device %d (%p) has exports\n",
421 obd->obd_minor, obd);
422 GOTO(out, err=-EBUSY);
424 forcibly_detach_exports(obd);
426 if (OBP(obd, detach))
427 err=OBP(obd,detach)(obd);
430 OBD_FREE(obd->obd_name, strlen(obd->obd_name)+1);
431 obd->obd_name = NULL;
434 obd->obd_flags &= ~OBD_ATTACHED;
435 obd->obd_type->typ_refcnt--;
436 obd->obd_type = NULL;
437 CDEBUG(D_IOCTL, "MOD_DEC_USE for detach: count = %d\n",
438 atomic_read(&(THIS_MODULE)->uc.usecount) - 1);
443 case OBD_IOC_SETUP: {
444 /* have we attached a type to this device? */
445 if (!(obd->obd_flags & OBD_ATTACHED)) {
446 CERROR("Device %d not attached\n", obd->obd_minor);
447 GOTO(out, err=-ENODEV);
450 /* has this been done already? */
451 if ( obd->obd_flags & OBD_SET_UP ) {
452 CERROR("Device %d already setup (type %s)\n",
453 obd->obd_minor, obd->obd_type->typ_name);
454 GOTO(out, err=-EBUSY);
457 if ( OBT(obd) && OBP(obd, setup) )
458 err = obd_setup(obd, sizeof(*data), data);
461 obd->obd_type->typ_refcnt++;
462 obd->obd_flags |= OBD_SET_UP;
467 case OBD_IOC_CLEANUP: {
468 /* have we attached a type to this device? */
469 if (!(obd->obd_flags & OBD_ATTACHED)) {
470 CERROR("Device %d not attached\n", obd->obd_minor);
471 GOTO(out, err=-ENODEV);
474 if ( OBT(obd) && OBP(obd, cleanup) )
475 err = obd_cleanup(obd);
478 obd->obd_flags &= ~OBD_SET_UP;
479 obd->obd_type->typ_refcnt--;
484 case OBD_IOC_CONNECT: {
485 char * cluuid = "OBD_CLASS_UUID";
486 obd_data2conn(&conn, data);
488 err = obd_connect(&conn, obd, cluuid, NULL, NULL);
490 CDEBUG(D_IOCTL, "assigned export "LPX64"\n", conn.addr);
491 obd_conn2data(data, &conn);
495 err = copy_to_user((void *)arg, data, sizeof(*data));
498 // XXX save connection data into file handle
502 case OBD_IOC_DISCONNECT: {
503 obd_data2conn(&conn, data);
504 err = obd_disconnect(&conn);
508 case OBD_IOC_DEC_USE_COUNT: {
509 CDEBUG(D_IOCTL, "MOD_DEC_USE for force dec: count = %d\n",
510 atomic_read(&(THIS_MODULE)->uc.usecount) - 1);
516 obd_data2conn(&conn, data);
518 err = obd_iocontrol(cmd, &conn, len, data, NULL);
522 err = copy_to_user((void *)arg, data, len);
534 } /* obd_class_ioctl */
538 /* declare character device */
539 static struct file_operations obd_psdev_fops = {
540 ioctl: obd_class_ioctl, /* ioctl */
541 open: obd_class_open, /* open */
542 release: obd_class_release, /* release */
546 #define OBD_MINOR 241
547 static struct miscdevice obd_psdev = {
553 void (*class_signal_connection_failure)(struct ptlrpc_connection *);
555 #ifdef CONFIG_HIGHMEM
556 /* Allow at most 3/4 of the kmap mappings to be consumed by vector I/O
557 * requests. This avoids deadlocks on servers which have a lot of clients
558 * doing vector I/O. We don't need to do this for non-vector I/O requests
559 * because singleton requests will just block on the kmap itself and never
560 * deadlock waiting for additional kmaps to complete.
562 * If we are a "server" task, we can have at most a single reservation
563 * in excess of the maximum. This avoids a deadlock when multiple client
564 * threads are on the same machine as the server threads, and the clients
565 * have consumed all of the available mappings. As long as a single server
566 * thread is can make progress, we are guaranteed to avoid deadlock.
568 #define OBD_KMAP_MAX (LAST_PKMAP * 3 / 4)
569 static atomic_t obd_kmap_count = ATOMIC_INIT(OBD_KMAP_MAX);
570 static DECLARE_WAIT_QUEUE_HEAD(obd_kmap_waitq);
572 void obd_kmap_get(int count, int server)
574 //CERROR("getting %d kmap counts (%d/%d)\n", count,
575 // atomic_read(&obd_kmap_count), OBD_KMAP_MAX);
577 atomic_dec(&obd_kmap_count);
578 else while (atomic_add_negative(-count, &obd_kmap_count)) {
579 static long next_show = 0;
580 static int skipped = 0;
582 if (server && atomic_read(&obd_kmap_count) >= -PTL_MD_MAX_IOV)
585 CDEBUG(D_OTHER, "negative kmap reserved count: %d\n",
586 atomic_read(&obd_kmap_count));
587 atomic_add(count, &obd_kmap_count);
589 if (time_after(jiffies, next_show)) {
590 CERROR("blocking %s (and %d others) for kmaps\n",
591 current->comm, skipped);
592 next_show = jiffies + 5*HZ;
596 wait_event(obd_kmap_waitq,
597 atomic_read(&obd_kmap_count) >= count);
601 void obd_kmap_put(int count)
603 atomic_add(count, &obd_kmap_count);
604 /* Wake up sleepers. Sadly, this wakes up all of the tasks at once.
605 * We could have something smarter here like:
606 while (atomic_read(&obd_kmap_count) > 0)
607 wake_up_nr(obd_kmap_waitq, 1);
608 although we would need to set somewhere (probably obd_class_init):
609 obd_kmap_waitq.flags |= WQ_FLAG_EXCLUSIVE;
610 For now the wait_event() condition will handle this OK I believe.
612 if (atomic_read(&obd_kmap_count) > 0)
613 wake_up(&obd_kmap_waitq);
616 EXPORT_SYMBOL(obd_kmap_get);
617 EXPORT_SYMBOL(obd_kmap_put);
620 EXPORT_SYMBOL(obd_dev);
621 EXPORT_SYMBOL(obdo_cachep);
622 EXPORT_SYMBOL(obd_memory);
623 EXPORT_SYMBOL(obd_fail_loc);
624 EXPORT_SYMBOL(obd_timeout);
625 EXPORT_SYMBOL(obd_recovery_upcall);
626 EXPORT_SYMBOL(ptlrpc_put_connection_superhack);
628 EXPORT_SYMBOL(class_register_type);
629 EXPORT_SYMBOL(class_unregister_type);
630 EXPORT_SYMBOL(class_name2dev);
631 EXPORT_SYMBOL(class_uuid2dev);
632 EXPORT_SYMBOL(class_uuid2obd);
633 EXPORT_SYMBOL(class_new_export);
634 EXPORT_SYMBOL(class_destroy_export);
635 EXPORT_SYMBOL(class_connect);
636 EXPORT_SYMBOL(class_conn2export);
637 EXPORT_SYMBOL(class_conn2obd);
638 EXPORT_SYMBOL(class_conn2cliimp);
639 EXPORT_SYMBOL(class_conn2ldlmimp);
640 EXPORT_SYMBOL(class_disconnect);
641 EXPORT_SYMBOL(class_disconnect_all);
642 EXPORT_SYMBOL(class_uuid_unparse);
644 EXPORT_SYMBOL(class_signal_connection_failure);
645 EXPORT_SYMBOL(class_nm_to_type);
647 static int __init init_obdclass(void)
649 struct obd_device *obd;
653 printk(KERN_INFO "OBD class driver Build Version: " BUILD_VERSION
654 ", info@clusterfs.com\n");
656 sema_init(&obd_conf_sem, 1);
657 INIT_LIST_HEAD(&obd_types);
659 if ((err = misc_register(&obd_psdev))) {
660 CERROR("cannot register %d err %d\n", OBD_MINOR, err);
664 /* This struct is already zerod for us (static global) */
665 for (i = 0, obd = obd_dev; i < MAX_OBD_DEVICES; i++, obd++)
668 err = obd_init_caches();
674 err = lprocfs_reg_main();
679 static void __exit cleanup_obdclass(void)
684 misc_deregister(&obd_psdev);
685 for (i = 0; i < MAX_OBD_DEVICES; i++) {
686 struct obd_device *obd = &obd_dev[i];
687 if (obd->obd_type && (obd->obd_flags & OBD_SET_UP) &&
688 OBT(obd) && OBP(obd, detach)) {
689 /* XXX should this call generic detach otherwise? */
690 OBP(obd, detach)(obd);
694 obd_cleanup_caches();
697 err = lprocfs_dereg_main();
699 CERROR("obd memory leaked: %ld bytes\n", obd_memory);
703 /* Check that we're building against the appropriate version of the Lustre
705 #include <linux/lustre_version.h>
706 #if (LUSTRE_KERNEL_VERSION != 4)
707 # error Cannot continue: Your Lustre kernel patch is out of date
710 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
711 MODULE_DESCRIPTION("Lustre Class Driver Build Version: " BUILD_VERSION);
712 MODULE_LICENSE("GPL");
714 module_init(init_obdclass);
715 module_exit(cleanup_obdclass);