1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
6 * This code is issued under the GNU General Public License.
7 * See the file COPYING in this distribution
9 * These are the only exported functions, they provide some generic
10 * infrastructure for managing object devices
12 * Object Devices Class Driver
16 #include <linux/config.h> /* for CONFIG_PROC_FS */
17 #include <linux/module.h>
18 #include <linux/errno.h>
19 #include <linux/kernel.h>
20 #include <linux/major.h>
21 #include <linux/sched.h>
23 #include <linux/slab.h>
24 #include <linux/ioport.h>
25 #include <linux/fcntl.h>
26 #include <linux/delay.h>
27 #include <linux/skbuff.h>
28 #include <linux/proc_fs.h>
30 #include <linux/poll.h>
31 #include <linux/init.h>
32 #include <linux/list.h>
33 #include <linux/highmem.h>
35 #include <asm/ioctls.h>
36 #include <asm/system.h>
38 #include <asm/uaccess.h>
39 #include <linux/miscdevice.h>
41 #define DEBUG_SUBSYSTEM S_CLASS
43 #include <linux/obd_support.h>
44 #include <linux/obd_class.h>
45 #include <linux/lustre_debug.h>
46 #include <linux/smp_lock.h>
47 #include <linux/lprocfs_status.h>
48 #include <portals/lib-types.h> /* for PTL_MD_MAX_IOV */
49 #include <linux/lustre_build_version.h>
51 struct semaphore obd_conf_sem; /* serialize configuration commands */
52 struct obd_device obd_dev[MAX_OBD_DEVICES];
53 struct list_head obd_types;
54 unsigned long obd_memory;
56 /* The following are visible and mutable through /proc/sys/lustre/. */
57 unsigned long obd_fail_loc;
58 unsigned long obd_timeout = 100;
59 char obd_recovery_upcall[128] = "/usr/lib/lustre/ha_assist";
61 extern struct obd_type *class_nm_to_type(char *nm);
63 /* opening /dev/obd */
64 static int obd_class_open(struct inode * inode, struct file * file)
68 file->private_data = NULL;
69 CDEBUG(D_IOCTL, "MOD_INC_USE for open: count = %d\n",
70 atomic_read(&(THIS_MODULE)->uc.usecount));
75 /* closing /dev/obd */
76 static int obd_class_release(struct inode * inode, struct file * file)
80 // XXX drop lsm, connections here
81 if (file->private_data)
82 file->private_data = NULL;
84 CDEBUG(D_IOCTL, "MOD_DEC_USE for close: count = %d\n",
85 atomic_read(&(THIS_MODULE)->uc.usecount) - 1);
90 static inline void obd_data2conn(struct lustre_handle *conn,
91 struct obd_ioctl_data *data)
93 conn->addr = data->ioc_addr;
94 conn->cookie = data->ioc_cookie;
97 static inline void obd_conn2data(struct obd_ioctl_data *data,
98 struct lustre_handle *conn)
100 data->ioc_addr = conn->addr;
101 data->ioc_cookie = conn->cookie;
104 static void forcibly_detach_exports(struct obd_device *obd)
107 struct list_head *tmp, *n;
108 struct lustre_handle fake_conn;
110 CDEBUG(D_IOCTL, "OBD device %d (%p) has exports, "
111 "disconnecting them", obd->obd_minor, obd);
112 list_for_each_safe(tmp, n, &obd->obd_exports) {
113 struct obd_export *exp = list_entry(tmp, struct obd_export,
115 fake_conn.addr = (__u64)(unsigned long)exp;
116 fake_conn.cookie = exp->exp_cookie;
117 rc = obd_disconnect(&fake_conn);
119 CDEBUG(D_IOCTL, "disconnecting export %p failed: %d\n",
122 CDEBUG(D_IOCTL, "export %p disconnected\n", exp);
127 /* to control /dev/obd */
128 static int obd_class_ioctl (struct inode * inode, struct file * filp,
129 unsigned int cmd, unsigned long arg)
132 struct obd_ioctl_data *data;
133 struct obd_device *obd = filp->private_data;
134 struct lustre_handle conn;
135 int err = 0, len = 0;
140 if (!obd && cmd != OBD_IOC_DEVICE && cmd != TCGETS &&
141 cmd != OBD_IOC_LIST && cmd != OBD_GET_VERSION &&
142 cmd != OBD_IOC_NAME2DEV && cmd != OBD_IOC_NEWDEV) {
143 CERROR("OBD ioctl: No device\n");
144 GOTO(out, err = -EINVAL);
146 if (obd_ioctl_getdata(&buf, &len, (void *)arg)) {
147 CERROR("OBD ioctl: data error\n");
148 GOTO(out, err = -EINVAL);
150 data = (struct obd_ioctl_data *)buf;
154 GOTO(out, err=-EINVAL);
155 case OBD_IOC_DEVICE: {
156 CDEBUG(D_IOCTL, "\n");
157 if (data->ioc_dev >= MAX_OBD_DEVICES || data->ioc_dev < 0) {
158 CERROR("OBD ioctl: DEVICE insufficient devices\n");
159 GOTO(out, err=-EINVAL);
161 CDEBUG(D_IOCTL, "device %d\n", data->ioc_dev);
163 filp->private_data = &obd_dev[data->ioc_dev];
169 char *buf2 = data->ioc_bulk;
170 int remains = data->ioc_inllen1;
172 if (!data->ioc_inlbuf1) {
173 CERROR("No buffer passed!\n");
174 GOTO(out, err=-EINVAL);
178 for (i = 0 ; i < MAX_OBD_DEVICES ; i++) {
181 struct obd_device *obd = &obd_dev[i];
184 if (obd->obd_flags & OBD_SET_UP)
186 else if (obd->obd_flags & OBD_ATTACHED)
190 l = snprintf(buf2, remains, "%2d %s %s %s %s %d\n",
191 i, status, obd->obd_type->typ_name,
192 obd->obd_name, obd->obd_uuid, obd->obd_type->typ_refcnt);
196 CERROR("not enough space for device listing\n");
201 err = copy_to_user((void *)arg, data, len);
207 case OBD_GET_VERSION:
208 if (!data->ioc_inlbuf1) {
209 CERROR("No buffer passed in ioctl\n");
210 GOTO(out, err = -EINVAL);
213 if (strlen(BUILD_VERSION) + 1 > data->ioc_inllen1) {
214 CERROR("ioctl buffer too small to hold version\n");
215 GOTO(out, err = -EINVAL);
218 memcpy(data->ioc_bulk, BUILD_VERSION,
219 strlen(BUILD_VERSION) + 1);
221 err = copy_to_user((void *)arg, data, len);
226 case OBD_IOC_NAME2DEV: {
227 /* Resolve a device name. This does not change the
228 * currently selected device.
232 if (!data->ioc_inllen1 || !data->ioc_inlbuf1 ) {
233 CERROR("No name passed,!\n");
234 GOTO(out, err=-EINVAL);
236 if (data->ioc_inlbuf1[data->ioc_inllen1-1] !=0) {
237 CERROR("Name not nul terminated!\n");
238 GOTO(out, err=-EINVAL);
241 CDEBUG(D_IOCTL, "device name %s\n", data->ioc_inlbuf1);
242 dev = class_name2dev(data->ioc_inlbuf1);
245 CDEBUG(D_IOCTL, "No device for name %s!\n",
247 GOTO(out, err=-EINVAL);
250 CDEBUG(D_IOCTL, "device name %s, dev %d\n", data->ioc_inlbuf1,
252 err = copy_to_user((void *)arg, data, sizeof(*data));
258 case OBD_IOC_UUID2DEV: {
259 /* Resolve a device uuid. This does not change the
260 * currently selected device.
264 if (!data->ioc_inllen1 || !data->ioc_inlbuf1) {
265 CERROR("No UUID passed!\n");
266 GOTO(out, err=-EINVAL);
268 if (data->ioc_inlbuf1[data->ioc_inllen1-1] !=0) {
269 CERROR("Name not nul terminated!\n");
270 GOTO(out, err=-EINVAL);
273 CDEBUG(D_IOCTL, "device name %s\n", data->ioc_inlbuf1);
274 dev = class_uuid2dev(data->ioc_inlbuf1);
277 CDEBUG(D_IOCTL, "No device for name %s!\n",
279 GOTO(out, err=-EINVAL);
282 CDEBUG(D_IOCTL, "device name %s, dev %d\n", data->ioc_inlbuf1,
284 err = copy_to_user((void *)arg, data, sizeof(*data));
290 case OBD_IOC_NEWDEV: {
294 filp->private_data = NULL;
295 for (i = 0 ; i < MAX_OBD_DEVICES ; i++) {
296 struct obd_device *obd = &obd_dev[i];
297 if (!obd->obd_type) {
298 filp->private_data = obd;
307 GOTO(out, err=-EINVAL);
309 err = copy_to_user((void *)arg, data, sizeof(*data));
315 case OBD_IOC_ATTACH: {
316 struct obd_type *type;
319 /* have we attached a type to this device */
320 if (obd->obd_flags & OBD_ATTACHED || obd->obd_type) {
321 CERROR("OBD: Device %d already typed as %s.\n",
322 obd->obd_minor, MKSTR(obd->obd_type->typ_name));
323 GOTO(out, err=-EBUSY);
326 if (!data->ioc_inllen1 || !data->ioc_inlbuf1) {
327 CERROR("No type passed!\n");
328 GOTO(out, err=-EINVAL);
330 if (data->ioc_inlbuf1[data->ioc_inllen1-1] !=0) {
331 CERROR("Type not nul terminated!\n");
332 GOTO(out, err=-EINVAL);
335 CDEBUG(D_IOCTL, "attach type %s name: %s uuid: %s\n",
336 MKSTR(data->ioc_inlbuf1),
337 MKSTR(data->ioc_inlbuf2), MKSTR(data->ioc_inlbuf3));
340 type = class_nm_to_type(data->ioc_inlbuf1);
342 CERROR("OBD: unknown type dev %d\n", obd->obd_minor);
343 GOTO(out, err=-EINVAL);
346 minor = obd->obd_minor;
347 memset(obd, 0, sizeof(*obd));
348 obd->obd_minor = minor;
349 obd->obd_type = type;
350 INIT_LIST_HEAD(&obd->obd_exports);
351 INIT_LIST_HEAD(&obd->obd_imports);
352 spin_lock_init(&obd->obd_dev_lock);
354 if (data->ioc_inlbuf2) {
355 int len = strlen(data->ioc_inlbuf2) + 1;
356 OBD_ALLOC(obd->obd_name, len);
357 if (!obd->obd_name) {
358 CERROR("no memory\n");
361 memcpy(obd->obd_name, data->ioc_inlbuf2, len);
363 CERROR("WARNING: unnamed obd device\n");
365 if (data->ioc_inlbuf3) {
366 int len = strlen(data->ioc_inlbuf3);
367 if (len >= sizeof(obd->obd_uuid)) {
368 CERROR("uuid must be < %d bytes long\n",
369 sizeof(obd->obd_uuid));
371 OBD_FREE(obd->obd_name,
372 strlen(obd->obd_name) + 1);
373 GOTO(out, err=-EINVAL);
375 memcpy(obd->obd_uuid, data->ioc_inlbuf3, len);
378 if (OBP(obd, attach))
379 err = OBP(obd,attach)(obd, sizeof(*data), data);
381 if(data->ioc_inlbuf2)
382 OBD_FREE(obd->obd_name, strlen(obd->obd_name)+1);
383 obd->obd_type = NULL;
385 obd->obd_flags |= OBD_ATTACHED;
388 CDEBUG(D_IOCTL, "OBD: dev %d attached type %s\n",
389 obd->obd_minor, data->ioc_inlbuf1);
391 CDEBUG(D_IOCTL, "MOD_INC_USE for attach: count = %d\n",
392 atomic_read(&(THIS_MODULE)->uc.usecount));
399 case OBD_IOC_DETACH: {
401 if (obd->obd_flags & OBD_SET_UP) {
402 CERROR("OBD device %d still set up\n", obd->obd_minor);
403 GOTO(out, err=-EBUSY);
405 if (!(obd->obd_flags & OBD_ATTACHED) ) {
406 CERROR("OBD device %d not attached\n", obd->obd_minor);
407 GOTO(out, err=-ENODEV);
409 if (!list_empty(&obd->obd_exports)) {
410 if (!data->ioc_inlbuf1 || data->ioc_inlbuf1[0] != 'F') {
411 CERROR("OBD device %d (%p) has exports\n",
412 obd->obd_minor, obd);
413 GOTO(out, err=-EBUSY);
415 forcibly_detach_exports(obd);
417 if (OBP(obd, detach))
418 err=OBP(obd,detach)(obd);
421 OBD_FREE(obd->obd_name, strlen(obd->obd_name)+1);
422 obd->obd_name = NULL;
425 obd->obd_flags &= ~OBD_ATTACHED;
426 obd->obd_type->typ_refcnt--;
427 obd->obd_type = NULL;
428 CDEBUG(D_IOCTL, "MOD_DEC_USE for detach: count = %d\n",
429 atomic_read(&(THIS_MODULE)->uc.usecount) - 1);
434 case OBD_IOC_SETUP: {
435 /* have we attached a type to this device? */
436 if (!(obd->obd_flags & OBD_ATTACHED)) {
437 CERROR("Device %d not attached\n", obd->obd_minor);
438 GOTO(out, err=-ENODEV);
441 /* has this been done already? */
442 if ( obd->obd_flags & OBD_SET_UP ) {
443 CERROR("Device %d already setup (type %s)\n",
444 obd->obd_minor, obd->obd_type->typ_name);
445 GOTO(out, err=-EBUSY);
448 if ( OBT(obd) && OBP(obd, setup) )
449 err = obd_setup(obd, sizeof(*data), data);
452 obd->obd_type->typ_refcnt++;
453 obd->obd_flags |= OBD_SET_UP;
458 case OBD_IOC_CLEANUP: {
459 /* have we attached a type to this device? */
460 if (!(obd->obd_flags & OBD_ATTACHED)) {
461 CERROR("Device %d not attached\n", obd->obd_minor);
462 GOTO(out, err=-ENODEV);
465 if ( OBT(obd) && OBP(obd, cleanup) )
466 err = obd_cleanup(obd);
469 obd->obd_flags &= ~OBD_SET_UP;
470 obd->obd_type->typ_refcnt--;
475 case OBD_IOC_CONNECT: {
476 char * cluuid = "OBD_CLASS_UUID";
477 obd_data2conn(&conn, data);
479 err = obd_connect(&conn, obd, cluuid, NULL, NULL);
481 CDEBUG(D_IOCTL, "assigned export "LPX64"\n", conn.addr);
482 obd_conn2data(data, &conn);
486 err = copy_to_user((void *)arg, data, sizeof(*data));
489 // XXX save connection data into file handle
493 case OBD_IOC_DISCONNECT: {
494 obd_data2conn(&conn, data);
495 err = obd_disconnect(&conn);
499 case OBD_IOC_DEC_USE_COUNT: {
500 CDEBUG(D_IOCTL, "MOD_DEC_USE for force dec: count = %d\n",
501 atomic_read(&(THIS_MODULE)->uc.usecount) - 1);
507 obd_data2conn(&conn, data);
509 err = obd_iocontrol(cmd, &conn, len, data, NULL);
513 err = copy_to_user((void *)arg, data, len);
524 } /* obd_class_ioctl */
528 /* declare character device */
529 static struct file_operations obd_psdev_fops = {
530 ioctl: obd_class_ioctl, /* ioctl */
531 open: obd_class_open, /* open */
532 release: obd_class_release, /* release */
536 #define OBD_MINOR 241
537 static struct miscdevice obd_psdev = {
543 void (*class_signal_connection_failure)(struct ptlrpc_connection *);
545 #ifdef CONFIG_HIGHMEM
546 /* Allow at most 3/4 of the kmap mappings to be consumed by vector I/O
547 * requests. This avoids deadlocks on servers which have a lot of clients
548 * doing vector I/O. We don't need to do this for non-vector I/O requests
549 * because singleton requests will just block on the kmap itself and never
550 * deadlock waiting for additional kmaps to complete.
552 * If we are a "server" task, we can have at most a single reservation
553 * in excess of the maximum. This avoids a deadlock when multiple client
554 * threads are on the same machine as the server threads, and the clients
555 * have consumed all of the available mappings. As long as a single server
556 * thread is can make progress, we are guaranteed to avoid deadlock.
558 #define OBD_KMAP_MAX (LAST_PKMAP * 3 / 4)
559 static atomic_t obd_kmap_count = ATOMIC_INIT(OBD_KMAP_MAX);
560 static DECLARE_WAIT_QUEUE_HEAD(obd_kmap_waitq);
562 void obd_kmap_get(int count, int server)
564 //CERROR("getting %d kmap counts (%d/%d)\n", count,
565 // atomic_read(&obd_kmap_count), OBD_KMAP_MAX);
567 atomic_dec(&obd_kmap_count);
568 else while (atomic_add_negative(-count, &obd_kmap_count)) {
569 static long next_show = 0;
570 static int skipped = 0;
572 if (server && atomic_read(&obd_kmap_count) >= -PTL_MD_MAX_IOV)
575 CDEBUG(D_OTHER, "negative kmap reserved count: %d\n",
576 atomic_read(&obd_kmap_count));
577 atomic_add(count, &obd_kmap_count);
579 if (time_after(jiffies, next_show)) {
580 CERROR("blocking %s (and %d others) for kmaps\n",
581 current->comm, skipped);
582 next_show = jiffies + 5*HZ;
586 wait_event(obd_kmap_waitq,
587 atomic_read(&obd_kmap_count) >= count);
591 void obd_kmap_put(int count)
593 atomic_add(count, &obd_kmap_count);
594 /* Wake up sleepers. Sadly, this wakes up all of the tasks at once.
595 * We could have something smarter here like:
596 while (atomic_read(&obd_kmap_count) > 0)
597 wake_up_nr(obd_kmap_waitq, 1);
598 although we would need to set somewhere (probably obd_class_init):
599 obd_kmap_waitq.flags |= WQ_FLAG_EXCLUSIVE;
600 For now the wait_event() condition will handle this OK I believe.
602 if (atomic_read(&obd_kmap_count) > 0)
603 wake_up(&obd_kmap_waitq);
606 EXPORT_SYMBOL(obd_kmap_get);
607 EXPORT_SYMBOL(obd_kmap_put);
610 EXPORT_SYMBOL(obd_dev);
611 EXPORT_SYMBOL(obdo_cachep);
612 EXPORT_SYMBOL(obd_memory);
613 EXPORT_SYMBOL(obd_fail_loc);
614 EXPORT_SYMBOL(obd_timeout);
615 EXPORT_SYMBOL(obd_recovery_upcall);
616 EXPORT_SYMBOL(ptlrpc_put_connection_superhack);
618 EXPORT_SYMBOL(class_register_type);
619 EXPORT_SYMBOL(class_unregister_type);
620 EXPORT_SYMBOL(class_name2dev);
621 EXPORT_SYMBOL(class_uuid2dev);
622 EXPORT_SYMBOL(class_uuid2obd);
623 EXPORT_SYMBOL(class_new_export);
624 EXPORT_SYMBOL(class_destroy_export);
625 EXPORT_SYMBOL(class_connect);
626 EXPORT_SYMBOL(class_conn2export);
627 EXPORT_SYMBOL(class_conn2obd);
628 EXPORT_SYMBOL(class_conn2cliimp);
629 EXPORT_SYMBOL(class_conn2ldlmimp);
630 EXPORT_SYMBOL(class_disconnect);
631 EXPORT_SYMBOL(class_disconnect_all);
632 EXPORT_SYMBOL(class_uuid_unparse);
634 EXPORT_SYMBOL(class_signal_connection_failure);
635 EXPORT_SYMBOL(class_nm_to_type);
637 static int __init init_obdclass(void)
639 struct obd_device *obd;
643 printk(KERN_INFO "OBD class driver Build Version: " BUILD_VERSION
644 ", info@clusterfs.com\n");
646 sema_init(&obd_conf_sem, 1);
647 INIT_LIST_HEAD(&obd_types);
649 if ((err = misc_register(&obd_psdev))) {
650 CERROR("cannot register %d err %d\n", OBD_MINOR, err);
654 /* This struct is already zerod for us (static global) */
655 for (i = 0, obd = obd_dev; i < MAX_OBD_DEVICES; i++, obd++)
658 err = obd_init_caches();
664 err = lprocfs_reg_main();
669 static void __exit cleanup_obdclass(void)
674 misc_deregister(&obd_psdev);
675 for (i = 0; i < MAX_OBD_DEVICES; i++) {
676 struct obd_device *obd = &obd_dev[i];
677 if (obd->obd_type && (obd->obd_flags & OBD_SET_UP) &&
678 OBT(obd) && OBP(obd, detach)) {
679 /* XXX should this call generic detach otherwise? */
680 OBP(obd, detach)(obd);
684 obd_cleanup_caches();
687 err = lprocfs_dereg_main();
689 CERROR("obd memory leaked: %ld bytes\n", obd_memory);
693 /* Check that we're building against the appropriate version of the Lustre
695 #include <linux/lustre_version.h>
696 #if (LUSTRE_KERNEL_VERSION != 3)
697 # error Cannot continue: Your Lustre kernel patch is out of date
700 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
701 MODULE_DESCRIPTION("Lustre Class Driver Build Version: " BUILD_VERSION);
702 MODULE_LICENSE("GPL");
704 module_init(init_obdclass);
705 module_exit(cleanup_obdclass);