1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
6 * This code is issued under the GNU General Public License.
7 * See the file COPYING in this distribution
9 * These are the only exported functions, they provide some generic
10 * infrastructure for managing object devices
12 * Object Devices Class Driver
16 #include <linux/config.h> /* for CONFIG_PROC_FS */
17 #include <linux/module.h>
18 #include <linux/errno.h>
19 #include <linux/kernel.h>
20 #include <linux/major.h>
21 #include <linux/sched.h>
23 #include <linux/slab.h>
24 #include <linux/ioport.h>
25 #include <linux/fcntl.h>
26 #include <linux/delay.h>
27 #include <linux/skbuff.h>
28 #include <linux/proc_fs.h>
30 #include <linux/poll.h>
31 #include <linux/init.h>
32 #include <linux/list.h>
33 #include <linux/highmem.h>
35 #include <asm/ioctls.h>
36 #include <asm/system.h>
38 #include <asm/uaccess.h>
39 #include <linux/miscdevice.h>
41 #define DEBUG_SUBSYSTEM S_CLASS
43 #include <linux/obd_support.h>
44 #include <linux/obd_class.h>
45 #include <linux/lustre_debug.h>
46 #include <linux/smp_lock.h>
47 #include <linux/lprocfs_status.h>
48 #include <portals/lib-types.h> /* for PTL_MD_MAX_IOV */
49 #include <linux/lustre_build_version.h>
51 struct semaphore obd_conf_sem; /* serialize configuration commands */
52 struct obd_device obd_dev[MAX_OBD_DEVICES];
53 struct list_head obd_types;
57 /* The following are visible and mutable through /proc/sys/lustre/. */
58 unsigned long obd_fail_loc;
59 unsigned long obd_timeout = 100;
60 char obd_recovery_upcall[128] = "/usr/lib/lustre/ha_assist";
62 /* opening /dev/obd */
63 static int obd_class_open(struct inode * inode, struct file * file)
67 file->private_data = NULL;
72 /* closing /dev/obd */
73 static int obd_class_release(struct inode * inode, struct file * file)
77 // XXX drop lsm, connections here
78 if (file->private_data)
79 file->private_data = NULL;
84 static inline void obd_data2conn(struct lustre_handle *conn,
85 struct obd_ioctl_data *data)
87 conn->addr = data->ioc_addr;
88 conn->cookie = data->ioc_cookie;
91 static inline void obd_conn2data(struct obd_ioctl_data *data,
92 struct lustre_handle *conn)
94 data->ioc_addr = conn->addr;
95 data->ioc_cookie = conn->cookie;
98 static void forcibly_detach_exports(struct obd_device *obd)
101 struct list_head *tmp, *n;
102 struct lustre_handle fake_conn;
104 CDEBUG(D_IOCTL, "OBD device %d (%p) has exports, "
105 "disconnecting them", obd->obd_minor, obd);
106 list_for_each_safe(tmp, n, &obd->obd_exports) {
107 struct obd_export *exp = list_entry(tmp, struct obd_export,
109 fake_conn.addr = (__u64)(unsigned long)exp;
110 fake_conn.cookie = exp->exp_cookie;
111 rc = obd_disconnect(&fake_conn);
113 CDEBUG(D_IOCTL, "disconnecting export %p failed: %d\n",
116 CDEBUG(D_IOCTL, "export %p disconnected\n", exp);
121 /* to control /dev/obd */
122 static int obd_class_ioctl (struct inode * inode, struct file * filp,
123 unsigned int cmd, unsigned long arg)
126 struct obd_ioctl_data *data;
127 struct obd_device *obd = filp->private_data;
128 struct lustre_handle conn;
129 int err = 0, len = 0, serialised = 0;
133 case OBD_IOC_BRW_WRITE:
134 case OBD_IOC_BRW_READ:
135 case OBD_IOC_GETATTR:
143 if (!obd && cmd != OBD_IOC_DEVICE && cmd != TCGETS &&
144 cmd != OBD_IOC_LIST && cmd != OBD_GET_VERSION &&
145 cmd != OBD_IOC_NAME2DEV && cmd != OBD_IOC_NEWDEV) {
146 CERROR("OBD ioctl: No device\n");
147 GOTO(out, err = -EINVAL);
149 if (obd_ioctl_getdata(&buf, &len, (void *)arg)) {
150 CERROR("OBD ioctl: data error\n");
151 GOTO(out, err = -EINVAL);
153 data = (struct obd_ioctl_data *)buf;
157 GOTO(out, err=-EINVAL);
158 case OBD_IOC_DEVICE: {
159 CDEBUG(D_IOCTL, "\n");
160 if (data->ioc_dev >= MAX_OBD_DEVICES || data->ioc_dev < 0) {
161 CERROR("OBD ioctl: DEVICE insufficient devices\n");
162 GOTO(out, err=-EINVAL);
164 CDEBUG(D_IOCTL, "device %d\n", data->ioc_dev);
166 filp->private_data = &obd_dev[data->ioc_dev];
172 char *buf2 = data->ioc_bulk;
173 int remains = data->ioc_inllen1;
175 if (!data->ioc_inlbuf1) {
176 CERROR("No buffer passed!\n");
177 GOTO(out, err=-EINVAL);
181 for (i = 0 ; i < MAX_OBD_DEVICES ; i++) {
184 struct obd_device *obd = &obd_dev[i];
187 if (obd->obd_flags & OBD_SET_UP)
189 else if (obd->obd_flags & OBD_ATTACHED)
193 l = snprintf(buf2, remains, "%2d %s %s %s %s %d\n",
194 i, status, obd->obd_type->typ_name,
195 obd->obd_name, obd->obd_uuid, obd->obd_type->typ_refcnt);
199 CERROR("not enough space for device listing\n");
204 err = copy_to_user((void *)arg, data, len);
210 case OBD_GET_VERSION:
211 if (!data->ioc_inlbuf1) {
212 CERROR("No buffer passed in ioctl\n");
213 GOTO(out, err = -EINVAL);
216 if (strlen(BUILD_VERSION) + 1 > data->ioc_inllen1) {
217 CERROR("ioctl buffer too small to hold version\n");
218 GOTO(out, err = -EINVAL);
221 memcpy(data->ioc_bulk, BUILD_VERSION,
222 strlen(BUILD_VERSION) + 1);
224 err = copy_to_user((void *)arg, data, len);
229 case OBD_IOC_NAME2DEV: {
230 /* Resolve a device name. This does not change the
231 * currently selected device.
235 if (!data->ioc_inllen1 || !data->ioc_inlbuf1 ) {
236 CERROR("No name passed,!\n");
237 GOTO(out, err=-EINVAL);
239 if (data->ioc_inlbuf1[data->ioc_inllen1-1] !=0) {
240 CERROR("Name not nul terminated!\n");
241 GOTO(out, err=-EINVAL);
244 CDEBUG(D_IOCTL, "device name %s\n", data->ioc_inlbuf1);
245 dev = class_name2dev(data->ioc_inlbuf1);
248 CDEBUG(D_IOCTL, "No device for name %s!\n",
250 GOTO(out, err=-EINVAL);
253 CDEBUG(D_IOCTL, "device name %s, dev %d\n", data->ioc_inlbuf1,
255 err = copy_to_user((void *)arg, data, sizeof(*data));
261 case OBD_IOC_UUID2DEV: {
262 /* Resolve a device uuid. This does not change the
263 * currently selected device.
267 if (!data->ioc_inllen1 || !data->ioc_inlbuf1) {
268 CERROR("No UUID passed!\n");
269 GOTO(out, err=-EINVAL);
271 if (data->ioc_inlbuf1[data->ioc_inllen1-1] !=0) {
272 CERROR("Name not nul terminated!\n");
273 GOTO(out, err=-EINVAL);
276 CDEBUG(D_IOCTL, "device name %s\n", data->ioc_inlbuf1);
277 dev = class_uuid2dev(data->ioc_inlbuf1);
280 CDEBUG(D_IOCTL, "No device for name %s!\n",
282 GOTO(out, err=-EINVAL);
285 CDEBUG(D_IOCTL, "device name %s, dev %d\n", data->ioc_inlbuf1,
287 err = copy_to_user((void *)arg, data, sizeof(*data));
293 case OBD_IOC_NEWDEV: {
297 filp->private_data = NULL;
298 for (i = 0 ; i < MAX_OBD_DEVICES ; i++) {
299 struct obd_device *obd = &obd_dev[i];
300 if (!obd->obd_type) {
301 filp->private_data = obd;
310 GOTO(out, err=-EINVAL);
312 err = copy_to_user((void *)arg, data, sizeof(*data));
318 case OBD_IOC_ATTACH: {
319 struct obd_type *type;
322 /* have we attached a type to this device */
323 if (obd->obd_flags & OBD_ATTACHED || obd->obd_type) {
324 CERROR("OBD: Device %d already typed as %s.\n",
325 obd->obd_minor, MKSTR(obd->obd_type->typ_name));
326 GOTO(out, err = -EBUSY);
329 if (!data->ioc_inllen1 || !data->ioc_inlbuf1) {
330 CERROR("No type passed!\n");
331 GOTO(out, err = -EINVAL);
333 if (data->ioc_inlbuf1[data->ioc_inllen1-1] !=0) {
334 CERROR("Type not nul terminated!\n");
335 GOTO(out, err = -EINVAL);
338 CDEBUG(D_IOCTL, "attach type %s name: %s uuid: %s\n",
339 MKSTR(data->ioc_inlbuf1),
340 MKSTR(data->ioc_inlbuf2), MKSTR(data->ioc_inlbuf3));
343 type = class_get_type(data->ioc_inlbuf1);
345 CERROR("OBD: unknown type dev %d\n", obd->obd_minor);
346 GOTO(out, err = -EINVAL);
349 minor = obd->obd_minor;
350 memset(obd, 0, sizeof(*obd));
351 obd->obd_minor = minor;
352 obd->obd_type = type;
353 INIT_LIST_HEAD(&obd->obd_exports);
354 INIT_LIST_HEAD(&obd->obd_imports);
355 spin_lock_init(&obd->obd_dev_lock);
357 if (data->ioc_inlbuf2) {
358 int len = strlen(data->ioc_inlbuf2) + 1;
359 OBD_ALLOC(obd->obd_name, len);
360 if (!obd->obd_name) {
361 class_put_type(obd->obd_type);
362 GOTO(out, err = -ENOMEM);
364 memcpy(obd->obd_name, data->ioc_inlbuf2, len);
366 CERROR("WARNING: unnamed obd device\n");
368 if (data->ioc_inlbuf3) {
369 int len = strlen(data->ioc_inlbuf3);
370 if (len >= sizeof(obd->obd_uuid)) {
371 CERROR("uuid must be < "LPSZ" bytes long\n",
372 sizeof(obd->obd_uuid));
374 OBD_FREE(obd->obd_name,
375 strlen(obd->obd_name) + 1);
376 class_put_type(obd->obd_type);
377 GOTO(out, err=-EINVAL);
379 memcpy(obd->obd_uuid, data->ioc_inlbuf3, len);
382 if (OBP(obd, attach))
383 err = OBP(obd,attach)(obd, sizeof(*data), data);
385 if(data->ioc_inlbuf2)
386 OBD_FREE(obd->obd_name, strlen(obd->obd_name)+1);
387 class_put_type(obd->obd_type);
388 obd->obd_type = NULL;
390 obd->obd_flags |= OBD_ATTACHED;
393 CDEBUG(D_IOCTL, "OBD: dev %d attached type %s\n",
394 obd->obd_minor, data->ioc_inlbuf1);
400 case OBD_IOC_DETACH: {
402 if (obd->obd_flags & OBD_SET_UP) {
403 CERROR("OBD device %d still set up\n", obd->obd_minor);
404 GOTO(out, err=-EBUSY);
406 if (!(obd->obd_flags & OBD_ATTACHED) ) {
407 CERROR("OBD device %d not attached\n", obd->obd_minor);
408 GOTO(out, err=-ENODEV);
410 if (!list_empty(&obd->obd_exports)) {
411 if (!data->ioc_inlbuf1 || data->ioc_inlbuf1[0] != 'F') {
412 CERROR("OBD device %d (%p) has exports\n",
413 obd->obd_minor, obd);
414 GOTO(out, err=-EBUSY);
416 forcibly_detach_exports(obd);
418 if (OBP(obd, detach))
419 err = OBP(obd,detach)(obd);
422 OBD_FREE(obd->obd_name, strlen(obd->obd_name)+1);
423 obd->obd_name = NULL;
426 obd->obd_flags &= ~OBD_ATTACHED;
427 obd->obd_type->typ_refcnt--;
428 class_put_type(obd->obd_type);
429 obd->obd_type = NULL;
433 case OBD_IOC_SETUP: {
434 /* have we attached a type to this device? */
435 if (!(obd->obd_flags & OBD_ATTACHED)) {
436 CERROR("Device %d not attached\n", obd->obd_minor);
437 GOTO(out, err=-ENODEV);
440 /* has this been done already? */
441 if ( obd->obd_flags & OBD_SET_UP ) {
442 CERROR("Device %d already setup (type %s)\n",
443 obd->obd_minor, obd->obd_type->typ_name);
444 GOTO(out, err=-EBUSY);
447 if ( OBT(obd) && OBP(obd, setup) )
448 err = obd_setup(obd, sizeof(*data), data);
451 obd->obd_type->typ_refcnt++;
452 obd->obd_flags |= OBD_SET_UP;
457 case OBD_IOC_CLEANUP: {
458 /* have we attached a type to this device? */
459 if (!(obd->obd_flags & OBD_ATTACHED)) {
460 CERROR("Device %d not attached\n", obd->obd_minor);
461 GOTO(out, err=-ENODEV);
464 if ( OBT(obd) && OBP(obd, cleanup) )
465 err = obd_cleanup(obd);
468 obd->obd_flags &= ~OBD_SET_UP;
469 obd->obd_type->typ_refcnt--;
474 case OBD_IOC_CONNECT: {
475 char * cluuid = "OBD_CLASS_UUID";
476 obd_data2conn(&conn, data);
478 err = obd_connect(&conn, obd, cluuid, NULL, NULL);
480 CDEBUG(D_IOCTL, "assigned export "LPX64"\n", conn.addr);
481 obd_conn2data(data, &conn);
485 err = copy_to_user((void *)arg, data, sizeof(*data));
488 // XXX save connection data into file handle
492 case OBD_IOC_DISCONNECT: {
493 obd_data2conn(&conn, data);
494 err = obd_disconnect(&conn);
499 obd_data2conn(&conn, data);
501 err = obd_iocontrol(cmd, &conn, len, data, NULL);
505 err = copy_to_user((void *)arg, data, len);
517 } /* obd_class_ioctl */
521 /* declare character device */
522 static struct file_operations obd_psdev_fops = {
523 ioctl: obd_class_ioctl, /* ioctl */
524 open: obd_class_open, /* open */
525 release: obd_class_release, /* release */
529 #define OBD_MINOR 241
530 static struct miscdevice obd_psdev = {
536 void (*class_signal_connection_failure)(struct ptlrpc_connection *);
538 #ifdef CONFIG_HIGHMEM
539 /* Allow at most 3/4 of the kmap mappings to be consumed by vector I/O
540 * requests. This avoids deadlocks on servers which have a lot of clients
541 * doing vector I/O. We don't need to do this for non-vector I/O requests
542 * because singleton requests will just block on the kmap itself and never
543 * deadlock waiting for additional kmaps to complete.
545 * If we are a "server" task, we can have at most a single reservation
546 * in excess of the maximum. This avoids a deadlock when multiple client
547 * threads are on the same machine as the server threads, and the clients
548 * have consumed all of the available mappings. As long as a single server
549 * thread is can make progress, we are guaranteed to avoid deadlock.
551 #define OBD_KMAP_MAX (LAST_PKMAP * 3 / 4)
552 static atomic_t obd_kmap_count = ATOMIC_INIT(OBD_KMAP_MAX);
553 static DECLARE_WAIT_QUEUE_HEAD(obd_kmap_waitq);
555 void obd_kmap_get(int count, int server)
557 //CERROR("getting %d kmap counts (%d/%d)\n", count,
558 // atomic_read(&obd_kmap_count), OBD_KMAP_MAX);
560 atomic_dec(&obd_kmap_count);
561 else while (atomic_add_negative(-count, &obd_kmap_count)) {
562 static long next_show = 0;
563 static int skipped = 0;
565 if (server && atomic_read(&obd_kmap_count) >= -PTL_MD_MAX_IOV)
568 CDEBUG(D_OTHER, "negative kmap reserved count: %d\n",
569 atomic_read(&obd_kmap_count));
570 atomic_add(count, &obd_kmap_count);
572 if (time_after(jiffies, next_show)) {
573 CERROR("blocking %s (and %d others) for kmaps\n",
574 current->comm, skipped);
575 next_show = jiffies + 5*HZ;
579 wait_event(obd_kmap_waitq,
580 atomic_read(&obd_kmap_count) >= count);
584 void obd_kmap_put(int count)
586 atomic_add(count, &obd_kmap_count);
587 /* Wake up sleepers. Sadly, this wakes up all of the tasks at once.
588 * We could have something smarter here like:
589 while (atomic_read(&obd_kmap_count) > 0)
590 wake_up_nr(obd_kmap_waitq, 1);
591 although we would need to set somewhere (probably obd_class_init):
592 obd_kmap_waitq.flags |= WQ_FLAG_EXCLUSIVE;
593 For now the wait_event() condition will handle this OK I believe.
595 if (atomic_read(&obd_kmap_count) > 0)
596 wake_up(&obd_kmap_waitq);
599 EXPORT_SYMBOL(obd_kmap_get);
600 EXPORT_SYMBOL(obd_kmap_put);
603 EXPORT_SYMBOL(obd_dev);
604 EXPORT_SYMBOL(obdo_cachep);
605 EXPORT_SYMBOL(obd_memory);
606 EXPORT_SYMBOL(obd_memmax);
607 EXPORT_SYMBOL(obd_fail_loc);
608 EXPORT_SYMBOL(obd_timeout);
609 EXPORT_SYMBOL(obd_recovery_upcall);
610 EXPORT_SYMBOL(ptlrpc_put_connection_superhack);
612 EXPORT_SYMBOL(class_register_type);
613 EXPORT_SYMBOL(class_unregister_type);
614 EXPORT_SYMBOL(class_get_type);
615 EXPORT_SYMBOL(class_put_type);
616 EXPORT_SYMBOL(class_name2dev);
617 EXPORT_SYMBOL(class_uuid2dev);
618 EXPORT_SYMBOL(class_uuid2obd);
619 EXPORT_SYMBOL(class_new_export);
620 EXPORT_SYMBOL(class_destroy_export);
621 EXPORT_SYMBOL(class_connect);
622 EXPORT_SYMBOL(class_conn2export);
623 EXPORT_SYMBOL(class_conn2obd);
624 EXPORT_SYMBOL(class_conn2cliimp);
625 EXPORT_SYMBOL(class_conn2ldlmimp);
626 EXPORT_SYMBOL(class_disconnect);
627 EXPORT_SYMBOL(class_disconnect_all);
628 EXPORT_SYMBOL(class_uuid_unparse);
630 EXPORT_SYMBOL(class_signal_connection_failure);
632 static int __init init_obdclass(void)
634 struct obd_device *obd;
638 printk(KERN_INFO "OBD class driver Build Version: " BUILD_VERSION
639 ", info@clusterfs.com\n");
641 sema_init(&obd_conf_sem, 1);
642 INIT_LIST_HEAD(&obd_types);
644 if ((err = misc_register(&obd_psdev))) {
645 CERROR("cannot register %d err %d\n", OBD_MINOR, err);
649 /* This struct is already zerod for us (static global) */
650 for (i = 0, obd = obd_dev; i < MAX_OBD_DEVICES; i++, obd++)
653 err = obd_init_caches();
659 err = lprocfs_reg_main();
664 static void __exit cleanup_obdclass(void)
669 misc_deregister(&obd_psdev);
670 for (i = 0; i < MAX_OBD_DEVICES; i++) {
671 struct obd_device *obd = &obd_dev[i];
672 if (obd->obd_type && (obd->obd_flags & OBD_SET_UP) &&
673 OBT(obd) && OBP(obd, detach)) {
674 /* XXX should this call generic detach otherwise? */
675 OBP(obd, detach)(obd);
679 obd_cleanup_caches();
682 err = lprocfs_dereg_main();
684 CERROR("obd mem max: %d leaked: %d\n", obd_memmax,
685 atomic_read(&obd_memory));
689 /* Check that we're building against the appropriate version of the Lustre
691 #include <linux/lustre_version.h>
692 #if (LUSTRE_KERNEL_VERSION != 5)
693 # error Cannot continue: Your Lustre kernel patch is out of date
696 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
697 MODULE_DESCRIPTION("Lustre Class Driver Build Version: " BUILD_VERSION);
698 MODULE_LICENSE("GPL");
700 module_init(init_obdclass);
701 module_exit(cleanup_obdclass);