1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
6 * This code is issued under the GNU General Public License.
7 * See the file COPYING in this distribution
9 * These are the only exported functions, they provide some generic
10 * infrastructure for managing object devices
12 * Object Devices Class Driver
16 #include <linux/config.h> /* for CONFIG_PROC_FS */
17 #include <linux/module.h>
18 #include <linux/errno.h>
19 #include <linux/kernel.h>
20 #include <linux/major.h>
21 #include <linux/sched.h>
23 #include <linux/slab.h>
24 #include <linux/ioport.h>
25 #include <linux/fcntl.h>
26 #include <linux/delay.h>
27 #include <linux/skbuff.h>
28 #include <linux/proc_fs.h>
30 #include <linux/poll.h>
31 #include <linux/init.h>
32 #include <linux/list.h>
33 #include <linux/highmem.h>
35 #include <asm/ioctls.h>
36 #include <asm/system.h>
38 #include <asm/uaccess.h>
39 #include <linux/miscdevice.h>
41 #define DEBUG_SUBSYSTEM S_CLASS
43 #include <linux/obd_support.h>
44 #include <linux/obd_class.h>
45 #include <linux/lustre_debug.h>
46 #include <linux/smp_lock.h>
47 #include <linux/lprocfs_status.h>
48 #include <portals/lib-types.h> /* for PTL_MD_MAX_IOV */
50 struct semaphore obd_conf_sem; /* serialize configuration commands */
51 struct obd_device obd_dev[MAX_OBD_DEVICES];
52 struct list_head obd_types;
53 unsigned long obd_memory;
55 /* The following are visible and mutable through /proc/sys/lustre/. */
56 unsigned long obd_fail_loc;
57 unsigned long obd_timeout = 100;
58 char obd_recovery_upcall[128] = "/usr/lib/lustre/ha_assist";
60 extern struct obd_type *class_nm_to_type(char *nm);
62 /* opening /dev/obd */
63 static int obd_class_open(struct inode * inode, struct file * file)
67 file->private_data = NULL;
68 CDEBUG(D_IOCTL, "MOD_INC_USE for open: count = %d\n",
69 atomic_read(&(THIS_MODULE)->uc.usecount));
74 /* closing /dev/obd */
75 static int obd_class_release(struct inode * inode, struct file * file)
79 // XXX drop lsm, connections here
80 if (file->private_data)
81 file->private_data = NULL;
83 CDEBUG(D_IOCTL, "MOD_DEC_USE for close: count = %d\n",
84 atomic_read(&(THIS_MODULE)->uc.usecount) - 1);
89 static inline void obd_data2conn(struct lustre_handle *conn,
90 struct obd_ioctl_data *data)
92 conn->addr = data->ioc_addr;
93 conn->cookie = data->ioc_cookie;
96 static inline void obd_conn2data(struct obd_ioctl_data *data,
97 struct lustre_handle *conn)
99 data->ioc_addr = conn->addr;
100 data->ioc_cookie = conn->cookie;
103 static void forcibly_detach_exports(struct obd_device *obd)
106 struct list_head *tmp, *n;
107 struct lustre_handle fake_conn;
109 CDEBUG(D_IOCTL, "OBD device %d (%p) has exports, "
110 "disconnecting them", obd->obd_minor, obd);
111 list_for_each_safe(tmp, n, &obd->obd_exports) {
112 struct obd_export *exp = list_entry(tmp, struct obd_export,
114 fake_conn.addr = (__u64)(unsigned long)exp;
115 fake_conn.cookie = exp->exp_cookie;
116 rc = obd_disconnect(&fake_conn);
118 CDEBUG(D_IOCTL, "disconnecting export %p failed: %d\n",
121 CDEBUG(D_IOCTL, "export %p disconnected\n", exp);
126 /* to control /dev/obd */
127 static int obd_class_ioctl (struct inode * inode, struct file * filp,
128 unsigned int cmd, unsigned long arg)
131 struct obd_ioctl_data *data;
132 struct obd_device *obd = filp->private_data;
133 struct lustre_handle conn;
134 int err = 0, len = 0;
139 if (!obd && cmd != OBD_IOC_DEVICE && cmd != TCGETS &&
140 cmd != OBD_IOC_LIST &&
141 cmd != OBD_IOC_NAME2DEV && cmd != OBD_IOC_NEWDEV) {
142 CERROR("OBD ioctl: No device\n");
143 GOTO(out, err = -EINVAL);
145 if (obd_ioctl_getdata(&buf, &len, (void *)arg)) {
146 CERROR("OBD ioctl: data error\n");
147 GOTO(out, err = -EINVAL);
149 data = (struct obd_ioctl_data *)buf;
153 GOTO(out, err=-EINVAL);
154 case OBD_IOC_DEVICE: {
155 CDEBUG(D_IOCTL, "\n");
156 if (data->ioc_dev >= MAX_OBD_DEVICES || data->ioc_dev < 0) {
157 CERROR("OBD ioctl: DEVICE insufficient devices\n");
158 GOTO(out, err=-EINVAL);
160 CDEBUG(D_IOCTL, "device %d\n", data->ioc_dev);
162 filp->private_data = &obd_dev[data->ioc_dev];
168 char *buf2 = data->ioc_bulk;
169 int remains = data->ioc_inllen1;
171 if (!data->ioc_inlbuf1) {
172 CERROR("No buffer passed!\n");
173 GOTO(out, err=-EINVAL);
177 for (i = 0 ; i < MAX_OBD_DEVICES ; i++) {
180 struct obd_device *obd = &obd_dev[i];
183 if (obd->obd_flags & OBD_SET_UP)
185 else if (obd->obd_flags & OBD_ATTACHED)
189 l = snprintf(buf2, remains, "%2d %s %s %s %s %d\n",
190 i, status, obd->obd_type->typ_name,
191 obd->obd_name, obd->obd_uuid, obd->obd_type->typ_refcnt);
195 CERROR("not enough space for device listing\n");
200 err = copy_to_user((void *)arg, data, len);
207 case OBD_IOC_NAME2DEV: {
208 /* Resolve a device name. This does not change the
209 * currently selected device.
213 if (!data->ioc_inllen1 || !data->ioc_inlbuf1 ) {
214 CERROR("No name passed,!\n");
215 GOTO(out, err=-EINVAL);
217 if (data->ioc_inlbuf1[data->ioc_inllen1-1] !=0) {
218 CERROR("Name not nul terminated!\n");
219 GOTO(out, err=-EINVAL);
222 CDEBUG(D_IOCTL, "device name %s\n", data->ioc_inlbuf1);
223 dev = class_name2dev(data->ioc_inlbuf1);
226 CDEBUG(D_IOCTL, "No device for name %s!\n",
228 GOTO(out, err=-EINVAL);
231 CDEBUG(D_IOCTL, "device name %s, dev %d\n", data->ioc_inlbuf1,
233 err = copy_to_user((void *)arg, data, sizeof(*data));
239 case OBD_IOC_UUID2DEV: {
240 /* Resolve a device uuid. This does not change the
241 * currently selected device.
245 if (!data->ioc_inllen1 || !data->ioc_inlbuf1) {
246 CERROR("No UUID passed!\n");
247 GOTO(out, err=-EINVAL);
249 if (data->ioc_inlbuf1[data->ioc_inllen1-1] !=0) {
250 CERROR("Name not nul terminated!\n");
251 GOTO(out, err=-EINVAL);
254 CDEBUG(D_IOCTL, "device name %s\n", data->ioc_inlbuf1);
255 dev = class_uuid2dev(data->ioc_inlbuf1);
258 CDEBUG(D_IOCTL, "No device for name %s!\n",
260 GOTO(out, err=-EINVAL);
263 CDEBUG(D_IOCTL, "device name %s, dev %d\n", data->ioc_inlbuf1,
265 err = copy_to_user((void *)arg, data, sizeof(*data));
271 case OBD_IOC_NEWDEV: {
275 filp->private_data = NULL;
276 for (i = 0 ; i < MAX_OBD_DEVICES ; i++) {
277 struct obd_device *obd = &obd_dev[i];
278 if (!obd->obd_type) {
279 filp->private_data = obd;
288 GOTO(out, err=-EINVAL);
290 err = copy_to_user((void *)arg, data, sizeof(*data));
296 case OBD_IOC_ATTACH: {
297 struct obd_type *type;
300 /* have we attached a type to this device */
301 if (obd->obd_flags & OBD_ATTACHED || obd->obd_type) {
302 CERROR("OBD: Device %d already typed as %s.\n",
303 obd->obd_minor, MKSTR(obd->obd_type->typ_name));
304 GOTO(out, err=-EBUSY);
307 if (!data->ioc_inllen1 || !data->ioc_inlbuf1) {
308 CERROR("No type passed!\n");
309 GOTO(out, err=-EINVAL);
311 if (data->ioc_inlbuf1[data->ioc_inllen1-1] !=0) {
312 CERROR("Type not nul terminated!\n");
313 GOTO(out, err=-EINVAL);
316 CDEBUG(D_IOCTL, "attach type %s name: %s uuid: %s\n",
317 MKSTR(data->ioc_inlbuf1),
318 MKSTR(data->ioc_inlbuf2), MKSTR(data->ioc_inlbuf3));
321 type = class_nm_to_type(data->ioc_inlbuf1);
323 CERROR("OBD: unknown type dev %d\n", obd->obd_minor);
324 GOTO(out, err=-EINVAL);
327 minor = obd->obd_minor;
328 memset(obd, 0, sizeof(*obd));
329 obd->obd_minor = minor;
330 obd->obd_type = type;
331 INIT_LIST_HEAD(&obd->obd_exports);
332 INIT_LIST_HEAD(&obd->obd_imports);
333 spin_lock_init(&obd->obd_dev_lock);
335 if (data->ioc_inlbuf2) {
336 int len = strlen(data->ioc_inlbuf2) + 1;
337 OBD_ALLOC(obd->obd_name, len);
338 if (!obd->obd_name) {
339 CERROR("no memory\n");
342 memcpy(obd->obd_name, data->ioc_inlbuf2, len);
344 CERROR("WARNING: unnamed obd device\n");
346 if (data->ioc_inlbuf3) {
347 int len = strlen(data->ioc_inlbuf3);
348 if (len >= sizeof(obd->obd_uuid)) {
349 CERROR("uuid must be < %d bytes long\n",
350 sizeof(obd->obd_uuid));
352 OBD_FREE(obd->obd_name,
353 strlen(obd->obd_name) + 1);
354 GOTO(out, err=-EINVAL);
356 memcpy(obd->obd_uuid, data->ioc_inlbuf3, len);
359 if (OBP(obd, attach))
360 err = OBP(obd,attach)(obd, sizeof(*data), data);
362 if(data->ioc_inlbuf2)
363 OBD_FREE(obd->obd_name, strlen(obd->obd_name)+1);
364 obd->obd_type = NULL;
366 obd->obd_flags |= OBD_ATTACHED;
369 CDEBUG(D_IOCTL, "OBD: dev %d attached type %s\n",
370 obd->obd_minor, data->ioc_inlbuf1);
372 CDEBUG(D_IOCTL, "MOD_INC_USE for attach: count = %d\n",
373 atomic_read(&(THIS_MODULE)->uc.usecount));
380 case OBD_IOC_DETACH: {
382 if (obd->obd_flags & OBD_SET_UP) {
383 CERROR("OBD device %d still set up\n", obd->obd_minor);
384 GOTO(out, err=-EBUSY);
386 if (!(obd->obd_flags & OBD_ATTACHED) ) {
387 CERROR("OBD device %d not attached\n", obd->obd_minor);
388 GOTO(out, err=-ENODEV);
390 if (!list_empty(&obd->obd_exports)) {
391 if (!data->ioc_inlbuf1 || data->ioc_inlbuf1[0] != 'F') {
392 CERROR("OBD device %d (%p) has exports\n",
393 obd->obd_minor, obd);
394 GOTO(out, err=-EBUSY);
396 forcibly_detach_exports(obd);
398 if (OBP(obd, detach))
399 err=OBP(obd,detach)(obd);
402 OBD_FREE(obd->obd_name, strlen(obd->obd_name)+1);
403 obd->obd_name = NULL;
406 obd->obd_flags &= ~OBD_ATTACHED;
407 obd->obd_type->typ_refcnt--;
408 obd->obd_type = NULL;
409 CDEBUG(D_IOCTL, "MOD_DEC_USE for detach: count = %d\n",
410 atomic_read(&(THIS_MODULE)->uc.usecount) - 1);
415 case OBD_IOC_SETUP: {
416 /* have we attached a type to this device? */
417 if (!(obd->obd_flags & OBD_ATTACHED)) {
418 CERROR("Device %d not attached\n", obd->obd_minor);
419 GOTO(out, err=-ENODEV);
422 /* has this been done already? */
423 if ( obd->obd_flags & OBD_SET_UP ) {
424 CERROR("Device %d already setup (type %s)\n",
425 obd->obd_minor, obd->obd_type->typ_name);
426 GOTO(out, err=-EBUSY);
429 if ( OBT(obd) && OBP(obd, setup) )
430 err = obd_setup(obd, sizeof(*data), data);
433 obd->obd_type->typ_refcnt++;
434 obd->obd_flags |= OBD_SET_UP;
439 case OBD_IOC_CLEANUP: {
440 /* have we attached a type to this device? */
441 if (!(obd->obd_flags & OBD_ATTACHED)) {
442 CERROR("Device %d not attached\n", obd->obd_minor);
443 GOTO(out, err=-ENODEV);
446 if ( OBT(obd) && OBP(obd, cleanup) )
447 err = obd_cleanup(obd);
450 obd->obd_flags &= ~OBD_SET_UP;
451 obd->obd_type->typ_refcnt--;
456 case OBD_IOC_CONNECT: {
457 char * cluuid = "OBD_CLASS_UUID";
458 obd_data2conn(&conn, data);
460 err = obd_connect(&conn, obd, cluuid, NULL, NULL);
462 CDEBUG(D_IOCTL, "assigned export "LPX64"\n", conn.addr);
463 obd_conn2data(data, &conn);
467 err = copy_to_user((void *)arg, data, sizeof(*data));
470 // XXX save connection data into file handle
474 case OBD_IOC_DISCONNECT: {
475 obd_data2conn(&conn, data);
476 err = obd_disconnect(&conn);
480 case OBD_IOC_DEC_USE_COUNT: {
481 CDEBUG(D_IOCTL, "MOD_DEC_USE for force dec: count = %d\n",
482 atomic_read(&(THIS_MODULE)->uc.usecount) - 1);
488 obd_data2conn(&conn, data);
490 err = obd_iocontrol(cmd, &conn, len, data, NULL);
494 err = copy_to_user((void *)arg, data, len);
505 } /* obd_class_ioctl */
509 /* declare character device */
510 static struct file_operations obd_psdev_fops = {
511 ioctl: obd_class_ioctl, /* ioctl */
512 open: obd_class_open, /* open */
513 release: obd_class_release, /* release */
517 #define OBD_MINOR 241
518 static struct miscdevice obd_psdev = {
524 void (*class_signal_connection_failure)(struct ptlrpc_connection *);
526 #ifdef CONFIG_HIGHMEM
527 /* Allow at most 3/4 of the kmap mappings to be consumed by vector I/O
528 * requests. This avoids deadlocks on servers which have a lot of clients
529 * doing vector I/O. We don't need to do this for non-vector I/O requests
530 * because singleton requests will just block on the kmap itself and never
531 * deadlock waiting for additional kmaps to complete.
533 * If we are a "server" task, we can have at most a single reservation
534 * in excess of the maximum. This avoids a deadlock when multiple client
535 * threads are on the same machine as the server threads, and the clients
536 * have consumed all of the available mappings. As long as a single server
537 * thread is can make progress, we are guaranteed to avoid deadlock.
539 #define OBD_KMAP_MAX (LAST_PKMAP * 3 / 4)
540 static atomic_t obd_kmap_count = ATOMIC_INIT(OBD_KMAP_MAX);
541 static DECLARE_WAIT_QUEUE_HEAD(obd_kmap_waitq);
543 void obd_kmap_get(int count, int server)
545 //CERROR("getting %d kmap counts (%d/%d)\n", count,
546 // atomic_read(&obd_kmap_count), OBD_KMAP_MAX);
548 atomic_dec(&obd_kmap_count);
549 else while (atomic_add_negative(-count, &obd_kmap_count)) {
550 static long next_show = 0;
551 static int skipped = 0;
553 if (server && atomic_read(&obd_kmap_count) >= -PTL_MD_MAX_IOV)
556 CDEBUG(D_OTHER, "negative kmap reserved count: %d\n",
557 atomic_read(&obd_kmap_count));
558 atomic_add(count, &obd_kmap_count);
560 if (time_after(jiffies, next_show)) {
561 CERROR("blocking %s (and %d others) for kmaps\n",
562 current->comm, skipped);
563 next_show = jiffies + 5*HZ;
567 wait_event(obd_kmap_waitq,
568 atomic_read(&obd_kmap_count) >= count);
572 void obd_kmap_put(int count)
574 atomic_add(count, &obd_kmap_count);
575 /* Wake up sleepers. Sadly, this wakes up all of the tasks at once.
576 * We could have something smarter here like:
577 while (atomic_read(&obd_kmap_count) > 0)
578 wake_up_nr(obd_kmap_waitq, 1);
579 although we would need to set somewhere (probably obd_class_init):
580 obd_kmap_waitq.flags |= WQ_FLAG_EXCLUSIVE;
581 For now the wait_event() condition will handle this OK I believe.
583 if (atomic_read(&obd_kmap_count) > 0)
584 wake_up(&obd_kmap_waitq);
587 EXPORT_SYMBOL(obd_kmap_get);
588 EXPORT_SYMBOL(obd_kmap_put);
591 EXPORT_SYMBOL(obd_dev);
592 EXPORT_SYMBOL(obdo_cachep);
593 EXPORT_SYMBOL(obd_memory);
594 EXPORT_SYMBOL(obd_fail_loc);
595 EXPORT_SYMBOL(obd_timeout);
596 EXPORT_SYMBOL(obd_recovery_upcall);
597 EXPORT_SYMBOL(ptlrpc_put_connection_superhack);
599 EXPORT_SYMBOL(class_register_type);
600 EXPORT_SYMBOL(class_unregister_type);
601 EXPORT_SYMBOL(class_name2dev);
602 EXPORT_SYMBOL(class_uuid2dev);
603 EXPORT_SYMBOL(class_uuid2obd);
604 EXPORT_SYMBOL(class_new_export);
605 EXPORT_SYMBOL(class_destroy_export);
606 EXPORT_SYMBOL(class_connect);
607 EXPORT_SYMBOL(class_conn2export);
608 EXPORT_SYMBOL(class_conn2obd);
609 EXPORT_SYMBOL(class_conn2cliimp);
610 EXPORT_SYMBOL(class_conn2ldlmimp);
611 EXPORT_SYMBOL(class_disconnect);
612 EXPORT_SYMBOL(class_disconnect_all);
613 EXPORT_SYMBOL(class_uuid_unparse);
615 EXPORT_SYMBOL(class_signal_connection_failure);
616 EXPORT_SYMBOL(class_nm_to_type);
618 static int __init init_obdclass(void)
620 struct obd_device *obd;
624 printk(KERN_INFO "OBD class driver v0.9, info@clusterfs.com\n");
626 sema_init(&obd_conf_sem, 1);
627 INIT_LIST_HEAD(&obd_types);
629 if ((err = misc_register(&obd_psdev))) {
630 CERROR("cannot register %d err %d\n", OBD_MINOR, err);
634 /* This struct is already zerod for us (static global) */
635 for (i = 0, obd = obd_dev; i < MAX_OBD_DEVICES; i++, obd++)
638 err = obd_init_caches();
644 err = lprocfs_reg_main();
649 static void __exit cleanup_obdclass(void)
654 misc_deregister(&obd_psdev);
655 for (i = 0; i < MAX_OBD_DEVICES; i++) {
656 struct obd_device *obd = &obd_dev[i];
657 if (obd->obd_type && (obd->obd_flags & OBD_SET_UP) &&
658 OBT(obd) && OBP(obd, detach)) {
659 /* XXX should this call generic detach otherwise? */
660 OBP(obd, detach)(obd);
664 obd_cleanup_caches();
667 err = lprocfs_dereg_main();
669 CERROR("obd memory leaked: %ld bytes\n", obd_memory);
673 /* Check that we're building against the appropriate version of the Lustre
675 #include <linux/lustre_version.h>
676 #if (LUSTRE_KERNEL_VERSION != 2)
677 # error Cannot continue: Your Lustre kernel patch is out of date
680 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
681 MODULE_DESCRIPTION("Lustre Class Driver v1.0");
682 MODULE_LICENSE("GPL");
684 module_init(init_obdclass);
685 module_exit(cleanup_obdclass);