Whamcloud - gitweb
8888e8051a337fa029d0fef94775ac571e30bddb
[fs/lustre-release.git] / lustre / obdclass / class_obd.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
5  *
6  * This code is issued under the GNU General Public License.
7  * See the file COPYING in this distribution
8  *
9  * These are the only exported functions, they provide some generic
10  * infrastructure for managing object devices
11  *
12  * Object Devices Class Driver
13  */
14
15 #define EXPORT_SYMTAB
16 #include <linux/config.h> /* for CONFIG_PROC_FS */
17 #include <linux/module.h>
18 #include <linux/errno.h>
19 #include <linux/kernel.h>
20 #include <linux/major.h>
21 #include <linux/sched.h>
22 #include <linux/lp.h>
23 #include <linux/slab.h>
24 #include <linux/ioport.h>
25 #include <linux/fcntl.h>
26 #include <linux/delay.h>
27 #include <linux/skbuff.h>
28 #include <linux/proc_fs.h>
29 #include <linux/fs.h>
30 #include <linux/poll.h>
31 #include <linux/init.h>
32 #include <linux/list.h>
33 #include <linux/highmem.h>
34 #include <asm/io.h>
35 #include <asm/ioctls.h>
36 #include <asm/system.h>
37 #include <asm/poll.h>
38 #include <asm/uaccess.h>
39 #include <linux/miscdevice.h>
40
41 #define DEBUG_SUBSYSTEM S_CLASS
42
43 #include <linux/obd_support.h>
44 #include <linux/obd_class.h>
45 #include <linux/lustre_debug.h>
46 #include <linux/smp_lock.h>
47 #include <linux/lprocfs_status.h>
48 #include <portals/lib-types.h> /* for PTL_MD_MAX_IOV */
49
50 struct semaphore obd_conf_sem;   /* serialize configuration commands */
51 struct obd_device obd_dev[MAX_OBD_DEVICES];
52 struct list_head obd_types;
53 unsigned long obd_memory;
54
55 /* The following are visible and mutable through /proc/sys/lustre/. */
56 unsigned long obd_fail_loc;
57 unsigned long obd_timeout = 100;
58 char obd_recovery_upcall[128] = "/usr/lib/lustre/ha_assist";
59
60 extern struct obd_type *class_nm_to_type(char *nm);
61
62 /*  opening /dev/obd */
63 static int obd_class_open(struct inode * inode, struct file * file)
64 {
65         ENTRY;
66
67         file->private_data = NULL;
68         CDEBUG(D_IOCTL, "MOD_INC_USE for open: count = %d\n",
69                atomic_read(&(THIS_MODULE)->uc.usecount));
70         MOD_INC_USE_COUNT;
71         RETURN(0);
72 }
73
74 /*  closing /dev/obd */
75 static int obd_class_release(struct inode * inode, struct file * file)
76 {
77         ENTRY;
78
79         // XXX drop lsm, connections here
80         if (file->private_data)
81                 file->private_data = NULL;
82
83         CDEBUG(D_IOCTL, "MOD_DEC_USE for close: count = %d\n",
84                atomic_read(&(THIS_MODULE)->uc.usecount) - 1);
85         MOD_DEC_USE_COUNT;
86         RETURN(0);
87 }
88
89
90 static inline void obd_data2conn(struct lustre_handle *conn,
91                                  struct obd_ioctl_data *data)
92 {
93         conn->addr = data->ioc_addr;
94         conn->cookie = data->ioc_cookie;
95 }
96
97
98 static inline void obd_conn2data(struct obd_ioctl_data *data,
99                                  struct lustre_handle *conn)
100 {
101         data->ioc_addr = conn->addr;
102         data->ioc_cookie = conn->cookie;
103 }
104
105 static void forcibly_detach_exports(struct obd_device *obd)
106 {
107         int rc;
108         struct list_head *tmp, *n;
109         struct lustre_handle fake_conn;
110
111         CDEBUG(D_IOCTL, "OBD device %d (%p) has exports, "
112                "disconnecting them", obd->obd_minor, obd);
113         list_for_each_safe(tmp, n, &obd->obd_exports) {
114                 struct obd_export *exp = list_entry(tmp, struct obd_export,
115                                                     exp_obd_chain);
116                 fake_conn.addr = (__u64)(unsigned long)exp;
117                 fake_conn.cookie = exp->exp_cookie;
118                 rc = obd_disconnect(&fake_conn);
119                 if (rc) {
120                         CDEBUG(D_IOCTL, "disconnecting export %p failed: %d\n",
121                                exp, rc);
122                 } else {
123                         CDEBUG(D_IOCTL, "export %p disconnected\n", exp);
124                 }
125         }
126 }
127
128 /* to control /dev/obd */
129 static int obd_class_ioctl (struct inode * inode, struct file * filp,
130                             unsigned int cmd, unsigned long arg)
131 {
132         char *buf = NULL;
133         int len = 0;
134         struct obd_ioctl_data *data;
135         struct obd_device *obd = filp->private_data;
136
137         struct lustre_handle conn;
138         int rw = OBD_BRW_READ;
139         int err = 0;
140         int serialised = 0;
141
142         ENTRY;
143
144         switch (cmd)
145         {
146         case OBD_IOC_BRW_WRITE:
147         case OBD_IOC_BRW_READ:
148         case OBD_IOC_GETATTR:
149                 break;
150         default:
151                 down(&obd_conf_sem);
152                 serialised = 1;
153                 break;
154         }
155
156         if (!obd && cmd != OBD_IOC_DEVICE && cmd != TCGETS &&
157             cmd != OBD_IOC_LIST &&
158             cmd != OBD_IOC_NAME2DEV && cmd != OBD_IOC_NEWDEV) {
159                 CERROR("OBD ioctl: No device\n");
160                 GOTO(out, err=-EINVAL);
161         }
162         if (obd_ioctl_getdata(&buf, &len, (void *)arg)) {
163                 CERROR("OBD ioctl: data error\n");
164                 GOTO(out, err=-EINVAL);
165         }
166         data = (struct obd_ioctl_data *)buf;
167
168         switch (cmd) {
169         case TCGETS:
170                 GOTO(out, err=-EINVAL);
171         case OBD_IOC_DEVICE: {
172                 CDEBUG(D_IOCTL, "\n");
173                 if (data->ioc_dev >= MAX_OBD_DEVICES || data->ioc_dev < 0) {
174                         CERROR("OBD ioctl: DEVICE insufficient devices\n");
175                         GOTO(out, err=-EINVAL);
176                 }
177                 CDEBUG(D_IOCTL, "device %d\n", data->ioc_dev);
178
179                 filp->private_data = &obd_dev[data->ioc_dev];
180                 GOTO(out, err=0);
181         }
182
183         case OBD_IOC_LIST: {
184                 int i;
185                 char *buf2 = data->ioc_bulk;
186                 int remains = data->ioc_inllen1;
187
188                 if (!data->ioc_inlbuf1) {
189                         CERROR("No buffer passed!\n");
190                         GOTO(out, err=-EINVAL);
191                 }
192
193
194                 for (i = 0 ; i < MAX_OBD_DEVICES ; i++) {
195                         int l;
196                         char *status;
197                         struct obd_device *obd = &obd_dev[i];
198                         if (!obd->obd_type)
199                                 continue;
200                         if (obd->obd_flags & OBD_SET_UP)
201                                 status = "UP";
202                         else if (obd->obd_flags & OBD_ATTACHED)
203                                 status = "AT";
204                         else
205                                 status = "-";
206                         l = snprintf(buf2, remains, "%2d %s %s %s %s %d\n",
207                                      i, status, obd->obd_type->typ_name,
208                                      obd->obd_name, obd->obd_uuid, obd->obd_type->typ_refcnt);
209                         buf2 +=l;
210                         remains -=l;
211                         if (remains <= 0) {
212                                 CERROR("not enough space for device listing\n");
213                                 break;
214                         }
215                 }
216
217                 err = copy_to_user((void *)arg, data, len);
218                 if (err)
219                         err = -EFAULT;
220                 GOTO(out, err);
221         }
222
223
224         case OBD_IOC_NAME2DEV: {
225                 /* Resolve a device name.  This does not change the
226                  * currently selected device.
227                  */
228                 int dev;
229
230                 if (!data->ioc_inllen1 || !data->ioc_inlbuf1 ) {
231                         CERROR("No name passed,!\n");
232                         GOTO(out, err=-EINVAL);
233                 }
234                 if (data->ioc_inlbuf1[data->ioc_inllen1-1] !=0) {
235                         CERROR("Name not nul terminated!\n");
236                         GOTO(out, err=-EINVAL);
237                 }
238
239                 CDEBUG(D_IOCTL, "device name %s\n", data->ioc_inlbuf1);
240                 dev = class_name2dev(data->ioc_inlbuf1);
241                 data->ioc_dev = dev;
242                 if (dev == -1) {
243                         CDEBUG(D_IOCTL, "No device for name %s!\n",
244                                data->ioc_inlbuf1);
245                         GOTO(out, err=-EINVAL);
246                 }
247
248                 CDEBUG(D_IOCTL, "device name %s, dev %d\n", data->ioc_inlbuf1,
249                        dev);
250                 err = copy_to_user((void *)arg, data, sizeof(*data));
251                 if (err)
252                         err = -EFAULT;
253                 GOTO(out, err);
254         }
255
256         case OBD_IOC_UUID2DEV: {
257                 /* Resolve a device uuid.  This does not change the
258                  * currently selected device.
259                  */
260                 int dev;
261
262                 if (!data->ioc_inllen1 || !data->ioc_inlbuf1) {
263                         CERROR("No UUID passed!\n");
264                         GOTO(out, err=-EINVAL);
265                 }
266                 if (data->ioc_inlbuf1[data->ioc_inllen1-1] !=0) {
267                         CERROR("Name not nul terminated!\n");
268                         GOTO(out, err=-EINVAL);
269                 }
270
271                 CDEBUG(D_IOCTL, "device name %s\n", data->ioc_inlbuf1);
272                 dev = class_uuid2dev(data->ioc_inlbuf1);
273                 data->ioc_dev = dev;
274                 if (dev == -1) {
275                         CDEBUG(D_IOCTL, "No device for name %s!\n",
276                                data->ioc_inlbuf1);
277                         GOTO(out, err=-EINVAL);
278                 }
279
280                 CDEBUG(D_IOCTL, "device name %s, dev %d\n", data->ioc_inlbuf1,
281                        dev);
282                 err = copy_to_user((void *)arg, data, sizeof(*data));
283                 if (err)
284                         err = -EFAULT;
285                 GOTO(out, err);
286         }
287
288         case OBD_IOC_NEWDEV: {
289                 int dev = -1;
290                 int i;
291
292                 filp->private_data = NULL;
293                 for (i = 0 ; i < MAX_OBD_DEVICES ; i++) {
294                         struct obd_device *obd = &obd_dev[i];
295                         if (!obd->obd_type) {
296                                 filp->private_data = obd;
297                                 dev = i;
298                                 break;
299                         }
300                 }
301
302
303                 data->ioc_dev = dev;
304                 if (dev == -1)
305                         GOTO(out, err=-EINVAL);
306
307                 err = copy_to_user((void *)arg, data, sizeof(*data));
308                 if (err)
309                         err = -EFAULT;
310                 GOTO(out, err);
311         }
312
313         case OBD_IOC_ATTACH: {
314                 struct obd_type *type;
315                 int minor;
316
317                 /* have we attached a type to this device */
318                 if (obd->obd_flags & OBD_ATTACHED || obd->obd_type) {
319                         CERROR("OBD: Device %d already typed as %s.\n",
320                                obd->obd_minor, MKSTR(obd->obd_type->typ_name));
321                         GOTO(out, err=-EBUSY);
322                 }
323
324                 if (!data->ioc_inllen1 || !data->ioc_inlbuf1) {
325                         CERROR("No type passed!\n");
326                         GOTO(out, err=-EINVAL);
327                 }
328                 if (data->ioc_inlbuf1[data->ioc_inllen1-1] !=0) {
329                         CERROR("Type not nul terminated!\n");
330                         GOTO(out, err=-EINVAL);
331                 }
332
333                 CDEBUG(D_IOCTL, "attach type %s name: %s uuid: %s\n",
334                        MKSTR(data->ioc_inlbuf1),
335                        MKSTR(data->ioc_inlbuf2), MKSTR(data->ioc_inlbuf3));
336
337                 /* find the type */
338                 type = class_nm_to_type(data->ioc_inlbuf1);
339                 if (!type) {
340                         CERROR("OBD: unknown type dev %d\n", obd->obd_minor);
341                         GOTO(out, err=-EINVAL);
342                 }
343
344                 minor = obd->obd_minor;
345                 memset(obd, 0, sizeof(*obd));
346                 obd->obd_minor = minor;
347                 obd->obd_type = type;
348                 INIT_LIST_HEAD(&obd->obd_exports);
349                 INIT_LIST_HEAD(&obd->obd_imports);
350                 spin_lock_init(&obd->obd_dev_lock);
351
352                         if (data->ioc_inlbuf2) {
353                                 int len = strlen(data->ioc_inlbuf2) + 1;
354                                 OBD_ALLOC(obd->obd_name, len);
355                                 if (!obd->obd_name) {
356                                         CERROR("no memory\n");
357                                         LBUG();
358                                 }
359                                 memcpy(obd->obd_name, data->ioc_inlbuf2, len);
360                         } else {
361                                 CERROR("WARNING: unnamed obd device\n");
362                         }
363                         if (data->ioc_inlbuf3) {
364                                 int len = strlen(data->ioc_inlbuf3);
365                                 if (len >= sizeof(obd->obd_uuid)) {
366                                         CERROR("uuid must be < %d bytes long\n",
367                                                sizeof(obd->obd_uuid));
368                                         if (obd->obd_name)
369                                                 OBD_FREE(obd->obd_name,
370                                                          strlen(obd->obd_name) + 1);
371                                         GOTO(out, err=-EINVAL);
372                                 }
373                                 memcpy(obd->obd_uuid, data->ioc_inlbuf3, len);
374                         }
375                 /* do the attach */
376                 if (OBP(obd, attach))
377                         err = OBP(obd,attach)(obd, sizeof(*data), data);
378                 if (err) {
379                         if(data->ioc_inlbuf2)
380                                 OBD_FREE(obd->obd_name, strlen(obd->obd_name)+1);
381                         obd->obd_type = NULL;
382
383                         } else {
384                         obd->obd_flags |= OBD_ATTACHED;
385
386                         type->typ_refcnt++;
387                         CDEBUG(D_IOCTL, "OBD: dev %d attached type %s\n",
388                                obd->obd_minor, data->ioc_inlbuf1);
389
390                         CDEBUG(D_IOCTL, "MOD_INC_USE for attach: count = %d\n",
391                                atomic_read(&(THIS_MODULE)->uc.usecount));
392                         MOD_INC_USE_COUNT;
393                 }
394
395                 GOTO(out, err);
396         }
397
398         case OBD_IOC_DETACH: {
399                 ENTRY;
400                 if (obd->obd_flags & OBD_SET_UP) {
401                         CERROR("OBD device %d still set up\n", obd->obd_minor);
402                         GOTO(out, err=-EBUSY);
403                 }
404                 if (!(obd->obd_flags & OBD_ATTACHED) ) {
405                         CERROR("OBD device %d not attached\n", obd->obd_minor);
406                         GOTO(out, err=-ENODEV);
407                 }
408                 if (!list_empty(&obd->obd_exports)) {
409                         if (!data->ioc_inlbuf1 || data->ioc_inlbuf1[0] != 'F') {
410                                 CERROR("OBD device %d (%p) has exports\n",
411                                        obd->obd_minor, obd);
412                                 GOTO(out, err=-EBUSY);
413                         }
414                         forcibly_detach_exports(obd);
415                 }
416                    if (OBP(obd, detach))
417                         err=OBP(obd,detach)(obd);
418
419                 if (obd->obd_name) {
420                         OBD_FREE(obd->obd_name, strlen(obd->obd_name)+1);
421                         obd->obd_name = NULL;
422                 }
423
424                 obd->obd_flags &= ~OBD_ATTACHED;
425                 obd->obd_type->typ_refcnt--;
426                 obd->obd_type = NULL;
427                 CDEBUG(D_IOCTL, "MOD_DEC_USE for detach: count = %d\n",
428                        atomic_read(&(THIS_MODULE)->uc.usecount) - 1);
429                 MOD_DEC_USE_COUNT;
430                 GOTO(out, err = 0);
431         }
432
433         case OBD_IOC_SETUP: {
434                 /* have we attached a type to this device? */
435                 if (!(obd->obd_flags & OBD_ATTACHED)) {
436                         CERROR("Device %d not attached\n", obd->obd_minor);
437                         GOTO(out, err=-ENODEV);
438                 }
439
440                 /* has this been done already? */
441                 if ( obd->obd_flags & OBD_SET_UP ) {
442                         CERROR("Device %d already setup (type %s)\n",
443                                obd->obd_minor, obd->obd_type->typ_name);
444                         GOTO(out, err=-EBUSY);
445                 }
446
447                 if ( OBT(obd) && OBP(obd, setup) )
448                         err = obd_setup(obd, sizeof(*data), data);
449
450                 if (!err) {
451                         obd->obd_type->typ_refcnt++;
452                         obd->obd_flags |= OBD_SET_UP;
453                 }
454
455                 GOTO(out, err);
456         }
457         case OBD_IOC_CLEANUP: {
458                 /* have we attached a type to this device? */
459                 if (!(obd->obd_flags & OBD_ATTACHED)) {
460                         CERROR("Device %d not attached\n", obd->obd_minor);
461                         GOTO(out, err=-ENODEV);
462                 }
463
464                 if ( OBT(obd) && OBP(obd, cleanup) )
465                         err = obd_cleanup(obd);
466
467                 if (!err) {
468                         obd->obd_flags &= ~OBD_SET_UP;
469                         obd->obd_type->typ_refcnt--;
470                 }
471                 GOTO(out, err);
472         }
473
474         case OBD_IOC_CONNECT: {
475                 char * cluuid = "OBD_CLASS_UUID";
476                 obd_data2conn(&conn, data);
477
478                 err = obd_connect(&conn, obd, cluuid, NULL, NULL);
479
480                 CDEBUG(D_IOCTL, "assigned export "LPX64"\n", conn.addr);
481                 obd_conn2data(data, &conn);
482                 if (err)
483                         GOTO(out, err);
484
485                 err = copy_to_user((void *)arg, data, sizeof(*data));
486                 if (err)
487                         err = -EFAULT;
488                 // XXX save connection data into file handle
489                 GOTO(out, err);
490         }
491
492         case OBD_IOC_DISCONNECT: {
493                 obd_data2conn(&conn, data);
494                 err = obd_disconnect(&conn);
495                 GOTO(out, err);
496         }
497
498         case OBD_IOC_DEC_USE_COUNT: {
499                 CDEBUG(D_IOCTL, "MOD_DEC_USE for force dec: count = %d\n",
500                        atomic_read(&(THIS_MODULE)->uc.usecount) - 1);
501                 MOD_DEC_USE_COUNT;
502                 GOTO(out, err=0);
503         }
504
505         case OBD_IOC_CREATE: {
506                 struct lov_stripe_md *lsm = NULL;
507                 obd_data2conn(&conn, data);
508
509 #warning FIXME: save lsm into file handle for other ops, release on close
510                 err = obd_create(&conn, &data->ioc_obdo1, &lsm);
511                 if (!err) {
512                         err = copy_to_user((void *)arg, data, sizeof(*data));
513                         if (err)
514                                 err = -EFAULT;
515                 }
516                 GOTO(out, err);
517         }
518
519         case OBD_IOC_GETATTR: {
520                 obd_data2conn(&conn, data);
521                 err = obd_getattr(&conn, &data->ioc_obdo1, NULL);
522                 if (!err) {
523                         err = copy_to_user((void *)arg, data, sizeof(*data));
524                         if (err)
525                                 err = -EFAULT;
526                 }
527                 GOTO(out, err);
528         }
529
530         case OBD_IOC_SETATTR: {
531                 obd_data2conn(&conn, data);
532                 err = obd_setattr(&conn, &data->ioc_obdo1, NULL);
533                 if (!err) {
534                         err = copy_to_user((void *)arg, data, sizeof(*data));
535                         if (err)
536                                 err = -EFAULT;
537                 }
538                 GOTO(out, err);
539         }
540
541         case OBD_IOC_DESTROY: {
542                 //void *ea;
543                 obd_data2conn(&conn, data);
544
545                 err = obd_destroy(&conn, &data->ioc_obdo1, NULL);
546                 if (!err) {
547                         err = copy_to_user((void *)arg, data, sizeof(*data));
548                         if (err)
549                                 err = -EFAULT;
550                 }
551                 GOTO(out, err);
552         }
553
554         case OBD_IOC_OPEN: {
555                 struct lov_stripe_md *lsm = NULL; // XXX fill in from create
556
557                 obd_data2conn(&conn, data);
558                 err = obd_open(&conn, &data->ioc_obdo1, lsm);
559                 if (!err) {
560                         err = copy_to_user((void *)arg, data, sizeof(*data));
561                         if (err)
562                                 err = -EFAULT;
563                 }
564                 GOTO(out, err);
565         }
566
567         case OBD_IOC_CLOSE: {
568                 struct lov_stripe_md *lsm = NULL; // XXX fill in from create
569
570                 obd_data2conn(&conn, data);
571                 err = obd_close(&conn, &data->ioc_obdo1, lsm);
572                 GOTO(out, err);
573         }
574
575         case OBD_IOC_BRW_WRITE:
576                 rw = OBD_BRW_WRITE;
577         case OBD_IOC_BRW_READ: {
578                 struct lov_stripe_md tmp_lsm; // XXX fill in from create
579                 struct lov_stripe_md *lsm = &tmp_lsm; // XXX fill in from create
580                 struct brw_cb_data *brw_cbd = ll_init_brw_cb_data();
581                 obd_count       pages = 0;
582                 struct brw_page *pga, *pgp;
583                 __u64 id = data->ioc_obdo1.o_id;
584                 int gfp_mask = (id & 1) ? GFP_HIGHUSER : GFP_KERNEL;
585                 int verify = (id != 0);
586                 __u64 off;
587                 int j;
588
589                 if (!brw_cbd)
590                         GOTO(out, err = -ENOMEM);
591
592                 obd_data2conn(&conn, data);
593
594                 pages = data->ioc_count / PAGE_SIZE;
595                 off = data->ioc_offset;
596
597                 CDEBUG(D_INODE, "BRW %s with %d pages @ "LPX64"\n",
598                        rw == OBD_BRW_READ ? "read" : "write", pages, off);
599                 OBD_ALLOC(pga, pages * sizeof(*pga));
600                 if (!pga) {
601                         CERROR("no memory for %d BRW per-page data\n", pages);
602                         GOTO(brw_free, err = -ENOMEM);
603                 }
604
605                 memset(lsm, 0, sizeof(*lsm)); // XXX don't do this later
606                 lsm->lsm_object_id = id; // ensure id == lsm->lsm_object_id
607
608                 for (j = 0, pgp = pga; j < pages; j++, off += PAGE_SIZE, pgp++){
609                         pgp->pg = alloc_pages(gfp_mask, 0);
610                         if (!pgp->pg) {
611                                 CERROR("no memory for brw pages\n");
612                                 GOTO(brw_cleanup, err = -ENOMEM);
613                         }
614                         pgp->count = PAGE_SIZE;
615                         pgp->off = off;
616                         pgp->flag = 0;
617
618                         if (verify) {
619                                 void *addr = kmap(pgp->pg);
620
621                                 if (rw == OBD_BRW_WRITE)
622                                         page_debug_setup(addr, pgp->count,
623                                                          pgp->off, id);
624                                 else
625                                         page_debug_setup(addr, pgp->count,
626                                                          0xdeadbeef00c0ffee,
627                                                          0xdeadbeef00c0ffee);
628                                 kunmap(pgp->pg);
629                         }
630                 }
631
632                 err = obd_brw(rw, &conn, lsm, j, pga, ll_sync_brw_cb, brw_cbd);
633                 if (err)
634                         CERROR("test_brw: error from obd_brw: err = %d\n", err);
635                 EXIT;
636         brw_cleanup:
637                 for (j = 0, pgp = pga; j < pages; j++, pgp++) {
638                         if (pgp->pg != NULL) {
639                                 if (verify && !err) {
640                                         void *addr = kmap(pgp->pg);
641
642                                         err = page_debug_check("test_brw",
643                                                                addr,
644                                                                PAGE_SIZE,
645                                                                pgp->off,id);
646                                         kunmap(pgp->pg);
647                                 }
648                                 __free_pages(pgp->pg, 0);
649                         }
650                 }
651         brw_free:
652                 OBD_FREE(pga, pages * sizeof(*pga));
653                 GOTO(out, err);
654         }
655         default:
656                 obd_data2conn(&conn, data);
657
658                 err = obd_iocontrol(cmd, &conn, len, data, NULL);
659                 if (err)
660                         GOTO(out, err);
661
662                 err = copy_to_user((void *)arg, data, len);
663                 if (err)
664                         err = -EFAULT;
665                 GOTO(out, err);
666         }
667
668  out:
669         if (buf)
670                 OBD_FREE(buf, len);
671         if (serialised)
672                 up(&obd_conf_sem);
673         RETURN(err);
674 } /* obd_class_ioctl */
675
676
677
678 /* declare character device */
679 static struct file_operations obd_psdev_fops = {
680         ioctl: obd_class_ioctl,       /* ioctl */
681         open: obd_class_open,        /* open */
682         release: obd_class_release,     /* release */
683 };
684
685 /* modules setup */
686 #define OBD_MINOR 241
687 static struct miscdevice obd_psdev = {
688         OBD_MINOR,
689         "obd_psdev",
690         &obd_psdev_fops
691 };
692
693 void (*class_signal_connection_failure)(struct ptlrpc_connection *);
694
695 #ifdef CONFIG_HIGHMEM
696 #warning "using kmap accounting for deadlock avoidance"
697 /* Allow at most 3/4 of the kmap mappings to be consumed by vector I/O
698  * requests.  This avoids deadlocks on servers which have a lot of clients
699  * doing vector I/O.  We don't need to do this for non-vector I/O requests
700  * because singleton requests will just block on the kmap itself and never
701  * deadlock waiting for additional kmaps to complete.
702  *
703  * If we are a "server" task, we can have at most a single reservation
704  * in excess of the maximum.  This avoids a deadlock when multiple client
705  * threads are on the same machine as the server threads, and the clients
706  * have consumed all of the available mappings.  As long as a single server
707  * thread is can make progress, we are guaranteed to avoid deadlock.
708  */
709 #define OBD_KMAP_MAX (LAST_PKMAP * 3 / 4)
710 static atomic_t obd_kmap_count = ATOMIC_INIT(OBD_KMAP_MAX);
711 static DECLARE_WAIT_QUEUE_HEAD(obd_kmap_waitq);
712
713 void obd_kmap_get(int count, int server)
714 {
715         //CERROR("getting %d kmap counts (%d/%d)\n", count,
716         //       atomic_read(&obd_kmap_count), OBD_KMAP_MAX);
717         if (count == 1)
718                 atomic_dec(&obd_kmap_count);
719         else while (atomic_add_negative(-count, &obd_kmap_count)) {
720                 static long next_show = 0;
721                 static int skipped = 0;
722
723                 if (server && atomic_read(&obd_kmap_count) >= -PTL_MD_MAX_IOV)
724                         break;
725
726                 CDEBUG(D_OTHER, "negative kmap reserved count: %d\n",
727                        atomic_read(&obd_kmap_count));
728                 atomic_add(count, &obd_kmap_count);
729
730                 if (time_after(jiffies, next_show)) {
731                         CERROR("blocking %s (and %d others) for kmaps\n",
732                                current->comm, skipped);
733                         next_show = jiffies + 5*HZ;
734                         skipped = 0;
735                 } else
736                         skipped++;
737                 wait_event(obd_kmap_waitq,
738                            atomic_read(&obd_kmap_count) >= count);
739         }
740 }
741
742 void obd_kmap_put(int count)
743 {
744         atomic_add(count, &obd_kmap_count);
745         /* Wake up sleepers.  Sadly, this wakes up all of the tasks at once.
746          * We could have something smarter here like:
747         while (atomic_read(&obd_kmap_count) > 0)
748                 wake_up_nr(obd_kmap_waitq, 1);
749         although we would need to set somewhere (probably obd_class_init):
750         obd_kmap_waitq.flags |= WQ_FLAG_EXCLUSIVE;
751         For now the wait_event() condition will handle this OK I believe.
752          */
753         if (atomic_read(&obd_kmap_count) > 0)
754                 wake_up(&obd_kmap_waitq);
755 }
756
757 EXPORT_SYMBOL(obd_kmap_get);
758 EXPORT_SYMBOL(obd_kmap_put);
759 #endif
760
761 EXPORT_SYMBOL(obd_dev);
762 EXPORT_SYMBOL(obdo_cachep);
763 EXPORT_SYMBOL(obd_memory);
764 EXPORT_SYMBOL(obd_fail_loc);
765 EXPORT_SYMBOL(obd_timeout);
766 EXPORT_SYMBOL(obd_recovery_upcall);
767 EXPORT_SYMBOL(ptlrpc_put_connection_superhack);
768
769 EXPORT_SYMBOL(class_register_type);
770 EXPORT_SYMBOL(class_unregister_type);
771 EXPORT_SYMBOL(class_name2dev);
772 EXPORT_SYMBOL(class_uuid2dev);
773 EXPORT_SYMBOL(class_uuid2obd);
774 EXPORT_SYMBOL(class_new_export);
775 EXPORT_SYMBOL(class_destroy_export);
776 EXPORT_SYMBOL(class_connect);
777 EXPORT_SYMBOL(class_conn2export);
778 EXPORT_SYMBOL(class_conn2obd);
779 EXPORT_SYMBOL(class_conn2cliimp);
780 EXPORT_SYMBOL(class_conn2ldlmimp);
781 EXPORT_SYMBOL(class_disconnect);
782 EXPORT_SYMBOL(class_disconnect_all);
783 //EXPORT_SYMBOL(class_uuid_parse);
784 EXPORT_SYMBOL(class_uuid_unparse);
785 //EXPORT_SYMBOL(class_multi_setup);
786 //EXPORT_SYMBOL(class_multi_cleanup);
787
788 EXPORT_SYMBOL(class_signal_connection_failure);
789 EXPORT_SYMBOL(ll_sync_brw_cb);
790 EXPORT_SYMBOL(ll_init_brw_cb_data);
791 EXPORT_SYMBOL(class_nm_to_type);
792
793 static int __init init_obdclass(void)
794 {
795         struct obd_device *obd;
796         int err;
797         int i;
798
799         printk(KERN_INFO "OBD class driver  v0.9, info@clusterfs.com\n");
800
801         sema_init(&obd_conf_sem, 1);
802         INIT_LIST_HEAD(&obd_types);
803
804         if ((err = misc_register(&obd_psdev))) {
805                 CERROR("cannot register %d err %d\n", OBD_MINOR, err);
806                 return err;
807         }
808
809         /* This struct is already zerod for us (static global) */
810         for (i = 0, obd = obd_dev; i < MAX_OBD_DEVICES; i++, obd++)
811                 obd->obd_minor = i;
812
813         err = obd_init_caches();
814
815         if (err)
816                 return err;
817         obd_sysctl_init();
818
819         err=lprocfs_reg_main();
820
821         return 0;
822 }
823
824 static void __exit cleanup_obdclass(void)
825 {
826         int i, err;
827         ENTRY;
828
829         misc_deregister(&obd_psdev);
830         for (i = 0; i < MAX_OBD_DEVICES; i++) {
831                 struct obd_device *obd = &obd_dev[i];
832                 if (obd->obd_type && (obd->obd_flags & OBD_SET_UP) &&
833                     OBT(obd) && OBP(obd, detach)) {
834                         /* XXX should this call generic detach otherwise? */
835                         OBP(obd, detach)(obd);
836                 }
837         }
838
839         obd_cleanup_caches();
840         obd_sysctl_clean();
841
842         err = lprocfs_dereg_main();
843
844         CERROR("obd memory leaked: %ld bytes\n", obd_memory);
845         EXIT;
846 }
847
848 /* Check that we're building against the appropriate version of the Lustre
849  * kernel patch */
850 #include <linux/lustre_version.h>
851 #if (LUSTRE_KERNEL_VERSION != 2)
852 # error Cannot continue: Your Lustre kernel patch is out of date
853 #endif
854
855 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
856 MODULE_DESCRIPTION("Lustre Class Driver v1.0");
857 MODULE_LICENSE("GPL");
858
859 module_init(init_obdclass);
860 module_exit(cleanup_obdclass);