Whamcloud - gitweb
Change object "refcounts" to be dgets instead of igets, where possible,
[fs/lustre-release.git] / lustre / obdfilter / filter.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  linux/fs/filter/filter.c
5  *
6  * Copyright (C) 2001  Cluster File Systems, Inc.
7  *
8  * This code is issued under the GNU General Public License.
9  * See the file COPYING in this distribution
10  *
11  * by Peter Braam <braam@clusterfs.com>
12  */
13
14 #define EXPORT_SYMTAB
15 #define DEBUG_SUBSYSTEM S_FILTER
16
17 #include <linux/module.h>
18 #include <linux/obd_filter.h>
19
20 extern struct obd_device obd_dev[MAX_OBD_DEVICES];
21 long filter_memory;
22
23 #define FILTER_ROOTINO 2
24
25 #define S_SHIFT 12
26 static char *obd_type_by_mode[S_IFMT >> S_SHIFT] = {
27         [0]                     NULL,
28         [S_IFREG >> S_SHIFT]    "R",
29         [S_IFDIR >> S_SHIFT]    "D",
30         [S_IFCHR >> S_SHIFT]    "C",
31         [S_IFBLK >> S_SHIFT]    "B",
32         [S_IFIFO >> S_SHIFT]    "F",
33         [S_IFSOCK >> S_SHIFT]   "S",
34         [S_IFLNK >> S_SHIFT]    "L"
35 };
36
37 static inline const char *obd_mode_to_type(int mode)
38 {
39         return obd_type_by_mode[(mode & S_IFMT) >> S_SHIFT];
40 }
41
42 /* write the pathname into the string */
43 static int filter_id(char *buf, obd_id id, obd_mode mode)
44 {
45         return sprintf(buf, "O/%s/%Ld", obd_mode_to_type(mode),
46                        (unsigned long long)id);
47 }
48
49 /* setup the object store with correct subdirectories */
50 static int filter_prep(struct obd_device *obddev)
51 {
52         struct obd_run_ctxt saved;
53         struct filter_obd *filter = &obddev->u.filter;
54         struct dentry *dentry;
55         struct file *file;
56         struct inode *inode;
57         loff_t off;
58         int rc = 0;
59         char rootid[128];
60         __u64 lastino = 2;
61         int mode = 0;
62
63         push_ctxt(&saved, &filter->fo_ctxt);
64         dentry = simple_mkdir(current->fs->pwd, "O", 0700);
65         CDEBUG(D_INODE, "got/created O: %p\n", dentry);
66         if (IS_ERR(dentry)) {
67                 rc = PTR_ERR(dentry);
68                 CERROR("cannot open/create O: rc = %d\n", rc);
69                 GOTO(out, rc);
70         }
71         filter->fo_dentry_O = dentry;
72         dentry = simple_mkdir(current->fs->pwd, "P", 0700);
73         CDEBUG(D_INODE, "got/created P: %p\n", dentry);
74         if (IS_ERR(dentry)) {
75                 rc = PTR_ERR(dentry);
76                 CERROR("cannot open/create P: rc = %d\n", rc);
77                 GOTO(out_O, rc);
78         }
79         CDEBUG(D_INODE, "putting P: %p, count = %d\n", dentry,
80                atomic_read(&dentry->d_count) - 1);
81         dput(dentry);
82         dentry = simple_mkdir(current->fs->pwd, "D", 0700);
83         CDEBUG(D_INODE, "got/created D: %p\n", dentry);
84         if (IS_ERR(dentry)) {
85                 rc = PTR_ERR(dentry);
86                 CERROR("cannot open/create D: rc = %d\n", rc);
87                 GOTO(out_O, rc);
88         }
89         CDEBUG(D_INODE, "putting D: %p, count = %d\n", dentry,
90                atomic_read(&dentry->d_count) - 1);
91         dput(dentry);
92
93         /*
94          * Create directories and/or get dentries for each object type.
95          * This saves us from having to do multiple lookups for each one.
96          */
97         for (mode = 0; mode < (S_IFMT >> S_SHIFT); mode++) {
98                 char *type = obd_type_by_mode[mode];
99
100                 if (!type) {
101                         filter->fo_dentry_O_mode[mode] = NULL;
102                         continue;
103                 }
104                 dentry = simple_mkdir(filter->fo_dentry_O, type, 0700);
105                 CDEBUG(D_INODE, "got/created O/%s: %p\n", type, dentry);
106                 if (IS_ERR(dentry)) {
107                         rc = PTR_ERR(dentry);
108                         CERROR("cannot create O/%s: rc = %d\n", type, rc);
109                         GOTO(out_O_mode, rc);
110                 }
111                 filter->fo_dentry_O_mode[mode] = dentry;
112         }
113
114         filter_id(rootid, FILTER_ROOTINO, S_IFDIR);
115         file = filp_open(rootid, O_RDWR | O_CREAT, 0755);
116         if (IS_ERR(file)) {
117                 rc = PTR_ERR(file);
118                 CERROR("OBD filter: cannot open/create root %s: rc = %d\n",
119                        rootid, rc);
120                 GOTO(out_O_mode, rc);
121         }
122         filp_close(file, 0);
123
124         file = filp_open("D/status", O_RDWR | O_CREAT, 0700);
125         if ( !file || IS_ERR(file) ) {
126                 rc = PTR_ERR(file);
127                 CERROR("OBD filter: cannot open/create status %s: rc = %d\n",
128                        "D/status", rc);
129                 GOTO(out_O_mode, rc);
130         }
131
132         /* steal operations */
133         inode = file->f_dentry->d_inode;
134         filter->fo_fop = file->f_op;
135         filter->fo_iop = inode->i_op;
136         filter->fo_aops = inode->i_mapping->a_ops;
137
138         off = 0;
139         if (inode->i_size == 0) {
140                 ssize_t retval = file->f_op->write(file, (char *)&lastino,
141                                                    sizeof(lastino), &off);
142                 if (retval != sizeof(lastino)) {
143                         CDEBUG(D_INODE, "OBD filter: error writing lastino\n");
144                         filp_close(file, 0);
145                         GOTO(out_O_mode, rc = -EIO);
146                 }
147         } else {
148                 ssize_t retval = file->f_op->read(file, (char *)&lastino,
149                                                   sizeof(lastino), &off);
150                 if (retval != sizeof(lastino)) {
151                         CDEBUG(D_INODE, "OBD filter: error reading lastino\n");
152                         filp_close(file, 0);
153                         GOTO(out_O_mode, rc = -EIO);
154                 }
155         }
156         filter->fo_lastino = lastino;
157         filp_close(file, 0);
158
159         rc = 0;
160  out:
161         pop_ctxt(&saved);
162
163         return(rc);
164
165 out_O_mode:
166         while (--mode >= 0) {
167                 struct dentry *dentry = filter->fo_dentry_O_mode[mode];
168                 if (dentry) {
169                         CDEBUG(D_INODE, "putting O/%s: %p, count = %d\n",
170                                obd_type_by_mode[mode], dentry,
171                                atomic_read(&dentry->d_count) - 1);
172                         dput(dentry);
173                         filter->fo_dentry_O_mode[mode] = NULL;
174                 }
175         }
176 out_O:
177         CDEBUG(D_INODE, "putting O: %p, count = %d\n", filter->fo_dentry_O,
178                atomic_read(&filter->fo_dentry_O->d_count) - 1);
179         dput(filter->fo_dentry_O);
180         filter->fo_dentry_O = NULL;
181         goto out;
182 }
183
184 /* cleanup the filter: write last used object id to status file */
185 static void filter_post(struct obd_device *obddev)
186 {
187         struct obd_run_ctxt saved;
188         struct filter_obd *filter = &obddev->u.filter;
189         long rc;
190         struct file *file;
191         loff_t off = 0;
192         int mode;
193
194         push_ctxt(&saved, &filter->fo_ctxt);
195         file = filp_open("D/status", O_RDWR | O_CREAT, 0700);
196         if (IS_ERR(file)) {
197                 CERROR("OBD filter: cannot create status file\n");
198                 goto out;
199         }
200         rc = file->f_op->write(file, (char *)&filter->fo_lastino,
201                        sizeof(filter->fo_lastino), &off);
202         if (rc != sizeof(filter->fo_lastino))
203                 CERROR("OBD filter: error writing lastino: rc = %ld\n", rc);
204
205         rc = filp_close(file, NULL);
206         if (rc)
207                 CERROR("OBD filter: cannot close status file: rc = %ld\n", rc);
208
209         for (mode = 0; mode < (S_IFMT >> S_SHIFT); mode++) {
210                 struct dentry *dentry = filter->fo_dentry_O_mode[mode];
211                 if (dentry) {
212                         CDEBUG(D_INODE, "putting O/%s: %p, count = %d\n",
213                                obd_type_by_mode[mode], dentry,
214                                atomic_read(&dentry->d_count) - 1);
215                         dput(dentry);
216                         filter->fo_dentry_O_mode[mode] = NULL;
217                 }
218         }
219         CDEBUG(D_INODE, "putting O: %p, count = %d\n", filter->fo_dentry_O,
220                atomic_read(&filter->fo_dentry_O->d_count) - 1);
221         dput(filter->fo_dentry_O);
222 out:
223         pop_ctxt(&saved);
224 }
225
226
227 static __u64 filter_next_id(struct obd_device *obddev)
228 {
229         __u64 id;
230         spin_lock(&obddev->u.filter.fo_lock);
231         obddev->u.filter.fo_lastino++;
232         id =    obddev->u.filter.fo_lastino;
233         spin_unlock(&obddev->u.filter.fo_lock);
234         return id;
235 }
236
237 /* how to get files, dentries, inodes from object id's */
238 /* parent i_sem is already held if needed for exclusivity */
239 static struct dentry *filter_fid2dentry(struct obd_device *obddev,
240                                         struct dentry *dparent,
241                                         __u64 id, __u32 type)
242 {
243         struct super_block *sb = obddev->u.filter.fo_sb;
244         struct dentry *dchild;
245         char name[32];
246         int len;
247         ENTRY;
248
249         if (!sb || !sb->s_dev) {
250                 CERROR("fatal: device not initialized.\n");
251                 RETURN(ERR_PTR(-ENXIO));
252         }
253
254         if (id == 0) {
255                 CERROR("fatal: invalid object #0\n");
256                 RETURN(ERR_PTR(-ESTALE));
257         }
258
259         if (!(type & S_IFMT)) {
260                 CERROR("OBD %s, object %Lu has bad type: %o\n", __FUNCTION__,
261                        (unsigned long long)id, type);
262                 RETURN(ERR_PTR(-EINVAL));
263         }
264
265         len = sprintf(name, "%Ld", id);
266         CDEBUG(D_INODE, "opening object O/%s/%s\n", obd_mode_to_type(type),
267                name);
268         dchild = lookup_one_len(name, dparent, len);
269         CDEBUG(D_INODE, "got child obj O/%s/%s: %p, count = %d\n",
270                obd_mode_to_type(type), name, dchild,
271                atomic_read(&dchild->d_count));
272
273         if (IS_ERR(dchild)) {
274                 CERROR("child lookup error %ld\n", PTR_ERR(dchild));
275                 RETURN(dchild);
276         }
277
278         RETURN(dchild);
279 }
280
281 static struct file *filter_obj_open(struct obd_device *obddev,
282                                     __u64 id, __u32 type)
283 {
284         struct super_block *sb = obddev->u.filter.fo_sb;
285         struct obd_run_ctxt saved;
286         char name[24];
287         struct file *file;
288         ENTRY;
289
290         if (!sb || !sb->s_dev) {
291                 CERROR("fatal: device not initialized.\n");
292                 RETURN(ERR_PTR(-ENXIO));
293         }
294
295         if (!id) {
296                 CERROR("fatal: invalid obdo %Lu\n", (unsigned long long)id);
297                 RETURN(ERR_PTR(-ESTALE));
298         }
299
300         if (!(type & S_IFMT)) {
301                 CERROR("OBD %s, no type (%Ld), mode %o!\n", __FUNCTION__,
302                        (unsigned long long)id, type);
303                 RETURN(ERR_PTR(-EINVAL));
304         }
305
306         filter_id(name, id, type);
307         push_ctxt(&saved, &obddev->u.filter.fo_ctxt);
308         file = filp_open(name, O_RDONLY | O_LARGEFILE, 0 /* type? */);
309         pop_ctxt(&saved);
310
311         CDEBUG(D_INODE, "opening obdo %s: rc = %p\n", name, file);
312
313         if (IS_ERR(file))
314                 file = NULL;
315         RETURN(file);
316 }
317
318 static struct dentry *filter_parent(struct obd_device *obddev, obd_mode mode)
319 {
320         struct filter_obd *filter = &obddev->u.filter;
321
322         return filter->fo_dentry_O_mode[(mode & S_IFMT) >> S_SHIFT];
323 }
324
325
326 static struct inode *filter_inode_from_obj(struct obd_device *obddev,
327                                            __u64 id, __u32 type)
328 {
329         struct dentry *dentry;
330         struct inode *inode;
331
332         dentry = filter_fid2dentry(obddev, filter_parent(obddev, type),
333                                    id, type);
334         if (IS_ERR(dentry)) {
335                 CERROR("%s: lookup failed: rc = %ld\n", __FUNCTION__,
336                        PTR_ERR(dentry));
337                 RETURN(NULL);
338         }
339
340         lock_kernel();
341         inode = iget(dentry->d_inode->i_sb, dentry->d_inode->i_ino);
342         unlock_kernel();
343         CDEBUG(D_INODE, "put child %p, count = %d\n", dentry,
344                atomic_read(&dentry->d_count) - 1);
345         dput(dentry);
346         CDEBUG(D_INODE, "got inode %p (%ld), count = %d\n", inode, inode->i_ino,
347                atomic_read(&inode->i_count));
348         return inode;
349 }
350
351 /* obd methods */
352 static int filter_connect(struct obd_conn *conn)
353 {
354         int rc;
355
356         MOD_INC_USE_COUNT;
357         rc = gen_connect(conn);
358
359         if (rc)
360                 MOD_DEC_USE_COUNT;
361
362         return rc;
363 }
364
365 static int filter_disconnect(struct obd_conn *conn)
366 {
367         int rc;
368
369         rc = gen_disconnect(conn);
370         if (!rc)
371                 MOD_DEC_USE_COUNT;
372
373         /* XXX cleanup preallocated inodes */
374         return rc;
375 }
376
377 /* mount the file system (secretly) */
378 static int filter_setup(struct obd_device *obddev, obd_count len, void *buf)
379 {
380         struct obd_ioctl_data* data = buf;
381         struct filter_obd *filter;
382         struct vfsmount *mnt;
383         int err = 0;
384         ENTRY;
385
386         if (!data->ioc_inlbuf1 || !data->ioc_inlbuf2)
387                 RETURN(-EINVAL);
388
389         MOD_INC_USE_COUNT;
390         mnt = do_kern_mount(data->ioc_inlbuf2, 0, data->ioc_inlbuf1, NULL);
391         err = PTR_ERR(mnt);
392         if (IS_ERR(mnt))
393                 GOTO(err_dec, err);
394
395         filter = &obddev->u.filter;;
396         filter->fo_sb = mnt->mnt_root->d_inode->i_sb;
397         /* XXX is this even possible if do_kern_mount succeeded? */
398         if (!filter->fo_sb)
399                 GOTO(err_put, err = -ENODEV);
400
401         filter->fo_vfsmnt = mnt;
402         filter->fo_fstype = strdup(data->ioc_inlbuf2);
403
404         filter->fo_ctxt.pwdmnt = mnt;
405         filter->fo_ctxt.pwd = mnt->mnt_root;
406         filter->fo_ctxt.fs = KERNEL_DS;
407
408         err = filter_prep(obddev);
409         if (err)
410                 GOTO(err_kfree, err);
411         spin_lock_init(&filter->fo_lock);
412
413         RETURN(0);
414
415 err_kfree:
416         kfree(filter->fo_fstype);
417 err_put:
418         unlock_kernel();
419         mntput(filter->fo_vfsmnt);
420         filter->fo_sb = 0;
421         lock_kernel();
422
423 err_dec:
424         MOD_DEC_USE_COUNT;
425         return err;
426 }
427
428
429 static int filter_cleanup(struct obd_device * obddev)
430 {
431         struct super_block *sb;
432         ENTRY;
433
434         if (!(obddev->obd_flags & OBD_SET_UP))
435                 RETURN(0);
436
437         if (!list_empty(&obddev->obd_gen_clients)) {
438                 CERROR("still has clients!\n");
439                 RETURN(-EBUSY);
440         }
441
442         sb = obddev->u.filter.fo_sb;
443         if (!obddev->u.filter.fo_sb)
444                 RETURN(0);
445
446         filter_post(obddev);
447
448         shrink_dcache_parent(sb->s_root);
449         unlock_kernel();
450         mntput(obddev->u.filter.fo_vfsmnt);
451         obddev->u.filter.fo_sb = 0;
452         kfree(obddev->u.filter.fo_fstype);
453
454         lock_kernel();
455
456         MOD_DEC_USE_COUNT;
457         RETURN(0);
458 }
459
460
461 static inline void filter_from_inode(struct obdo *oa, struct inode *inode)
462 {
463         int type = oa->o_mode & S_IFMT;
464         ENTRY;
465
466         CDEBUG(D_INFO, "src inode %ld, dst obdo %ld valid 0x%08x\n",
467                inode->i_ino, (long)oa->o_id, oa->o_valid);
468         obdo_from_inode(oa, inode);
469         oa->o_mode &= ~S_IFMT;
470         oa->o_mode |= type;
471
472         if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
473                 obd_rdev rdev = kdev_t_to_nr(inode->i_rdev);
474                 CDEBUG(D_INODE, "copying device %x from inode to obdo\n",
475                        rdev);
476                 *((obd_rdev *)oa->o_inline) = rdev;
477                 oa->o_obdflags |= OBD_FL_INLINEDATA;
478                 oa->o_valid |= OBD_MD_FLINLINE;
479         }
480
481 #if 0
482         else if (filter_has_inline(inode)) {
483                 CDEBUG(D_INFO, "copying inline from inode to obdo\n");
484                 memcpy(oa->o_inline, inode->u.ext2_i.i_data,
485                        MIN(sizeof(inode->u.ext2_i.i_data),OBD_INLINESZ));
486                 oa->o_obdflags |= OBD_FL_INLINEDATA;
487                 oa->o_valid |= OBD_MD_FLINLINE;
488         }
489
490         if (filter_has_obdmd(inode)) {
491                 /* XXX this will change when we don't store the obdmd in data */
492                 CDEBUG(D_INFO, "copying obdmd from inode to obdo\n");
493                 memcpy(oa->o_obdmd, inode->u.ext2_i.i_data,
494                        MIN(sizeof(inode->u.ext2_i.i_data),OBD_INLINESZ));
495                 oa->o_obdflags |= OBD_FL_OBDMDEXISTS;
496                 oa->o_valid |= OBD_MD_FLOBDMD;
497         }
498 #endif
499         EXIT;
500 }
501
502 static int filter_getattr(struct obd_conn *conn, struct obdo *oa)
503 {
504         struct obd_device *obddev;
505         struct dentry *dentry;
506         ENTRY;
507
508         if (!gen_client(conn)) {
509                 CDEBUG(D_IOCTL, "fatal: invalid client %u\n", conn->oc_id);
510                 RETURN(-EINVAL);
511         }
512
513         obddev = conn->oc_dev;
514         dentry = filter_fid2dentry(obddev, filter_parent(obddev, oa->o_mode),
515                                    oa->o_id, oa->o_mode);
516         if (IS_ERR(dentry))
517                 RETURN(PTR_ERR(dentry));
518
519         oa->o_valid &= ~OBD_MD_FLID;
520         filter_from_inode(oa, dentry->d_inode);
521
522         dput(dentry);
523         RETURN(0);
524 }
525
526 static int filter_setattr(struct obd_conn *conn, struct obdo *oa)
527 {
528         struct obd_run_ctxt saved;
529         struct obd_device *obddev;
530         struct dentry *dentry;
531         struct iattr iattr;
532         struct inode *inode;
533         int rc;
534         ENTRY;
535
536         if (!gen_client(conn)) {
537                 CDEBUG(D_IOCTL, "invalid client %u\n", conn->oc_id);
538                 RETURN(-EINVAL);
539         }
540
541         obddev = conn->oc_dev;
542         dentry = filter_fid2dentry(obddev, filter_parent(obddev, oa->o_mode),
543                                    oa->o_id, oa->o_mode);
544         if (IS_ERR(dentry))
545                 RETURN(PTR_ERR(dentry));
546
547         inode = dentry->d_inode;
548         iattr_from_obdo(&iattr, oa);
549         iattr.ia_mode &= ~S_IFMT;
550         iattr.ia_mode |= S_IFREG;
551         lock_kernel();
552         if (iattr.ia_mode & ATTR_SIZE)
553                 down(&inode->i_sem);
554         push_ctxt(&saved, &conn->oc_dev->u.filter.fo_ctxt);
555         if (inode->i_op->setattr)
556                 rc = inode->i_op->setattr(dentry, &iattr);
557         else
558                 rc = inode_setattr(inode, &iattr);
559         pop_ctxt(&saved);
560         if (iattr.ia_mode & ATTR_SIZE)
561                 up(&inode->i_sem);
562         unlock_kernel();
563
564         CDEBUG(D_INODE, "put dentry %p, count = %d\n", inode,
565                atomic_read(&dentry->d_count) - 1);
566         dput(dentry);
567         RETURN(rc);
568 }
569
570 static int filter_open(struct obd_conn *conn, struct obdo *oa)
571 {
572         struct obd_device *obddev;
573         struct dentry *dentry;
574         /* ENTRY; */
575
576         if (!gen_client(conn)) {
577                 CDEBUG(D_IOCTL, "fatal: invalid client %u\n", conn->oc_id);
578                 RETURN(-EINVAL);
579         }
580
581         obddev = conn->oc_dev;
582         dentry = filter_fid2dentry(obddev, filter_parent(obddev, oa->o_mode),
583                                    oa->o_id, oa->o_mode);
584         if (IS_ERR(dentry))
585                 RETURN(PTR_ERR(dentry));
586
587         return 0;
588 } /* filter_open */
589
590 static int filter_close(struct obd_conn *conn, struct obdo *oa)
591 {
592         struct obd_device *obddev;
593         struct dentry *dentry;
594         /* ENTRY; */
595
596         if (!gen_client(conn)) {
597                 CDEBUG(D_IOCTL, "fatal: invalid client %u\n", conn->oc_id);
598                 RETURN(-EINVAL);
599         }
600
601         obddev = conn->oc_dev;
602         dentry = filter_fid2dentry(obddev, filter_parent(obddev, oa->o_mode),
603                                    oa->o_id, oa->o_mode);
604         if (IS_ERR(dentry))
605                 RETURN(PTR_ERR(dentry));
606
607         CDEBUG(D_INODE, "put dentry %p, count = %d\n", dentry,
608                atomic_read(&dentry->d_count) - 1);
609         dput(dentry);  /* for the close */
610         CDEBUG(D_INODE, "put dentry %p, count = %d\n", dentry,
611                atomic_read(&dentry->d_count) - 1);
612         dput(dentry);  /* for this call */
613         return 0;
614 } /* filter_close */
615
616 static int filter_create(struct obd_conn* conn, struct obdo *oa)
617 {
618         char name[64];
619         struct obd_run_ctxt saved;
620         struct file *file;
621         int mode;
622         struct obd_device *obddev = conn->oc_dev;
623         struct iattr;
624         ENTRY;
625
626         if (!gen_client(conn)) {
627                 CERROR("invalid client %u\n", conn->oc_id);
628                 return -EINVAL;
629         }
630
631         oa->o_id = filter_next_id(conn->oc_dev);
632         if (!(oa->o_mode && S_IFMT)) {
633                 CERROR("filter obd: no type!\n");
634                 return -ENOENT;
635         }
636
637         filter_id(name, oa->o_id, oa->o_mode);
638         mode = (oa->o_mode & ~S_IFMT) | S_IFREG;
639         push_ctxt(&saved, &obddev->u.filter.fo_ctxt);
640         file = filp_open(name, O_RDONLY | O_CREAT, mode);
641         pop_ctxt(&saved);
642         if (IS_ERR(file)) {
643                 CERROR("Error mknod obj %s, err %ld\n", name, PTR_ERR(file));
644                 return -ENOENT;
645         }
646         filp_close(file, 0);
647
648         /* Set flags for fields we have set in the inode struct */
649         oa->o_valid |= OBD_MD_FLID | OBD_MD_FLBLKSZ | OBD_MD_FLBLOCKS |
650                  OBD_MD_FLMTIME | OBD_MD_FLATIME | OBD_MD_FLCTIME |
651                  OBD_MD_FLUID | OBD_MD_FLGID;
652
653         /* XXX Hmm, shouldn't we copy the fields into the obdo here? */
654         return 0;
655 }
656
657 static int filter_destroy(struct obd_conn *conn, struct obdo *oa)
658 {
659         struct obd_run_ctxt saved;
660         struct obd_device *obddev;
661         struct obd_client *cli;
662         struct inode *inode;
663         struct dentry *dir_dentry, *object_dentry;
664         int rc;
665         ENTRY;
666
667         if (!(cli = gen_client(conn))) {
668                 CERROR("invalid client %u\n", conn->oc_id);
669                 RETURN(-EINVAL);
670         }
671
672         CDEBUG(D_INODE, "destroying object %Ld\n",oa->o_id);
673         obddev = conn->oc_dev;
674
675         dir_dentry = filter_parent(obddev, oa->o_mode);
676         down(&dir_dentry->d_inode->i_sem);
677
678         object_dentry = filter_fid2dentry(obddev, dir_dentry, oa->o_id,
679                                           oa->o_mode);
680         if (IS_ERR(object_dentry))
681                 GOTO(out, rc = -ENOENT);
682
683         inode = object_dentry->d_inode;
684         if (inode->i_nlink != 1) {
685                 CERROR("destroying inode with nlink = %d\n", inode->i_nlink);
686                 inode->i_nlink = 1;
687         }
688         inode->i_mode = S_IFREG;
689
690         push_ctxt(&saved, &obddev->u.filter.fo_ctxt);
691         rc = vfs_unlink(dir_dentry->d_inode, object_dentry);
692         pop_ctxt(&saved);
693         CDEBUG(D_INODE, "put child %p, count = %d\n", object_dentry,
694                atomic_read(&object_dentry->d_count) - 1);
695         dput(object_dentry);
696
697         EXIT;
698 out:
699         up(&dir_dentry->d_inode->i_sem);
700         return rc;
701 }
702
703 /* NB count and offset are used for punch, but not truncate */
704 static int filter_truncate(struct obd_conn *conn, struct obdo *oa,
705                            obd_size count, obd_off offset)
706 {
707         int error;
708         ENTRY;
709
710         CDEBUG(D_INODE, "calling truncate for object #%Ld, valid = %x, "
711                "o_size = %Ld\n", oa->o_id, oa->o_valid, oa->o_size);
712         error = filter_setattr(conn, oa);
713         oa->o_valid = OBD_MD_FLBLOCKS | OBD_MD_FLCTIME | OBD_MD_FLMTIME;
714
715         RETURN(error);
716 }
717
718 /* buffer must lie in user memory here */
719 static int filter_read(struct obd_conn *conn, struct obdo *oa, char *buf,
720                         obd_size *count, obd_off offset)
721 {
722         struct file * file;
723         unsigned long retval;
724         int err;
725         ENTRY;
726
727         if (!gen_client(conn)) {
728                 CDEBUG(D_IOCTL, "invalid client %u\n", conn->oc_id);
729                 RETURN(-EINVAL);
730         }
731
732         file = filter_obj_open(conn->oc_dev, oa->o_id, oa->o_mode);
733         if (IS_ERR(file))
734                 RETURN(PTR_ERR(file));
735
736         /* count doubles as retval */
737         retval = file->f_op->read(file, buf, *count, (loff_t *)&offset);
738         filp_close(file, 0);
739
740         if ( retval >= 0 ) {
741                 err = 0;
742                 *count = retval;
743         } else {
744                 err = retval;
745                 *count = 0;
746         }
747
748         return err;
749 }
750
751
752 /* buffer must lie in user memory here */
753 static int filter_write(struct obd_conn *conn, struct obdo *oa, char *buf,
754                          obd_size *count, obd_off offset)
755 {
756         struct obd_run_ctxt saved;
757         int err;
758         struct file * file;
759         unsigned long retval;
760         ENTRY;
761
762         if (!gen_client(conn)) {
763                 CDEBUG(D_IOCTL, "invalid client %u\n", conn->oc_id);
764                 RETURN(-EINVAL);
765         }
766
767         file = filter_obj_open(conn->oc_dev, oa->o_id, oa->o_mode);
768         if (IS_ERR(file))
769                 RETURN(PTR_ERR(file));
770
771         /* count doubles as retval */
772         push_ctxt(&saved, &conn->oc_dev->u.filter.fo_ctxt);
773         retval = file->f_op->write(file, buf, *count, (loff_t *)&offset);
774         pop_ctxt(&saved);
775         filp_close(file, 0);
776
777         if ( retval >= 0 ) {
778                 err = 0;
779                 *count = retval;
780                 EXIT;
781         } else {
782                 err = retval;
783                 *count = 0;
784                 EXIT;
785         }
786
787         return err;
788 } /* filter_write */
789
790 static int filter_pgcache_brw(int rw, struct obd_conn *conn, obd_count num_oa,
791                                struct obdo **oa, obd_count *oa_bufs,
792                                struct page **pages, obd_size *count,
793                                obd_off *offset, obd_flag *flags)
794 {
795         struct obd_run_ctxt      saved;
796         struct super_block      *sb;
797         int                      onum;          /* index to oas */
798         int                      pnum;          /* index to pages (bufs) */
799         unsigned long            retval;
800         int                      error;
801         struct file             *file;
802         ENTRY;
803
804         if (!gen_client(conn)) {
805                 CDEBUG(D_IOCTL, "invalid client %u\n", conn->oc_id);
806                 RETURN(-EINVAL);
807         }
808
809         sb = conn->oc_dev->u.filter.fo_sb;
810         // if (rw == WRITE)
811         push_ctxt(&saved, &conn->oc_dev->u.filter.fo_ctxt);
812         pnum = 0; /* pnum indexes buf 0..num_pages */
813         for (onum = 0; onum < num_oa; onum++) {
814                 int pg;
815
816                 file = filter_obj_open(conn->oc_dev, oa[onum]->o_id,
817                                        oa[onum]->o_mode);
818                 if (IS_ERR(file))
819                         GOTO(out, retval = PTR_ERR(file));
820
821                 /* count doubles as retval */
822                 for (pg = 0; pg < oa_bufs[onum]; pg++) {
823                         CDEBUG(D_INODE, "OP %d obdo no/pno: (%d,%d) (%ld,%ld) "
824                                "off count (%Ld,%Ld)\n",
825                                rw, onum, pnum, file->f_dentry->d_inode->i_ino,
826                                (unsigned long)offset[pnum] >> PAGE_CACHE_SHIFT,
827                                (unsigned long long)offset[pnum],
828                                (unsigned long long)count[pnum]);
829                         if (rw == WRITE) {
830                                 loff_t off;
831                                 char *buffer;
832                                 off = offset[pnum];
833                                 buffer = kmap(pages[pnum]);
834                                 retval = file->f_op->write(file, buffer, count[pnum], &off);
835                                 kunmap(pages[pnum]);
836                                 CDEBUG(D_INODE, "retval %ld\n", retval);
837                         } else {
838                                 loff_t off = offset[pnum];
839                                 char *buffer = kmap(pages[pnum]);
840
841                                 if (off >= file->f_dentry->d_inode->i_size) {
842                                         memset(buffer, 0, count[pnum]);
843                                         retval = count[pnum];
844                                 } else {
845                                         retval = file->f_op->read(file, buffer, count[pnum], &off);
846                                 }
847                                 kunmap(pages[pnum]);
848
849                                 if (retval != count[pnum]) {
850                                         filp_close(file, 0);
851                                         GOTO(out, retval = -EIO);
852                                 }
853                                 CDEBUG(D_INODE, "retval %ld\n", retval);
854                         }
855                         pnum++;
856                 }
857                 /* sizes and blocks are set by generic_file_write */
858                 /* ctimes/mtimes will follow with a setattr call */
859                 filp_close(file, 0);
860         }
861
862         EXIT;
863 out:
864         // if (rw == WRITE)
865         pop_ctxt(&saved);
866         error = (retval >= 0) ? 0 : retval;
867         return error;
868 }
869
870
871 struct inode *ioobj_to_inode(struct obd_conn *conn, struct obd_ioobj *o)
872 {
873         struct super_block *sb = conn->oc_dev->u.filter.fo_sb;
874         struct inode *inode = NULL;
875         ENTRY;
876
877         if (!sb || !sb->s_dev) {
878                 CDEBUG(D_SUPER, "fatal: device not initialized.\n");
879                 RETURN(NULL);
880         }
881
882         if ( !o->ioo_id ) {
883                 CDEBUG(D_INODE, "fatal: invalid obdo %lu\n", (long)o->ioo_id);
884                 RETURN(NULL);
885         }
886
887         inode = filter_inode_from_obj(conn->oc_dev, o->ioo_id, S_IFREG);
888         if (!inode || inode->i_nlink == 0 || is_bad_inode(inode)) {
889                 CERROR("from obdo - fatal: invalid inode %ld (%s).\n",
890                        (long)o->ioo_id, inode ? inode->i_nlink ? "bad inode" :
891                        "no links" : "NULL");
892                 iput(inode);
893                 RETURN(NULL);
894         }
895
896         RETURN(inode);
897 }
898
899 static int filter_preprw(int cmd, struct obd_conn *conn,
900                          int objcount, struct obd_ioobj *obj,
901                          int niocount, struct niobuf_remote *nb,
902                          struct niobuf_local *res)
903 {
904         struct obd_run_ctxt saved;
905         struct obd_ioobj *o = obj;
906         struct niobuf_remote *b = nb;
907         struct niobuf_local *r = res;
908         int i;
909         ENTRY;
910
911         memset(res, 0, sizeof(*res) * niocount);
912
913         // if (cmd == OBD_BRW_WRITE)
914         push_ctxt(&saved, &conn->oc_dev->u.filter.fo_ctxt);
915         for (i = 0; i < objcount; i++, o++) {
916                 int j;
917                 for (j = 0; j < o->ioo_bufcnt; j++, b++, r++) {
918                         unsigned long index = b->offset >> PAGE_SHIFT;
919                         struct inode *inode = ioobj_to_inode(conn, o);
920                         struct page *page;
921
922                         /* FIXME: we need to iput all inodes on error */
923                         if (!inode)
924                                 RETURN(-EINVAL);
925
926                         if (cmd == OBD_BRW_WRITE)
927                                 page = lustre_get_page_write(inode, index);
928                         else
929                                 page = lustre_get_page_read(inode, index);
930                         if (IS_ERR(page))
931                                 RETURN(PTR_ERR(page));
932
933                         r->addr = (__u64)(unsigned long)page_address(page);
934                         r->offset = b->offset;
935                         r->page = page;
936                         r->len = PAGE_SIZE;
937                 }
938         }
939         // if (cmd == OBD_BRW_WRITE)
940         pop_ctxt(&saved);
941         return(0);
942 }
943
944 static int filter_commitrw(int cmd, struct obd_conn *conn,
945                            int objcount, struct obd_ioobj *obj,
946                            int niocount, struct niobuf_local *res)
947 {
948         struct obd_run_ctxt saved;
949         struct obd_ioobj *o = obj;
950         struct niobuf_local *r = res;
951         int i;
952         ENTRY;
953
954         // if (cmd == OBD_BRW_WRITE)
955         push_ctxt(&saved, &conn->oc_dev->u.filter.fo_ctxt);
956         for (i = 0; i < objcount; i++, obj++) {
957                 int j;
958                 for (j = 0 ; j < o->ioo_bufcnt ; j++, r++) {
959                         struct page *page = r->page;
960
961                         if (!r->page)
962                                 LBUG();
963
964                         if (cmd == OBD_BRW_WRITE) {
965                                 int rc = lustre_commit_page(page, 0, PAGE_SIZE);
966
967                                 /* FIXME: still need to iput the other inodes */
968                                 if (rc)
969                                         RETURN(rc);
970                         } else
971                                 lustre_put_page(page);
972
973                         CDEBUG(D_INODE, "put inode %p (%ld), count = %d, nlink = %d\n",
974                                page->mapping->host,
975                                page->mapping->host->i_ino,
976                                atomic_read(&page->mapping->host->i_count) - 1,
977                                page->mapping->host->i_nlink);
978                         iput(page->mapping->host);
979                 }
980         }
981         // if (cmd == OBD_BRW_WRITE)
982         pop_ctxt(&saved);
983         RETURN(0);
984 }
985
986 static int filter_statfs(struct obd_conn *conn, struct statfs * statfs)
987 {
988         struct super_block *sb;
989         int err;
990         ENTRY;
991
992         if (!gen_client(conn)) {
993                 CDEBUG(D_IOCTL, "invalid client %u\n", conn->oc_id);
994                 RETURN(-EINVAL);
995         }
996
997         sb = conn->oc_dev->u.filter.fo_sb;
998
999         err = sb->s_op->statfs(sb, statfs);
1000         RETURN(err);
1001 } /* filter_statfs */
1002
1003
1004 static int filter_get_info(struct obd_conn *conn, obd_count keylen,
1005                            void *key, obd_count *vallen, void **val)
1006 {
1007         struct obd_device *obddev;
1008         struct obd_client * cli;
1009         ENTRY;
1010
1011         if (!(cli = gen_client(conn))) {
1012                 CDEBUG(D_IOCTL, "invalid client %u\n", conn->oc_id);
1013                 RETURN(-EINVAL);
1014         }
1015
1016         obddev = conn->oc_dev;
1017
1018         if ( keylen == strlen("blocksize") &&
1019              memcmp(key, "blocksize", keylen) == 0 ) {
1020                 *vallen = sizeof(long);
1021                 *val = (void *)(long)obddev->u.filter.fo_sb->s_blocksize;
1022                 RETURN(0);
1023         }
1024
1025         if ( keylen == strlen("blocksize_bits") &&
1026              memcmp(key, "blocksize_bits", keylen) == 0 ){
1027                 *vallen = sizeof(long);
1028                 *val = (void *)(long)obddev->u.filter.fo_sb->s_blocksize_bits;
1029                 RETURN(0);
1030         }
1031
1032         if ( keylen == strlen("root_ino") &&
1033              memcmp(key, "root_ino", keylen) == 0 ){
1034                 *vallen = sizeof(long);
1035                 *val = (void *)(long)FILTER_ROOTINO;
1036                 RETURN(0);
1037         }
1038
1039         CDEBUG(D_IOCTL, "invalid key\n");
1040         RETURN(-EINVAL);
1041 }
1042
1043
1044 struct obd_ops filter_obd_ops = {
1045         o_iocontrol:   NULL,
1046         o_get_info:    filter_get_info,
1047         o_setup:       filter_setup,
1048         o_cleanup:     filter_cleanup,
1049         o_connect:     filter_connect,
1050         o_disconnect:  filter_disconnect,
1051         o_statfs:      filter_statfs,
1052         o_getattr:     filter_getattr,
1053         o_create:      filter_create,
1054         o_setattr:     filter_setattr,
1055         o_destroy:     filter_destroy,
1056         o_open:        filter_open,
1057         o_close:       filter_close,
1058         o_read:        filter_read,
1059         o_write:       filter_write,
1060         o_brw:         filter_pgcache_brw,
1061         o_punch:       filter_truncate,
1062         o_preprw:      filter_preprw,
1063         o_commitrw:    filter_commitrw
1064 #if 0
1065         o_preallocate: filter_preallocate_inodes,
1066         o_migrate:     filter_migrate,
1067         o_copy:        gen_copy_data,
1068         o_iterate:     filter_iterate
1069 #endif
1070 };
1071
1072
1073 static int __init obdfilter_init(void)
1074 {
1075         printk(KERN_INFO "Filtering OBD driver  v0.001, braam@clusterfs.com\n");
1076         return obd_register_type(&filter_obd_ops, OBD_FILTER_DEVICENAME);
1077 }
1078
1079 static void __exit obdfilter_exit(void)
1080 {
1081         obd_unregister_type(OBD_FILTER_DEVICENAME);
1082 }
1083
1084 MODULE_AUTHOR("Peter J. Braam <braam@clusterfs.com>");
1085 MODULE_DESCRIPTION("Lustre Filtering OBD driver v1.0");
1086 MODULE_LICENSE("GPL");
1087
1088 module_init(obdfilter_init);
1089 module_exit(obdfilter_exit);