From 68030197dc2c9671afbb58e9ccd6aa262d5d0871 Mon Sep 17 00:00:00 2001 From: braam Date: Thu, 13 Dec 2001 22:21:05 +0000 Subject: [PATCH] - obd filter works over ext2, but bonnie++ is buggie - obd filter doesn't work over reiser - obdctl now works and is taking over from obdcontrol --- lustre/demos/Makefile.in | 4 +- lustre/demos/basesetup.sh | 4 +- lustre/demos/config.sh | 4 +- lustre/demos/obdfsclean.sh | 5 +- lustre/demos/obdfssetup.sh | 8 +- lustre/demos/snap3clean.sh | 2 +- lustre/demos/snap3set.sh | 2 +- lustre/demos/snapclean.sh | 2 +- lustre/demos/snapdel.sh | 2 +- lustre/demos/snaprest.sh | 2 +- lustre/demos/snaprestclean.sh | 2 +- lustre/demos/snapsetup.sh | 2 +- lustre/include/linux/obd_class.h | 4 +- lustre/include/linux/obd_ext2.h | 4 +- lustre/include/linux/obd_filter.h | 3 + lustre/include/linux/obdfs.h | 2 - lustre/obdclass/class_obd.c | 4 +- lustre/obdfilter/filter.c | 319 +++++++++++++------------------------- lustre/obdfs/dir.c | 1 - lustre/obdfs/file.c | 2 +- lustre/obdfs/namei.c | 5 +- lustre/obdfs/rw.c | 52 +++---- lustre/obdfs/super.c | 6 +- lustre/patches/patch-2.4.16 | 161 +++++++++++++++++++ 24 files changed, 331 insertions(+), 271 deletions(-) create mode 100644 lustre/patches/patch-2.4.16 diff --git a/lustre/demos/Makefile.in b/lustre/demos/Makefile.in index 4a6aded..177d824 100644 --- a/lustre/demos/Makefile.in +++ b/lustre/demos/Makefile.in @@ -77,9 +77,7 @@ docdir = @docdir@ moduledir = @moduledir@ modulefsdir = @modulefsdir@ -demo_SCRIPTS = baseclean.sh basesetup.sh obdfsclean.sh obdfssetup.sh \ - obdtest.sh snap3clean.sh snap3set.sh snapclean.sh snapdel.sh \ - snaprest.sh snaprestclean.sh snapsetup.sh snaptest.sh +demo_SCRIPTS = baseclean.sh basesetup.sh obdfsclean.sh obdfssetup.sh obdtest.sh snap3clean.sh snap3set.sh snapclean.sh snapdel.sh snaprest.sh snaprestclean.sh snapsetup.sh snaptest.sh demo_DATA = config.sh EXTRA_DIST = $(demo_SCRIPTS) $(demo_DATA) diff --git a/lustre/demos/basesetup.sh b/lustre/demos/basesetup.sh index 15c0c73..1e34702 100755 --- a/lustre/demos/basesetup.sh +++ b/lustre/demos/basesetup.sh @@ -58,8 +58,8 @@ fi if [ "$BASEDEV" ]; then - echo "No mke2fs!!" - # mke2fs -r 0 -b 4096 $BASEDEV +# echo "No mke2fs!!" + mke2fs -r 0 -b 4096 $BASEDEV else echo "\$BASEDEV not defined in demos/config.sh. Please fix!" [ "$LOOPDEV" ] && losetup -d $LOOPDEV diff --git a/lustre/demos/config.sh b/lustre/demos/config.sh index 5a5140d..4d19a29 100644 --- a/lustre/demos/config.sh +++ b/lustre/demos/config.sh @@ -25,8 +25,8 @@ OBDMAJ=186 # If LOOPDEV is empty, then it is assumed that BASEDEV is a real block device # that doesn't mind being overwritten - don't use a partition with data on it!! -LOOPDEV="" -BASEDEV="/dev/hda5" +LOOPDEV="/dev/loop0" +BASEDEV="/dev/loop0" # The following are mount points for the filesystems during the test. MNTOBD="/mnt/obd" diff --git a/lustre/demos/obdfsclean.sh b/lustre/demos/obdfsclean.sh index 42fd2b2..58f5470 100755 --- a/lustre/demos/obdfsclean.sh +++ b/lustre/demos/obdfsclean.sh @@ -14,13 +14,14 @@ plog umount $MNTOBD plog rmmod obdfs plog log "CLEANUP/DETACH" -$OBDDIR/class/obdcontrol -f << EOF -device /dev/obd0 +$OBDDIR/utils/obdctl << EOF +device 0 cleanup detach quit EOF +plog rmmod obdfilter plog rmmod obdext2 plog rmmod obdclass diff --git a/lustre/demos/obdfssetup.sh b/lustre/demos/obdfssetup.sh index bff240c..80acc66 100755 --- a/lustre/demos/obdfssetup.sh +++ b/lustre/demos/obdfssetup.sh @@ -20,9 +20,11 @@ fi #insmod $OBDDIR/ext2obd/obdext2.o #insmod $OBDDIR/obdfs/obdfs.o -plog log "ATTACHING /dev/obd0, SETUP $BASEDEV" -$OBDDIR/class/obdcontrol -f << EOF -device /dev/obd0 +plog log "ATTACHING device 0 SETUP $BASEDEV" +$OBDDIR/utils/obdctl << EOF +device 0 +# attach obdfilter +# setup $BASEDEV reiserfs attach obdext2 setup $BASEDEV quit diff --git a/lustre/demos/snap3clean.sh b/lustre/demos/snap3clean.sh index 0897ea9..a660ad2 100755 --- a/lustre/demos/snap3clean.sh +++ b/lustre/demos/snap3clean.sh @@ -12,7 +12,7 @@ OBDDIR="`dirname $0`/.." plog umount $MNTSNAP2 plog log "CLEANUP /dev/obd3" -$OBDDIR/class/obdcontrol -f << EOF +$OBDDIR/utils/obdcontrol -f << EOF device /dev/obd3 cleanup detach diff --git a/lustre/demos/snap3set.sh b/lustre/demos/snap3set.sh index 197c5da..d32baba 100755 --- a/lustre/demos/snap3set.sh +++ b/lustre/demos/snap3set.sh @@ -17,7 +17,7 @@ sync sleep 5 # let syslog logs get written plog log "CREATING /dev/obd3 snapshot" -$OBDDIR/class/obdcontrol -f << EOF +$OBDDIR/utils/obdcontrol -f << EOF snaptable $SNAPTABLE a diff --git a/lustre/demos/snapclean.sh b/lustre/demos/snapclean.sh index 4d2d456..ecc94ee 100755 --- a/lustre/demos/snapclean.sh +++ b/lustre/demos/snapclean.sh @@ -13,7 +13,7 @@ plog umount $MNTOBD plog umount $MNTSNAP plog log "CLEANUP /dev/obd2 /dev/obd1" -$OBDDIR/class/obdcontrol -f << EOF +$OBDDIR/utils/obdcontrol -f << EOF device /dev/obd2 cleanup detach diff --git a/lustre/demos/snapdel.sh b/lustre/demos/snapdel.sh index 87607f5..06e33a4 100755 --- a/lustre/demos/snapdel.sh +++ b/lustre/demos/snapdel.sh @@ -19,7 +19,7 @@ plog umount $MNTOBD sync sleep 1 plog log "STARTING snapdelete" -$OBDDIR/class/obdcontrol -f << EOF +$OBDDIR/utils/obdcontrol -f << EOF device /dev/obd2 connect snapdelete diff --git a/lustre/demos/snaprest.sh b/lustre/demos/snaprest.sh index 9a5dd07..b139e39 100755 --- a/lustre/demos/snaprest.sh +++ b/lustre/demos/snaprest.sh @@ -31,7 +31,7 @@ sync plog log "STARTING snaprestore" -$OBDDIR/class/obdcontrol -f << EOF +$OBDDIR/utils/obdcontrol -f << EOF device /dev/obd1 cleanup detach diff --git a/lustre/demos/snaprestclean.sh b/lustre/demos/snaprestclean.sh index a3c4319..4b3b57d 100755 --- a/lustre/demos/snaprestclean.sh +++ b/lustre/demos/snaprestclean.sh @@ -18,7 +18,7 @@ fi rmmod obdfs -$OBDDIR/class/obdcontrol -f << EOF +$OBDDIR/utils/obdcontrol -f << EOF device /dev/obd2 cleanup detach diff --git a/lustre/demos/snapsetup.sh b/lustre/demos/snapsetup.sh index ff95718..7db6ab1 100755 --- a/lustre/demos/snapsetup.sh +++ b/lustre/demos/snapsetup.sh @@ -42,7 +42,7 @@ plog log "NEW SNAP SETUP" # second will be a snapshot of the filesystem taken "now" (in obd device 2) # that will remain static (historical read-only) filesystem as changes # are made to the current snapshot. -$OBDDIR/class/obdcontrol -f << EOF +$OBDDIR/utils/obdcontrol -f << EOF snaptable $SNAPTABLE a diff --git a/lustre/include/linux/obd_class.h b/lustre/include/linux/obd_class.h index 0314929..70afae8 100644 --- a/lustre/include/linux/obd_class.h +++ b/lustre/include/linux/obd_class.h @@ -267,9 +267,9 @@ struct obd_ops { obd_count *vallen, void **val); int (*o_set_info)(struct obd_conn *, obd_count keylen, void *key, obd_count vallen, void *val); - int (*o_attach)(struct obd_device *dev, struct obd_ioctl_data *data); + int (*o_attach)(struct obd_device *dev, obd_count len, void *data); int (*o_detach)(struct obd_device *dev); - int (*o_setup) (struct obd_device *dev, struct obd_ioctl_data *data); + int (*o_setup) (struct obd_device *dev, obd_count len, void *data); int (*o_cleanup)(struct obd_device *dev); int (*o_connect)(struct obd_conn *conn); int (*o_disconnect)(struct obd_conn *conn); diff --git a/lustre/include/linux/obd_ext2.h b/lustre/include/linux/obd_ext2.h index 657b103..0cf413b 100644 --- a/lustre/include/linux/obd_ext2.h +++ b/lustre/include/linux/obd_ext2.h @@ -16,8 +16,8 @@ #endif struct ext2_obd { - struct super_block * ext2_sb; - struct vfsmount *vfsmnt; + struct super_block * e2_sb; + struct vfsmount *e2_vfsmnt; }; diff --git a/lustre/include/linux/obd_filter.h b/lustre/include/linux/obd_filter.h index 6fb189f..bdccdfa 100644 --- a/lustre/include/linux/obd_filter.h +++ b/lustre/include/linux/obd_filter.h @@ -24,6 +24,9 @@ struct filter_obd { struct vfsmount *fo_vfsmnt; struct run_ctxt fo_ctxt; __u64 fo_lastino; + struct file_operations *fo_fop; + struct inode_operations *fo_iop; + struct address_space_operations *fo_aops; }; diff --git a/lustre/include/linux/obdfs.h b/lustre/include/linux/obdfs.h index 5df8f03..4c839ee 100644 --- a/lustre/include/linux/obdfs.h +++ b/lustre/include/linux/obdfs.h @@ -216,8 +216,6 @@ static void inline obdfs_set_size (struct inode *inode, obd_size size) inode->i_size = size; inode->i_blocks = (inode->i_size + inode->i_sb->s_blocksize - 1) >> inode->i_sb->s_blocksize_bits; - inode->i_bytes = inode->i_size & - ((1 << inode->i_sb->s_blocksize_bits) - 1); } /* obdfs_set_size */ diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c index 545f66b..84b6440 100644 --- a/lustre/obdclass/class_obd.c +++ b/lustre/obdclass/class_obd.c @@ -194,7 +194,7 @@ static int obd_class_ioctl (struct inode * inode, struct file * filp, /* do the attach */ if ( OBT(obd) && OBP(obd, attach) ) { - err = OBP(obd, attach)(obd, data); + err = OBP(obd, attach)(obd, sizeof(*data), data); } if ( err ) { @@ -257,7 +257,7 @@ static int obd_class_ioctl (struct inode * inode, struct file * filp, } if ( OBT(obd) && OBP(obd, setup) ) - err = OBP(obd, setup)(obd, data); + err = OBP(obd, setup)(obd, sizeof(*data), data); if (!err) { obd->obd_type->typ_refcnt++; diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index ca9ee52..e97ab5d 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -1,17 +1,12 @@ /* * linux/fs/ext2_obd/ext2_obd.c * - * Copyright (C) 1999 Stelias Computing, Inc. - * Copyright (C) 1999 Seagate Technology, Inc. * Copyright (C) 2001 Cluster File Systems, Inc. * * This code is issued under the GNU General Public License. * See the file COPYING in this distribution * - * This is the object based disk driver based on ext2 - * written by Peter Braam , Phil Schwan - * Andreas Dilger - * + * by Peter Braam */ #define EXPORT_SYMTAB @@ -28,8 +23,6 @@ #include #include - - extern struct obd_device obd_dev[MAX_OBD_DEVICES]; long filter_memory; @@ -55,15 +48,18 @@ void pop_ctxt(struct run_ctxt *saved) static void filter_prep(struct obd_device *obddev) { struct run_ctxt saved; + struct file *file; + struct inode *inode; long rc; int fd; struct stat64 buf; - __u64 lastino = 0; + __u64 lastino = 2; push_ctxt(&saved, &obddev->u.filter.fo_ctxt); rc = sys_mkdir("O", 0700); rc = sys_mkdir("P", 0700); rc = sys_mkdir("D", 0700); + rc = sys_mkdir("O/2", 0755); if ( (fd = sys_open("D/status", O_RDWR | O_CREAT, 0700)) == -1 ) { printk("OBD filter: cannot create status file\n"); goto out; @@ -81,11 +77,23 @@ static void filter_prep(struct obd_device *obddev) } else { rc = sys_read(fd, (char *)&lastino, sizeof(lastino)); if (rc != sizeof(lastino)) { - printk("OBD filter: error writing lastino\n"); + printk("OBD filter: error reading lastino\n"); goto out_close; } } obddev->u.filter.fo_lastino = lastino; + + /* this is also the moment to steal operations */ + file = filp_open("D/status", O_RDONLY, 0); + if (!file || IS_ERR(file)) { + EXIT; + goto out_close; + } + inode = file->f_dentry->d_inode; + obddev->u.filter.fo_fop = file->f_op; + obddev->u.filter.fo_iop = inode->i_op; + obddev->u.filter.fo_aops = inode->i_mapping->a_ops; + filp_close(file, 0); out_close: rc = sys_close(fd); @@ -129,15 +137,12 @@ static int filter_disconnect(struct obd_conn *conn) return gen_disconnect(conn); } /* ext2obd_disconnect */ - - - -/* - * to initialize a particular /dev/obdNNN to simulated OBD type - * *data holds the device of the ext2 disk partition we will use. - */ -static int filter_setup(struct obd_device *obddev, struct obd_ioctl_data* data) +/* mount the file system (secretly) */ +static int filter_setup(struct obd_device *obddev, obd_count len, + void *buf) + { + struct obd_ioctl_data* data = buf; struct vfsmount *mnt; int err; ENTRY; @@ -202,20 +207,20 @@ static int filter_cleanup(struct obd_device * obddev) lock_kernel(); - MOD_DEC_USE_COUNT; EXIT; return 0; } -static struct inode *inode_from_obdo(struct obd_device *obddev, + +static struct file *filter_obj_open(struct obd_device *obddev, struct obdo *oa) { + struct file *file; + int error = 0; char id[16]; - struct super_block *sb; - struct inode *inode; struct run_ctxt saved; - struct stat64 st; + struct super_block *sb; sb = obddev->u.filter.fo_sb; if (!sb || !sb->s_dev) { @@ -231,24 +236,40 @@ static struct inode *inode_from_obdo(struct obd_device *obddev, } sprintf(id, "O/%Ld", oa->o_id); - push_ctxt(&saved, &obddev->u.filter.fo_ctxt); - if (sys_stat64(id, &st, 0)) { - EXIT; + file = filp_open(id , O_RDONLY, 0); + pop_ctxt(&saved); + + if (IS_ERR(file)) { + error = PTR_ERR(file); + file = NULL; + } + CDEBUG(D_INODE, "opening obdo %s\n", id); + + if ( file ) { + file->f_op = obddev->u.filter.fo_fop; + file->f_dentry->d_inode->i_op = obddev->u.filter.fo_iop; + file->f_dentry->d_inode->i_mapping->a_ops = obddev->u.filter.fo_aops; + } else { + printk("Error opening object %s, error %d\n", id, error); + } + return file; +} + +static struct inode *inode_from_obdo(struct obd_device *obddev, + struct obdo *oa) +{ + struct file *file; + struct inode *inode; + + file = filter_obj_open(obddev, oa); + if ( !file ) { + printk("inode_from_obdo failed\n"); return NULL; } - pop_ctxt(&saved); - inode = iget(sb, st.st_ino); - if (!inode || inode->i_nlink == 0 || is_bad_inode(inode)) { - printk("from obdo - fatal: invalid inode %ld (%s).\n", - (long)oa->o_id, inode ? inode->i_nlink ? "bad inode" : - "no links" : "NULL"); - if (inode) - iput(inode); - EXIT; - return NULL; - } + inode = iget(file->f_dentry->d_inode->i_sb, file->f_dentry->d_inode->i_ino); + filp_close(file, 0); return inode; } @@ -306,7 +327,9 @@ static int filter_getattr(struct obd_conn *conn, struct obdo *oa) return -ENOENT; } + oa->o_valid &= ~OBD_MD_FLID; filter_from_inode(oa, inode); + iput(inode); EXIT; return 0; @@ -350,27 +373,32 @@ static int filter_create (struct obd_conn* conn, struct obdo *oa) struct obd_device *obddev = conn->oc_dev; struct iattr; int rc; + ENTRY; if (!gen_client(conn)) { CDEBUG(D_IOCTL, "invalid client %u\n", conn->oc_id); return -EINVAL; } + CDEBUG(D_IOCTL, "\n"); conn->oc_dev->u.filter.fo_lastino++; oa->o_id = conn->oc_dev->u.filter.fo_lastino; sprintf(name, "O/%Ld", oa->o_id); push_ctxt(&saved, &obddev->u.filter.fo_ctxt); - if (sys_mknod(name, 010644, 0)) { - printk("Error mknod %s\n", name); + CDEBUG(D_IOCTL, "\n"); + if (sys_mknod(name, 0100644, 0)) { + printk("Error mknod obj %s\n", name); return -ENOENT; } pop_ctxt(&saved); + CDEBUG(D_IOCTL, "\n"); rc = filter_setattr(conn, oa); if ( rc ) { EXIT; return -EINVAL; } + CDEBUG(D_IOCTL, "\n"); /* Set flags for fields we have set in ext2_new_inode */ oa->o_valid |= OBD_MD_FLID | OBD_MD_FLBLKSZ | OBD_MD_FLBLOCKS | @@ -417,51 +445,30 @@ static int filter_destroy(struct obd_conn *conn, struct obdo *oa) return 0; } +/* buffer must lie in user memory here */ static int filter_read(struct obd_conn *conn, struct obdo *oa, char *buf, obd_size *count, obd_off offset) { - struct super_block *sb; - struct inode * inode; - struct file * f; - struct file fake_file; - struct dentry fake_dentry; + struct file * file; unsigned long retval; int err; - if (!gen_client(conn)) { CDEBUG(D_IOCTL, "invalid client %u\n", conn->oc_id); EXIT; return -EINVAL; } - sb = conn->oc_dev->u.ext2.ext2_sb; - if ( !(inode = inode_from_obdo(conn->oc_dev, oa)) ) { + file = filter_obj_open(conn->oc_dev, oa); + if (!file || IS_ERR(file)) { EXIT; - return -ENOENT; + return -PTR_ERR(file); } - if (!S_ISREG(inode->i_mode)) { - iput(inode); - CDEBUG(D_INODE, "fatal: not regular file %ld (mode=%o).\n", - inode->i_ino, inode->i_mode); - EXIT; - return -EINVAL; - } - - memset(&fake_file, 0, sizeof(fake_file)); - memset(&fake_dentry, 0, sizeof(fake_dentry)); - - f = &fake_file; - f->f_dentry = &fake_dentry; - f->f_dentry->d_inode = inode; - f->f_flags = O_LARGEFILE; - f->f_op = &ext2_file_operations; - inode->i_mapping->a_ops = &ext2_aops; - /* count doubles as retval */ - retval = f->f_op->read(f, buf, *count, &offset); - iput(inode); + retval = file->f_op->read(file, buf, *count, &offset); + filp_close(file, 0); + if ( retval >= 0 ) { err = 0; *count = retval; @@ -474,58 +481,30 @@ static int filter_read(struct obd_conn *conn, struct obdo *oa, char *buf, } /* ext2obd_read */ +/* buffer must lie in user memory here */ static int filter_write(struct obd_conn *conn, struct obdo *oa, char *buf, obd_size *count, obd_off offset) { int err; - struct super_block *sb; - struct inode * inode; - struct file fake_file; - struct dentry fake_dentry; - struct file * f; + struct file * file; unsigned long retval; ENTRY; - if (!gen_client(conn)) { CDEBUG(D_IOCTL, "invalid client %u\n", conn->oc_id); EXIT; return -EINVAL; } - sb = conn->oc_dev->u.ext2.ext2_sb; - if ( !(inode = inode_from_obdo(conn->oc_dev, oa)) ) { + file = filter_obj_open(conn->oc_dev, oa); + if (!file || IS_ERR(file)) { EXIT; - return -ENOENT; + return -PTR_ERR(file); } - if (!S_ISREG(inode->i_mode)) { - CDEBUG(D_INODE, "fatal: not regular file.\n"); - iput(inode); - EXIT; - return -EINVAL; - } - - memset(&fake_file, 0, sizeof(fake_file)); - memset(&fake_dentry, 0, sizeof(fake_dentry)); - - f = &fake_file; - f->f_dentry = &fake_dentry; - f->f_dentry->d_inode = inode; - f->f_op = &ext2_file_operations; - f->f_flags = O_LARGEFILE; - inode->i_mapping->a_ops = &ext2_aops; - /* count doubles as retval */ - if (f->f_op->write) - retval = f->f_op->write(f, buf, *count, &(offset)); - else - retval = -EINVAL; - CDEBUG(D_INFO, "Result %ld\n", retval); - - oa->o_valid = OBD_MD_FLBLOCKS | OBD_MD_FLCTIME | OBD_MD_FLMTIME; - obdo_from_inode(oa, inode); - iput(inode); + retval = file->f_op->write(file, buf, *count, &offset); + filp_close(file, 0); if ( retval >= 0 ) { err = 0; @@ -540,56 +519,6 @@ static int filter_write(struct obd_conn *conn, struct obdo *oa, char *buf, return err; } /* ext2obd_write */ -void ___wait_on_page(struct page *page) -{ - struct task_struct *tsk = current; - DECLARE_WAITQUEUE(wait, tsk); - - add_wait_queue(&page->wait, &wait); - do { - run_task_queue(&tq_disk); - set_task_state(tsk, TASK_UNINTERRUPTIBLE); - if (!PageLocked(page)) - break; - schedule(); - } while (PageLocked(page)); - tsk->state = TASK_RUNNING; - remove_wait_queue(&page->wait, &wait); -} - -static inline int actor_from_kernel(char *dst, char *src, size_t len) -{ - ENTRY; - memcpy(dst, src, len); - EXIT; - return 0; -} - -int kernel_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size) -{ - char *kaddr; - unsigned long count = desc->count; - ENTRY; - if (desc->buf == NULL) { - printk("ALERT: desc->buf == NULL\n"); - desc->error = -EIO; - return -EIO; - } - - if (size > count) - size = count; - - kaddr = kmap(page); - memcpy(desc->buf, kaddr + offset, size); - kunmap(page); - - desc->count = count - size; - desc->written += size; - desc->buf += size; - EXIT; - return size; -} - static int filter_pgcache_brw(int rw, struct obd_conn *conn, obd_count num_oa, struct obdo **oa, @@ -600,13 +529,12 @@ static int filter_pgcache_brw(int rw, struct obd_conn *conn, obd_flag *flags) { struct super_block *sb; + mm_segment_t oldfs; int onum; /* index to oas */ int pnum; /* index to pages (bufs) */ unsigned long retval; - int err; - struct file fake_file; - struct dentry fake_dentry; - struct file *f; + int error; + struct file *file; ENTRY; @@ -616,37 +544,25 @@ static int filter_pgcache_brw(int rw, struct obd_conn *conn, return -EINVAL; } - sb = conn->oc_dev->u.ext2.ext2_sb; + sb = conn->oc_dev->u.filter.fo_sb; + oldfs = get_fs(); + set_fs(KERNEL_DS); pnum = 0; /* pnum indexes buf 0..num_pages */ for (onum = 0; onum < num_oa; onum++) { - struct inode *inode; int pg; - if ( rw == READ ) - *flags &= ~OBD_BRW_CREATE; - - if (! (inode = inode_from_obdo(conn->oc_dev, oa[onum])) ) { - EXIT; - return -ENOENT; - } - - CDEBUG(D_INODE, "ino %ld, i_count %d\n", - inode->i_ino, atomic_read(&inode->i_count)); - memset(&fake_file, 0, sizeof(fake_file)); - memset(&fake_dentry, 0, sizeof(fake_dentry)); - - f = &fake_file; - f->f_dentry = &fake_dentry; - f->f_dentry->d_inode = inode; - f->f_op = &ext2_file_operations; - f->f_flags = O_LARGEFILE; - inode->i_mapping->a_ops = &ext2_aops; + file = filter_obj_open(conn->oc_dev, oa[onum]); + if (!file || IS_ERR(file)) { + EXIT; + error = -ENOENT; + goto ERROR; + } /* count doubles as retval */ for (pg = 0; pg < oa_bufs[onum]; pg++) { CDEBUG(D_INODE, "OP %d obdo no/pno: (%d,%d) (%ld,%ld) off count (%Ld,%Ld)\n", - rw, onum, pnum, inode->i_ino, + rw, onum, pnum, file->f_dentry->d_inode->i_ino, (unsigned long)offset[pnum] >> PAGE_CACHE_SHIFT, offset[pnum], count[pnum]); if (rw == WRITE) { @@ -654,35 +570,24 @@ static int filter_pgcache_brw(int rw, struct obd_conn *conn, char *buffer; off = offset[pnum]; buffer = kmap(pages[pnum]); - retval = do_generic_file_write - (f, buffer, count[pnum], &off, - actor_from_kernel); + retval = file->f_op->write(file, buffer, count[pnum], &off); kunmap(pages[pnum]); CDEBUG(D_INODE, "retval %ld\n", retval); } else { - loff_t off; - read_descriptor_t desc; + loff_t off = offset[pnum]; char *buffer = kmap(pages[pnum]); - desc.written = 0; - desc.count = count[pnum]; - desc.buf = buffer; - desc.error = 0; - off = offset[pnum]; - - off = offset[pnum]; - if (off >= inode->i_size) { - memset(buffer, 0, PAGE_SIZE); + if (off >= file->f_dentry->d_inode->i_size) { + memset(buffer, 0, count[pnum]); + retval = count[pnum]; } else { - do_generic_file_read - (f, &off, &desc, - kernel_read_actor); + retval = file->f_op->read(file, buffer, count[pnum], &off); } kunmap(pages[pnum]); - retval = desc.written; - if ( !retval ) { - iput(inode); - retval = desc.error; + + if ( retval != count[pnum] ) { + filp_close(file, 0); + retval = -EIO; EXIT; goto ERROR; } @@ -692,20 +597,14 @@ static int filter_pgcache_brw(int rw, struct obd_conn *conn, } /* sizes and blocks are set by generic_file_write */ /* ctimes/mtimes will follow with a setattr call */ - - //oa[onum]->o_blocks = inode->i_blocks; - //oa[onum]->o_valid = OBD_MD_FLBLOCKS; - /* perform the setattr on the inode */ - //ext2obd_to_inode(inode, oa[onum]); - //inode->i_size = oa[onum]->o_size; - //mark_inode_dirty(inode); - iput(inode); + filp_close(file, 0); } EXIT; ERROR: - err = (retval >= 0) ? 0 : retval; - return err; + set_fs(oldfs); + error = (retval >= 0) ? 0 : retval; + return error; } static int filter_statfs (struct obd_conn *conn, struct statfs * statfs) diff --git a/lustre/obdfs/dir.c b/lustre/obdfs/dir.c index 46f1f0f..4ac7bdf 100644 --- a/lustre/obdfs/dir.c +++ b/lustre/obdfs/dir.c @@ -31,7 +31,6 @@ typedef struct ext2_dir_entry_2 ext2_dirent; #define PageChecked(page) test_bit(PG_checked, &(page)->flags) #define SetPageChecked(page) set_bit(PG_checked, &(page)->flags) -#define PG_checked 13 /* kill me in 2.5.. */ int waitfor_one_page(struct page *page) { diff --git a/lustre/obdfs/file.c b/lustre/obdfs/file.c index 138ca56..a652e42 100644 --- a/lustre/obdfs/file.c +++ b/lustre/obdfs/file.c @@ -64,7 +64,7 @@ obdfs_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos) { ssize_t retval; CDEBUG(D_INFO, "Writing inode %ld, %d bytes, offset %Ld\n", - file->f_dentry->d_inode->i_ino, count, ppos); + file->f_dentry->d_inode->i_ino, count, *ppos); retval = generic_file_write(file, buf, count, ppos); CDEBUG(D_INFO, "Wrote %d\n", retval); diff --git a/lustre/obdfs/namei.c b/lustre/obdfs/namei.c index 1d79362..ff0f3e1 100644 --- a/lustre/obdfs/namei.c +++ b/lustre/obdfs/namei.c @@ -151,8 +151,9 @@ static struct inode *obdfs_new_inode(struct inode *dir, int mode) /* Send a hint to the create method on the type of file to create */ oa->o_mode = mode; oa->o_valid |= OBD_MD_FLMODE; - + CDEBUG(D_INODE, "\n"); err = IOPS(dir, create)(IID(dir), oa); + CDEBUG(D_INODE, "\n"); if ( err ) { printk("new_inode - fatal: err %d\n", err); @@ -160,8 +161,10 @@ static struct inode *obdfs_new_inode(struct inode *dir, int mode) EXIT; return ERR_PTR(err); } + CDEBUG(D_INODE, "\n"); inode = iget4(dir->i_sb, (ino_t)oa->o_id, NULL, oa); + CDEBUG(D_INODE, "\n"); obdo_free(oa); if (!inode) { diff --git a/lustre/obdfs/rw.c b/lustre/obdfs/rw.c index c708bf7..5fcd525 100644 --- a/lustre/obdfs/rw.c +++ b/lustre/obdfs/rw.c @@ -43,56 +43,48 @@ static int cache_writes = 0; /* page cache support stuff */ + /* * Add a page to the dirty page list. */ -void __set_page_dirty(struct page *page) +void set_page_dirty(struct page *page) { - struct address_space *mapping; - spinlock_t *pg_lock; - - pg_lock = PAGECACHE_LOCK(page); - spin_lock(pg_lock); - - mapping = page->mapping; - spin_lock(&mapping->page_lock); - - list_del(&page->list); - list_add(&page->list, &mapping->dirty_pages); - - spin_unlock(&mapping->page_lock); - spin_unlock(pg_lock); - - if (mapping->host) - mark_inode_dirty_pages(mapping->host); + if (!test_and_set_bit(PG_dirty, &page->flags)) { + struct address_space *mapping = page->mapping; + + if (mapping) { + spin_lock(&pagecache_lock); + list_del(&page->list); + list_add(&page->list, &mapping->dirty_pages); + spin_unlock(&pagecache_lock); + + if (mapping->host) + mark_inode_dirty_pages(mapping->host); + } + } } /* - * Add a page to the dirty page list. + * Remove page from dirty list */ void __set_page_clean(struct page *page) { + struct address_space *mapping = page->mapping; struct inode *inode; - struct address_space *mapping; - spinlock_t *pg_lock; - - pg_lock = PAGECACHE_LOCK(page); - spin_lock(pg_lock); - - mapping = page->mapping; - spin_lock(&mapping->page_lock); + + if (!mapping) + return; + spin_lock(&pagecache_lock); list_del(&page->list); list_add(&page->list, &mapping->clean_pages); - spin_unlock(&mapping->page_lock); - spin_unlock(pg_lock); - inode = mapping->host; if (list_empty(&mapping->dirty_pages)) { CDEBUG(D_INODE, "inode clean\n"); inode->i_state &= ~I_DIRTY_PAGES; } + spin_unlock(&pagecache_lock); EXIT; } diff --git a/lustre/obdfs/super.c b/lustre/obdfs/super.c index 031b8f3..9c9ae14 100644 --- a/lustre/obdfs/super.c +++ b/lustre/obdfs/super.c @@ -120,6 +120,7 @@ static struct super_block * obdfs_read_super(struct super_block *sb, struct obd_device *obddev; char *device = NULL; char *version = NULL; + int connected = 0; int devno; int err; unsigned long blocksize; @@ -187,7 +188,7 @@ static struct super_block * obdfs_read_super(struct super_block *sb, EXIT; goto ERR; } - + connected = 1; CDEBUG(D_INFO, "\n"); /* list of dirty inodes, and a mutex to hold while modifying it */ INIT_LIST_HEAD(&sbi->osi_inodes); @@ -276,6 +277,9 @@ ERR: OBD_FREE(device, strlen(device) + 1); if (version) OBD_FREE(version, strlen(version) + 1); + if (connected) + sbi->osi_ops->o_disconnect(&sbi->osi_conn); + if (sbi) { sbi->osi_super = NULL; } diff --git a/lustre/patches/patch-2.4.16 b/lustre/patches/patch-2.4.16 new file mode 100644 index 0000000..8e0c915 --- /dev/null +++ b/lustre/patches/patch-2.4.16 @@ -0,0 +1,161 @@ +--- /usr/src/linux-2.4.16/fs/reiserfs/file.c.obd-orig Thu Dec 13 14:35:00 2001 ++++ /usr/src/linux-2.4.16/fs/reiserfs/file.c Thu Dec 13 14:35:08 2001 +@@ -28,8 +28,8 @@ + struct reiserfs_transaction_handle th ; + int windex ; + +- if (!S_ISREG (inode->i_mode)) +- BUG (); ++ //if (!S_ISREG (inode->i_mode)) ++ //BUG (); + + /* fast out for when nothing needs to be done */ + if ((atomic_read(&inode->i_count) > 1 || +--- /usr/src/linux-2.4.16/fs/namespace.c.obd-orig Tue Dec 11 20:44:15 2001 ++++ /usr/src/linux-2.4.16/fs/namespace.c Tue Dec 11 20:44:35 2001 +@@ -332,7 +332,7 @@ + } + } + +-static int do_umount(struct vfsmount *mnt, int flags) ++int do_umount(struct vfsmount *mnt, int flags) + { + struct super_block * sb = mnt->mnt_sb; + int retval = 0; +--- /usr/src/linux-2.4.16/kernel/ksyms.c.obd-orig Tue Dec 11 14:22:09 2001 ++++ /usr/src/linux-2.4.16/kernel/ksyms.c Tue Dec 11 18:05:30 2001 +@@ -268,6 +268,7 @@ + EXPORT_SYMBOL(__pollwait); + EXPORT_SYMBOL(poll_freewait); + EXPORT_SYMBOL(ROOT_DEV); ++EXPORT_SYMBOL(pagecache_lock); + EXPORT_SYMBOL(__find_get_page); + EXPORT_SYMBOL(__find_lock_page); + EXPORT_SYMBOL(grab_cache_page); +--- /usr/src/linux-2.4.16/mm/filemap.c.obd-orig Mon Nov 26 06:29:17 2001 ++++ /usr/src/linux-2.4.16/mm/filemap.c Tue Dec 11 14:29:50 2001 +@@ -2846,8 +2846,7 @@ + * file system has to do this all by itself, unfortunately. + * okir@monad.swb.de + */ +-ssize_t +-generic_file_write(struct file *file,const char *buf,size_t count, loff_t *ppos) ++ssize_t do_generic_file_write(struct file *file,const char *buf,size_t count,loff_t *ppos, int (*actor)(char *, char *, size_t )) + { + struct address_space *mapping = file->f_dentry->d_inode->i_mapping; + struct inode *inode = mapping->host; +@@ -2862,8 +2861,10 @@ + if ((ssize_t) count < 0) + return -EINVAL; + ++#if 0 + if (!access_ok(VERIFY_READ, buf, count)) + return -EFAULT; ++#endif + + cached_page = NULL; + +@@ -2987,10 +2988,12 @@ + * same page as we're writing to, without it being marked + * up-to-date. + */ ++#if 0 + { volatile unsigned char dummy; + __get_user(dummy, buf); + __get_user(dummy, buf+bytes-1); + } ++#endif + + status = -ENOMEM; /* we'll assign it later anyway */ + page = __grab_cache_page(mapping, index, &cached_page); +@@ -3006,7 +3009,7 @@ + status = mapping->a_ops->prepare_write(file, page, offset, offset+bytes); + if (status) + goto unlock; +- page_fault = __copy_from_user(kaddr+offset, buf, bytes); ++ page_fault = actor(kaddr+offset, buf, bytes); + flush_dcache_page(page); + status = mapping->a_ops->commit_write(file, page, offset, offset+bytes); + if (page_fault) +@@ -3072,6 +3075,23 @@ + goto out_status; + } + ++ ++static inline int actor_from_user(char *dst, char *src, size_t len) ++{ ++ if (!access_ok(VERIFY_READ, src, len)) ++ return -EFAULT; ++ ++ return __copy_from_user(dst, src, len); ++} ++ ++ssize_t ++generic_file_write(struct file *file,const char *buf,size_t count,loff_t *ppos) ++{ ++ return do_generic_file_write(file, buf, count, ppos, &actor_from_user); ++} ++ ++ ++ + void __init page_cache_init(unsigned long mempages) + { + unsigned long htable_size, order; +--- /usr/src/linux-2.4.16/include/linux/fs.h.obd-orig Tue Dec 11 14:23:29 2001 ++++ /usr/src/linux-2.4.16/include/linux/fs.h Tue Dec 11 20:45:00 2001 +@@ -978,8 +978,10 @@ + extern int register_filesystem(struct file_system_type *); + extern int unregister_filesystem(struct file_system_type *); + extern struct vfsmount *kern_mount(struct file_system_type *); ++struct vfsmount *do_kern_mount(char *type, int flags, char *name, void *data); + extern int may_umount(struct vfsmount *); + extern long do_mount(char *, char *, char *, unsigned long, void *); ++int do_umount(struct vfsmount *mnt, int flags); + + #define kern_umount mntput + +@@ -1044,6 +1046,35 @@ + + asmlinkage long sys_open(const char *, int, int); + asmlinkage long sys_close(unsigned int); /* yes, it's really unsigned */ ++asmlinkage ssize_t sys_read(unsigned int fd, char * buf, size_t count); ++asmlinkage ssize_t sys_write(unsigned int fd, const char * buf, size_t count); ++asmlinkage long sys_truncate64(const char * path, loff_t length); ++asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length); ++asmlinkage long sys_mount(char * dev_name, char * dir_name, char * type, ++ unsigned long flags, void * data); ++asmlinkage long sys_umount(char * name, int flags); ++asmlinkage long sys_stat(char * filename, struct __old_kernel_stat * statbuf); ++asmlinkage long sys_mknod(const char * filename, int mode, dev_t dev); ++asmlinkage long sys_mkdir(const char * pathname, int mode); ++asmlinkage long sys_rmdir(const char * pathname); ++asmlinkage long sys_unlink(const char * pathname); ++asmlinkage long sys_symlink(const char * oldname, const char * newname); ++asmlinkage long sys_link(const char * oldname, const char * newname); ++asmlinkage long sys_rename(const char * oldname, const char * newname); ++asmlinkage long sys_lstat64(char * filename, struct stat64 * statbuf, long flags); ++asmlinkage long sys_stat64(char * filename, struct stat64 * statbuf, long flags); ++asmlinkage long sys_fstat64(unsigned long fd, struct stat64 * statbuf, long flags); ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ + extern int do_truncate(struct dentry *, loff_t start); + + extern struct file *filp_open(const char *, int, int); +@@ -1395,6 +1426,8 @@ + extern ssize_t generic_file_read(struct file *, char *, size_t, loff_t *); + extern ssize_t generic_file_write(struct file *, const char *, size_t, loff_t *); + extern void do_generic_file_read(struct file *, loff_t *, read_descriptor_t *, read_actor_t); ++ssize_t do_generic_file_write(struct file *file,const char *buf,size_t count,loff_t *ppos, int (*actor)(char *, char *, size_t )); ++ + extern loff_t no_llseek(struct file *file, loff_t offset, int origin); + extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin); + extern ssize_t generic_read_dir(struct file *, char *, size_t, loff_t *); -- 1.8.3.1