From: braam Date: Sun, 26 Sep 1999 03:20:37 +0000 (+0000) Subject: Added code for obdfs to do writes to files and reads of directories and X-Git-Tag: v1_7_100~6186 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=1e7d496794cc655eabb30c1edb1a269aec6309dc Added code for obdfs to do writes to files and reads of directories and files. --- diff --git a/lustre/include/linux/obd_class.h b/lustre/include/linux/obd_class.h index a124b36..f7d6f22 100644 --- a/lustre/include/linux/obd_class.h +++ b/lustre/include/linux/obd_class.h @@ -4,12 +4,18 @@ #include #include +#include +/* #include */ + #define OBD_PSDEV_MAJOR 120 #define MAX_OBD_DEVICES 2 +extern struct obd_device obd_dev[MAX_OBD_DEVICES]; + struct obd_conn_info { unsigned int conn_id; /* handle */ unsigned long conn_ino; /* root inode number */ + /* XXX do we really need this */ unsigned long conn_blocksize; unsigned char conn_blocksize_bits; }; @@ -18,21 +24,24 @@ struct obd_type { struct list_head typ_chain; struct obd_ops *typ_ops; char *typ_name; - int typ_refcount; + int typ_refcnt; }; +/* corresponds to one of the obdx */ struct obd_device { struct obd_type *obd_type; - int refcnt; - struct super_block * sb; - unsigned int last_id; - unsigned long prealloc_quota; - struct list_head clients; + int obd_refcnt; + union { + struct sim_obd sim; + /* struct fc_obd fc; */ + } u; }; #define OBD_FL_SETUP 0x1 struct obd_ops { + int (*o_format)(void); + int (*o_partition)(int partition, unsigned int size); int (*o_connect)(int minor, struct obd_conn_info *info); int (*o_disconnect)(unsigned int conn_id); int (*o_setup) (struct obd_device *dev, void *data); @@ -43,7 +52,9 @@ struct obd_ops { int (*o_create)(struct obd_device *, int prealloc_ino, int *er); int (*o_destroy)(unsigned int conn_id, unsigned long ino); unsigned long (*o_read)(unsigned int conn_id, unsigned long ino, char *buf, unsigned long count, loff_t offset, int *err); + unsigned long (*o_read2)(unsigned int conn_id, unsigned long ino, char *buf, unsigned long count, loff_t offset, int *err); unsigned long (*o_write)(unsigned int conn_id, unsigned long ino, char *buf, unsigned long count, loff_t offset, int *err); + int (*o_brw)(int rw, int conn, int objectid, struct page *page); long (*o_preallocate)(unsigned int conn_id, int req, long inodes[32], int *err); void (*o_cleanup_device)(int dev); }; @@ -87,10 +98,15 @@ struct oic_rw_s { unsigned long count; loff_t offset; }; +struct oic_partition { + int partition; + unsigned int size; +}; + #define OBD_IOC_CREATE _IOR ('f', 3, long) -#define OBD_IOC_SETUP_SUPER _IOW ('f', 4, long) -#define OBD_IOC_CLEANUP_SUPER _IO ('f', 5 ) +#define OBD_IOC_SETUP_OBDDEV _IOW ('f', 4, long) +#define OBD_IOC_CLEANUP_OBDDEV _IO ('f', 5 ) #define OBD_IOC_DESTROY _IOW ('f', 6, long) #define OBD_IOC_PREALLOCATE _IOWR('f', 7, long) #define OBD_IOC_DEC_USE_COUNT _IO ('f', 8 ) @@ -102,6 +118,9 @@ struct oic_rw_s { #define OBD_IOC_DISCONNECT _IOW ('f', 14, long) #define OBD_IOC_STATFS _IOWR('f', 15, long) #define OBD_IOC_SYNC _IOR ('f', 16, long) +#define OBD_IOC_READ2 _IOWR('f', 17, long) +#define OBD_IOC_FORMAT _IO('f', 17, long) +#define OBD_IOC_READ2 _IOWR('f', 17, long) #define OBD_IOC_DEC_FS_USE_COUNT _IO ('f', 32 ) diff --git a/lustre/include/linux/obd_sim.h b/lustre/include/linux/obd_sim.h index 090efa7..6b56399 100644 --- a/lustre/include/linux/obd_sim.h +++ b/lustre/include/linux/obd_sim.h @@ -1,6 +1,18 @@ #ifndef _OBD_SIM #define _OBD_SIM +struct sim_obd { + struct super_block * sim_sb; + unsigned int sim_last_id; + unsigned long sim_prealloc_quota; + struct list_head sim_clients; +}; + + +/* development definitions */ +extern struct obdfs_sb_info *obd_sbi; +extern struct file_operations *obd_fso; + /* obd_sim.c */ extern struct obd_ops sim_obd_ops; inline long ext2_block_map (struct inode * inode, long block); @@ -43,31 +55,6 @@ struct buffer_head * obd_bread (struct inode * inode, int block, struct buffer_head * obd_getblk (struct inode * inode, long block, int create, int * err); -/* interface.c */ -void obd_cleanup_device(int dev); -extern int obd_create (struct obd_device *, int inode_hint, int * err); -extern void obd_unlink (struct inode * inode); -extern struct obd_client * obd_client(int cli_id); -extern void obd_cleanup_client (struct obd_device * obddev, - struct obd_client * cli); -void obd_cleanup_device(int dev); -int obd_cleanup_super(struct obd_device * obddev); -int obd_setup_super(struct obd_device * obddev, void *data); -long obd_preallocate_inodes(unsigned int conn_id, - int req, long inodes[32], int * err); -long obd_preallocate_quota(struct super_block * sb, struct obd_client * cli, - unsigned long req, int * err); -int obd_connect (int minor, struct obd_conn_info * conninfo); -int obd_disconnect (unsigned int conn_id); -int obd_setattr(unsigned int conn_id, unsigned long ino, struct iattr * iattr); -int obd_getattr(unsigned int conn_id, unsigned long ino, struct iattr * iattr); -int obd_destroy(unsigned int conn_id, unsigned long ino); -int obd_statfs(unsigned int conn_id, struct statfs * statfs); -unsigned long obd_read(unsigned int conn_id, unsigned long ino, char * buf, - unsigned long count, loff_t offset, int * err); -unsigned long obd_write (unsigned int conn_id, unsigned long ino, char * buf, - unsigned long count, loff_t offset, int * err); - /* super.c */ #define ext2_warning obd_warning diff --git a/lustre/include/linux/obd_support.h b/lustre/include/linux/obd_support.h index 7313386..2c6e664 100644 --- a/lustre/include/linux/obd_support.h +++ b/lustre/include/linux/obd_support.h @@ -50,6 +50,17 @@ extern int obd_print_entry; #endif /* SIM_OBD_DEBUG */ +#define PDEBUG(page,cmd) {\ + char *command = ( cmd == READ ) ? "read" : "write";\ + char *uptodate = (Page_Uptodate(page)) ? "yes" : "no";\ + char *locked = (PageLocked(page)) ? "yes" : "no";\ + int count = page->count.counter;\ + long ino = (page->inode) ? page->inode->i_ino : -1;\ + \ + CDEBUG(D_IOCTL, " ** %s, cmd: %s, ino: %ld, uptodate: %s, "\ + "locked: %s, cnt %d ** \n", __FUNCTION__,\ + command, ino, uptodate, locked, count);\ + } #define OBD_ALLOC(ptr, cast, size) \ diff --git a/lustre/include/linux/obdfs.h b/lustre/include/linux/obdfs.h index b293c40..cee9b7b 100644 --- a/lustre/include/linux/obdfs.h +++ b/lustre/include/linux/obdfs.h @@ -11,22 +11,39 @@ #ifndef _OBDFS_H #define OBDFS_H -#include <../obd/linux/sim_obd.h> +#include <../obd/linux/obd_class.h> +/* file.c */ +ssize_t obdfs_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos); + + +/* rw.c */ +struct page *obdfs_getpage(struct inode *inode, unsigned long offset); +int obdfs_writepage(struct file *file, struct page *page); +int obdfs_write_one_page(struct file *file, struct page *page, unsigned long offset, unsigned long bytes, const char * buf); + +/* namei.c */ +struct dentry *obdfs_lookup(struct inode * dir, struct dentry *dentry); + +/* dir.c */ +int obdfs_readdir(struct file * filp, void * dirent, filldir_t filldir); struct obdfs_sb_info { struct obd_conn_info osi_conn_info; struct super_block *osi_super; - int osi_obd_minor; + struct obd_device *osi_obd; + struct obd_ops *osi_ops; }; - - void obdfs_sysctl_init(void); void obdfs_sysctl_clean(void); struct obdfs_inode_info; +extern struct file_operations obdfs_file_operations; +extern struct inode_perations obdfs_inode_operations; + + #define OBDFS_SUPER_MAGIC 0x4711 #endif diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c index b33ee67..56cf3f3 100644 --- a/lustre/obdclass/class_obd.c +++ b/lustre/obdclass/class_obd.c @@ -61,6 +61,7 @@ int obd_debug_level = 4095; struct obd_device obd_dev[MAX_OBD_DEVICES]; struct list_head obd_types; +/* called when opening /dev/obdNNN */ static int obd_class_open(struct inode * inode, struct file * file) { int dev; @@ -71,14 +72,15 @@ static int obd_class_open(struct inode * inode, struct file * file) dev = MINOR(inode->i_rdev); if (dev >= MAX_OBD_DEVICES) return -ENODEV; - obd_dev[dev].refcnt++; - CDEBUG(D_PSDEV, "Refcount now %d\n", obd_dev[dev].refcnt++); + obd_dev[dev].obd_refcnt++; + CDEBUG(D_PSDEV, "Refcount now %d\n", obd_dev[dev].obd_refcnt++); MOD_INC_USE_COUNT; EXIT; return 0; } +/* called when closing /dev/obdNNN */ static int obd_class_release(struct inode * inode, struct file * file) { int dev; @@ -90,12 +92,12 @@ static int obd_class_release(struct inode * inode, struct file * file) if (dev >= MAX_OBD_DEVICES) return -ENODEV; fsync_dev(inode->i_rdev); - if (obd_dev[dev].refcnt <= 0) + if (obd_dev[dev].obd_refcnt <= 0) printk(KERN_ALERT "presto_psdev_release: refcount(%d) <= 0\n", - obd_dev[dev].refcnt); - obd_dev[dev].refcnt--; + obd_dev[dev].obd_refcnt); + obd_dev[dev].obd_refcnt--; - CDEBUG(D_PSDEV, "Refcount now %d\n", obd_dev[dev].refcnt++); + CDEBUG(D_PSDEV, "Refcount now %d\n", obd_dev[dev].obd_refcnt++); MOD_DEC_USE_COUNT; @@ -103,6 +105,7 @@ static int obd_class_release(struct inode * inode, struct file * file) return 0; } +/* support function */ static struct obd_type *obd_nm_to_type(char *nm) { struct list_head *tmp; @@ -119,8 +122,9 @@ static struct obd_type *obd_nm_to_type(char *nm) return NULL; } +/* to control /dev/obdNNN */ static int obd_class_ioctl (struct inode * inode, struct file * filp, - unsigned int cmd, unsigned long arg) + unsigned int cmd, unsigned long arg) { int err, i_ino, dev; struct obd_device *obddev; @@ -137,11 +141,11 @@ static int obd_class_ioctl (struct inode * inode, struct file * filp, obddev = &obd_dev[dev]; /* has this minor been registered? */ - if (cmd != OBD_IOC_SETUP_SUPER && !obd_dev[dev].obd_type) + if (cmd != OBD_IOC_SETUP_OBDDEV && !obd_dev[dev].obd_type) return -ENODEV; switch (cmd) { - case OBD_IOC_SETUP_SUPER: { + case OBD_IOC_SETUP_OBDDEV: { struct obd_type *type; struct setup { @@ -156,43 +160,49 @@ static int obd_class_ioctl (struct inode * inode, struct file * filp, } /* get data structures */ - if ( (err= copy_from_user(&input, (void *) arg, sizeof(struct setup))) ) + err= copy_from_user(&input, (void *) arg, sizeof(input)); + if (err) return err; type = obd_nm_to_type(input.setup_type); if ( !type ) { - CDEBUG(D_IOCTL, "Trying to register non existent type %s\n", + printk("Trying to register non existent type %s\n", input.setup_type); return -1; } obddev->obd_type = type; - CDEBUG(D_IOCTL, "Registering %d, type %s\n", - dev, input.setup_type); + CDEBUG(D_IOCTL, "Setup %d, type %s\n", dev, input.setup_type); if ( obddev->obd_type->typ_ops->o_setup(obddev, &input.setup_data)){ obddev->obd_type = NULL; return -1; } else { - type->typ_refcount++; + type->typ_refcnt++; + MOD_INC_USE_COUNT; return 0; } - - } - case OBD_IOC_CLEANUP_SUPER: + case OBD_IOC_CLEANUP_OBDDEV: { + int rc; - /* cleanup has no argument */ - if ( obddev->obd_type->typ_refcount ) - obddev->obd_type->typ_refcount--; - else + if ( !obddev->obd_type->typ_refcnt ) printk("OBD_CLEANUP: refcount wrap!\n"); - if ( obddev->obd_type->typ_ops->o_cleanup ) - return obddev->obd_type->typ_ops->o_cleanup(obddev); - else - return 0; + if ( !obddev->obd_type->typ_ops->o_cleanup ) + goto out; + /* cleanup has no argument */ + rc = obddev->obd_type->typ_ops->o_cleanup(obddev); + if ( rc ) + return rc; + + out: + obddev->obd_type->typ_refcnt--; + obddev->obd_type = NULL; + MOD_DEC_USE_COUNT; + return 0; + } case OBD_IOC_CONNECT: { struct obd_conn_info conninfo; @@ -214,11 +224,11 @@ static int obd_class_ioctl (struct inode * inode, struct file * filp, /* sync doesn't need a connection ID, because it knows * what device it was called on, and can thus get the * superblock that it needs. */ - if (!obddev->sb || !obddev->sb->s_dev) { + if (!obddev->u.sim.sim_sb || !obddev->u.sim.sim_sb->s_dev) { CDEBUG(D_IOCTL, "fatal: device not initialized.\n"); err = -EINVAL; } else { - if ((err = fsync_dev(obddev->sb->s_dev))) + if ((err = fsync_dev(obddev->u.sim.sim_sb->s_dev))) CDEBUG(D_IOCTL, "sync: fsync_dev failure\n"); else CDEBUG(D_IOCTL, "sync: success\n"); @@ -228,7 +238,7 @@ static int obd_class_ioctl (struct inode * inode, struct file * filp, case OBD_IOC_CREATE: /* similarly, create doesn't need a connection ID for * the same reasons. */ - if (!obddev->sb) { + if (!obddev->u.sim.sim_sb) { CDEBUG(D_IOCTL, "fatal: device not initialized.\n"); return put_user(-EINVAL, (int *) arg); } @@ -296,6 +306,33 @@ static int obd_class_ioctl (struct inode * inode, struct file * filp, return err; } + case OBD_IOC_READ2: + { + int err; + + err = copy_from_user(&rw_s, (int *)arg, sizeof(struct oic_rw_s)); + if ( err ) + return err; + + if ( !obddev->obd_type->typ_ops || + !obddev->obd_type->typ_ops->o_read ) + return -EINVAL; + + rw_s.count = obddev->obd_type->typ_ops->o_read2(rw_s.conn_id, + rw_s.inode, + rw_s.buf, + rw_s.count, + rw_s.offset, + &err); + if ( err ) + return err; + + err = copy_to_user((int*)arg, &rw_s.count, + sizeof(unsigned long)); + return err; + } + + case OBD_IOC_READ: { int err; @@ -348,7 +385,7 @@ static int obd_class_ioctl (struct inode * inode, struct file * filp, copy_from_user(&prealloc, (int *)arg, sizeof(struct oic_prealloc_s)); - if (!obddev->sb || !obddev->sb->s_dev) { + if (!obddev->u.sim.sim_sb || !obddev->u.sim.sim_sb->s_dev) { CDEBUG(D_IOCTL, "fatal: device not initialized.\n"); return -EINVAL; } @@ -368,14 +405,21 @@ static int obd_class_ioctl (struct inode * inode, struct file * filp, { struct statfs *tmp; unsigned int conn_id; - + struct statfs buf; + int rc; + tmp = (void *)arg + sizeof(unsigned int); get_user(conn_id, (int *) arg); if ( !obddev->obd_type || !obddev->obd_type->typ_ops->o_statfs) return -EINVAL; - return obddev->obd_type->typ_ops->o_statfs(conn_id, tmp); + rc = obddev->obd_type->typ_ops->o_statfs(conn_id, &buf); + if ( rc ) + return rc; + rc = copy_to_user(tmp, &buf, sizeof(buf)); + return rc; + } default: printk("invalid ioctl: cmd = %u, arg = %lu\n", cmd, arg); @@ -413,7 +457,7 @@ int obd_unregister_type(char *nm) if ( !type ) return -1; - if ( type->typ_refcount ) + if ( type->typ_refcnt ) return -1; list_del(&type->typ_chain); @@ -459,11 +503,8 @@ int init_obd(void) } for (i = 0; i < MAX_OBD_DEVICES; i++) { - obd_dev[i].obd_type = 0; - obd_dev[i].refcnt = 0; - obd_dev[i].sb = NULL; - obd_dev[i].last_id = 0; - INIT_LIST_HEAD(&obd_dev[i].clients); + memset(&(obd_dev[i]), 0, sizeof(obd_dev[i])); + INIT_LIST_HEAD(&obd_dev[i].u.sim.sim_clients); } obd_sysctl_init(); diff --git a/lustre/obdclass/obdcontrol b/lustre/obdclass/obdcontrol index ab31b64..da661fb 100755 --- a/lustre/obdclass/obdcontrol +++ b/lustre/obdclass/obdcontrol @@ -21,10 +21,10 @@ BEGIN { require "asm/ioctl.ph" }; # p2ph generated invalid macros for ioctl stuff, so I override some of it here eval 'sub OBD_IOC_CREATE () { &_IOC(2, ord(\'f\'), 3, 4);}' unless defined(&OBD_IOC_CREATE); -eval 'sub OBD_IOC_SETUP_SUPER () { &_IOC(1, ord(\'f\'), 4, 4);}' unless - defined(&OBD_IOC_SETUP_SUPER); -eval 'sub OBD_IOC_CLEANUP_SUPER () { &_IOC(0, ord(\'f\'), 5, 0);}' unless - defined(&OBD_IOC_CLEANUP_SUPER); +eval 'sub OBD_IOC_SETUP_OBDDEV () { &_IOC(1, ord(\'f\'), 4, 4);}' unless + defined(&OBD_IOC_SETUP_OBDDEV); +eval 'sub OBD_IOC_CLEANUP_OBDDEV () { &_IOC(0, ord(\'f\'), 5, 0);}' unless + defined(&OBD_IOC_CLEANUP_OBDDEV); eval 'sub OBD_IOC_DESTROY () { &_IOC(1, ord(\'f\'), 6, 4);}' unless defined(&OBD_IOC_DESTROY); eval 'sub OBD_IOC_PREALLOCATE () { &_IOC(3, ord(\'f\'), 7, 4);}' unless @@ -37,6 +37,8 @@ eval 'sub OBD_IOC_GETATTR () { &_IOC(2, ord(\'f\'), 10, 4);}' unless defined(&OBD_IOC_GETATTR); eval 'sub OBD_IOC_READ () { &_IOC(3, ord(\'f\'), 11, 4);}' unless defined(&OBD_IOC_READ); +eval 'sub OBD_IOC_READ2 () { &_IOC(3, ord(\'f\'), 17, 4);}' unless + defined(&OBD_IOC_READ2); eval 'sub OBD_IOC_WRITE () { &_IOC(3, ord(\'f\'), 12, 4);}' unless defined(&OBD_IOC_WRITE); eval 'sub OBD_IOC_CONNECT () { &_IOC(2, ord(\'f\'), 13, 4);}' unless @@ -91,9 +93,10 @@ my %commands = 'disconnect' => {func => "Disconnect", doc => "disconnect [id]: frees client resources"}, 'sync' => {func => "Sync", doc => "sync: flushes buffers to disk"}, 'destroy' => {func => "Destroy", doc => "setup: destroys an inode"}, - 'cleanup' => {func => "Cleanup", doc => "detach the superblock from this minor obd dev"}, + 'cleanup' => {func => "Cleanup", doc => "cleanup the minor obd device"}, 'dec_use_count' => {func => "Decusecount", doc => "decreases the module use count so that the module can be removed following an oops"}, 'read' => {func => "Read", doc => "read [offset]"}, + 'fsread' => {func => "Read2", doc => "read [offset]"}, 'write' => {func => "Write", doc => "write "}, 'setattr' => {func => "Setattr", doc => "setattr [mode [uid [gid [size [atime [mtime [ctime]]]]]]]"}, 'getattr' => {func => "Getattr", doc => "getattr : displays inode object attributes"}, @@ -174,7 +177,7 @@ sub Setup { my $err = 0; my $packed = pack("La24", $::st->rdev(), "sim_obd"); - my $rc = ioctl(DEV_OBD, &OBD_IOC_SETUP_SUPER, $packed); + my $rc = ioctl(DEV_OBD, &OBD_IOC_SETUP_OBDDEV, $packed); if (!defined $rc) { print STDERR "ioctl failed: $!\n"; @@ -187,7 +190,7 @@ sub Setup { sub Cleanup { my $err = "0"; - my $rc = ioctl(DEV_OBD, &OBD_IOC_CLEANUP_SUPER, $err); + my $rc = ioctl(DEV_OBD, &OBD_IOC_CLEANUP_OBDDEV, $err); if (!defined $rc) { print STDERR "ioctl failed: $!\n"; @@ -494,6 +497,55 @@ sub Read { } } +sub Read2 { + if (!defined($::client_id)) { + print "You must first ``connect''.\n"; + return; + } + + my $inode = shift; + my $count = shift; + my $offset = shift; + + if (!defined($inode) || scalar($inode) < 1 || !defined($count) || + $count < 1 || (defined($offset) && $offset < 0)) { + print "invalid arguments; type \"help read\" for a synopsis\n"; + return; + } + + if (!defined($offset)) { + $offset = 0; + } + + print("Reading $count bytes starting at byte $offset from object " . + "$inode...\n"); + + # "allocate" a large enough buffer + my $buf = sprintf("%${count}s", " "); + die "suck" if (length($buf) != $count); + + # the perl we're using doesn't support pack type Q, and offset is 64 bits + my $packed = pack("ILpLLL", $::client_id, $inode, $buf, $count, $offset, 0); + + my $rc = ioctl(DEV_OBD, &OBD_IOC_READ2, $packed); + + $retval = unpack("l", $packed); + + if (!defined $rc) { + print STDERR "ioctl failed: $!\n"; + } elsif ($rc eq "0 but true") { + if ($retval >= 0) { + print substr($buf, 0, $retval); + print "\nRead $retval of an attempted $count bytes.\n"; + print "Finished (success)\n"; + } else { + print "Finished (error $retval)\n"; + } + } else { + print "ioctl returned error code $rc.\n"; + } +} + sub Write { if (!defined($::client_id)) { print "You must first ``connect''.\n"; diff --git a/lustre/obdfs/Makefile b/lustre/obdfs/Makefile index 5ca62b3..46188f1 100644 --- a/lustre/obdfs/Makefile +++ b/lustre/obdfs/Makefile @@ -8,7 +8,7 @@ MODDIR = $(PREFIX)/lib/modules/$(UTS_RELEASE) ifndef CFLAGS # Don't remove "-O2" or bad things will happen! -KFLAGS = -O2 -Wall -Wstrict-prototypes -Winline -pipe -I../obd +KFLAGS = -O2 -Wall -Wstrict-prototypes -pipe -I../obd endif CPPFLAGS = $(PCDEBUG) -D__KERNEL__ -DMODULE -I../include \ @@ -17,7 +17,7 @@ COFLAGS = -kv CFLAGS= $(KFLAGS) $(MFLAG) -SRCS=sysctl.c super.c +SRCS=file.c dir.c sysctl.c super.c rw.c namei.c OBJS=$(SRCS:%.c=%.o) MODULES = obdfs.o diff --git a/lustre/obdfs/dir.c b/lustre/obdfs/dir.c new file mode 100644 index 0000000..7e87052 --- /dev/null +++ b/lustre/obdfs/dir.c @@ -0,0 +1,176 @@ +/* + * linux/fs/ext2/dir.c + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/fs/minix/dir.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * ext2 directory handling functions + * + * Big-endian to little-endian byte-swapping/bitmaps by + * David S. Miller (davem@caip.rutgers.edu), 1995 + * + * Changes for use with Object Based Device File System + * + * Copyright (C) 1999, Seagate Technology Inc. + * (author Peter J. Braam, braam@stelias.com) + * + */ + +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "obdfs.h" +#include + +#if 0 +static ssize_t obdfs_dir_read (struct file * filp, char * buf, + size_t count, loff_t *ppos) +{ + return -EISDIR; +} + + +int ext2_check_dir_entry (const char * function, struct inode * dir, + struct ext2_dir_entry_2 * de, + struct buffer_head * bh, + unsigned long offset) +{ + const char * error_msg = NULL; + + if (le16_to_cpu(de->rec_len) < EXT2_DIR_REC_LEN(1)) + error_msg = "rec_len is smaller than minimal"; + else if (le16_to_cpu(de->rec_len) % 4 != 0) + error_msg = "rec_len % 4 != 0"; + else if (le16_to_cpu(de->rec_len) < EXT2_DIR_REC_LEN(de->name_len)) + error_msg = "rec_len is too small for name_len"; + else if (dir && ((char *) de - bh->b_data) + le16_to_cpu(de->rec_len) > + dir->i_sb->s_blocksize) + error_msg = "directory entry across blocks"; + else if (dir && le32_to_cpu(de->inode) > le32_to_cpu(dir->i_sb->u.ext2_sb.s_es->s_inodes_count)) + error_msg = "inode out of bounds"; + + if (error_msg != NULL) + ext2_error (dir->i_sb, function, "bad entry in directory #%lu: %s - " + "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", + dir->i_ino, error_msg, offset, + (unsigned long) le32_to_cpu(de->inode), + le16_to_cpu(de->rec_len), de->name_len); + return error_msg == NULL ? 1 : 0; +} + +#endif + +int obdfs_readdir(struct file * filp, void * dirent, filldir_t filldir) +{ + int error = 0; + unsigned long offset; + int stored; + struct ext2_dir_entry_2 * de; + struct super_block * sb; + struct page *page; + struct inode *inode = filp->f_dentry->d_inode; + + sb = inode->i_sb; + + stored = 0; + offset = filp->f_pos & (PAGE_SIZE - 1); + + while (!error && !stored && filp->f_pos < inode->i_size) { + page = obdfs_getpage(inode, offset); + if (!page) { + ext2_error (sb, "ext2_readdir", + "directory #%lu contains a hole at offset %lu", + inode->i_ino, (unsigned long)filp->f_pos); + filp->f_pos += PAGE_SIZE - offset; + continue; + } + +#if 0 + /* XXX need to do read ahead and support stuff below */ +revalidate: + /* If the dir block has changed since the last call to + * readdir(2), then we might be pointing to an invalid + * dirent right now. Scan from the start of the block + * to make sure. */ + if (filp->f_version != inode->i_version) { + for (i = 0; i < sb->s_blocksize && i < offset; ) { + de = (struct ext2_dir_entry_2 *) + (bh->b_data + i); + /* It's too expensive to do a full + * dirent test each time round this + * loop, but we do have to test at + * least that it is non-zero. A + * failure will be detected in the + * dirent test below. */ + if (le16_to_cpu(de->rec_len) < EXT2_DIR_REC_LEN(1)) + break; + i += le16_to_cpu(de->rec_len); + } + offset = i; + filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1)) + | offset; + filp->f_version = inode->i_version; + } +#endif + while (!error && filp->f_pos < inode->i_size + && offset < PAGE_SIZE) { + de = (struct ext2_dir_entry_2 *) ((char *)page_address(page) + offset); +#if 0 + if (!obdfs_check_dir_entry ("ext2_readdir", inode, de, + bh, offset)) { + /* On error, skip the f_pos to the + next block. */ + filp->f_pos = (filp->f_pos & (sb->s_blocksize - 1)) + + sb->s_blocksize; + brelse (bh); + return stored; + } +#endif + offset += le16_to_cpu(de->rec_len); + if (le32_to_cpu(de->inode)) { + /* We might block in the next section + * if the data destination is + * currently swapped out. So, use a + * version stamp to detect whether or + * not the directory has been modified + * during the copy operation. + */ + /* XXX unsigned long version = inode->i_version; + */ + error = filldir(dirent, de->name, + de->name_len, + filp->f_pos, le32_to_cpu(de->inode)); + if (error) + break; +#if 0 + if (version != inode->i_version) + goto revalidate; +#endif + stored ++; + } + filp->f_pos += le16_to_cpu(de->rec_len); + } + offset = 0; + } + UPDATE_ATIME(inode); + return 0; +} diff --git a/lustre/obdfs/file.c b/lustre/obdfs/file.c new file mode 100644 index 0000000..fb6b916 --- /dev/null +++ b/lustre/obdfs/file.c @@ -0,0 +1,70 @@ +/* + * linux/fs/ext2/file.c + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/fs/minix/file.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * ext2 fs regular file handling primitives + * + * 64-bit file support on 64-bit platforms by Jakub Jelinek + * (jj@sunsite.ms.mff.cuni.cz) + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "obdfs.h" + +static inline void remove_suid(struct inode *inode) +{ + unsigned int mode; + + /* set S_IGID if S_IXGRP is set, and always set S_ISUID */ + mode = (inode->i_mode & S_IXGRP)*(S_ISGID/S_IXGRP) | S_ISUID; + + /* was any of the uid bits set? */ + mode &= inode->i_mode; + if (mode && !capable(CAP_FSETID)) { + inode->i_mode &= ~mode; + mark_inode_dirty(inode); + } +} + +/* + * Write to a file (through the page cache). + */ +ssize_t obdfs_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos) +{ + ssize_t retval; + CDEBUG(D_INODE, "Writing %d bytes, offset %ld\n", count, (long)*ppos); + + retval = generic_file_write(file, buf, count, + ppos, obdfs_write_one_page); + if (retval > 0) { + struct inode *inode = file->f_dentry->d_inode; + remove_suid(inode); + inode->i_ctime = inode->i_mtime = CURRENT_TIME; + mark_inode_dirty(inode); + } + return retval; +} diff --git a/lustre/obdfs/namei.c b/lustre/obdfs/namei.c new file mode 100644 index 0000000..b6ba9c9 --- /dev/null +++ b/lustre/obdfs/namei.c @@ -0,0 +1,867 @@ +/* + * linux/fs/ext2/namei.c + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/fs/minix/namei.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * Big-endian to little-endian byte-swapping/bitmaps by + * David S. Miller (davem@caip.rutgers.edu), 1995 + * Directory entry file type support and forward compatibility hooks + * for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998 + * + * Changes for use in OBDFS + * Copyright (c) 1999, Seagate Technology Inc. + * + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "obdfs.h" + +/* + * define how far ahead to read directories while searching them. + */ +#define NAMEI_RA_CHUNKS 2 +#define NAMEI_RA_BLOCKS 4 +#define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS) +#define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b)) + +/* + * NOTE! unlike strncmp, ext2_match returns 1 for success, 0 for failure. + * + * `len <= EXT2_NAME_LEN' is guaranteed by caller. + * `de != NULL' is guaranteed by caller. + */ +static inline int ext2_match (int len, const char * const name, + struct ext2_dir_entry_2 * de) +{ + if (len != de->name_len) + return 0; + if (!de->inode) + return 0; + return !memcmp(name, de->name, len); +} + +/* + * ext2_find_entry() + * + * finds an entry in the specified directory with the wanted name. It + * returns the cache buffer in which the entry was found, and the entry + * itself (as a parameter - res_dir). It does NOT read the inode of the + * entry - you'll have to do that yourself if you want to. + */ +struct page * obdfs_find_entry (struct inode * dir, + const char * const name, int namelen, + struct ext2_dir_entry_2 ** res_dir) +{ + struct super_block * sb; + unsigned long offset; + struct page * page; + + *res_dir = NULL; + sb = dir->i_sb; + + if (namelen > EXT2_NAME_LEN) + return NULL; + + for (page = 0, offset = 0; offset < dir->i_size; page++) { + struct ext2_dir_entry_2 * de; + char * dlimit; + + page = obdfs_getpage(dir, offset); + + de = (struct ext2_dir_entry_2 *) page_address(page); + dlimit = (char *)page_address(page) + PAGE_SIZE; + while ((char *) de < dlimit) { + /* this code is executed quadratically often */ + /* do minimal checking `by hand' */ + int de_len; + + if ((char *) de + namelen <= dlimit && + ext2_match (namelen, name, de)) { + /* found a match - + just to be sure, do a full check */ +#if 0 + if (!ext2_check_dir_entry("ext2_find_entry", + dir, de, bh, offset)) + goto failure; +#endif + *res_dir = de; + return page; + } + /* prevent looping on a bad block */ + de_len = le16_to_cpu(de->rec_len); + if (de_len <= 0) + goto failure; + offset += de_len; + de = (struct ext2_dir_entry_2 *) + ((char *) de + de_len); + } + page_cache_release(page); + } + +failure: + return NULL; +} + +struct dentry *obdfs_lookup(struct inode * dir, struct dentry *dentry) +{ + struct inode * inode; + struct ext2_dir_entry_2 * de; + struct page * page; + + if (dentry->d_name.len > EXT2_NAME_LEN) + return ERR_PTR(-ENAMETOOLONG); + + page = obdfs_find_entry (dir, dentry->d_name.name, dentry->d_name.len, &de); + inode = NULL; + if (page) { + unsigned long ino = le32_to_cpu(de->inode); + page_cache_release(page); + inode = iget(dir->i_sb, ino); + + if (!inode) + return ERR_PTR(-EACCES); + } + d_add(dentry, inode); + return NULL; +} + + +#if 0 +/* + * ext2_add_entry() + * + * adds a file entry to the specified directory, using the same + * semantics as ext2_find_entry(). It returns NULL if it failed. + * + * NOTE!! The inode part of 'de' is left at 0 - which means you + * may not sleep between calling this and putting something into + * the entry, as someone else might have used it while you slept. + */ +static struct buffer_head * ext2_add_entry (struct inode * dir, + const char * name, int namelen, + struct ext2_dir_entry_2 ** res_dir, + int *err) +{ + unsigned long offset; + unsigned short rec_len; + struct buffer_head * bh; + struct ext2_dir_entry_2 * de, * de1; + struct super_block * sb; + + *err = -EINVAL; + *res_dir = NULL; + if (!dir || !dir->i_nlink) + return NULL; + sb = dir->i_sb; + + if (!namelen) + return NULL; + /* + * Is this a busy deleted directory? Can't create new files if so + */ + if (dir->i_size == 0) + { + *err = -ENOENT; + return NULL; + } + bh = ext2_bread (dir, 0, 0, err); + if (!bh) + return NULL; + rec_len = EXT2_DIR_REC_LEN(namelen); + offset = 0; + de = (struct ext2_dir_entry_2 *) bh->b_data; + *err = -ENOSPC; + while (1) { + if ((char *)de >= sb->s_blocksize + bh->b_data) { + brelse (bh); + bh = NULL; + bh = ext2_bread (dir, offset >> EXT2_BLOCK_SIZE_BITS(sb), 1, err); + if (!bh) + return NULL; + if (dir->i_size <= offset) { + if (dir->i_size == 0) { + *err = -ENOENT; + return NULL; + } + + ext2_debug ("creating next block\n"); + + de = (struct ext2_dir_entry_2 *) bh->b_data; + de->inode = 0; + de->rec_len = le16_to_cpu(sb->s_blocksize); + dir->i_size = offset + sb->s_blocksize; + dir->u.ext2_i.i_flags &= ~EXT2_BTREE_FL; + mark_inode_dirty(dir); + } else { + + ext2_debug ("skipping to next block\n"); + + de = (struct ext2_dir_entry_2 *) bh->b_data; + } + } + if (!ext2_check_dir_entry ("ext2_add_entry", dir, de, bh, + offset)) { + *err = -ENOENT; + brelse (bh); + return NULL; + } + if (ext2_match (namelen, name, de)) { + *err = -EEXIST; + brelse (bh); + return NULL; + } + if ((le32_to_cpu(de->inode) == 0 && le16_to_cpu(de->rec_len) >= rec_len) || + (le16_to_cpu(de->rec_len) >= EXT2_DIR_REC_LEN(de->name_len) + rec_len)) { + offset += le16_to_cpu(de->rec_len); + if (le32_to_cpu(de->inode)) { + de1 = (struct ext2_dir_entry_2 *) ((char *) de + + EXT2_DIR_REC_LEN(de->name_len)); + de1->rec_len = cpu_to_le16(le16_to_cpu(de->rec_len) - + EXT2_DIR_REC_LEN(de->name_len)); + de->rec_len = cpu_to_le16(EXT2_DIR_REC_LEN(de->name_len)); + de = de1; + } + de->inode = 0; + de->name_len = namelen; + de->file_type = 0; + memcpy (de->name, name, namelen); + /* + * XXX shouldn't update any times until successful + * completion of syscall, but too many callers depend + * on this. + * + * XXX similarly, too many callers depend on + * ext2_new_inode() setting the times, but error + * recovery deletes the inode, so the worst that can + * happen is that the times are slightly out of date + * and/or different from the directory change time. + */ + dir->i_mtime = dir->i_ctime = CURRENT_TIME; + dir->u.ext2_i.i_flags &= ~EXT2_BTREE_FL; + mark_inode_dirty(dir); + dir->i_version = ++event; + mark_buffer_dirty(bh, 1); + *res_dir = de; + *err = 0; + return bh; + } + offset += le16_to_cpu(de->rec_len); + de = (struct ext2_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); + } + brelse (bh); + return NULL; +} + +/* + * ext2_delete_entry deletes a directory entry by merging it with the + * previous entry + */ +static int ext2_delete_entry (struct ext2_dir_entry_2 * dir, + struct buffer_head * bh) +{ + struct ext2_dir_entry_2 * de, * pde; + int i; + + i = 0; + pde = NULL; + de = (struct ext2_dir_entry_2 *) bh->b_data; + while (i < bh->b_size) { + if (!ext2_check_dir_entry ("ext2_delete_entry", NULL, + de, bh, i)) + return -EIO; + if (de == dir) { + if (pde) + pde->rec_len = + cpu_to_le16(le16_to_cpu(pde->rec_len) + + le16_to_cpu(dir->rec_len)); + else + dir->inode = 0; + return 0; + } + i += le16_to_cpu(de->rec_len); + pde = de; + de = (struct ext2_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); + } + return -ENOENT; +} + +static inline void ext2_set_de_type(struct super_block *sb, + struct ext2_dir_entry_2 *de, + umode_t mode) { + if (!EXT2_HAS_INCOMPAT_FEATURE(sb, EXT2_FEATURE_INCOMPAT_FILETYPE)) + return; + if (S_ISCHR(mode)) + de->file_type = EXT2_FT_CHRDEV; + else if (S_ISBLK(mode)) + de->file_type = EXT2_FT_BLKDEV; + else if (S_ISFIFO(mode)) + de->file_type = EXT2_FT_FIFO; + else if (S_ISLNK(mode)) + de->file_type = EXT2_FT_SYMLINK; + else if (S_ISREG(mode)) + de->file_type = EXT2_FT_REG_FILE; + else if (S_ISDIR(mode)) + de->file_type = EXT2_FT_DIR; +} + +/* + * By the time this is called, we already have created + * the directory cache entry for the new file, but it + * is so far negative - it has no inode. + * + * If the create succeeds, we fill in the inode information + * with d_instantiate(). + */ +int ext2_create (struct inode * dir, struct dentry * dentry, int mode) +{ + struct inode * inode; + struct buffer_head * bh; + struct ext2_dir_entry_2 * de; + int err = -EIO; + + /* + * N.B. Several error exits in ext2_new_inode don't set err. + */ + inode = ext2_new_inode (dir, mode, &err); + if (!inode) + return err; + + inode->i_op = &ext2_file_inode_operations; + inode->i_mode = mode; + mark_inode_dirty(inode); + bh = ext2_add_entry (dir, dentry->d_name.name, dentry->d_name.len, &de, &err); + if (!bh) { + inode->i_nlink--; + mark_inode_dirty(inode); + iput (inode); + return err; + } + de->inode = cpu_to_le32(inode->i_ino); + ext2_set_de_type(dir->i_sb, de, S_IFREG); + dir->i_version = ++event; + mark_buffer_dirty(bh, 1); + if (IS_SYNC(dir)) { + ll_rw_block (WRITE, 1, &bh); + wait_on_buffer (bh); + } + brelse (bh); + d_instantiate(dentry, inode); + return 0; +} + +int ext2_mknod (struct inode * dir, struct dentry *dentry, int mode, int rdev) +{ + struct inode * inode; + struct buffer_head * bh; + struct ext2_dir_entry_2 * de; + int err = -EIO; + + inode = ext2_new_inode (dir, mode, &err); + if (!inode) + goto out; + + inode->i_uid = current->fsuid; + init_special_inode(inode, mode, rdev); + bh = ext2_add_entry (dir, dentry->d_name.name, dentry->d_name.len, &de, &err); + if (!bh) + goto out_no_entry; + de->inode = cpu_to_le32(inode->i_ino); + dir->i_version = ++event; + ext2_set_de_type(dir->i_sb, de, inode->i_mode); + mark_inode_dirty(inode); + mark_buffer_dirty(bh, 1); + if (IS_SYNC(dir)) { + ll_rw_block (WRITE, 1, &bh); + wait_on_buffer (bh); + } + d_instantiate(dentry, inode); + brelse(bh); + err = 0; +out: + return err; + +out_no_entry: + inode->i_nlink--; + mark_inode_dirty(inode); + iput(inode); + goto out; +} + +int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode) +{ + struct inode * inode; + struct buffer_head * bh, * dir_block; + struct ext2_dir_entry_2 * de; + int err; + + err = -EMLINK; + if (dir->i_nlink >= EXT2_LINK_MAX) + goto out; + + err = -EIO; + inode = ext2_new_inode (dir, S_IFDIR, &err); + if (!inode) + goto out; + + inode->i_op = &ext2_dir_inode_operations; + inode->i_size = inode->i_sb->s_blocksize; + inode->i_blocks = 0; + dir_block = ext2_bread (inode, 0, 1, &err); + if (!dir_block) { + inode->i_nlink--; /* is this nlink == 0? */ + mark_inode_dirty(inode); + iput (inode); + return err; + } + de = (struct ext2_dir_entry_2 *) dir_block->b_data; + de->inode = cpu_to_le32(inode->i_ino); + de->name_len = 1; + de->rec_len = cpu_to_le16(EXT2_DIR_REC_LEN(de->name_len)); + strcpy (de->name, "."); + ext2_set_de_type(dir->i_sb, de, S_IFDIR); + de = (struct ext2_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); + de->inode = cpu_to_le32(dir->i_ino); + de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize - EXT2_DIR_REC_LEN(1)); + de->name_len = 2; + strcpy (de->name, ".."); + ext2_set_de_type(dir->i_sb, de, S_IFDIR); + inode->i_nlink = 2; + mark_buffer_dirty(dir_block, 1); + brelse (dir_block); + inode->i_mode = S_IFDIR | mode; + if (dir->i_mode & S_ISGID) + inode->i_mode |= S_ISGID; + mark_inode_dirty(inode); + bh = ext2_add_entry (dir, dentry->d_name.name, dentry->d_name.len, &de, &err); + if (!bh) + goto out_no_entry; + de->inode = cpu_to_le32(inode->i_ino); + ext2_set_de_type(dir->i_sb, de, S_IFDIR); + dir->i_version = ++event; + mark_buffer_dirty(bh, 1); + if (IS_SYNC(dir)) { + ll_rw_block (WRITE, 1, &bh); + wait_on_buffer (bh); + } + dir->i_nlink++; + dir->u.ext2_i.i_flags &= ~EXT2_BTREE_FL; + mark_inode_dirty(dir); + d_instantiate(dentry, inode); + brelse (bh); + err = 0; +out: + return err; + +out_no_entry: + inode->i_nlink = 0; + mark_inode_dirty(inode); + iput (inode); + goto out; +} + +/* + * routine to check that the specified directory is empty (for rmdir) + */ +static int empty_dir (struct inode * inode) +{ + unsigned long offset; + struct buffer_head * bh; + struct ext2_dir_entry_2 * de, * de1; + struct super_block * sb; + int err; + + sb = inode->i_sb; + if (inode->i_size < EXT2_DIR_REC_LEN(1) + EXT2_DIR_REC_LEN(2) || + !(bh = ext2_bread (inode, 0, 0, &err))) { + ext2_warning (inode->i_sb, "empty_dir", + "bad directory (dir #%lu) - no data block", + inode->i_ino); + return 1; + } + de = (struct ext2_dir_entry_2 *) bh->b_data; + de1 = (struct ext2_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); + if (le32_to_cpu(de->inode) != inode->i_ino || !le32_to_cpu(de1->inode) || + strcmp (".", de->name) || strcmp ("..", de1->name)) { + ext2_warning (inode->i_sb, "empty_dir", + "bad directory (dir #%lu) - no `.' or `..'", + inode->i_ino); + brelse (bh); + return 1; + } + offset = le16_to_cpu(de->rec_len) + le16_to_cpu(de1->rec_len); + de = (struct ext2_dir_entry_2 *) ((char *) de1 + le16_to_cpu(de1->rec_len)); + while (offset < inode->i_size ) { + if (!bh || (void *) de >= (void *) (bh->b_data + sb->s_blocksize)) { + brelse (bh); + bh = ext2_bread (inode, offset >> EXT2_BLOCK_SIZE_BITS(sb), 0, &err); + if (!bh) { +#if 0 + ext2_error (sb, "empty_dir", + "directory #%lu contains a hole at offset %lu", + inode->i_ino, offset); +#endif + offset += sb->s_blocksize; + continue; + } + de = (struct ext2_dir_entry_2 *) bh->b_data; + } + if (!ext2_check_dir_entry ("empty_dir", inode, de, bh, + offset)) { + brelse (bh); + return 1; + } + if (le32_to_cpu(de->inode)) { + brelse (bh); + return 0; + } + offset += le16_to_cpu(de->rec_len); + de = (struct ext2_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); + } + brelse (bh); + return 1; +} + +int ext2_rmdir (struct inode * dir, struct dentry *dentry) +{ + int retval; + struct inode * inode; + struct buffer_head * bh; + struct ext2_dir_entry_2 * de; + + retval = -ENOENT; + bh = ext2_find_entry (dir, dentry->d_name.name, dentry->d_name.len, &de); + if (!bh) + goto end_rmdir; + + inode = dentry->d_inode; + DQUOT_INIT(inode); + + retval = -EIO; + if (le32_to_cpu(de->inode) != inode->i_ino) + goto end_rmdir; + + retval = -ENOTEMPTY; + if (!empty_dir (inode)) + goto end_rmdir; + + retval = ext2_delete_entry (de, bh); + dir->i_version = ++event; + if (retval) + goto end_rmdir; + mark_buffer_dirty(bh, 1); + if (IS_SYNC(dir)) { + ll_rw_block (WRITE, 1, &bh); + wait_on_buffer (bh); + } + if (inode->i_nlink != 2) + ext2_warning (inode->i_sb, "ext2_rmdir", + "empty directory has nlink!=2 (%d)", + inode->i_nlink); + inode->i_version = ++event; + inode->i_nlink = 0; + inode->i_size = 0; + mark_inode_dirty(inode); + dir->i_nlink--; + inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; + dir->u.ext2_i.i_flags &= ~EXT2_BTREE_FL; + mark_inode_dirty(dir); + d_delete(dentry); + +end_rmdir: + brelse (bh); + return retval; +} + +int ext2_unlink(struct inode * dir, struct dentry *dentry) +{ + int retval; + struct inode * inode; + struct buffer_head * bh; + struct ext2_dir_entry_2 * de; + + retval = -ENOENT; + bh = ext2_find_entry (dir, dentry->d_name.name, dentry->d_name.len, &de); + if (!bh) + goto end_unlink; + + inode = dentry->d_inode; + DQUOT_INIT(inode); + + retval = -EIO; + if (le32_to_cpu(de->inode) != inode->i_ino) + goto end_unlink; + + if (!inode->i_nlink) { + ext2_warning (inode->i_sb, "ext2_unlink", + "Deleting nonexistent file (%lu), %d", + inode->i_ino, inode->i_nlink); + inode->i_nlink = 1; + } + retval = ext2_delete_entry (de, bh); + if (retval) + goto end_unlink; + dir->i_version = ++event; + mark_buffer_dirty(bh, 1); + if (IS_SYNC(dir)) { + ll_rw_block (WRITE, 1, &bh); + wait_on_buffer (bh); + } + dir->i_ctime = dir->i_mtime = CURRENT_TIME; + dir->u.ext2_i.i_flags &= ~EXT2_BTREE_FL; + mark_inode_dirty(dir); + inode->i_nlink--; + mark_inode_dirty(inode); + inode->i_ctime = dir->i_ctime; + retval = 0; + d_delete(dentry); /* This also frees the inode */ + +end_unlink: + brelse (bh); + return retval; +} + +int ext2_symlink (struct inode * dir, struct dentry *dentry, const char * symname) +{ + struct ext2_dir_entry_2 * de; + struct inode * inode; + struct buffer_head * bh = NULL, * name_block = NULL; + char * link; + int i, l, err = -EIO; + char c; + + if (!(inode = ext2_new_inode (dir, S_IFLNK, &err))) { + return err; + } + inode->i_mode = S_IFLNK | S_IRWXUGO; + inode->i_op = &ext2_symlink_inode_operations; + for (l = 0; l < inode->i_sb->s_blocksize - 1 && + symname [l]; l++) + ; + if (l >= sizeof (inode->u.ext2_i.i_data)) { + + ext2_debug ("l=%d, normal symlink\n", l); + + name_block = ext2_bread (inode, 0, 1, &err); + if (!name_block) { + inode->i_nlink--; + mark_inode_dirty(inode); + iput (inode); + return err; + } + link = name_block->b_data; + } else { + link = (char *) inode->u.ext2_i.i_data; + + ext2_debug ("l=%d, fast symlink\n", l); + + } + i = 0; + while (i < inode->i_sb->s_blocksize - 1 && (c = *(symname++))) + link[i++] = c; + link[i] = 0; + if (name_block) { + mark_buffer_dirty(name_block, 1); + brelse (name_block); + } + inode->i_size = i; + mark_inode_dirty(inode); + + bh = ext2_add_entry (dir, dentry->d_name.name, dentry->d_name.len, &de, &err); + if (!bh) + goto out_no_entry; + de->inode = cpu_to_le32(inode->i_ino); + ext2_set_de_type(dir->i_sb, de, S_IFLNK); + dir->i_version = ++event; + mark_buffer_dirty(bh, 1); + if (IS_SYNC(dir)) { + ll_rw_block (WRITE, 1, &bh); + wait_on_buffer (bh); + } + brelse (bh); + d_instantiate(dentry, inode); + err = 0; +out: + return err; + +out_no_entry: + inode->i_nlink--; + mark_inode_dirty(inode); + iput (inode); + goto out; +} + +int ext2_link (struct dentry * old_dentry, + struct inode * dir, struct dentry *dentry) +{ + struct inode *inode = old_dentry->d_inode; + struct ext2_dir_entry_2 * de; + struct buffer_head * bh; + int err; + + if (S_ISDIR(inode->i_mode)) + return -EPERM; + + if (inode->i_nlink >= EXT2_LINK_MAX) + return -EMLINK; + + bh = ext2_add_entry (dir, dentry->d_name.name, dentry->d_name.len, &de, &err); + if (!bh) + return err; + + de->inode = cpu_to_le32(inode->i_ino); + ext2_set_de_type(dir->i_sb, de, inode->i_mode); + dir->i_version = ++event; + mark_buffer_dirty(bh, 1); + if (IS_SYNC(dir)) { + ll_rw_block (WRITE, 1, &bh); + wait_on_buffer (bh); + } + brelse (bh); + inode->i_nlink++; + inode->i_ctime = CURRENT_TIME; + mark_inode_dirty(inode); + inode->i_count++; + d_instantiate(dentry, inode); + return 0; +} + +#define PARENT_INO(buffer) \ + ((struct ext2_dir_entry_2 *) ((char *) buffer + \ + le16_to_cpu(((struct ext2_dir_entry_2 *) buffer)->rec_len)))->inode + +/* + * Anybody can rename anything with this: the permission checks are left to the + * higher-level routines. + */ +int ext2_rename (struct inode * old_dir, struct dentry *old_dentry, + struct inode * new_dir,struct dentry *new_dentry) +{ + struct inode * old_inode, * new_inode; + struct buffer_head * old_bh, * new_bh, * dir_bh; + struct ext2_dir_entry_2 * old_de, * new_de; + int retval; + + old_bh = new_bh = dir_bh = NULL; + + old_bh = ext2_find_entry (old_dir, old_dentry->d_name.name, old_dentry->d_name.len, &old_de); + /* + * Check for inode number is _not_ due to possible IO errors. + * We might rmdir the source, keep it as pwd of some process + * and merrily kill the link to whatever was created under the + * same name. Goodbye sticky bit ;-< + */ + old_inode = old_dentry->d_inode; + retval = -ENOENT; + if (!old_bh || le32_to_cpu(old_de->inode) != old_inode->i_ino) + goto end_rename; + + new_inode = new_dentry->d_inode; + new_bh = ext2_find_entry (new_dir, new_dentry->d_name.name, + new_dentry->d_name.len, &new_de); + if (new_bh) { + if (!new_inode) { + brelse (new_bh); + new_bh = NULL; + } else { + DQUOT_INIT(new_inode); + } + } + if (S_ISDIR(old_inode->i_mode)) { + if (new_inode) { + retval = -ENOTEMPTY; + if (!empty_dir (new_inode)) + goto end_rename; + } + retval = -EIO; + dir_bh = ext2_bread (old_inode, 0, 0, &retval); + if (!dir_bh) + goto end_rename; + if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino) + goto end_rename; + retval = -EMLINK; + if (!new_inode && new_dir!=old_dir && + new_dir->i_nlink >= EXT2_LINK_MAX) + goto end_rename; + } + if (!new_bh) { + new_bh = ext2_add_entry (new_dir, new_dentry->d_name.name, + new_dentry->d_name.len, &new_de, + &retval); + if (!new_bh) + goto end_rename; + } + new_dir->i_version = ++event; + + /* + * ok, that's it + */ + new_de->inode = le32_to_cpu(old_inode->i_ino); + if (EXT2_HAS_INCOMPAT_FEATURE(new_dir->i_sb, + EXT2_FEATURE_INCOMPAT_FILETYPE)) + new_de->file_type = old_de->file_type; + + ext2_delete_entry (old_de, old_bh); + + old_dir->i_version = ++event; + if (new_inode) { + new_inode->i_nlink--; + new_inode->i_ctime = CURRENT_TIME; + mark_inode_dirty(new_inode); + } + old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME; + old_dir->u.ext2_i.i_flags &= ~EXT2_BTREE_FL; + mark_inode_dirty(old_dir); + if (dir_bh) { + PARENT_INO(dir_bh->b_data) = le32_to_cpu(new_dir->i_ino); + mark_buffer_dirty(dir_bh, 1); + old_dir->i_nlink--; + mark_inode_dirty(old_dir); + if (new_inode) { + new_inode->i_nlink--; + mark_inode_dirty(new_inode); + } else { + new_dir->i_nlink++; + new_dir->u.ext2_i.i_flags &= ~EXT2_BTREE_FL; + mark_inode_dirty(new_dir); + } + } + mark_buffer_dirty(old_bh, 1); + if (IS_SYNC(old_dir)) { + ll_rw_block (WRITE, 1, &old_bh); + wait_on_buffer (old_bh); + } + mark_buffer_dirty(new_bh, 1); + if (IS_SYNC(new_dir)) { + ll_rw_block (WRITE, 1, &new_bh); + wait_on_buffer (new_bh); + } + + retval = 0; + +end_rename: + brelse (dir_bh); + brelse (old_bh); + brelse (new_bh); + return retval; +} +#endif diff --git a/lustre/obdfs/rw.c b/lustre/obdfs/rw.c new file mode 100644 index 0000000..389058c --- /dev/null +++ b/lustre/obdfs/rw.c @@ -0,0 +1,204 @@ +/* + * OBDFS Super operations + * + * Copryright (C) 1999 Stelias Computing Inc, + * (author Peter J. Braam ) + * Copryright (C) 1999 Seagate Technology Inc. + */ + + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include <../obd/linux/obd_support.h> +#include <../obd/linux/obd_sim.h> +#include + +/* VFS super_block ops */ + +/* returns the page unlocked, but with a reference */ +int obdfs_readpage(struct file *file, struct page *page) +{ + struct obdfs_sb_info *sbi; + struct super_block *sb = file->f_dentry->d_inode->i_sb; + int rc; + + ENTRY; + + /* XXX flush stuff */ + sbi = sb->u.generic_sbp; + PDEBUG(page, READ); + rc = sbi->osi_ops->o_brw(READ, sbi->osi_conn_info.conn_id, + file->f_dentry->d_inode->i_ino, page); + if (rc == PAGE_SIZE ) { + SetPageUptodate(page); + UnlockPage(page); + } + PDEBUG(page, READ); + if ( rc == PAGE_SIZE ) + rc = 0; + return rc; + +} + +/* + * This does the "real" work of the write. The generic routine has + * allocated the page, locked it, done all the page alignment stuff + * calculations etc. Now we should just copy the data from user + * space and write it back to the real medium.. + * + * If the writer ends up delaying the write, the writer needs to + * increment the page use counts until he is done with the page. + */ +int obdfs_write_one_page(struct file *file, struct page *page, unsigned long offset, unsigned long bytes, const char * buf) +{ + long status; + + bytes -= copy_from_user((u8*)page_address(page) + offset, buf, bytes); + status = -EFAULT; + CDEBUG(D_INODE, "page offset %ld, bytes %ld, offset %ld, page addr %lx, writing: %s, beg of page %s\n", page->offset, bytes, offset, page_address(page), ((char *) page_address(page)) + offset, (char *)page_address(page)); + if (bytes) { + lock_kernel(); + status = obdfs_writepage(file, page); + unlock_kernel(); + } + if ( status != PAGE_SIZE ) + return status; + else + return bytes; +} + + + + +/* returns the page unlocked, but with a reference */ +int obdfs_writepage(struct file *file, struct page *page) +{ + struct obdfs_sb_info *sbi; + struct super_block *sb = file->f_dentry->d_inode->i_sb; + int rc; + + ENTRY; + PDEBUG(page,WRITE); + /* XXX flush stuff */ + sbi = sb->u.generic_sbp; + + rc = sbi->osi_ops->o_brw(WRITE, sbi->osi_conn_info.conn_id, + file->f_dentry->d_inode->i_ino, page); + SetPageUptodate(page); + PDEBUG(page,WRITE); + return rc; +} + + +/* + page is returned unlocked, with the up to date flag set, + and held, i.e. caller must do a page_put +*/ +struct page *obdfs_getpage(struct inode *inode, unsigned long offset) +{ + unsigned long new_page; + struct page ** hash; + struct page * page; + struct obdfs_sb_info *sbi; + struct super_block *sb = inode->i_sb; + + ENTRY; + + sbi = sb->u.generic_sbp; + + page = find_get_page(inode, offset); + if (page && Page_Uptodate(page)) { + PDEBUG(page,READ); + return page; + } + + if (page && !Page_Uptodate(page) ) { + CDEBUG(D_INODE, "Page found but not up to date\n"); + } + + /* page_cache_alloc returns address of page */ + new_page = page_cache_alloc(); + if (!new_page) + return NULL; + + /* corresponding struct page in the mmap */ + hash = page_hash(inode, offset); + page = page_cache_entry(new_page); + if (!add_to_page_cache_unique(page, inode, offset, hash)) { + CDEBUG(D_INODE, "Page not found. Reading it.\n"); + PDEBUG(page,READ); + sbi->osi_ops->o_brw(READ, sbi->osi_conn_info.conn_id, + inode->i_ino, page); + UnlockPage(page); + SetPageUptodate(page); + return page; + } + /* + * We arrive here in the unlikely event that someone + * raced with us and added our page to the cache first. + */ + CDEBUG(D_INODE, "Page not found. Someone raced us.\n"); + PDEBUG(page,READ); + return page; +} + + + +struct file_operations obdfs_file_operations = { + NULL, /* lseek - default */ + generic_file_read, /* read */ + obdfs_file_write, /* write - bad */ + obdfs_readdir, /* readdir */ + NULL, /* poll - default */ + NULL, /* ioctl */ + NULL, /* mmap */ + NULL, /* no special open code */ + NULL, /* flush */ + NULL, /* no special release code */ + NULL, /* fsync */ + NULL, /* fasync */ + NULL, /* check_media_change */ + NULL /* revalidate */ +}; + +struct inode_operations obdfs_inode_ops = { + &obdfs_file_operations, /* default directory file-ops */ + NULL, /* create */ + obdfs_lookup, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + NULL, /* get_block */ + obdfs_readpage, /* readpage */ + obdfs_writepage, /* writepage */ + NULL, /* flushpage */ + NULL, /* truncate */ + NULL, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ +}; diff --git a/lustre/obdfs/super.c b/lustre/obdfs/super.c index cf60d54..c40122b 100644 --- a/lustre/obdfs/super.c +++ b/lustre/obdfs/super.c @@ -2,6 +2,9 @@ * OBDFS Super operations * * Copryright (C) 1996 Peter J. Braam + * Copryright (C) 1999 Stelias Computing Inc. + * Copryright (C) 1999 Seagate Technology Inc. + * */ #define EXPORT_SYMTAB @@ -25,7 +28,9 @@ #include #include -#include <../obd/linux/sim_obd.h> +#include +#include +#include /* XXX for development/debugging only */ #include /* VFS super_block ops */ @@ -74,11 +79,15 @@ static struct super_block * obdfs_read_super(struct super_block *sb, return NULL; } - error = obd_connect(obd_minor, &sbi->osi_conn_info); + sbi->osi_obd = &obd_dev[obd_minor]; + sbi->osi_ops = sbi->osi_obd->obd_type->typ_ops; + + error = sbi->osi_ops->o_connect(obd_minor, &sbi->osi_conn_info); if ( error ) { printk("OBDFS: cannot connect to 0x%x.\n", obd_minor); goto error; } + sbi->osi_super = sb; lock_super(sb); @@ -130,7 +139,7 @@ static void obdfs_put_super(struct super_block *sb) /* XXX flush stuff */ sbi = sb->u.generic_sbp; sb->u.generic_sbp = NULL; - obd_disconnect(sbi->osi_conn_info.conn_id); + sbi->osi_ops->o_disconnect(sbi->osi_conn_info.conn_id); sbi->osi_super = NULL; @@ -141,6 +150,8 @@ static void obdfs_put_super(struct super_block *sb) EXIT; } +extern struct inode_operations obdfs_inode_ops; + /* all filling in of inodes postponed until lookup */ static void obdfs_read_inode(struct inode *inode) { @@ -149,14 +160,15 @@ static void obdfs_read_inode(struct inode *inode) struct obdfs_sb_info *sbi = inode->i_sb->u.generic_sbp; ENTRY; - error = obd_getattr(sbi->osi_conn_info.conn_id, inode->i_ino, &attr); + error = sbi->osi_ops->o_getattr(sbi->osi_conn_info.conn_id, + inode->i_ino, &attr); if (error) { printk("obdfs_read_inode: ibd_getattr fails (%d)\n", error); return; } inode_setattr(inode, &attr); - inode->i_op = NULL; + inode->i_op = &obdfs_inode_ops; return; } @@ -179,11 +191,11 @@ static void obdfs_write_inode(struct inode *inode) struct obdfs_sb_info *sbi; struct iattr attr; int error; - ENTRY; inode_to_iattr(inode, &attr); sbi = inode->i_sb->u.generic_sbp; - error = obd_setattr(sbi->osi_conn_info.conn_id, inode->i_ino, &attr); + error = sbi->osi_ops->o_setattr(sbi->osi_conn_info.conn_id, + inode->i_ino, &attr); if (error) { printk("obdfs_write_inode: ibd_setattr fails (%d)\n", error); return; @@ -199,7 +211,8 @@ static void obdfs_delete_inode(struct inode *inode) ENTRY; sbi = inode->i_sb->u.generic_sbp; - error = obd_destroy(sbi->osi_conn_info.conn_id , inode->i_ino); + error = sbi->osi_ops->o_destroy(sbi->osi_conn_info.conn_id, + inode->i_ino); if (error) { printk("obdfs_delete_node: ibd_destroy fails (%d)\n", error); return; @@ -217,7 +230,8 @@ static int obdfs_notify_change(struct dentry *de, struct iattr *iattr) ENTRY; sbi = inode->i_sb->u.generic_sbp; - error = obd_setattr(sbi->osi_conn_info.conn_id, inode->i_ino, iattr); + error = sbi->osi_ops->o_setattr(sbi->osi_conn_info.conn_id, + inode->i_ino, iattr); if ( error ) { printk("obdfs_notify_change: obd_setattr fails (%d)\n", error); return error; @@ -239,7 +253,7 @@ static int obdfs_statfs(struct super_block *sb, struct statfs *buf, ENTRY; sbi = sb->u.generic_sbp; - error = obd_statfs(sbi->osi_conn_info.conn_id, &tmp); + error = sbi->osi_ops->o_statfs(sbi->osi_conn_info.conn_id, &tmp); if ( error ) { printk("obdfs_notify_change: obd_statfs fails (%d)\n", error); return error; @@ -260,6 +274,10 @@ int init_obdfs(void) printk(KERN_INFO "OBDFS v0.1, braam@stelias.com\n"); obdfs_sysctl_init(); + + obd_sbi = &obdfs_super_info; + obd_fso = &obdfs_file_operations; + return register_filesystem(&obdfs_fs_type); } diff --git a/lustre/obdfs/sysctl.c b/lustre/obdfs/sysctl.c index 006e34b..e5c263a 100644 --- a/lustre/obdfs/sysctl.c +++ b/lustre/obdfs/sysctl.c @@ -11,10 +11,6 @@ #include #include -#include -#include -#include -#include struct ctl_table_header *obdfs_table_header = NULL;