From f35a4b46939408ba1c1df72cbbe563617bd65a64 Mon Sep 17 00:00:00 2001 From: braam Date: Tue, 10 Aug 1999 16:18:58 +0000 Subject: [PATCH] Numerous patches and fixes to Phil's initial attack. In particular we now have clean tracking of inode counts, so that things attach detach cleanly. Enough there for deliverable 1 & 2. --- lustre/include/linux/obdfs.h | 16 ++++ lustre/include/linux/sim_obd.h | 29 +++--- lustre/obdclass/obdcontrol | 66 ++++++++++---- lustre/obdfs/README | 22 +++++ lustre/obdfs/notes.txt | 198 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 305 insertions(+), 26 deletions(-) create mode 100644 lustre/obdfs/README create mode 100644 lustre/obdfs/notes.txt diff --git a/lustre/include/linux/obdfs.h b/lustre/include/linux/obdfs.h index 46df61c..b293c40 100644 --- a/lustre/include/linux/obdfs.h +++ b/lustre/include/linux/obdfs.h @@ -1,3 +1,16 @@ +/* object based disk file system + * + * This software is licensed under the GPL. See the file COPYING in the + * top directory of this distribution for details. + * + * Copyright (C), 1999, Stelias Computing Inc + * + * + */ + + +#ifndef _OBDFS_H +#define OBDFS_H #include <../obd/linux/sim_obd.h> @@ -15,3 +28,6 @@ void obdfs_sysctl_clean(void); struct obdfs_inode_info; #define OBDFS_SUPER_MAGIC 0x4711 + +#endif + diff --git a/lustre/include/linux/sim_obd.h b/lustre/include/linux/sim_obd.h index ad5dcc2..a96cd4d 100755 --- a/lustre/include/linux/sim_obd.h +++ b/lustre/include/linux/sim_obd.h @@ -17,7 +17,7 @@ extern int obd_print_entry; /* debugging masks */ #define D_PSDEV 1 /* debug information from psdev.c */ -#define D_UNUSED1 2 +#define D_INODE 2 #define D_UNUSED2 4 #define D_UNUSED3 8 #define D_UNUSED4 16 @@ -58,7 +58,7 @@ extern int obd_print_entry; #define OBD_ALLOC(ptr, cast, size) \ do { \ if (size <= 4096) { \ - ptr = (cast)kmalloc((unsigned long) size, GFP_KERNEL); \ + ptr = (cast)kmalloc((unsigned long) size, GFP_KERNEL); \ CDEBUG(D_MALLOC, "kmalloced: %x at %x.\n", \ (int) size, (int) ptr); \ } else { \ @@ -87,11 +87,13 @@ do { \ } while (0) -#define MAX_DEVICES 128 + + +#define MAX_OBD_DEVICES 2 struct obd_conn_info { - unsigned int conn_id; - unsigned long conn_ino; - unsigned long conn_blocksize; + unsigned int conn_id; /* handle */ + unsigned long conn_ino; /* root inode number */ + unsigned long conn_blocksize; unsigned char conn_blocksize_bits; }; @@ -141,8 +143,8 @@ struct oic_rw_s { }; #define OBD_IOC_CREATE _IOR ('f', 3, long) -#define OBD_IOC_SETUP _IOW ('f', 4, long) -#define OBD_IOC_SYNC _IOR ('f', 5, long) +#define OBD_IOC_SETUP_SUPER _IOW ('f', 4, long) +#define OBD_IOC_CLEANUP_SUPER _IO ('f', 5 ) #define OBD_IOC_DESTROY _IOW ('f', 6, long) #define OBD_IOC_PREALLOCATE _IOWR('f', 7, long) #define OBD_IOC_DEC_USE_COUNT _IO ('f', 8 ) @@ -153,7 +155,11 @@ struct oic_rw_s { #define OBD_IOC_CONNECT _IOR ('f', 13, long) #define OBD_IOC_DISCONNECT _IOW ('f', 14, long) #define OBD_IOC_STATFS _IOWR('f', 15, long) -#define OBD_IOC_DEC_FS_USE_COUNT _IO ('f', 16 ) +#define OBD_IOC_SYNC _IOR ('f', 16, long) + + + +#define OBD_IOC_DEC_FS_USE_COUNT _IO ('f', 32 ) /* balloc.c */ int ext2_new_block (const struct inode * inode, unsigned long goal, @@ -195,12 +201,15 @@ struct buffer_head * obd_getblk (struct inode * inode, long block, int create, int * err); /* interface.c */ +void obd_cleanup_device(int dev); extern int obd_create (struct super_block * sb, int inode_hint, int * err); extern void obd_unlink (struct inode * inode); extern struct obd_client * obd_client(int cli_id); extern void obd_cleanup_client (struct obd_device * obddev, struct obd_client * cli); void obd_cleanup_device(int dev); +int obd_cleanup_super(struct obd_device * obddev); +int obd_setup_super(struct obd_device * obddev, int sbdev); long obd_preallocate_inodes(unsigned int conn_id, int req, long inodes[32], int * err); long obd_preallocate_quota(struct super_block * sb, struct obd_client * cli, @@ -225,7 +234,6 @@ int obd_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, #undef ext2_error #define ext2_error obd_warning #define ext2_panic obd_warning - #ifdef EXT2FS_DEBUG # undef ext2_debug # define ext2_debug(format, a...) CDEBUG(D_EXT2, format, ## a) @@ -256,5 +264,4 @@ extern struct inode_operations ext2_file_inode_operations; /* super.c */ extern struct super_operations ext2_sops; - #endif /* __LINUX_SIM_OBD_H */ diff --git a/lustre/obdclass/obdcontrol b/lustre/obdclass/obdcontrol index 2d33a4f..5e29bb6 100755 --- a/lustre/obdclass/obdcontrol +++ b/lustre/obdclass/obdcontrol @@ -1,5 +1,19 @@ #!/usr/bin/perl +# +# This code is issued under the GNU General Public License. +# See the file COPYING in this distribution +# +# Copyright (C) 1998, Stelias Computing +# +# Modified for InterMezzo from Gordian's HSM bcache device/jcm module +# Copyright (C) 1999, Carnegie Mellon University +# +# Derived from InterMezzo's incontrol, modified for OBD's +# Copyright (C) 1999, Stelias Computing +# +# + #use strict; BEGIN { require "asm/errno.ph" }; BEGIN { require "asm/ioctl.ph" }; @@ -7,10 +21,10 @@ BEGIN { require "asm/ioctl.ph" }; # p2ph generated invalid macros for ioctl stuff, so I override some of it here eval 'sub OBD_IOC_CREATE () { &_IOC(2, ord(\'f\'), 3, 4);}' unless defined(&OBD_IOC_CREATE); -eval 'sub OBD_IOC_SETUP () { &_IOC(1, ord(\'f\'), 4, 4);}' unless - defined(&OBD_IOC_SETUP); -eval 'sub OBD_IOC_SYNC () { &_IOC(2, ord(\'f\'), 5, 4);}' unless - defined(&OBD_IOC_SYNC); +eval 'sub OBD_IOC_SETUP_SUPER () { &_IOC(1, ord(\'f\'), 4, 4);}' unless + defined(&OBD_IOC_SETUP_SUPER); +eval 'sub OBD_IOC_CLEANUP_SUPER () { &_IOC(0, ord(\'f\'), 5, 0);}' unless + defined(&OBD_IOC_CLEANUP_SUPER); eval 'sub OBD_IOC_DESTROY () { &_IOC(1, ord(\'f\'), 6, 4);}' unless defined(&OBD_IOC_DESTROY); eval 'sub OBD_IOC_PREALLOCATE () { &_IOC(3, ord(\'f\'), 7, 4);}' unless @@ -31,6 +45,8 @@ eval 'sub OBD_IOC_DISCONNECT () { &_IOC(1, ord(\'f\'), 14, 4);}' unless defined(&OBD_IOC_DISCONNECT); eval 'sub OBD_IOC_STATFS () { &_IOC(3, ord(\'f\'), 15, 4);}' unless defined(&OBD_IOC_STATFS); +eval 'sub OBD_IOC_SYNC () { &_IOC(2, ord(\'f\'), 5, 4);}' unless + defined(&OBD_IOC_SYNC); eval 'sub ATTR_MODE () {1;}' unless defined(&ATTR_MODE); eval 'sub ATTR_UID () {2;}' unless defined(&ATTR_UID); @@ -51,7 +67,9 @@ my ($device, $filesystem); # startup options (I'll replace these when I have some to replace with) GetOptions("device=s" => \$device, "fs=s" => $filesystem) || die "Getoptions"; +# genuine new simulated OBD device $device = "/dev/obd" unless $device; +# object store in the ext2 formatted block device $filesystem = "/dev/loop0" unless $filesystem; # get a console for the app @@ -68,16 +86,17 @@ my $arg; my %commands = ('create' => {func => "Create", doc => "create: creates a new inode"}, - 'setup' => {func => "Setup", doc => "setup: initializes the environment"}, + 'setup' => {func => "Setup", doc => "setup: link the ext2 partition (default /dev/loop0) to this obddev"}, 'connect' => {func => "Connect", doc => "connect: allocates client ID for this session"}, 'disconnect' => {func => "Disconnect", doc => "disconnect [id]: frees client resources"}, 'sync' => {func => "Sync", doc => "sync: flushes buffers to disk"}, 'destroy' => {func => "Destroy", doc => "setup: destroys an inode"}, + 'cleanup' => {func => "Cleanup", doc => "detach the superblock from this minor obd dev"}, 'dec_use_count' => {func => "Decusecount", doc => "decreases the module use count so that the module can be removed following an oops"}, 'read' => {func => "Read", doc => "read [offset]"}, 'write' => {func => "Write", doc => "write "}, - 'setattr' => {func => "Setattr", doc => "setattr [mode [uid [gid [size [atime [mtime [ctime]]]]]]]"}, - 'getattr' => {func => "Getattr", doc => "getattr [inode]: displays inode object attributes"}, + 'setattr' => {func => "Setattr", doc => "setattr [mode [uid [gid [size [atime [mtime [ctime]]]]]]]"}, + 'getattr' => {func => "Getattr", doc => "getattr : displays inode object attributes"}, 'preallocate' => {func => "Preallocate", doc => "preallocate [num]: requests preallocation of num inodes."}, 'statfs' => {func => "Statfs", doc => "statfs: filesystem status information"}, 'help' => {func => \&Help, doc => "help: this message"}, @@ -93,7 +112,7 @@ my @jcm_cmd_list = keys %commands; $attribs->{attempted_completion_function} = \&completeme; #------------------------------------------------------------------------------ # Open the device, as we need an FD for the ioctl -sysopen(DEV_OBD, $device, 0); +sysopen(DEV_OBD, $device, 0) || die "Cannot open $device"; if (!defined($::st = stat($filesystem))) { die "Unable to stat $filesystem.\n"; @@ -154,7 +173,20 @@ sub execute_line { sub Setup { my $err = 0; my $packed = pack("L", $::st->rdev()); - my $rc = ioctl(DEV_OBD, &OBD_IOC_SETUP, $packed); + my $rc = ioctl(DEV_OBD, &OBD_IOC_SETUP_SUPER, $packed); + + if (!defined $rc) { + print STDERR "ioctl failed: $!\n"; + } elsif ($rc eq "0 but true") { + print "Finished (success)\n"; + } else { + print "ioctl returned error code $rc.\n"; + } +} + +sub Cleanup { + my $err = "0"; + my $rc = ioctl(DEV_OBD, &OBD_IOC_CLEANUP_SUPER, $err); if (!defined $rc) { print STDERR "ioctl failed: $!\n"; @@ -165,6 +197,7 @@ sub Setup { } } + sub Connect { my $id = 0; my $ino = 0; @@ -279,8 +312,9 @@ sub Sync { } sub Destroy { - if (!defined($id)) { - $id = $::client_id; + if (!defined($::client_id)) { + print "You must first ``connect''.\n"; + return; } my $arg = shift; @@ -328,7 +362,7 @@ sub Getattr { ($valid, $mode, $uid, $gid, $size, $atime, $mtime, $ctime, $flags) = unpack("ISssx2lLLLI", $packed); - printf("Inode: %d Mode: %04d\n", $inode, $mode); + printf("Inode: %d Mode: %o\n", $inode, $mode); printf("User: %6d Group: %6d Size: %d\n", $uid, $gid, $size); printf("ctime: %08lx -- %s\n", $ctime, scalar(gmtime($ctime))); printf("atime: %08lx -- %s\n", $atime, scalar(gmtime($atime))); @@ -348,7 +382,7 @@ sub Setattr { my $inode = shift; my $valid = 0; - my $mode = shift; + my $mode = oct(shift); my $uid = shift; my $gid = shift; my $size = shift; @@ -395,6 +429,8 @@ sub Setattr { # time_t ia_ctime; (32) # unsigned int ia_attr_flags; (32) #}; + + printf "valid is %x, mode is %o\n", $valid, $mode; my $packed = pack("ILLSssx2ILLLL", $::client_id, $inode, $valid, $mode, $uid, $gid, $size, $atime, $mtime, $ctime, 0); my $rc = ioctl(DEV_OBD, &OBD_IOC_SETATTR, $packed); @@ -440,7 +476,7 @@ sub Read { my $rc = ioctl(DEV_OBD, &OBD_IOC_READ, $packed); - $retval = unpack("L", $packed); + $retval = unpack("l", $packed); if (!defined $rc) { print STDERR "ioctl failed: $!\n"; @@ -486,7 +522,7 @@ sub Write { my $packed = pack("ILpLLL", $::client_id, $inode, $text, $count, 0, $offset); my $rc = ioctl(DEV_OBD, &OBD_IOC_WRITE, $packed); - $retval = unpack("L", $packed); + $retval = unpack("l", $packed); if (!defined $rc) { print STDERR "ioctl failed: $!\n"; diff --git a/lustre/obdfs/README b/lustre/obdfs/README new file mode 100644 index 0000000..31e0010 --- /dev/null +++ b/lustre/obdfs/README @@ -0,0 +1,22 @@ +OBDFS - v 0.001 + +This version can mount, list and set the attributes of the root +directory. + +1. configure the obd to use /dev/obd with a scratch file system + +2. make + +3. insmod obdfs + +4. mount -t obdfs /dev/obd /mnt + +To verify: type mount (output: +/dev/obd on /mnt type obdfs (rw)) + +5. ls -ld /mnt + +6. chmod 711 /mnt, ls -ld /mnt + +7. chown seagate /mnt , ls -ld /mnt + diff --git a/lustre/obdfs/notes.txt b/lustre/obdfs/notes.txt new file mode 100644 index 0000000..cd177d5 --- /dev/null +++ b/lustre/obdfs/notes.txt @@ -0,0 +1,198 @@ +Considerations for an API between OBD's and OBDFS + + +OBDFS + +Methods needed: + +struct super_operations { + void (*read_inode) (struct inode *); + void (*write_inode) (struct inode *); + void (*put_inode) (struct inode *); + void (*delete_inode) (struct inode *); + int (*notify_change) (struct dentry *, struct iattr *); + void (*put_super) (struct super_block *); + void (*write_super) (struct super_block *); + int (*statfs) (struct super_block *, struct statfs *, int); +* int (*remount_fs) (struct super_block *, int *, char *); + void (*clear_inode) (struct inode *); +* void (*umount_begin) (struct super_block *); +}; + + +read_inode: + +Called from function iget(ino, dev) - through get_new_inode. +Typically called only when a VFS inode is instantiated by FS, i.e. +upon lookup, create, mkdir, or upon mounting for the / inode. + + - executed for new inodes and for existing inodes + - for new inodes, avoid traffic to disk + +E.g: + +lookup("name in dir-inode") +{ + get data from dir-inode; + find ino of "name" in this data; + iget(sb(dev), ino); + ---> calls read_inode + ----> obd_getattr(obj-no = ino) +} + +create("name in dir-inode") +{ + get ino for name from pre-alloced obj-no's + ---> may call obd_prealloc_ids(hint) + + iget(sb(dev), ino) + ---> call read_inode + ---> do not contact OBD, fill in from FS + + change data from dir-inode, to contain ("name, ino"); +} + +mkdir("name in dir-inode") +{ + as above +} + +read_super(dev, data) +{ + struct obdfs_sb *sb = ... ; + obd_connect(dev, &sb->obdfs_conn_info); + + iget(sb, sb->obdfs_conn_info->conn_ino); + + +} + +We currently have: + +struct obd_conn_info { + unsigned int conn_id; + unsigned long conn_ino; + unsigned long conn_blocksize; + unsigned char conn_blocksize_bits; +}; + +read_inode(inode) +{ + struct iattr attr; + + + if ( inode in inode_attr cache ) { + get_attr_from_cache(inode, &attr); + } else { + obd_getattr(conn_id, inode->i_ino, &attr); + } + + inode_setattr(inode, &attr); +} + +Write_inode is called from the bdflush (sync_dev) routines, through +write_inode, sync_inode, sync_list, sync_inodes etc: + +void sync_dev(kdev_t dev) +{ + sync_buffers(dev, 0); + sync_supers(dev); + sync_inodes(dev); + sync_buffers(dev, 0); + DQUOT_SYNC(dev); + /* + * FIXME(eric) we need to sync the physical devices here. + * This is because some (scsi) controllers have huge amounts of + * cache onboard (hundreds of Mb), and we need to instruct + * them to commit all of the dirty memory to disk, and we should + * not return until this has happened. + * + * This would need to get implemented by going through the assorted + * layers so that each block major number can be synced, and this + * would call down into the upper and mid-layer scsi. + */ +} + +This sync_inodes function is responsible (for "block" disk file +systems) for copying the modified inode metadata into the buffer +cache. The sync_buffers call which follows sync_inodes is responsible +for writing back this meta data. For OBD's this is different. + +I expect the following routine to be there: + +sync_inode_pages(dev,0); +sync_supers(dev); +sync_inode_metadata(dev); + + +The statfs function should return simple summary information available +on the disk: %free, total space, etc. May require a new obd_command. + +Similarly write_super would instruct the disk to commit any pending +data. This is called from do_unmount just before put_super (the +latter breaks down the vm super block structure). + +Write_super should: +- undo pre-allocated inode numbers + +The disk itself also needs a cleanup function. + +struct file_operations { + loff_t (*llseek) (struct file *, loff_t, int); + ssize_t (*read) (struct file *, char *, size_t, loff_t *); + ssize_t (*write) (struct file *, const char *, size_t, loff_t *); + int (*readdir) (struct file *, void *, filldir_t); + unsigned int (*poll) (struct file *, struct poll_table_struct *); + int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long); + int (*mmap) (struct file *, struct vm_area_struct *); + int (*open) (struct inode *, struct file *); + int (*flush) (struct file *); + int (*release) (struct inode *, struct file *); + int (*fsync) (struct file *, struct dentry *); + int (*fasync) (int, struct file *, int); + int (*check_media_change) (kdev_t dev); + int (*revalidate) (kdev_t dev); + int (*lock) (struct file *, int, struct file_lock *); +}; + + +struct inode_operations { + struct file_operations * default_file_ops; + int (*create) (struct inode *,struct dentry *,int); + struct dentry * (*lookup) (struct inode *,struct dentry *); + int (*link) (struct dentry *,struct inode *,struct dentry *); + int (*unlink) (struct inode *,struct dentry *); + int (*symlink) (struct inode *,struct dentry *,const char *); + int (*mkdir) (struct inode *,struct dentry *,int); + int (*rmdir) (struct inode *,struct dentry *); + int (*mknod) (struct inode *,struct dentry *,int,int); + int (*rename) (struct inode *, struct dentry *, + struct inode *, struct dentry *); + int (*readlink) (struct dentry *, char *,int); + struct dentry * (*follow_link) (struct dentry *, struct dentry *, unsigned int); + /* + * the order of these functions within the VFS template has been + * changed because SMP locking has changed: from now on all get_block, + * readpage, writepage and flushpage functions are supposed to do + * whatever locking they need to get proper SMP operation - for + * now in most cases this means a lock/unlock_kernel at entry/exit. + * [The new order is also slightly more logical :)] + */ + /* + * Generic block allocator exported by the lowlevel fs. All metadata + * details are handled by the lowlevel fs, all 'logical data content' + * details are handled by the highlevel block layer. + */ + int (*get_block) (struct inode *, long, struct buffer_head *, int); + + int (*readpage) (struct file *, struct page *); + int (*writepage) (struct file *, struct page *); + int (*flushpage) (struct inode *, struct page *, unsigned long); + + void (*truncate) (struct inode *); + int (*permission) (struct inode *, int); + int (*smap) (struct inode *,int); + int (*revalidate) (struct dentry *); +}; + + -- 1.8.3.1