+/* object based disk file system
+ *
+ * This software is licensed under the GPL. See the file COPYING in the
+ * top directory of this distribution for details.
+ *
+ * Copyright (C), 1999, Stelias Computing Inc
+ *
+ *
+ */
+
+
+#ifndef _OBDFS_H
+#define OBDFS_H
#include <../obd/linux/sim_obd.h>
struct obdfs_inode_info;
#define OBDFS_SUPER_MAGIC 0x4711
+
+#endif
+
/* debugging masks */
#define D_PSDEV 1 /* debug information from psdev.c */
-#define D_UNUSED1 2
+#define D_INODE 2
#define D_UNUSED2 4
#define D_UNUSED3 8
#define D_UNUSED4 16
#define OBD_ALLOC(ptr, cast, size) \
do { \
if (size <= 4096) { \
- ptr = (cast)kmalloc((unsigned long) size, GFP_KERNEL); \
+ ptr = (cast)kmalloc((unsigned long) size, GFP_KERNEL); \
CDEBUG(D_MALLOC, "kmalloced: %x at %x.\n", \
(int) size, (int) ptr); \
} else { \
} while (0)
-#define MAX_DEVICES 128
+
+
+#define MAX_OBD_DEVICES 2
struct obd_conn_info {
- unsigned int conn_id;
- unsigned long conn_ino;
- unsigned long conn_blocksize;
+ unsigned int conn_id; /* handle */
+ unsigned long conn_ino; /* root inode number */
+ unsigned long conn_blocksize;
unsigned char conn_blocksize_bits;
};
};
#define OBD_IOC_CREATE _IOR ('f', 3, long)
-#define OBD_IOC_SETUP _IOW ('f', 4, long)
-#define OBD_IOC_SYNC _IOR ('f', 5, long)
+#define OBD_IOC_SETUP_SUPER _IOW ('f', 4, long)
+#define OBD_IOC_CLEANUP_SUPER _IO ('f', 5 )
#define OBD_IOC_DESTROY _IOW ('f', 6, long)
#define OBD_IOC_PREALLOCATE _IOWR('f', 7, long)
#define OBD_IOC_DEC_USE_COUNT _IO ('f', 8 )
#define OBD_IOC_CONNECT _IOR ('f', 13, long)
#define OBD_IOC_DISCONNECT _IOW ('f', 14, long)
#define OBD_IOC_STATFS _IOWR('f', 15, long)
-#define OBD_IOC_DEC_FS_USE_COUNT _IO ('f', 16 )
+#define OBD_IOC_SYNC _IOR ('f', 16, long)
+
+
+
+#define OBD_IOC_DEC_FS_USE_COUNT _IO ('f', 32 )
/* balloc.c */
int ext2_new_block (const struct inode * inode, unsigned long goal,
int create, int * err);
/* interface.c */
+void obd_cleanup_device(int dev);
extern int obd_create (struct super_block * sb, int inode_hint, int * err);
extern void obd_unlink (struct inode * inode);
extern struct obd_client * obd_client(int cli_id);
extern void obd_cleanup_client (struct obd_device * obddev,
struct obd_client * cli);
void obd_cleanup_device(int dev);
+int obd_cleanup_super(struct obd_device * obddev);
+int obd_setup_super(struct obd_device * obddev, int sbdev);
long obd_preallocate_inodes(unsigned int conn_id,
int req, long inodes[32], int * err);
long obd_preallocate_quota(struct super_block * sb, struct obd_client * cli,
#undef ext2_error
#define ext2_error obd_warning
#define ext2_panic obd_warning
-
#ifdef EXT2FS_DEBUG
# undef ext2_debug
# define ext2_debug(format, a...) CDEBUG(D_EXT2, format, ## a)
/* super.c */
extern struct super_operations ext2_sops;
-
#endif /* __LINUX_SIM_OBD_H */
#!/usr/bin/perl
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+#
+# Copyright (C) 1998, Stelias Computing
+#
+# Modified for InterMezzo from Gordian's HSM bcache device/jcm module
+# Copyright (C) 1999, Carnegie Mellon University
+#
+# Derived from InterMezzo's incontrol, modified for OBD's
+# Copyright (C) 1999, Stelias Computing
+#
+#
+
#use strict;
BEGIN { require "asm/errno.ph" };
BEGIN { require "asm/ioctl.ph" };
# p2ph generated invalid macros for ioctl stuff, so I override some of it here
eval 'sub OBD_IOC_CREATE () { &_IOC(2, ord(\'f\'), 3, 4);}' unless
defined(&OBD_IOC_CREATE);
-eval 'sub OBD_IOC_SETUP () { &_IOC(1, ord(\'f\'), 4, 4);}' unless
- defined(&OBD_IOC_SETUP);
-eval 'sub OBD_IOC_SYNC () { &_IOC(2, ord(\'f\'), 5, 4);}' unless
- defined(&OBD_IOC_SYNC);
+eval 'sub OBD_IOC_SETUP_SUPER () { &_IOC(1, ord(\'f\'), 4, 4);}' unless
+ defined(&OBD_IOC_SETUP_SUPER);
+eval 'sub OBD_IOC_CLEANUP_SUPER () { &_IOC(0, ord(\'f\'), 5, 0);}' unless
+ defined(&OBD_IOC_CLEANUP_SUPER);
eval 'sub OBD_IOC_DESTROY () { &_IOC(1, ord(\'f\'), 6, 4);}' unless
defined(&OBD_IOC_DESTROY);
eval 'sub OBD_IOC_PREALLOCATE () { &_IOC(3, ord(\'f\'), 7, 4);}' unless
defined(&OBD_IOC_DISCONNECT);
eval 'sub OBD_IOC_STATFS () { &_IOC(3, ord(\'f\'), 15, 4);}' unless
defined(&OBD_IOC_STATFS);
+eval 'sub OBD_IOC_SYNC () { &_IOC(2, ord(\'f\'), 5, 4);}' unless
+ defined(&OBD_IOC_SYNC);
eval 'sub ATTR_MODE () {1;}' unless defined(&ATTR_MODE);
eval 'sub ATTR_UID () {2;}' unless defined(&ATTR_UID);
# startup options (I'll replace these when I have some to replace with)
GetOptions("device=s" => \$device, "fs=s" => $filesystem) || die "Getoptions";
+# genuine new simulated OBD device
$device = "/dev/obd" unless $device;
+# object store in the ext2 formatted block device
$filesystem = "/dev/loop0" unless $filesystem;
# get a console for the app
my %commands =
('create' => {func => "Create", doc => "create: creates a new inode"},
- 'setup' => {func => "Setup", doc => "setup: initializes the environment"},
+ 'setup' => {func => "Setup", doc => "setup: link the ext2 partition (default /dev/loop0) to this obddev"},
'connect' => {func => "Connect", doc => "connect: allocates client ID for this session"},
'disconnect' => {func => "Disconnect", doc => "disconnect [id]: frees client resources"},
'sync' => {func => "Sync", doc => "sync: flushes buffers to disk"},
'destroy' => {func => "Destroy", doc => "setup: destroys an inode"},
+ 'cleanup' => {func => "Cleanup", doc => "detach the superblock from this minor obd dev"},
'dec_use_count' => {func => "Decusecount", doc => "decreases the module use count so that the module can be removed following an oops"},
'read' => {func => "Read", doc => "read <inode> <count> [offset]"},
'write' => {func => "Write", doc => "write <inode> <offset> <text>"},
- 'setattr' => {func => "Setattr", doc => "setattr [mode [uid [gid [size [atime [mtime [ctime]]]]]]]"},
- 'getattr' => {func => "Getattr", doc => "getattr [inode]: displays inode object attributes"},
+ 'setattr' => {func => "Setattr", doc => "setattr <inode> [mode [uid [gid [size [atime [mtime [ctime]]]]]]]"},
+ 'getattr' => {func => "Getattr", doc => "getattr <inode>: displays inode object attributes"},
'preallocate' => {func => "Preallocate", doc => "preallocate [num]: requests preallocation of num inodes."},
'statfs' => {func => "Statfs", doc => "statfs: filesystem status information"},
'help' => {func => \&Help, doc => "help: this message"},
$attribs->{attempted_completion_function} = \&completeme;
#------------------------------------------------------------------------------
# Open the device, as we need an FD for the ioctl
-sysopen(DEV_OBD, $device, 0);
+sysopen(DEV_OBD, $device, 0) || die "Cannot open $device";
if (!defined($::st = stat($filesystem))) {
die "Unable to stat $filesystem.\n";
sub Setup {
my $err = 0;
my $packed = pack("L", $::st->rdev());
- my $rc = ioctl(DEV_OBD, &OBD_IOC_SETUP, $packed);
+ my $rc = ioctl(DEV_OBD, &OBD_IOC_SETUP_SUPER, $packed);
+
+ if (!defined $rc) {
+ print STDERR "ioctl failed: $!\n";
+ } elsif ($rc eq "0 but true") {
+ print "Finished (success)\n";
+ } else {
+ print "ioctl returned error code $rc.\n";
+ }
+}
+
+sub Cleanup {
+ my $err = "0";
+ my $rc = ioctl(DEV_OBD, &OBD_IOC_CLEANUP_SUPER, $err);
if (!defined $rc) {
print STDERR "ioctl failed: $!\n";
}
}
+
sub Connect {
my $id = 0;
my $ino = 0;
}
sub Destroy {
- if (!defined($id)) {
- $id = $::client_id;
+ if (!defined($::client_id)) {
+ print "You must first ``connect''.\n";
+ return;
}
my $arg = shift;
($valid, $mode, $uid, $gid, $size, $atime, $mtime, $ctime, $flags) =
unpack("ISssx2lLLLI", $packed);
- printf("Inode: %d Mode: %04d\n", $inode, $mode);
+ printf("Inode: %d Mode: %o\n", $inode, $mode);
printf("User: %6d Group: %6d Size: %d\n", $uid, $gid, $size);
printf("ctime: %08lx -- %s\n", $ctime, scalar(gmtime($ctime)));
printf("atime: %08lx -- %s\n", $atime, scalar(gmtime($atime)));
my $inode = shift;
my $valid = 0;
- my $mode = shift;
+ my $mode = oct(shift);
my $uid = shift;
my $gid = shift;
my $size = shift;
# time_t ia_ctime; (32)
# unsigned int ia_attr_flags; (32)
#};
+
+ printf "valid is %x, mode is %o\n", $valid, $mode;
my $packed = pack("ILLSssx2ILLLL", $::client_id, $inode, $valid, $mode,
$uid, $gid, $size, $atime, $mtime, $ctime, 0);
my $rc = ioctl(DEV_OBD, &OBD_IOC_SETATTR, $packed);
my $rc = ioctl(DEV_OBD, &OBD_IOC_READ, $packed);
- $retval = unpack("L", $packed);
+ $retval = unpack("l", $packed);
if (!defined $rc) {
print STDERR "ioctl failed: $!\n";
my $packed = pack("ILpLLL", $::client_id, $inode, $text, $count, 0, $offset);
my $rc = ioctl(DEV_OBD, &OBD_IOC_WRITE, $packed);
- $retval = unpack("L", $packed);
+ $retval = unpack("l", $packed);
if (!defined $rc) {
print STDERR "ioctl failed: $!\n";
--- /dev/null
+OBDFS - v 0.001
+
+This version can mount, list and set the attributes of the root
+directory.
+
+1. configure the obd to use /dev/obd with a scratch file system
+
+2. make
+
+3. insmod obdfs
+
+4. mount -t obdfs /dev/obd /mnt
+
+To verify: type mount (output:
+/dev/obd on /mnt type obdfs (rw))
+
+5. ls -ld /mnt
+
+6. chmod 711 /mnt, ls -ld /mnt
+
+7. chown seagate /mnt , ls -ld /mnt
+
--- /dev/null
+Considerations for an API between OBD's and OBDFS
+
+
+OBDFS
+
+Methods needed:
+
+struct super_operations {
+ void (*read_inode) (struct inode *);
+ void (*write_inode) (struct inode *);
+ void (*put_inode) (struct inode *);
+ void (*delete_inode) (struct inode *);
+ int (*notify_change) (struct dentry *, struct iattr *);
+ void (*put_super) (struct super_block *);
+ void (*write_super) (struct super_block *);
+ int (*statfs) (struct super_block *, struct statfs *, int);
+* int (*remount_fs) (struct super_block *, int *, char *);
+ void (*clear_inode) (struct inode *);
+* void (*umount_begin) (struct super_block *);
+};
+
+
+read_inode:
+
+Called from function iget(ino, dev) - through get_new_inode.
+Typically called only when a VFS inode is instantiated by FS, i.e.
+upon lookup, create, mkdir, or upon mounting for the / inode.
+
+ - executed for new inodes and for existing inodes
+ - for new inodes, avoid traffic to disk
+
+E.g:
+
+lookup("name in dir-inode")
+{
+ get data from dir-inode;
+ find ino of "name" in this data;
+ iget(sb(dev), ino);
+ ---> calls read_inode
+ ----> obd_getattr(obj-no = ino)
+}
+
+create("name in dir-inode")
+{
+ get ino for name from pre-alloced obj-no's
+ ---> may call obd_prealloc_ids(hint)
+
+ iget(sb(dev), ino)
+ ---> call read_inode
+ ---> do not contact OBD, fill in from FS
+
+ change data from dir-inode, to contain ("name, ino");
+}
+
+mkdir("name in dir-inode")
+{
+ as above
+}
+
+read_super(dev, data)
+{
+ struct obdfs_sb *sb = ... ;
+ obd_connect(dev, &sb->obdfs_conn_info);
+
+ iget(sb, sb->obdfs_conn_info->conn_ino);
+
+
+}
+
+We currently have:
+
+struct obd_conn_info {
+ unsigned int conn_id;
+ unsigned long conn_ino;
+ unsigned long conn_blocksize;
+ unsigned char conn_blocksize_bits;
+};
+
+read_inode(inode)
+{
+ struct iattr attr;
+
+
+ if ( inode in inode_attr cache ) {
+ get_attr_from_cache(inode, &attr);
+ } else {
+ obd_getattr(conn_id, inode->i_ino, &attr);
+ }
+
+ inode_setattr(inode, &attr);
+}
+
+Write_inode is called from the bdflush (sync_dev) routines, through
+write_inode, sync_inode, sync_list, sync_inodes etc:
+
+void sync_dev(kdev_t dev)
+{
+ sync_buffers(dev, 0);
+ sync_supers(dev);
+ sync_inodes(dev);
+ sync_buffers(dev, 0);
+ DQUOT_SYNC(dev);
+ /*
+ * FIXME(eric) we need to sync the physical devices here.
+ * This is because some (scsi) controllers have huge amounts of
+ * cache onboard (hundreds of Mb), and we need to instruct
+ * them to commit all of the dirty memory to disk, and we should
+ * not return until this has happened.
+ *
+ * This would need to get implemented by going through the assorted
+ * layers so that each block major number can be synced, and this
+ * would call down into the upper and mid-layer scsi.
+ */
+}
+
+This sync_inodes function is responsible (for "block" disk file
+systems) for copying the modified inode metadata into the buffer
+cache. The sync_buffers call which follows sync_inodes is responsible
+for writing back this meta data. For OBD's this is different.
+
+I expect the following routine to be there:
+
+sync_inode_pages(dev,0);
+sync_supers(dev);
+sync_inode_metadata(dev);
+
+
+The statfs function should return simple summary information available
+on the disk: %free, total space, etc. May require a new obd_command.
+
+Similarly write_super would instruct the disk to commit any pending
+data. This is called from do_unmount just before put_super (the
+latter breaks down the vm super block structure).
+
+Write_super should:
+- undo pre-allocated inode numbers
+
+The disk itself also needs a cleanup function.
+
+struct file_operations {
+ loff_t (*llseek) (struct file *, loff_t, int);
+ ssize_t (*read) (struct file *, char *, size_t, loff_t *);
+ ssize_t (*write) (struct file *, const char *, size_t, loff_t *);
+ int (*readdir) (struct file *, void *, filldir_t);
+ unsigned int (*poll) (struct file *, struct poll_table_struct *);
+ int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long);
+ int (*mmap) (struct file *, struct vm_area_struct *);
+ int (*open) (struct inode *, struct file *);
+ int (*flush) (struct file *);
+ int (*release) (struct inode *, struct file *);
+ int (*fsync) (struct file *, struct dentry *);
+ int (*fasync) (int, struct file *, int);
+ int (*check_media_change) (kdev_t dev);
+ int (*revalidate) (kdev_t dev);
+ int (*lock) (struct file *, int, struct file_lock *);
+};
+
+
+struct inode_operations {
+ struct file_operations * default_file_ops;
+ int (*create) (struct inode *,struct dentry *,int);
+ struct dentry * (*lookup) (struct inode *,struct dentry *);
+ int (*link) (struct dentry *,struct inode *,struct dentry *);
+ int (*unlink) (struct inode *,struct dentry *);
+ int (*symlink) (struct inode *,struct dentry *,const char *);
+ int (*mkdir) (struct inode *,struct dentry *,int);
+ int (*rmdir) (struct inode *,struct dentry *);
+ int (*mknod) (struct inode *,struct dentry *,int,int);
+ int (*rename) (struct inode *, struct dentry *,
+ struct inode *, struct dentry *);
+ int (*readlink) (struct dentry *, char *,int);
+ struct dentry * (*follow_link) (struct dentry *, struct dentry *, unsigned int);
+ /*
+ * the order of these functions within the VFS template has been
+ * changed because SMP locking has changed: from now on all get_block,
+ * readpage, writepage and flushpage functions are supposed to do
+ * whatever locking they need to get proper SMP operation - for
+ * now in most cases this means a lock/unlock_kernel at entry/exit.
+ * [The new order is also slightly more logical :)]
+ */
+ /*
+ * Generic block allocator exported by the lowlevel fs. All metadata
+ * details are handled by the lowlevel fs, all 'logical data content'
+ * details are handled by the highlevel block layer.
+ */
+ int (*get_block) (struct inode *, long, struct buffer_head *, int);
+
+ int (*readpage) (struct file *, struct page *);
+ int (*writepage) (struct file *, struct page *);
+ int (*flushpage) (struct inode *, struct page *, unsigned long);
+
+ void (*truncate) (struct inode *);
+ int (*permission) (struct inode *, int);
+ int (*smap) (struct inode *,int);
+ int (*revalidate) (struct dentry *);
+};
+
+