memset(*buf, 0, *len);
*hdr = (struct mds_req_hdr *)(*buf);
+ *req = (struct mds_req *)(*buf + sizeof(**hdr));
preq = (struct mds_req_packed *)(*buf + sizeof(**hdr));
ptr = *buf + sizeof(**hdr) + sizeof(*preq);
memset(*buf, 0, *len);
*hdr = (struct mds_rep_hdr *)(*buf);
+ *rep = (struct mds_rep *)(*buf + sizeof(**hdr));
prep = (struct mds_rep_packed *)(*buf + sizeof(**hdr));
ptr = *buf + sizeof(**hdr) + sizeof(*prep);
preq = (struct ost_req_packed *)(*buf + sizeof(**hdr));
ptr = *buf + sizeof(**hdr) + sizeof(*preq);
-
*req = (struct ost_req *)(*buf + sizeof(**hdr));
(*hdr)->type = OST_TYPE_REQ;
EXTRA_PROGRAMS = llight
-llight_SOURCES = mds_pack.c request.c # super.c rw.c file.c dir.c sysctl.c super.c namei.c symlink.c
-
-mds_pack.c:
- ln -s ../lib/mds_pack.c .
-
+llight_SOURCES = super.c rw.c file.c dir.c sysctl.c namei.c symlink.c
include $(top_srcdir)/Rules
--- /dev/null
+/*
+ * linux/fs/ext2/dir.c
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ * from
+ *
+ * linux/fs/minix/dir.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * ext2 directory handling functions
+ *
+ * Big-endian to little-endian byte-swapping/bitmaps by
+ * David S. Miller (davem@caip.rutgers.edu), 1995
+ *
+ * All code that works with directory layout had been switched to pagecache
+ * and moved here. AV
+ */
+
+#include <linux/fs.h>
+#include <linux/ext2_fs.h>
+#include <linux/pagemap.h>
+#include <linux/obd_support.h>
+#include <linux/locks.h>
+
+typedef struct ext2_dir_entry_2 ext2_dirent;
+
+#define PageChecked(page) test_bit(PG_checked, &(page)->flags)
+#define SetPageChecked(page) set_bit(PG_checked, &(page)->flags)
+
+int waitfor_one_page(struct page *page)
+{
+ int error = 0;
+ struct buffer_head *bh, *head = page->buffers;
+
+ bh = head;
+ do {
+ wait_on_buffer(bh);
+ if (buffer_req(bh) && !buffer_uptodate(bh))
+ error = -EIO;
+ } while ((bh = bh->b_this_page) != head);
+ return error;
+}
+
+/*
+ * ext2 uses block-sized chunks. Arguably, sector-sized ones would be
+ * more robust, but we have what we have
+ */
+static inline unsigned ext2_chunk_size(struct inode *inode)
+{
+ //return inode->i_sb->s_blocksize;
+ return PAGE_SIZE;
+}
+
+static inline void ext2_put_page(struct page *page)
+{
+ kunmap(page);
+ page_cache_release(page);
+}
+
+static inline unsigned long dir_pages(struct inode *inode)
+{
+ return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT;
+}
+
+static int ext2_commit_chunk(struct page *page, unsigned from, unsigned to)
+{
+ struct inode *dir = page->mapping->host;
+ int err = 0;
+ dir->i_version = ++event;
+ page->mapping->a_ops->commit_write(NULL, page, from, to);
+ if (IS_SYNC(dir))
+ err = waitfor_one_page(page);
+ return err;
+}
+
+static void ext2_check_page(struct page *page)
+{
+ struct inode *dir = page->mapping->host;
+ struct super_block *sb = dir->i_sb;
+ unsigned chunk_size = ext2_chunk_size(dir);
+ char *kaddr = page_address(page);
+ // u32 max_inumber = le32_to_cpu(sb->u.ext2_sb.s_es->s_inodes_count);
+ unsigned offs, rec_len;
+ unsigned limit = PAGE_CACHE_SIZE;
+ ext2_dirent *p;
+ char *error;
+
+ if ((dir->i_size >> PAGE_CACHE_SHIFT) == page->index) {
+ limit = dir->i_size & ~PAGE_CACHE_MASK;
+ if (limit & (chunk_size - 1))
+ goto Ebadsize;
+ for (offs = limit; offs<PAGE_CACHE_SIZE; offs += chunk_size) {
+ ext2_dirent *p = (ext2_dirent*)(kaddr + offs);
+ p->rec_len = cpu_to_le16(chunk_size);
+ }
+ if (!limit)
+ goto out;
+ }
+ for (offs = 0; offs <= limit - EXT2_DIR_REC_LEN(1); offs += rec_len) {
+ p = (ext2_dirent *)(kaddr + offs);
+ rec_len = le16_to_cpu(p->rec_len);
+
+ if (rec_len < EXT2_DIR_REC_LEN(1))
+ goto Eshort;
+ if (rec_len & 3)
+ goto Ealign;
+ if (rec_len < EXT2_DIR_REC_LEN(p->name_len))
+ goto Enamelen;
+ if (((offs + rec_len - 1) ^ offs) & ~(chunk_size-1))
+ goto Espan;
+ // if (le32_to_cpu(p->inode) > max_inumber)
+ //goto Einumber;
+ }
+ if (offs != limit)
+ goto Eend;
+out:
+ SetPageChecked(page);
+ return;
+
+ /* Too bad, we had an error */
+
+Ebadsize:
+ ext2_error(sb, "ext2_check_page",
+ "size of directory #%lu is not a multiple of chunk size",
+ dir->i_ino
+ );
+ goto fail;
+Eshort:
+ error = "rec_len is smaller than minimal";
+ goto bad_entry;
+Ealign:
+ error = "unaligned directory entry";
+ goto bad_entry;
+Enamelen:
+ error = "rec_len is too small for name_len";
+ goto bad_entry;
+Espan:
+ error = "directory entry across blocks";
+ goto bad_entry;
+ //Einumber:
+ // error = "inode out of bounds";
+bad_entry:
+ ext2_error (sb, "ext2_check_page", "bad entry in directory #%lu: %s - "
+ "offset=%lu, inode=%lu, rec_len=%d, name_len=%d",
+ dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs,
+ (unsigned long) le32_to_cpu(p->inode),
+ rec_len, p->name_len);
+ goto fail;
+Eend:
+ p = (ext2_dirent *)(kaddr + offs);
+ ext2_error (sb, "ext2_check_page",
+ "entry in directory #%lu spans the page boundary"
+ "offset=%lu, inode=%lu",
+ dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs,
+ (unsigned long) le32_to_cpu(p->inode));
+fail:
+ SetPageChecked(page);
+ SetPageError(page);
+}
+
+static struct page * ext2_get_page(struct inode *dir, unsigned long n)
+{
+ struct address_space *mapping = dir->i_mapping;
+ struct page *page = read_cache_page(mapping, n,
+ (filler_t*)mapping->a_ops->readpage, NULL);
+ if (!IS_ERR(page)) {
+ wait_on_page(page);
+ kmap(page);
+ if (!Page_Uptodate(page))
+ goto fail;
+ if (!PageChecked(page))
+ ext2_check_page(page);
+ if (PageError(page))
+ goto fail;
+ }
+ return page;
+
+fail:
+ ext2_put_page(page);
+ return ERR_PTR(-EIO);
+}
+
+/*
+ * NOTE! unlike strncmp, ext2_match returns 1 for success, 0 for failure.
+ *
+ * len <= EXT2_NAME_LEN and de != NULL are guaranteed by caller.
+ */
+static inline int ext2_match (int len, const char * const name,
+ struct ext2_dir_entry_2 * de)
+{
+ if (len != de->name_len)
+ return 0;
+ if (!de->inode)
+ return 0;
+ return !memcmp(name, de->name, len);
+}
+
+/*
+ * p is at least 6 bytes before the end of page
+ */
+static inline ext2_dirent *ext2_next_entry(ext2_dirent *p)
+{
+ return (ext2_dirent *)((char*)p + le16_to_cpu(p->rec_len));
+}
+
+static inline unsigned
+ext2_validate_entry(char *base, unsigned offset, unsigned mask)
+{
+ ext2_dirent *de = (ext2_dirent*)(base + offset);
+ ext2_dirent *p = (ext2_dirent*)(base + (offset&mask));
+ while ((char*)p < (char*)de)
+ p = ext2_next_entry(p);
+ return (char *)p - base;
+}
+
+static unsigned char ext2_filetype_table[EXT2_FT_MAX] = {
+ [EXT2_FT_UNKNOWN] DT_UNKNOWN,
+ [EXT2_FT_REG_FILE] DT_REG,
+ [EXT2_FT_DIR] DT_DIR,
+ [EXT2_FT_CHRDEV] DT_CHR,
+ [EXT2_FT_BLKDEV] DT_BLK,
+ [EXT2_FT_FIFO] DT_FIFO,
+ [EXT2_FT_SOCK] DT_SOCK,
+ [EXT2_FT_SYMLINK] DT_LNK,
+};
+
+static unsigned int obdfs_dt2fmt[DT_WHT + 1] = {
+ [EXT2_FT_UNKNOWN] 0,
+ [EXT2_FT_REG_FILE] S_IFREG,
+ [EXT2_FT_DIR] S_IFDIR,
+ [EXT2_FT_CHRDEV] S_IFCHR,
+ [EXT2_FT_BLKDEV] S_IFBLK,
+ [EXT2_FT_FIFO] S_IFIFO,
+ [EXT2_FT_SOCK] S_IFSOCK,
+ [EXT2_FT_SYMLINK] S_IFLNK
+};
+
+#define S_SHIFT 12
+static unsigned char ext2_type_by_mode[S_IFMT >> S_SHIFT] = {
+ [S_IFREG >> S_SHIFT] EXT2_FT_REG_FILE,
+ [S_IFDIR >> S_SHIFT] EXT2_FT_DIR,
+ [S_IFCHR >> S_SHIFT] EXT2_FT_CHRDEV,
+ [S_IFBLK >> S_SHIFT] EXT2_FT_BLKDEV,
+ [S_IFIFO >> S_SHIFT] EXT2_FT_FIFO,
+ [S_IFSOCK >> S_SHIFT] EXT2_FT_SOCK,
+ [S_IFLNK >> S_SHIFT] EXT2_FT_SYMLINK,
+};
+
+static inline void ext2_set_de_type(ext2_dirent *de, struct inode *inode)
+{
+ mode_t mode = inode->i_mode;
+ de->file_type = ext2_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
+}
+
+int
+new_obdfs_readdir (struct file * filp, void * dirent, filldir_t filldir)
+{
+ loff_t pos = filp->f_pos;
+ struct inode *inode = filp->f_dentry->d_inode;
+ // XXX struct super_block *sb = inode->i_sb;
+ unsigned offset = pos & ~PAGE_CACHE_MASK;
+ unsigned long n = pos >> PAGE_CACHE_SHIFT;
+ unsigned long npages = dir_pages(inode);
+ unsigned chunk_mask = ~(ext2_chunk_size(inode)-1);
+ unsigned char *types = NULL;
+ int need_revalidate = (filp->f_version != inode->i_version);
+
+ if (pos > inode->i_size - EXT2_DIR_REC_LEN(1))
+ goto done;
+
+ types = ext2_filetype_table;
+
+ for ( ; n < npages; n++, offset = 0) {
+ char *kaddr, *limit;
+ ext2_dirent *de;
+ struct page *page = ext2_get_page(inode, n);
+
+ if (IS_ERR(page))
+ continue;
+ kaddr = page_address(page);
+ if (need_revalidate) {
+ offset = ext2_validate_entry(kaddr, offset, chunk_mask);
+ need_revalidate = 0;
+ }
+ de = (ext2_dirent *)(kaddr+offset);
+ limit = kaddr + PAGE_CACHE_SIZE - EXT2_DIR_REC_LEN(1);
+ for ( ;(char*)de <= limit; de = ext2_next_entry(de))
+ if (de->inode) {
+ int over;
+ unsigned char d_type = DT_UNKNOWN;
+
+ if (types && de->file_type < EXT2_FT_MAX)
+ d_type = types[de->file_type];
+
+ offset = (char *)de - kaddr;
+ over = filldir(dirent, de->name, de->name_len,
+ (n<<PAGE_CACHE_SHIFT) | offset,
+ le32_to_cpu(de->inode), d_type);
+ if (over) {
+ ext2_put_page(page);
+ goto done;
+ }
+ }
+ ext2_put_page(page);
+ }
+
+done:
+ filp->f_pos = (n << PAGE_CACHE_SHIFT) | offset;
+ filp->f_version = inode->i_version;
+ UPDATE_ATIME(inode);
+ return 0;
+}
+
+/*
+ * ext2_find_entry()
+ *
+ * finds an entry in the specified directory with the wanted name. It
+ * returns the page in which the entry was found, and the entry itself
+ * (as a parameter - res_dir). Page is returned mapped and unlocked.
+ * Entry is guaranteed to be valid.
+ */
+struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir,
+ struct dentry *dentry, struct page ** res_page)
+{
+ const char *name = dentry->d_name.name;
+ int namelen = dentry->d_name.len;
+ unsigned reclen = EXT2_DIR_REC_LEN(namelen);
+ unsigned long start, n;
+ unsigned long npages = dir_pages(dir);
+ struct page *page = NULL;
+ ext2_dirent * de;
+
+ /* OFFSET_CACHE */
+ *res_page = NULL;
+
+ // start = dir->u.ext2_i.i_dir_start_lookup;
+ start = 0;
+ if (start >= npages)
+ start = 0;
+ n = start;
+ do {
+ char *kaddr;
+ page = ext2_get_page(dir, n);
+ if (!IS_ERR(page)) {
+ kaddr = page_address(page);
+ de = (ext2_dirent *) kaddr;
+ kaddr += PAGE_CACHE_SIZE - reclen;
+ while ((char *) de <= kaddr) {
+ if (ext2_match (namelen, name, de))
+ goto found;
+ de = ext2_next_entry(de);
+ }
+ ext2_put_page(page);
+ }
+ if (++n >= npages)
+ n = 0;
+ } while (n != start);
+ return NULL;
+
+found:
+ *res_page = page;
+ // dir->u.ext2_i.i_dir_start_lookup = n;
+ return de;
+}
+
+struct ext2_dir_entry_2 * ext2_dotdot (struct inode *dir, struct page **p)
+{
+ struct page *page = ext2_get_page(dir, 0);
+ ext2_dirent *de = NULL;
+
+ if (!IS_ERR(page)) {
+ de = ext2_next_entry((ext2_dirent *) page_address(page));
+ *p = page;
+ }
+ return de;
+}
+
+ino_t obdfs_inode_by_name(struct inode * dir, struct dentry *dentry, int *type)
+{
+ ino_t res = 0;
+ struct ext2_dir_entry_2 * de;
+ struct page *page;
+
+ de = ext2_find_entry (dir, dentry, &page);
+ if (de) {
+ res = le32_to_cpu(de->inode);
+ *type = obdfs_dt2fmt[de->file_type];
+ kunmap(page);
+ page_cache_release(page);
+ }
+ return res;
+}
+
+/* Releases the page */
+void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
+ struct page *page, struct inode *inode)
+{
+ unsigned from = (char *) de - (char *) page_address(page);
+ unsigned to = from + le16_to_cpu(de->rec_len);
+ int err;
+
+ lock_page(page);
+ err = page->mapping->a_ops->prepare_write(NULL, page, from, to);
+ if (err)
+ BUG();
+ de->inode = cpu_to_le32(inode->i_ino);
+ ext2_set_de_type (de, inode);
+ dir->i_mtime = dir->i_ctime = CURRENT_TIME;
+ err = ext2_commit_chunk(page, from, to);
+ UnlockPage(page);
+ ext2_put_page(page);
+}
+
+/*
+ * Parent is locked.
+ */
+int ext2_add_link (struct dentry *dentry, struct inode *inode)
+{
+ struct inode *dir = dentry->d_parent->d_inode;
+ const char *name = dentry->d_name.name;
+ int namelen = dentry->d_name.len;
+ unsigned reclen = EXT2_DIR_REC_LEN(namelen);
+ unsigned short rec_len, name_len;
+ struct page *page = NULL;
+ ext2_dirent * de;
+ unsigned long npages = dir_pages(dir);
+ unsigned long n;
+ char *kaddr;
+ unsigned from, to;
+ int err;
+
+ /* We take care of directory expansion in the same loop */
+ for (n = 0; n <= npages; n++) {
+ page = ext2_get_page(dir, n);
+ err = PTR_ERR(page);
+ if (IS_ERR(page))
+ goto out;
+ kaddr = page_address(page);
+ de = (ext2_dirent *)kaddr;
+ kaddr += PAGE_CACHE_SIZE - reclen;
+ while ((char *)de <= kaddr) {
+ err = -EEXIST;
+ if (ext2_match (namelen, name, de))
+ goto out_page;
+ name_len = EXT2_DIR_REC_LEN(de->name_len);
+ rec_len = le16_to_cpu(de->rec_len);
+ if ( n==npages && rec_len == 0) {
+ printk("Fatal dir behaviour\n");
+ goto out_page;
+ }
+ if (!de->inode && rec_len >= reclen)
+ goto got_it;
+ if (rec_len >= name_len + reclen)
+ goto got_it;
+ de = (ext2_dirent *) ((char *) de + rec_len);
+ }
+ ext2_put_page(page);
+ }
+ BUG();
+ return -EINVAL;
+
+got_it:
+ from = (char*)de - (char*)page_address(page);
+ to = from + rec_len;
+ lock_page(page);
+ err = page->mapping->a_ops->prepare_write(NULL, page, from, to);
+ if (err)
+ goto out_unlock;
+ if (de->inode) {
+ ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len);
+ de1->rec_len = cpu_to_le16(rec_len - name_len);
+ de->rec_len = cpu_to_le16(name_len);
+ de = de1;
+ }
+ de->name_len = namelen;
+ memcpy (de->name, name, namelen);
+ de->inode = cpu_to_le32(inode->i_ino);
+ ext2_set_de_type (de, inode);
+ CDEBUG(D_INODE, "type set to %o\n", de->file_type);
+ dir->i_mtime = dir->i_ctime = CURRENT_TIME;
+ err = ext2_commit_chunk(page, from, to);
+
+ // change_inode happens with the commit_chunk
+ // obdfs_change_inode(dir);
+ /* OFFSET_CACHE */
+out_unlock:
+ UnlockPage(page);
+out_page:
+ ext2_put_page(page);
+out:
+ return err;
+}
+
+/*
+ * ext2_delete_entry deletes a directory entry by merging it with the
+ * previous entry. Page is up-to-date. Releases the page.
+ */
+int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page )
+{
+ struct address_space *mapping = page->mapping;
+ struct inode *inode = mapping->host;
+ char *kaddr = page_address(page);
+ unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1);
+ unsigned to = ((char*)dir - kaddr) + le16_to_cpu(dir->rec_len);
+ ext2_dirent * pde = NULL;
+ ext2_dirent * de = (ext2_dirent *) (kaddr + from);
+ int err;
+
+ while ((char*)de < (char*)dir) {
+ pde = de;
+ de = ext2_next_entry(de);
+ }
+ if (pde)
+ from = (char*)pde - (char*)page_address(page);
+ lock_page(page);
+ err = mapping->a_ops->prepare_write(NULL, page, from, to);
+ if (err)
+ BUG();
+ if (pde)
+ pde->rec_len = cpu_to_le16(to-from);
+ dir->inode = 0;
+ inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+ err = ext2_commit_chunk(page, from, to);
+ UnlockPage(page);
+ ext2_put_page(page);
+ return err;
+}
+
+/*
+ * Set the first fragment of directory.
+ */
+int ext2_make_empty(struct inode *inode, struct inode *parent)
+{
+ struct address_space *mapping = inode->i_mapping;
+ struct page *page = grab_cache_page(mapping, 0);
+ unsigned chunk_size = ext2_chunk_size(inode);
+ struct ext2_dir_entry_2 * de;
+ char *base;
+ int err;
+ ENTRY;
+
+ if (!page)
+ return -ENOMEM;
+ err = mapping->a_ops->prepare_write(NULL, page, 0, chunk_size);
+ if (err)
+ goto fail;
+
+ base = page_address(page);
+
+ de = (struct ext2_dir_entry_2 *) base;
+ de->name_len = 1;
+ de->rec_len = cpu_to_le16(EXT2_DIR_REC_LEN(1));
+ memcpy (de->name, ".\0\0", 4);
+ de->inode = cpu_to_le32(inode->i_ino);
+ ext2_set_de_type (de, inode);
+
+ de = (struct ext2_dir_entry_2 *) (base + EXT2_DIR_REC_LEN(1));
+ de->name_len = 2;
+ de->rec_len = cpu_to_le16(chunk_size - EXT2_DIR_REC_LEN(1));
+ de->inode = cpu_to_le32(parent->i_ino);
+ memcpy (de->name, "..\0", 4);
+ ext2_set_de_type (de, inode);
+
+ err = ext2_commit_chunk(page, 0, chunk_size);
+fail:
+ UnlockPage(page);
+ page_cache_release(page);
+ ENTRY;
+ return err;
+}
+
+/*
+ * routine to check that the specified directory is empty (for rmdir)
+ */
+int ext2_empty_dir (struct inode * inode)
+{
+ struct page *page = NULL;
+ unsigned long i, npages = dir_pages(inode);
+
+ for (i = 0; i < npages; i++) {
+ char *kaddr;
+ ext2_dirent * de;
+ page = ext2_get_page(inode, i);
+
+ if (IS_ERR(page))
+ continue;
+
+ kaddr = page_address(page);
+ de = (ext2_dirent *)kaddr;
+ kaddr += PAGE_CACHE_SIZE-EXT2_DIR_REC_LEN(1);
+
+ while ((char *)de <= kaddr) {
+ if (de->inode != 0) {
+ /* check for . and .. */
+ if (de->name[0] != '.')
+ goto not_empty;
+ if (de->name_len > 2)
+ goto not_empty;
+ if (de->name_len < 2) {
+ if (de->inode !=
+ cpu_to_le32(inode->i_ino))
+ goto not_empty;
+ } else if (de->name[1] != '.')
+ goto not_empty;
+ }
+ de = ext2_next_entry(de);
+ }
+ ext2_put_page(page);
+ }
+ return 1;
+
+not_empty:
+ ext2_put_page(page);
+ return 0;
+}
+
+struct file_operations obdfs_dir_operations = {
+ read: generic_read_dir,
+ readdir: new_obdfs_readdir
+};
--- /dev/null
+/*
+ * linux/fs/ext2/file.c
+ *
+ * This code is issued under the GNU General Public License.
+ * See the file COPYING in this distribution
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ * from
+ *
+ * linux/fs/minix/file.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * ext2 fs regular file handling primitives
+ *
+ * 64-bit file support on 64-bit platforms by Jakub Jelinek
+ * (jj@sunsite.ms.mff.cuni.cz)
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/fcntl.h>
+#include <linux/sched.h>
+#include <linux/stat.h>
+#include <linux/locks.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/smp_lock.h>
+
+#include <linux/obd_support.h>
+#include <linux/obdfs.h>
+
+extern int obdfs_setattr(struct dentry *de, struct iattr *attr);
+void obdfs_change_inode(struct inode *inode);
+
+static inline void obdfs_remove_suid(struct inode *inode)
+{
+ unsigned int mode;
+
+ /* set S_IGID if S_IXGRP is set, and always set S_ISUID */
+ mode = (inode->i_mode & S_IXGRP)*(S_ISGID/S_IXGRP) | S_ISUID;
+
+ /* was any of the uid bits set? */
+ mode &= inode->i_mode;
+ if (mode && !capable(CAP_FSETID)) {
+ inode->i_mode &= ~mode;
+ // XXX careful here - we cannot change the size
+ //obdfs_change_inode(inode);
+ }
+}
+
+/*
+ * Write to a file (through the page cache).
+ */
+static ssize_t
+obdfs_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
+{
+ ssize_t retval;
+ CDEBUG(D_INFO, "Writing inode %ld, %d bytes, offset %Ld\n",
+ file->f_dentry->d_inode->i_ino, count, *ppos);
+
+ retval = generic_file_write(file, buf, count, ppos);
+ CDEBUG(D_INFO, "Wrote %d\n", retval);
+
+ /* update mtime/ctime/atime here, NOT size */
+ if (retval > 0) {
+ struct iattr attr;
+ attr.ia_valid = ATTR_MTIME | ATTR_CTIME | ATTR_ATIME;
+ attr.ia_mtime = attr.ia_ctime = attr.ia_atime =
+ CURRENT_TIME;
+ obdfs_setattr(file->f_dentry, &attr);
+ }
+ EXIT;
+ return retval;
+}
+
+
+/* XXX this does not need to do anything for data, it _does_ need to
+ call setattr */
+int obdfs_fsync(struct file *file, struct dentry *dentry, int data)
+{
+ return 0;
+}
+
+struct file_operations obdfs_file_operations = {
+ read: generic_file_read,
+ write: obdfs_file_write,
+ mmap: generic_file_mmap,
+ fsync: NULL
+};
+
+
+struct inode_operations obdfs_file_inode_operations = {
+ truncate: obdfs_truncate,
+ setattr: obdfs_setattr
+};
+
--- /dev/null
+/*
+ * linux/fs/obdfs/namei.c
+ *
+ * This code is issued under the GNU General Public License.
+ * See the file COPYING in this distribution
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ * from
+ *
+ * linux/fs/ext2/namei.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * Big-endian to little-endian byte-swapping/bitmaps by
+ * David S. Miller (davem@caip.rutgers.edu), 1995
+ * Directory entry file type support and forward compatibility hooks
+ * for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998
+ *
+ * Changes for use in OBDFS
+ * Copyright (c) 1999, Seagate Technology Inc.
+ * Copyright (C) 2001, Cluster File Systems, Inc.
+ * Rewritten based on recent ext2 page cache use.
+ *
+ */
+
+#include <linux/fs.h>
+#include <linux/locks.h>
+#include <linux/quotaops.h>
+#include <linux/obd_support.h>
+#include <linux/obdfs.h>
+extern struct address_space_operations obdfs_aops;
+
+/* from super.c */
+extern void obdfs_change_inode(struct inode *inode);
+extern int obdfs_setattr(struct dentry *de, struct iattr *attr);
+
+/* from dir.c */
+extern int ext2_add_link (struct dentry *dentry, struct inode *inode);
+ino_t obdfs_inode_by_name(struct inode * dir, struct dentry *dentry, int *typ);
+int ext2_make_empty(struct inode *inode, struct inode *parent);
+struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir,
+ struct dentry *dentry, struct page ** res_page);
+int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page );
+int ext2_empty_dir (struct inode * inode);
+struct ext2_dir_entry_2 * ext2_dotdot (struct inode *dir, struct page **p);
+void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
+ struct page *page, struct inode *inode);
+
+/*
+ * Couple of helper functions - make the code slightly cleaner.
+ */
+static inline void ext2_inc_count(struct inode *inode)
+{
+ inode->i_nlink++;
+ obdfs_change_inode(inode);
+}
+
+/* postpone the disk update until the inode really goes away */
+static inline void ext2_dec_count(struct inode *inode)
+{
+ inode->i_nlink--;
+ if (inode->i_nlink > 0)
+ obdfs_change_inode(inode);
+}
+
+static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode)
+{
+ int err;
+ err = ext2_add_link(dentry, inode);
+ if (!err) {
+ d_instantiate(dentry, inode);
+ return 0;
+ }
+ ext2_dec_count(inode);
+ iput(inode);
+ return err;
+}
+
+/* methods */
+static struct dentry *obdfs_lookup(struct inode * dir, struct dentry *dentry)
+{
+ struct obdo *oa;
+ struct inode * inode = NULL;
+ int type;
+ ino_t ino;
+
+ ENTRY;
+ if (dentry->d_name.len > EXT2_NAME_LEN)
+ return ERR_PTR(-ENAMETOOLONG);
+
+ ino = obdfs_inode_by_name(dir, dentry, &type);
+ if (!ino)
+ goto negative;
+
+ oa = obdo_fromid(IID(dir), ino, type,
+ OBD_MD_FLNOTOBD | OBD_MD_FLBLOCKS);
+ if ( IS_ERR(oa) ) {
+ printk(__FUNCTION__ ": obdo_fromid failed\n");
+ EXIT;
+ return ERR_PTR(-EACCES);
+ }
+
+ inode = iget4(dir->i_sb, ino, NULL, oa);
+ obdo_free(oa);
+
+ if (!inode)
+ return ERR_PTR(-EACCES);
+
+ negative:
+ d_add(dentry, inode);
+ return NULL;
+}
+
+
+/*
+ * NOTE! unlike strncmp, ext2_match returns 1 for success, 0 for failure.
+ *
+ * `len <= EXT2_NAME_LEN' is guaranteed by caller.
+ * `de != NULL' is guaranteed by caller.
+ */
+static inline int ext2_match (int len, const char * const name,
+ struct ext2_dir_entry_2 * de)
+{
+ if (len != de->name_len)
+ return 0;
+ if (!de->inode)
+ return 0;
+ return !memcmp(name, de->name, len);
+}
+
+static struct inode *obdfs_new_inode(struct inode *dir, int mode)
+{
+ struct obdo *oa;
+ struct inode *inode;
+ int err;
+
+ ENTRY;
+ if (IOPS(dir, create) == NULL) {
+ printk(KERN_ERR __FUNCTION__ ": no create method!\n");
+ EXIT;
+ return ERR_PTR(-EIO);
+ }
+ oa = obdo_alloc();
+ if (!oa) {
+ EXIT;
+ return ERR_PTR(-ENOMEM);
+ }
+
+ /* Send a hint to the create method on the type of file to create */
+ oa->o_mode = mode;
+ oa->o_valid |= OBD_MD_FLMODE;
+ CDEBUG(D_INODE, "\n");
+ err = IOPS(dir, create)(IID(dir), oa);
+ CDEBUG(D_INODE, "\n");
+
+ if ( err ) {
+ printk("new_inode - fatal: err %d\n", err);
+ obdo_free(oa);
+ EXIT;
+ return ERR_PTR(err);
+ }
+ CDEBUG(D_INODE, "obdo mode %o\n", oa->o_mode);
+
+ inode = iget4(dir->i_sb, (ino_t)oa->o_id, NULL, oa);
+ CDEBUG(D_INODE, "\n");
+ obdo_free(oa);
+
+ if (!inode) {
+ printk("new_inode -fatal: %ld\n", (long)oa->o_id);
+ IOPS(dir, destroy)(IID(dir), oa);
+ EXIT;
+ return ERR_PTR(-EIO);
+ }
+
+ if (!list_empty(&inode->i_dentry)) {
+ printk("new_inode -fatal: aliases %ld, ct %d lnk %d\n",
+ (long)oa->o_id,
+ atomic_read(&inode->i_count),
+ inode->i_nlink);
+ IOPS(dir, destroy)(IID(dir), oa);
+ iput(inode);
+ EXIT;
+ return ERR_PTR(-EIO);
+ }
+
+ EXIT;
+ return inode;
+} /* obdfs_new_inode */
+
+
+/*
+ * By the time this is called, we already have created
+ * the directory cache entry for the new file, but it
+ * is so far negative - it has no inode.
+ *
+ * If the create succeeds, we fill in the inode information
+ * with d_instantiate().
+ */
+static int obdfs_create (struct inode * dir, struct dentry * dentry, int mode)
+{
+ struct inode * inode = obdfs_new_inode (dir, mode);
+ int err = PTR_ERR(inode);
+ if (!IS_ERR(inode)) {
+ inode->i_op = &obdfs_file_inode_operations;
+ inode->i_fop = &obdfs_file_operations;
+ inode->i_mapping->a_ops = &obdfs_aops;
+ err = ext2_add_nondir(dentry, inode);
+ }
+ return err;
+} /* obdfs_create */
+
+
+static int obdfs_mknod (struct inode * dir, struct dentry *dentry, int mode, int rdev)
+{
+ struct inode * inode = obdfs_new_inode (dir, mode);
+ int err = PTR_ERR(inode);
+ if (!IS_ERR(inode)) {
+ init_special_inode(inode, mode, rdev);
+ obdfs_change_inode(inode);
+ err = ext2_add_nondir(dentry, inode);
+ }
+ return err;
+}
+
+static int obdfs_symlink (struct inode * dir, struct dentry * dentry,
+ const char * symname)
+{
+ struct super_block * sb = dir->i_sb;
+ int err = -ENAMETOOLONG;
+ unsigned l = strlen(symname)+1;
+ struct inode * inode;
+ struct obdfs_inode_info *oinfo;
+
+ if (l > sb->s_blocksize)
+ goto out;
+
+ inode = obdfs_new_inode (dir, S_IFLNK | S_IRWXUGO);
+ err = PTR_ERR(inode);
+ if (IS_ERR(inode))
+ goto out;
+
+ oinfo = obdfs_i2info(inode);
+ if (l >= sizeof(oinfo->oi_inline)) {
+ /* slow symlink */
+ inode->i_op = &page_symlink_inode_operations;
+ inode->i_mapping->a_ops = &obdfs_aops;
+ err = block_symlink(inode, symname, l);
+ if (err)
+ goto out_fail;
+ } else {
+ /* fast symlink */
+ inode->i_op = &obdfs_fast_symlink_inode_operations;
+ memcpy(oinfo->oi_inline, symname, l);
+ inode->i_size = l-1;
+ }
+ obdfs_change_inode(inode);
+
+ err = ext2_add_nondir(dentry, inode);
+out:
+ return err;
+
+out_fail:
+ ext2_dec_count(inode);
+ iput (inode);
+ goto out;
+}
+
+
+
+static int obdfs_link (struct dentry * old_dentry, struct inode * dir,
+ struct dentry *dentry)
+{
+ struct inode *inode = old_dentry->d_inode;
+
+ if (S_ISDIR(inode->i_mode))
+ return -EPERM;
+
+ if (inode->i_nlink >= EXT2_LINK_MAX)
+ return -EMLINK;
+
+ inode->i_ctime = CURRENT_TIME;
+ ext2_inc_count(inode);
+ atomic_inc(&inode->i_count);
+
+ return ext2_add_nondir(dentry, inode);
+}
+
+
+static int obdfs_mkdir(struct inode * dir, struct dentry * dentry, int mode)
+{
+ struct inode * inode;
+ int err = -EMLINK;
+ ENTRY;
+
+ if (dir->i_nlink >= EXT2_LINK_MAX)
+ goto out;
+
+ ext2_inc_count(dir);
+
+ inode = obdfs_new_inode (dir, S_IFDIR | mode);
+ err = PTR_ERR(inode);
+ if (IS_ERR(inode))
+ goto out_dir;
+
+ inode->i_op = &obdfs_dir_inode_operations;
+ inode->i_fop = &obdfs_dir_operations;
+ inode->i_mapping->a_ops = &obdfs_aops;
+
+ ext2_inc_count(inode);
+
+ err = ext2_make_empty(inode, dir);
+ if (err)
+ goto out_fail;
+
+ err = ext2_add_link(dentry, inode);
+ if (err)
+ goto out_fail;
+
+ d_instantiate(dentry, inode);
+out:
+ EXIT;
+ return err;
+
+out_fail:
+ ext2_dec_count(inode);
+ ext2_dec_count(inode);
+ iput(inode);
+ EXIT;
+out_dir:
+ ext2_dec_count(dir);
+ EXIT;
+ goto out;
+}
+
+static int obdfs_unlink(struct inode * dir, struct dentry *dentry)
+{
+ struct inode * inode = dentry->d_inode;
+ struct ext2_dir_entry_2 * de;
+ struct page * page;
+ int err = -ENOENT;
+
+ de = ext2_find_entry (dir, dentry, &page);
+ if (!de)
+ goto out;
+
+ err = ext2_delete_entry (de, page);
+ if (err)
+ goto out;
+
+ inode->i_ctime = dir->i_ctime;
+ ext2_dec_count(inode);
+ err = 0;
+out:
+ return err;
+}
+
+
+static int obdfs_rmdir (struct inode * dir, struct dentry *dentry)
+{
+ struct inode * inode = dentry->d_inode;
+ int err = -ENOTEMPTY;
+
+ if (ext2_empty_dir(inode)) {
+ err = obdfs_unlink(dir, dentry);
+ if (!err) {
+ inode->i_size = 0;
+ ext2_dec_count(inode);
+ ext2_dec_count(dir);
+ }
+ }
+ return err;
+}
+
+static int obdfs_rename (struct inode * old_dir, struct dentry * old_dentry,
+ struct inode * new_dir, struct dentry * new_dentry )
+{
+ struct inode * old_inode = old_dentry->d_inode;
+ struct inode * new_inode = new_dentry->d_inode;
+ struct page * dir_page = NULL;
+ struct ext2_dir_entry_2 * dir_de = NULL;
+ struct page * old_page;
+ struct ext2_dir_entry_2 * old_de;
+ int err = -ENOENT;
+
+ old_de = ext2_find_entry (old_dir, old_dentry, &old_page);
+ if (!old_de)
+ goto out;
+
+ if (S_ISDIR(old_inode->i_mode)) {
+ err = -EIO;
+ dir_de = ext2_dotdot(old_inode, &dir_page);
+ if (!dir_de)
+ goto out_old;
+ }
+
+ if (new_inode) {
+ struct page *new_page;
+ struct ext2_dir_entry_2 *new_de;
+
+ err = -ENOTEMPTY;
+ if (dir_de && !ext2_empty_dir (new_inode))
+ goto out_dir;
+
+ err = -ENOENT;
+ new_de = ext2_find_entry (new_dir, new_dentry, &new_page);
+ if (!new_de)
+ goto out_dir;
+ ext2_inc_count(old_inode);
+ ext2_set_link(new_dir, new_de, new_page, old_inode);
+ new_inode->i_ctime = CURRENT_TIME;
+ if (dir_de)
+ new_inode->i_nlink--;
+ ext2_dec_count(new_inode);
+ } else {
+ if (dir_de) {
+ err = -EMLINK;
+ if (new_dir->i_nlink >= EXT2_LINK_MAX)
+ goto out_dir;
+ }
+ ext2_inc_count(old_inode);
+ err = ext2_add_link(new_dentry, old_inode);
+ if (err) {
+ ext2_dec_count(old_inode);
+ goto out_dir;
+ }
+ if (dir_de)
+ ext2_inc_count(new_dir);
+ }
+
+ ext2_delete_entry (old_de, old_page);
+ ext2_dec_count(old_inode);
+
+ if (dir_de) {
+ ext2_set_link(old_inode, dir_de, dir_page, new_dir);
+ ext2_dec_count(old_dir);
+ }
+ return 0;
+
+
+out_dir:
+ if (dir_de) {
+ kunmap(dir_page);
+ page_cache_release(dir_page);
+ }
+out_old:
+ kunmap(old_page);
+ page_cache_release(old_page);
+out:
+ return err;
+}
+
+struct inode_operations obdfs_dir_inode_operations = {
+ create: obdfs_create,
+ lookup: obdfs_lookup,
+ link: obdfs_link,
+ unlink: obdfs_unlink,
+ symlink: obdfs_symlink,
+ mkdir: obdfs_mkdir,
+ rmdir: obdfs_rmdir,
+ mknod: obdfs_mknod,
+ rename: obdfs_rename,
+ setattr: obdfs_setattr
+};
--- /dev/null
+/*
+ * OBDFS Super operations
+ *
+ * This code is issued under the GNU General Public License.
+ * See the file COPYING in this distribution
+ *
+ * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de>
+ * Copryright (C) 1999 Stelias Computing Inc,
+ * (author Peter J. Braam <braam@stelias.com>)
+ * Copryright (C) 1999 Seagate Technology Inc.
+*/
+
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/locks.h>
+#include <linux/unistd.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include <linux/fs.h>
+#include <linux/stat.h>
+#include <asm/uaccess.h>
+#include <linux/vmalloc.h>
+#include <asm/segment.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/smp_lock.h>
+
+#include <linux/obd_support.h>
+#include <linux/obd_ext2.h>
+#include <linux/obdfs.h>
+
+void obdfs_change_inode(struct inode *inode);
+
+static int cache_writes = 0;
+
+
+/* page cache support stuff */
+
+
+/*
+ * Add a page to the dirty page list.
+ */
+void set_page_dirty(struct page *page)
+{
+ if (!test_and_set_bit(PG_dirty, &page->flags)) {
+ struct address_space *mapping = page->mapping;
+
+ if (mapping) {
+ spin_lock(&pagecache_lock);
+ list_del(&page->list);
+ list_add(&page->list, &mapping->dirty_pages);
+ spin_unlock(&pagecache_lock);
+
+ if (mapping->host)
+ mark_inode_dirty_pages(mapping->host);
+ }
+ }
+}
+
+/*
+ * Remove page from dirty list
+ */
+void __set_page_clean(struct page *page)
+{
+ struct address_space *mapping = page->mapping;
+ struct inode *inode;
+
+ if (!mapping)
+ return;
+
+ spin_lock(&pagecache_lock);
+ list_del(&page->list);
+ list_add(&page->list, &mapping->clean_pages);
+
+ inode = mapping->host;
+ if (list_empty(&mapping->dirty_pages)) {
+ CDEBUG(D_INODE, "inode clean\n");
+ inode->i_state &= ~I_DIRTY_PAGES;
+ }
+ spin_unlock(&pagecache_lock);
+ EXIT;
+}
+
+inline void set_page_clean(struct page *page)
+{
+ if (PageDirty(page)) {
+ ClearPageDirty(page);
+ __set_page_clean(page);
+ }
+}
+
+/* SYNCHRONOUS I/O to object storage for an inode -- object attr will be updated too */
+static int obdfs_brw(int rw, struct inode *inode, struct page *page, int create)
+{
+ obd_count num_obdo = 1;
+ obd_count bufs_per_obdo = 1;
+ struct obdo *oa;
+ obd_size count = PAGE_SIZE;
+ obd_off offset = ((obd_off)page->index) << PAGE_SHIFT;
+ obd_flag flags = create ? OBD_BRW_CREATE : 0;
+ int err;
+
+ ENTRY;
+ if (IOPS(inode, brw) == NULL) {
+ printk(KERN_ERR __FUNCTION__ ": no brw method!\n");
+ EXIT;
+ return -EIO;
+ }
+
+ oa = obdo_alloc();
+ if ( !oa ) {
+ EXIT;
+ return -ENOMEM;
+ }
+ oa->o_valid = OBD_MD_FLNOTOBD;
+ obdfs_from_inode(oa, inode);
+
+ err = IOPS(inode, brw)(rw, IID(inode), num_obdo, &oa, &bufs_per_obdo,
+ &page, &count, &offset, &flags);
+ //if ( !err )
+ // obdfs_to_inode(inode, oa); /* copy o_blocks to i_blocks */
+
+ obdo_free(oa);
+ EXIT;
+ return err;
+} /* obdfs_brw */
+
+extern void set_page_clean(struct page *);
+
+/* SYNCHRONOUS I/O to object storage for an inode -- object attr will be updated too */
+static int obdfs_commit_page(struct page *page, int create, int from, int to)
+{
+ struct inode *inode = page->mapping->host;
+ obd_count num_obdo = 1;
+ obd_count bufs_per_obdo = 1;
+ struct obdo *oa;
+ obd_size count = to;
+ obd_off offset = (((obd_off)page->index) << PAGE_SHIFT);
+ obd_flag flags = create ? OBD_BRW_CREATE : 0;
+ int err;
+
+ ENTRY;
+ if (IOPS(inode, brw) == NULL) {
+ printk(KERN_ERR __FUNCTION__ ": no brw method!\n");
+ EXIT;
+ return -EIO;
+ }
+
+ oa = obdo_alloc();
+ if ( !oa ) {
+ EXIT;
+ return -ENOMEM;
+ }
+ oa->o_valid = OBD_MD_FLNOTOBD;
+ obdfs_from_inode(oa, inode);
+
+ CDEBUG(D_INODE, "commit_page writing (at %d) to %d, count %Ld\n",
+ from, to, count);
+
+ err = IOPS(inode, brw)(WRITE, IID(inode), num_obdo, &oa, &bufs_per_obdo,
+ &page, &count, &offset, &flags);
+ if ( !err ) {
+ SetPageUptodate(page);
+ set_page_clean(page);
+ }
+
+ //if ( !err )
+ // obdfs_to_inode(inode, oa); /* copy o_blocks to i_blocks */
+
+ obdo_free(oa);
+ EXIT;
+ return err;
+} /* obdfs_brw */
+
+
+/* returns the page unlocked, but with a reference */
+int obdfs_readpage(struct file *file, struct page *page)
+{
+ struct inode *inode = page->mapping->host;
+ int rc;
+
+ ENTRY;
+
+ if ( ((inode->i_size + PAGE_CACHE_SIZE -1)>>PAGE_SHIFT)
+ <= page->index) {
+ memset(kmap(page), 0, PAGE_CACHE_SIZE);
+ kunmap(page);
+ goto readpage_out;
+ }
+
+ if (Page_Uptodate(page)) {
+ EXIT;
+ goto readpage_out;
+ }
+
+ rc = obdfs_brw(READ, inode, page, 0);
+ if ( rc ) {
+ EXIT;
+ return rc;
+ }
+ /* PDEBUG(page, "READ"); */
+
+ readpage_out:
+ SetPageUptodate(page);
+ obd_unlock_page(page);
+ EXIT;
+ return 0;
+} /* obdfs_readpage */
+
+int obdfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
+{
+ struct inode *inode = page->mapping->host;
+ obd_off offset = ((obd_off)page->index) << PAGE_SHIFT;
+ int rc = 0;
+ ENTRY;
+
+ kmap(page);
+ if (Page_Uptodate(page)) {
+ EXIT;
+ goto prepare_done;
+ }
+
+ if ( (from <= offset) && (to >= offset + PAGE_SIZE) ) {
+ EXIT;
+ return 0;
+ }
+
+ rc = obdfs_brw(READ, inode, page, 0);
+ if ( !rc ) {
+ SetPageUptodate(page);
+ }
+
+ prepare_done:
+ set_page_dirty(page);
+ //SetPageDirty(page);
+ EXIT;
+ return rc;
+}
+
+
+
+
+
+
+static kmem_cache_t *obdfs_pgrq_cachep = NULL;
+
+int obdfs_init_pgrqcache(void)
+{
+ ENTRY;
+ if (obdfs_pgrq_cachep == NULL) {
+ CDEBUG(D_CACHE, "allocating obdfs_pgrq_cache\n");
+ obdfs_pgrq_cachep = kmem_cache_create("obdfs_pgrq",
+ sizeof(struct obdfs_pgrq),
+ 0, SLAB_HWCACHE_ALIGN,
+ NULL, NULL);
+ if (obdfs_pgrq_cachep == NULL) {
+ EXIT;
+ return -ENOMEM;
+ } else {
+ CDEBUG(D_CACHE, "allocated cache at %p\n",
+ obdfs_pgrq_cachep);
+ }
+ } else {
+ CDEBUG(D_CACHE, "using existing cache at %p\n",
+ obdfs_pgrq_cachep);
+ }
+ EXIT;
+ return 0;
+} /* obdfs_init_wreqcache */
+
+inline void obdfs_pgrq_del(struct obdfs_pgrq *pgrq)
+{
+ --obdfs_cache_count;
+ CDEBUG(D_INFO, "deleting page %p from list [count %ld]\n",
+ pgrq->rq_page, obdfs_cache_count);
+ list_del(&pgrq->rq_plist);
+ OBDClearCachePage(pgrq->rq_page);
+ kmem_cache_free(obdfs_pgrq_cachep, pgrq);
+}
+
+void obdfs_cleanup_pgrqcache(void)
+{
+ ENTRY;
+ if (obdfs_pgrq_cachep != NULL) {
+ CDEBUG(D_CACHE, "destroying obdfs_pgrqcache at %p, count %ld\n",
+ obdfs_pgrq_cachep, obdfs_cache_count);
+ if (kmem_cache_destroy(obdfs_pgrq_cachep))
+ printk(KERN_INFO __FUNCTION__
+ ": unable to free all of cache\n");
+ obdfs_pgrq_cachep = NULL;
+ } else
+ printk(KERN_INFO __FUNCTION__ ": called with NULL pointer\n");
+
+ EXIT;
+} /* obdfs_cleanup_wreqcache */
+
+
+/* called with the list lock held */
+static struct page *obdfs_find_page_index(struct inode *inode,
+ unsigned long index)
+{
+ struct list_head *page_list = obdfs_iplist(inode);
+ struct list_head *tmp;
+ struct page *page;
+
+ ENTRY;
+
+ CDEBUG(D_INFO, "looking for inode %ld pageindex %ld\n",
+ inode->i_ino, index);
+ OIDEBUG(inode);
+
+ if (list_empty(page_list)) {
+ EXIT;
+ return NULL;
+ }
+ tmp = page_list;
+ while ( (tmp = tmp->next) != page_list ) {
+ struct obdfs_pgrq *pgrq;
+
+ pgrq = list_entry(tmp, struct obdfs_pgrq, rq_plist);
+ page = pgrq->rq_page;
+ if (index == page->index) {
+ CDEBUG(D_INFO,
+ "INDEX SEARCH found page %p, index %ld\n",
+ page, index);
+ EXIT;
+ return page;
+ }
+ }
+
+ EXIT;
+ return NULL;
+} /* obdfs_find_page_index */
+
+
+/* call and free pages from Linux page cache: called with io lock on inodes */
+int obdfs_do_vec_wr(struct inode **inodes, obd_count num_io,
+ obd_count num_obdos, struct obdo **obdos,
+ obd_count *oa_bufs, struct page **pages, char **bufs,
+ obd_size *counts, obd_off *offsets, obd_flag *flags)
+{
+ int err;
+
+ ENTRY;
+ if (IOPS(inodes[0], brw) == NULL) {
+ printk(KERN_ERR __FUNCTION__ ": no brw method!\n");
+ EXIT;
+ return -EIO;
+ }
+
+ CDEBUG(D_INFO, "writing %d page(s), %d obdo(s) in vector\n",
+ num_io, num_obdos);
+ if (obd_debug_level & D_INFO) { /* DEBUGGING */
+ int i;
+ printk("OBDOS: ");
+ for (i = 0; i < num_obdos; i++)
+ printk("%ld:0x%p ", (long)obdos[i]->o_id, obdos[i]);
+
+ printk("\nPAGES: ");
+ for (i = 0; i < num_io; i++)
+ printk("0x%p ", pages[i]);
+ printk("\n");
+ }
+
+ err = IOPS(inodes[0], brw)(WRITE, IID(inodes[0]), num_obdos, obdos,
+ oa_bufs, pages, counts, offsets, flags);
+
+ CDEBUG(D_INFO, "BRW done\n");
+ /* release the pages from the page cache */
+ while ( num_io > 0 ) {
+ --num_io;
+ CDEBUG(D_INFO, "calling put_page for %p, index %ld\n",
+ pages[num_io], pages[num_io]->index);
+ /* PDEBUG(pages[num_io], "do_vec_wr"); */
+ put_page(pages[num_io]);
+ /* PDEBUG(pages[num_io], "do_vec_wr"); */
+ }
+ CDEBUG(D_INFO, "put_page done\n");
+
+ while ( num_obdos > 0) {
+ --num_obdos;
+ CDEBUG(D_INFO, "free obdo %ld\n",(long)obdos[num_obdos]->o_id);
+ /* copy o_blocks to i_blocks */
+ obdfs_set_size (inodes[num_obdos], obdos[num_obdos]->o_size);
+ //obdfs_to_inode(inodes[num_obdos], obdos[num_obdos]);
+ obdo_free(obdos[num_obdos]);
+ }
+ CDEBUG(D_INFO, "obdo_free done\n");
+ EXIT;
+ return err;
+}
+
+
+/*
+ * Add a page to the write request cache list for later writing.
+ * ASYNCHRONOUS write method.
+ */
+static int obdfs_add_page_to_cache(struct inode *inode, struct page *page)
+{
+ int err = 0;
+ ENTRY;
+
+ /* The PG_obdcache bit is cleared by obdfs_pgrq_del() BEFORE the page
+ * is written, so at worst we will write the page out twice.
+ *
+ * If the page has the PG_obdcache bit set, then the inode MUST be
+ * on the superblock dirty list so we don't need to check this.
+ * Dirty inodes are removed from the superblock list ONLY when they
+ * don't have any more cached pages. It is possible to have an inode
+ * with no dirty pages on the superblock list, but not possible to
+ * have an inode with dirty pages NOT on the superblock dirty list.
+ */
+ if (!OBDAddCachePage(page)) {
+ struct obdfs_pgrq *pgrq;
+ pgrq = kmem_cache_alloc(obdfs_pgrq_cachep, SLAB_KERNEL);
+ if (!pgrq) {
+ OBDClearCachePage(page);
+ EXIT;
+ return -ENOMEM;
+ }
+ /* not really necessary since we set all pgrq fields here
+ memset(pgrq, 0, sizeof(*pgrq));
+ */
+
+ pgrq->rq_page = page;
+ pgrq->rq_jiffies = jiffies;
+ get_page(pgrq->rq_page);
+
+ obd_down(&obdfs_i2sbi(inode)->osi_list_mutex);
+ list_add(&pgrq->rq_plist, obdfs_iplist(inode));
+ obdfs_cache_count++;
+ //printk("-- count %d\n", obdfs_cache_count);
+
+ /* If inode isn't already on superblock inodes list, add it.
+ *
+ * We increment the reference count on the inode to keep it
+ * from being freed from memory. This _should_ be an iget()
+ * with an iput() in both flush_reqs() and put_inode(), but
+ * since put_inode() is called from iput() we can't call iput()
+ * again there. Instead we just increment/decrement i_count,
+ * which is mostly what iget/iput do for an inode in memory.
+ */
+ if ( list_empty(obdfs_islist(inode)) ) {
+ atomic_inc(&inode->i_count);
+ CDEBUG(D_INFO,
+ "adding inode %ld to superblock list %p\n",
+ inode->i_ino, obdfs_slist(inode));
+ list_add(obdfs_islist(inode), obdfs_slist(inode));
+ }
+ obd_up(&obdfs_i2sbi(inode)->osi_list_mutex);
+
+ }
+
+ /* XXX For testing purposes, we can write out the page here.
+ err = obdfs_flush_reqs(obdfs_slist(inode), ~0UL);
+ */
+
+ EXIT;
+ return err;
+} /* obdfs_add_page_to_cache */
+
+void rebalance(void)
+{
+ if (obdfs_cache_count > 60000) {
+ printk("-- count %ld\n", obdfs_cache_count);
+ //obdfs_flush_dirty_pages(~0UL);
+ printk("-- count %ld\n", obdfs_cache_count);
+ }
+}
+
+/* select between SYNC and ASYNC I/O methods */
+int obdfs_do_writepage(struct page *page, int sync)
+{
+ struct inode *inode = page->mapping->host;
+ int err;
+
+ ENTRY;
+ /* PDEBUG(page, "WRITEPAGE"); */
+ if ( sync )
+ err = obdfs_brw(WRITE, inode, page, 1);
+ else {
+ err = obdfs_add_page_to_cache(inode, page);
+ CDEBUG(D_INFO, "DO_WR ino: %ld, page %p, err %d, uptodate %d\n",
+ inode->i_ino, page, err, Page_Uptodate(page));
+ }
+
+ if ( !err ) {
+ SetPageUptodate(page);
+ set_page_clean(page);
+ }
+ /* PDEBUG(page,"WRITEPAGE"); */
+ EXIT;
+ return err;
+} /* obdfs_do_writepage */
+
+
+
+/* returns the page unlocked, but with a reference */
+int obdfs_writepage(struct page *page)
+{
+ int rc;
+ struct inode *inode = page->mapping->host;
+ ENTRY;
+ printk("---> writepage called ino %ld!\n", inode->i_ino);
+ BUG();
+ rc = obdfs_do_writepage(page, 1);
+ if ( !rc ) {
+ set_page_clean(page);
+ } else {
+ CDEBUG(D_INODE, "--> GRR %d\n", rc);
+ }
+ EXIT;
+ return rc;
+}
+
+void write_inode_pages(struct inode *inode)
+{
+ struct list_head *tmp = &inode->i_mapping->dirty_pages;
+
+ while ( (tmp = tmp->next) != &inode->i_mapping->dirty_pages) {
+ struct page *page;
+ page = list_entry(tmp, struct page, list);
+ obdfs_writepage(page);
+ }
+}
+
+
+int obdfs_commit_write(struct file *file, struct page *page, unsigned from, unsigned to)
+{
+ struct inode *inode = page->mapping->host;
+ int rc = 0;
+ loff_t len = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
+ ENTRY;
+ CDEBUG(D_INODE, "commit write ino %ld (end at %Ld) from %d to %d ,ind %ld\n",
+ inode->i_ino, len, from, to, page->index);
+
+
+ if (cache_writes == 0) {
+ rc = obdfs_commit_page(page, 1, from, to);
+ }
+
+ if (len > inode->i_size) {
+ obdfs_set_size(inode, len);
+ }
+
+ kunmap(page);
+ EXIT;
+ return rc;
+}
+
+
+/*
+ * This does the "real" work of the write. The generic routine has
+ * allocated the page, locked it, done all the page alignment stuff
+ * calculations etc. Now we should just copy the data from user
+ * space and write it back to the real medium..
+ *
+ * If the writer ends up delaying the write, the writer needs to
+ * increment the page use counts until he is done with the page.
+ *
+ * Return value is the number of bytes written.
+ */
+int obdfs_write_one_page(struct file *file, struct page *page,
+ unsigned long offset, unsigned long bytes,
+ const char * buf)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ int err;
+
+ ENTRY;
+ /* We check for complete page writes here, as we then don't have to
+ * get the page before writing over everything anyways.
+ */
+ if ( !Page_Uptodate(page) && (offset != 0 || bytes != PAGE_SIZE) ) {
+ err = obdfs_brw(READ, inode, page, 0);
+ if ( err )
+ return err;
+ SetPageUptodate(page);
+ }
+
+ if (copy_from_user((u8*)page_address(page) + offset, buf, bytes))
+ return -EFAULT;
+
+ lock_kernel();
+ err = obdfs_writepage(page);
+ unlock_kernel();
+
+ return (err < 0 ? err : bytes);
+} /* obdfs_write_one_page */
+
+/*
+ * return an up to date page:
+ * - if locked is true then is returned locked
+ * - if create is true the corresponding disk blocks are created
+ * - page is held, i.e. caller must release the page
+ *
+ * modeled on NFS code.
+ */
+struct page *obdfs_getpage(struct inode *inode, unsigned long offset,
+ int create, int locked)
+{
+ struct page * page;
+ int index;
+ int err;
+
+ ENTRY;
+
+ offset = offset & PAGE_CACHE_MASK;
+ CDEBUG(D_INFO, "ino: %ld, offset %ld, create %d, locked %d\n",
+ inode->i_ino, offset, create, locked);
+ index = offset >> PAGE_CACHE_SHIFT;
+
+ page = grab_cache_page(&inode->i_data, index);
+
+ /* Yuck, no page */
+ if (! page) {
+ printk(KERN_WARNING " grab_cache_page says no dice ...\n");
+ EXIT;
+ return NULL;
+ }
+
+ /* PDEBUG(page, "GETPAGE: got page - before reading\n"); */
+ /* now check if the data in the page is up to date */
+ if ( Page_Uptodate(page)) {
+ if (!locked) {
+ if (PageLocked(page))
+ obd_unlock_page(page);
+ } else {
+ printk("file %s, line %d: expecting locked page\n",
+ __FILE__, __LINE__);
+ }
+ EXIT;
+ return page;
+ }
+
+
+#ifdef EXT2_OBD_DEBUG
+ if ((obd_debug_level & D_INFO) && obdfs_find_page_index(inode, index)) {
+ CDEBUG(D_INFO, "OVERWRITE: found dirty page %p, index %ld\n",
+ page, page->index);
+ }
+#endif
+
+ err = obdfs_brw(READ, inode, page, create);
+
+ if ( err ) {
+ SetPageError(page);
+ obd_unlock_page(page);
+ EXIT;
+ return page;
+ }
+
+ if ( !locked )
+ obd_unlock_page(page);
+ SetPageUptodate(page);
+ /* PDEBUG(page,"GETPAGE - after reading"); */
+ EXIT;
+ return page;
+} /* obdfs_getpage */
+
+
+void obdfs_truncate(struct inode *inode)
+{
+ struct obdo *oa;
+ int err;
+ ENTRY;
+
+ //obdfs_dequeue_pages(inode);
+
+ if (IOPS(inode, punch) == NULL) {
+ printk(KERN_ERR __FUNCTION__ ": no punch method!\n");
+ EXIT;
+ return;
+ }
+
+ oa = obdo_alloc();
+ if ( !oa ) {
+ /* XXX This would give an inconsistent FS, so deal with it as
+ * best we can for now - an obdo on the stack is not pretty.
+ */
+ struct obdo obdo;
+
+ printk(__FUNCTION__ ": obdo_alloc failed - using stack!\n");
+
+ obdo.o_valid = OBD_MD_FLNOTOBD;
+ obdfs_from_inode(&obdo, inode);
+
+ err = IOPS(inode, punch)(IID(inode), &obdo, 0, obdo.o_size);
+ } else {
+ oa->o_valid = OBD_MD_FLNOTOBD;
+ obdfs_from_inode(oa, inode);
+
+ CDEBUG(D_INFO, "calling punch for %ld (%Lu bytes at 0)\n",
+ (long)oa->o_id, oa->o_size);
+ err = IOPS(inode, punch)(IID(inode), oa, oa->o_size, 0);
+
+ obdo_free(oa);
+ }
+
+ if (err) {
+ printk(__FUNCTION__ ": obd_truncate fails (%d)\n", err);
+ EXIT;
+ return;
+ }
+ EXIT;
+} /* obdfs_truncate */
+
+struct address_space_operations obdfs_aops = {
+ readpage: obdfs_readpage,
+ writepage: obdfs_writepage,
+ sync_page: block_sync_page,
+ prepare_write: obdfs_prepare_write,
+ commit_write: obdfs_commit_write,
+ bmap: NULL
+};
--- /dev/null
+
+/*
+ * OBDFS Super operations
+ *
+ * This code is issued under the GNU General Public License.
+ * See the file COPYING in this distribution
+ *
+ * Copryright (C) 1996 Peter J. Braam <braam@stelias.com>
+ * Copryright (C) 1999 Stelias Computing Inc. <braam@stelias.com>
+ * Copryright (C) 1999 Seagate Technology Inc.
+ * Copryright (C) 2001 Mountain View Data, Inc.
+ * Copryright (C) 2002 Cluster File Systems, Inc.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/locks.h>
+#include <linux/unistd.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include <linux/fs.h>
+#include <linux/stat.h>
+#include <asm/uaccess.h>
+#include <linux/vmalloc.h>
+#include <asm/segment.h>
+
+#include <linux/obd_support.h>
+#include <linux/obd_class.h>
+#include <linux/lustre_light.h>
+
+struct list_head ll_super_list;
+extern struct address_space_operations ll_aops;
+struct super_operations ll_super_operations;
+long ll_cache_count = 0;
+long ll_mutex_start = 0;
+long obd_memory = 0;
+
+static char *ll_read_opt(const char *opt, char *data)
+{
+ char *value;
+ char *retval;
+
+ CDEBUG(D_INFO, "option: %s, data %s\n", opt, data);
+ if ( strncmp(opt, data, strlen(opt)) )
+ return NULL;
+
+ if ( (value = strchr(data, '=')) == NULL )
+ return NULL;
+
+ value++;
+ OBD_ALLOC(retval, char *, strlen(value) + 1);
+ if ( !retval ) {
+ printk(KERN_ALERT __FUNCTION__ ": out of memory!\n");
+ return NULL;
+ }
+
+ memcpy(retval, value, strlen(value)+1);
+ CDEBUG(D_PSDEV, "Assigned option: %s, value %s\n", opt, retval);
+ return retval;
+}
+
+static void ll_options(char *options, char **dev, char **vers)
+{
+ char *this_char;
+
+ if (!options)
+ return;
+
+ for (this_char = strtok (options, ",");
+ this_char != NULL;
+ this_char = strtok (NULL, ",")) {
+ CDEBUG(D_INFO, "this_char %s\n", this_char);
+ if ( (!*dev && (*dev = ll_read_opt("device", this_char)))||
+ (!*vers && (*vers = ll_read_opt("version", this_char))) )
+ continue;
+
+ }
+}
+
+static struct super_block * ll_read_super(struct super_block *sb,
+ void *data, int silent)
+{
+ struct inode *root = 0;
+ struct ll_sb_info *sbi = (struct ll_sb_info *)(&sb->u.generic_sbp);
+ struct obd_device *obddev;
+ char *device = NULL;
+ char *version = NULL;
+ int root_ino = 2;
+ int connected = 0;
+ int devno;
+ int err;
+ struct obdo *oa;
+
+
+ ENTRY;
+ MOD_INC_USE_COUNT;
+ memset(sbi, 0, sizeof(*sbi));
+
+ CDEBUG(D_INFO, "\n");
+ ll_options(data, &device, &version);
+ if ( !device ) {
+ printk(__FUNCTION__ ": no device\n");
+ EXIT;
+ goto ERR;
+ }
+
+ devno = simple_strtoul(device, NULL, 0);
+ CDEBUG(D_INFO, "\n");
+ if ( devno >= MAX_OBD_DEVICES ) {
+ printk(__FUNCTION__ ": device of %s too high (%d)\n", device, devno);
+ EXIT;
+ goto ERR;
+ }
+
+ CDEBUG(D_INFO, "\n");
+ obddev = &obd_dev[devno];
+
+
+ CDEBUG(D_INFO, "\n");
+ if ( ! (obddev->obd_flags & OBD_ATTACHED) ||
+ ! (obddev->obd_flags & OBD_SET_UP) ){
+ printk("device %s not attached or not set up (%d)\n",
+ device, MINOR(devno));
+ EXIT;
+ goto ERR;;
+ }
+
+ CDEBUG(D_INFO, "\n");
+ sbi->ll_obd = obddev;
+ sbi->ll_ops = sbi->ll_obd->obd_type->typ_ops;
+
+ sbi->ll_conn.oc_dev = obddev;
+ err = sbi->ll_ops->o_connect(&sbi->ll_conn);
+ if ( err ) {
+ printk("OBDFS: cannot connect to %s\n", device);
+ EXIT;
+ goto ERR;
+ }
+
+ connected = 1;
+ CDEBUG(D_INFO, "\n");
+ /* list of dirty inodes, and a mutex to hold while modifying it */
+ INIT_LIST_HEAD(&sbi->ll_inodes);
+ init_MUTEX (&sbi->ll_list_mutex);
+
+ CDEBUG(D_INFO, "\n");
+ sbi->ll_super = sb;
+ sbi->ll_rootino = 2;
+
+ CDEBUG(D_INFO, "\n");
+ sb->s_maxbytes = 1LL << 36;
+ printk("Max bytes: %Lx\n", sb->s_maxbytes);
+ sb->s_blocksize = PAGE_SIZE;
+ sb->s_blocksize_bits = (unsigned char)PAGE_SHIFT;
+ sb->s_magic = LL_SUPER_MAGIC;
+ sb->s_op = &ll_super_operations;
+
+ /* make root inode */
+ CDEBUG(D_INFO, "\n");
+ oa = obdo_fromid(&sbi->ll_conn, root_ino, S_IFDIR,
+ OBD_MD_FLNOTOBD | OBD_MD_FLBLOCKS);
+ CDEBUG(D_INFO, "mode %o\n", oa->o_mode);
+ if ( IS_ERR(oa) ) {
+ printk(__FUNCTION__ ": obdo_fromid failed\n");
+ iput(root);
+ EXIT;
+ goto ERR;
+ }
+ CDEBUG(D_INFO, "\n");
+ root = iget4(sb, root_ino, NULL, oa);
+ obdo_free(oa);
+ CDEBUG(D_INFO, "\n");
+ if (!root) {
+ printk("OBDFS: bad iget4 for root\n");
+ sb->s_dev = 0;
+ err = -ENOENT;
+ EXIT;
+ goto ERR;
+ }
+
+ sb->s_root = d_alloc_root(root);
+ list_add(&sbi->ll_list, &ll_super_list);
+ OBD_FREE(device, strlen(device) + 1);
+ if (version)
+ OBD_FREE(version, strlen(version) + 1);
+ EXIT;
+ return sb;
+
+ERR:
+ MOD_DEC_USE_COUNT;
+ if (device)
+ OBD_FREE(device, strlen(device) + 1);
+ if (version)
+ OBD_FREE(version, strlen(version) + 1);
+ if (connected)
+ sbi->ll_ops->o_disconnect(&sbi->ll_conn);
+
+ if (sbi) {
+ sbi->ll_super = NULL;
+ }
+ if (root) {
+ iput(root);
+ }
+ sb->s_dev = 0;
+ return NULL;
+} /* ll_read_super */
+
+
+static void ll_put_super(struct super_block *sb)
+{
+ struct ll_sb_info *sbi;
+
+ ENTRY;
+ sb->s_dev = 0;
+
+ sbi = (struct ll_sb_info *) &sb->u.generic_sbp;
+ //ll_flush_reqs(&sbi->ll_inodes, ~0UL);
+
+ OPS(sb,disconnect)(ID(sb));
+ list_del(&sbi->ll_list);
+
+ printk(KERN_INFO "OBDFS: Bye bye.\n");
+
+ MOD_DEC_USE_COUNT;
+ EXIT;
+} /* ll_put_super */
+
+
+void ll_do_change_inode(struct inode *inode, int valid)
+{
+ struct obdo *oa;
+ int err;
+
+ ENTRY;
+ if (IOPS(inode, setattr) == NULL) {
+ printk(KERN_ERR __FUNCTION__ ": no setattr method!\n");
+ EXIT;
+ return;
+ }
+ oa = obdo_alloc();
+ if ( !oa ) {
+ printk(__FUNCTION__ ": obdo_alloc failed\n");
+ EXIT;
+ return;
+ }
+
+ oa->o_valid = OBD_MD_FLNOTOBD & (valid | OBD_MD_FLID);
+ ll_from_inode(oa, inode);
+ oa->o_mode = inode->i_mode;
+ err = IOPS(inode, setattr)(IID(inode), oa);
+
+ if ( err )
+ printk(__FUNCTION__ ": obd_setattr fails (%d)\n", err);
+
+ EXIT;
+ obdo_free(oa);
+} /* ll_write_inode */
+
+void ll_change_inode(struct inode *inode, int mask)
+{
+ return ll_do_change_inode(inode, OBD_MD_FLNLINK);
+}
+
+
+extern void write_inode_pages(struct inode *);
+/* This routine is called from iput() (for each unlink on the inode).
+ * We can't put this call into delete_inode() since that is called only
+ * when i_count == 0, and we need to keep a reference on the inode while
+ * it is in the page cache, which means i_count > 0. Catch 22.
+ */
+static void ll_put_inode(struct inode *inode)
+{
+ ENTRY;
+ if (inode->i_nlink && (atomic_read(&inode->i_count) == 1)) {
+ write_inode_pages(inode);
+ EXIT;
+ return;
+ }
+
+ //ll_dequeue_pages(inode);
+ EXIT;
+} /* ll_put_inode */
+
+
+static void ll_delete_inode(struct inode *inode)
+{
+ ll_do_change_inode(inode, ~0);
+ clear_inode(inode);
+}
+#if 0
+{
+ struct obdo *oa;
+ int err;
+
+ ENTRY;
+ if (IOPS(inode, destroy) == NULL) {
+ printk(KERN_ERR __FUNCTION__ ": no destroy method!\n");
+ EXIT;
+ return;
+ }
+
+ oa = obdo_alloc();
+ if ( !oa ) {
+ printk(__FUNCTION__ ": obdo_alloc failed\n");
+ EXIT;
+ return;
+ }
+ oa->o_valid = OBD_MD_FLNOTOBD;
+ ll_from_inode(oa, inode);
+
+ /* XXX how do we know that this inode is now clean? */
+ printk("delete_inode ------> link %d\n", inode->i_nlink);
+ ODEBUG(oa);
+ err = IOPS(inode, destroy)(IID(inode), oa);
+ obdo_free(oa);
+ clear_inode(inode);
+ if (err) {
+ printk(__FUNCTION__ ": obd_destroy fails (%d)\n", err);
+ EXIT;
+ return;
+ }
+
+ EXIT;
+} /* ll_delete_inode */
+#endif
+
+
+static int ll_attr2inode(struct inode * inode, struct iattr * attr)
+{
+ unsigned int ia_valid = attr->ia_valid;
+ int error = 0;
+
+ if (ia_valid & ATTR_SIZE) {
+ error = vmtruncate(inode, attr->ia_size);
+ if (error)
+ goto out;
+ }
+
+ if (ia_valid & ATTR_UID)
+ inode->i_uid = attr->ia_uid;
+ if (ia_valid & ATTR_GID)
+ inode->i_gid = attr->ia_gid;
+ if (ia_valid & ATTR_ATIME)
+ inode->i_atime = attr->ia_atime;
+ if (ia_valid & ATTR_MTIME)
+ inode->i_mtime = attr->ia_mtime;
+ if (ia_valid & ATTR_CTIME)
+ inode->i_ctime = attr->ia_ctime;
+ if (ia_valid & ATTR_MODE) {
+ inode->i_mode = attr->ia_mode;
+ if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
+ inode->i_mode &= ~S_ISGID;
+ }
+out:
+ return error;
+}
+
+int ll_setattr(struct dentry *de, struct iattr *attr)
+{
+ struct inode *inode = de->d_inode;
+ struct obdo *oa;
+ int err;
+
+ ENTRY;
+ if (IOPS(inode, setattr) == NULL) {
+ printk(KERN_ERR __FUNCTION__ ": no setattr method!\n");
+ EXIT;
+ return -EIO;
+ }
+ oa = obdo_alloc();
+ if ( !oa ) {
+ printk(__FUNCTION__ ": obdo_alloc failed\n");
+ return -ENOMEM;
+ }
+
+ ll_attr2inode(inode, attr);
+ oa->o_id = inode->i_ino;
+ oa->o_mode = inode->i_mode;
+ obdo_from_iattr(oa, attr);
+ err = IOPS(inode, setattr)(IID(inode), oa);
+
+ if ( err )
+ printk(__FUNCTION__ ": obd_setattr fails (%d)\n", err);
+
+ EXIT;
+ obdo_free(oa);
+ return err;
+} /* ll_setattr */
+
+
+
+static int ll_statfs(struct super_block *sb, struct statfs *buf)
+{
+ struct statfs tmp;
+ int err;
+
+ ENTRY;
+
+ err = OPS(sb,statfs)(ID(sb), &tmp);
+ if ( err ) {
+ printk(__FUNCTION__ ": obd_statfs fails (%d)\n", err);
+ return err;
+ }
+ memcpy(buf, &tmp, sizeof(*buf));
+ CDEBUG(D_SUPER, "statfs returns avail %ld\n", tmp.f_bavail);
+ EXIT;
+
+ return err;
+}
+
+static inline void ll_read_inode2(struct inode *inode, void *opaque)
+{
+ struct obdo *oa = opaque;
+
+ ENTRY;
+ ll_to_inode(inode, oa);
+
+ INIT_LIST_HEAD(ll_iplist(inode)); /* list of dirty pages on inode */
+ INIT_LIST_HEAD(ll_islist(inode)); /* list of inodes in superblock */
+
+ /* OIDEBUG(inode); */
+
+ if (S_ISREG(inode->i_mode)) {
+ inode->i_op = &ll_file_inode_operations;
+ inode->i_fop = &ll_file_operations;
+ inode->i_mapping->a_ops = &ll_aops;
+ EXIT;
+ } else if (S_ISDIR(inode->i_mode)) {
+ inode->i_op = &ll_dir_inode_operations;
+ inode->i_fop = &ll_dir_operations;
+ inode->i_mapping->a_ops = &ll_aops;
+ EXIT;
+ } else if (S_ISLNK(inode->i_mode)) {
+ if (inode->i_blocks) {
+ inode->i_op = &ll_symlink_inode_operations;
+ inode->i_mapping->a_ops = &ll_aops;
+ }else {
+ inode->i_op = &ll_fast_symlink_inode_operations;
+ }
+ EXIT;
+ } else {
+ init_special_inode(inode, inode->i_mode,
+ ((int *)ll_i2info(inode)->oi_inline)[0]);
+ }
+
+ EXIT;
+ return;
+}
+
+/* exported operations */
+struct super_operations ll_super_operations =
+{
+ read_inode2: ll_read_inode2,
+ // put_inode: ll_put_inode,
+ // delete_inode: ll_delete_inode,
+ // put_super: ll_put_super,
+ // statfs: ll_statfs
+};
+
+
+
+struct file_system_type lustre_light_fs_type = {
+ "lustre_light", 0, ll_read_super, NULL
+};
+
+static int __init init_lustre_light(void)
+{
+ printk(KERN_INFO "Lustre Light 0.0.1, braam@clusterfs.com\n");
+
+ return register_filesystem(&lustre_light_fs_type);
+}
+
+static void __exit exit_lustre_light(void)
+{
+ unregister_filesystem(&lustre_light_fs_type);
+}
+
+MODULE_AUTHOR("Peter J. Braam <braam@clusterfs.com>");
+MODULE_DESCRIPTION("Lustre Light Client File System v1.0");
+MODULE_LICENSE("GPL");
+
+module_init(init_lustre_light);
+module_exit(exit_lustre_light);
--- /dev/null
+/*
+ * linux/fs/ext2/symlink.c
+ *
+ * This code is issued under the GNU General Public License.
+ * See the file COPYING in this distribution
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ * from
+ *
+ * linux/fs/minix/symlink.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * ext2 symlink handling code
+ *
+ * Modified for OBDFS:
+ * Copyright (C) 1999 Seagate Technology Inc. (author: braam@stelias.com)
+ */
+
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/stat.h>
+#include <linux/locks.h>
+#include <linux/obd_support.h> /* for ENTRY and EXIT only */
+#include <linux/obdfs.h>
+
+static int obdfs_fast_readlink(struct dentry *dentry, char *buffer, int buflen)
+{
+ char *s = obdfs_i2info(dentry->d_inode)->oi_inline;
+ return vfs_readlink(dentry, buffer, buflen, s);
+}
+
+static int obdfs_fast_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+ char *s = obdfs_i2info(dentry->d_inode)->oi_inline;
+ return vfs_follow_link(nd, s);
+}
+
+extern int obdfs_setattr(struct dentry *de, struct iattr *attr);
+struct inode_operations obdfs_fast_symlink_inode_operations = {
+ readlink: obdfs_fast_readlink,
+ follow_link: obdfs_fast_follow_link,
+ setattr: obdfs_setattr
+};
+
+static int obdfs_readlink(struct dentry *dentry, char *buffer, int buflen)
+{
+ struct page *page = NULL;
+ int res;
+
+ ENTRY;
+ OIDEBUG(dentry->d_inode);
+ page = obdfs_getpage(dentry->d_inode, 0, 0, 0);
+ /* PDEBUG(page, "readlink"); */
+ if (!page) {
+ EXIT;
+ return 0;
+ }
+ res = vfs_readlink(dentry, buffer, buflen, (char *)page_address(page));
+ page_cache_release(page);
+ EXIT;
+ return res;
+} /* obdfs_readlink */
+
+static int obdfs_follow_link(struct dentry * dentry,
+ struct nameidata *nd)
+{
+ struct page *page = NULL;
+ int res;
+
+ ENTRY;
+ OIDEBUG(dentry->d_inode);
+ page = obdfs_getpage(dentry->d_inode, 0, 0, 0);
+ /* PDEBUG(page, "follow_link"); */
+ if (!page) {
+ dput(nd->dentry);
+ EXIT;
+ return -EIO;
+ }
+ res = vfs_follow_link(nd, (char *)page_address(page));
+ page_cache_release(page);
+ EXIT;
+ return res;
+}
+
+struct inode_operations obdfs_symlink_inode_operations = {
+ readlink: obdfs_readlink,
+ follow_link: obdfs_follow_link,
+ setattr: obdfs_setattr
+};
--- /dev/null
+/*
+ * Copyright (C) 2001 Cluster File Systems, Inc.
+ *
+ * This code is issued under the GNU General Public License.
+ * See the file COPYING in this distribution
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/sysctl.h>
+#include <linux/swapctl.h>
+#include <linux/proc_fs.h>
+#include <linux/slab.h>
+#include <linux/stat.h>
+#include <linux/ctype.h>
+#include <asm/bitops.h>
+#include <asm/segment.h>
+#include <asm/uaccess.h>
+#include <linux/utsname.h>
+
+
+struct ctl_table_header *obdfs_table_header = NULL;
+
+int obdfs_debug_level = 0;
+int obdfs_print_entry = 1;
+
+
+#define OBDFS_SYSCTL 1
+
+#define OBDFS_DEBUG 1 /* control debugging */
+#define OBDFS_ENTRY 2 /* control enter/leave pattern */
+#define OBDFS_TIMEOUT 3 /* timeout on upcalls to become intrble */
+#define OBDFS_HARD 4 /* mount type "hard" or "soft" */
+#define OBDFS_VARS 5
+#define OBDFS_INDEX 6
+#define OBDFS_RESET 7
+
+#define OBDFS_VARS_SLOT 2
+
+static ctl_table obdfs_table[] = {
+ {OBDFS_DEBUG, "debug", &obdfs_debug_level, sizeof(int), 0644, NULL, &proc_dointvec},
+ {OBDFS_ENTRY, "trace", &obdfs_print_entry, sizeof(int), 0644, NULL, &proc_dointvec},
+ { 0 }
+};
+
+static ctl_table top_table[] = {
+ {OBDFS_SYSCTL, "obdfs", NULL, 0, 0555, obdfs_table},
+ {0}
+};
+
+void obdfs_sysctl_init (void)
+{
+
+#ifdef CONFIG_SYSCTL
+ if ( !obdfs_table_header )
+ obdfs_table_header = register_sysctl_table(top_table, 0);
+#endif
+}
+
+void obdfs_sysctl_clean (void)
+{
+#ifdef CONFIG_SYSCTL
+ if ( obdfs_table_header )
+ unregister_sysctl_table(obdfs_table_header);
+ obdfs_table_header = NULL;
+#endif
+}
--- /dev/null
+# Copyright (C) 2001 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+DEFS:=
+
+MODULE = mdc
+modulefs_DATA = mdc.o
+EXTRA_PROGRAMS = mdc
+
+
+mdc_SOURCES = mds_pack.c mdc_request.c
+
+mds_pack.c:
+ ln -s ../lib/mds_pack.c .
+
+include $(top_srcdir)/Rules
#include <asm/system.h>
#include <asm/uaccess.h>
+#include <linux/module.h>
#include <linux/fs.h>
#include <linux/stat.h>
#include <linux/lustre_mds.h>
#define REQUEST_MINOR 244
+
extern int mds_queue_req(struct mds_request *);
-static int mds_send_req(struct mds_request *req)
+struct mds_request *mds_prep_req(int size, int opcode)
{
+ struct mds_request *request;
int rc;
- init_waitqueue_head(&req->rq_wait_for_rep);
- /* XXX replace the following with networking code */
+ ENTRY;
+
+ request = (struct mds_request *)kmalloc(sizeof(*request), GFP_KERNEL);
+ if (!request) {
+ printk("mds_prep_req: request allocation out of memory\n");
+ return NULL;
+ }
+
+ rc = mds_pack_req(NULL, 0, NULL, 0,
+ &request->rq_reqhdr, &request->rq_req,
+ &request->rq_reqlen, &request->rq_reqbuf);
+ if (rc) {
+ printk("llight request: cannot pack request %d\n", rc);
+ return NULL;
+ }
+ request->rq_reqhdr->opc = opcode;
+
+ EXIT;
+ return request;
+}
+
+
+
+
+static int mds_queue_wait(struct mds_request *req)
+{
+ int rc;
+
+ /* XXX fix the race here (wait_for_event?)*/
+ /* hand the packet over to the server */
rc = mds_queue_req(req);
if (rc) {
- EXIT;
- return rc;
+ printk("osc_queue_wait: error %d, opcode %d\n", rc,
+ req->rq_reqhdr->opc);
+ return -rc;
}
+ init_waitqueue_head(&req->rq_wait_for_rep);
printk("-- sleeping\n");
interruptible_sleep_on(&req->rq_wait_for_rep);
printk("-- done\n");
- return 0;
+
+ mds_unpack_rep(req->rq_repbuf, req->rq_replen, &req->rq_rephdr,
+ &req->rq_rep);
+ printk("-->osc_queue_wait: buf %p len %d status %d\n",
+ req->rq_repbuf, req->rq_replen, req->rq_rephdr->status);
+
+ EXIT;
+ return req->rq_rephdr->status;
}
-int llight_getattr(ino_t ino, struct mds_rep *rep)
+void mds_free_req(struct mds_request *request)
+{
+ kfree(request);
+}
+
+int mdc_getattr(ino_t ino, struct mds_rep **rep)
{
struct mds_request *request;
int rc;
- request = (struct mds_request *)kmalloc(sizeof(*request),
- GFP_KERNEL);
+ request = mds_prep_req(sizeof(*request), MDS_GETATTR);
if (!request) {
- printk("llight request: out of memory\n");
+ printk("llight request: cannot pack\n");
return -ENOMEM;
}
- rc = mds_pack_req(NULL, 0, NULL, 0,
- &request->rq_reqhdr, &request->rq_req,
- &request->rq_reqlen, &request->rq_reqbuf);
- if (rc) {
- printk("llight request: cannot pack request %d\n", rc);
- return rc;
- }
request->rq_req->fid1.id = ino;
- request->rq_reqhdr->opc = MDS_GETATTR;
-
- rc = mds_send_req(request);
+ rc = mds_queue_wait(request);
if (rc) {
printk("llight request: error in handling %d\n", rc);
- return rc;
+ goto out;
}
- printk("mode: %o\n", request->rq_rep->mode);
- if (rep) {
- memcpy(rep, request->rq_repbuf, sizeof(*rep));
+ printk("mds_getattr: mode: %o\n", request->rq_rep->mode);
+
+ if (rep ) {
+ *rep = request->rq_rep;
}
- kfree(request->rq_repbuf);
- kfree(request);
- return 0;
-}
+ out:
+ mds_free_req(request);
+ return rc;
+}
static int request_ioctl(struct inode *inode, struct file *file,
unsigned int cmd, unsigned long arg)
switch (cmd) {
case IOC_REQUEST_GETATTR: {
printk("-- getting attr for ino 2\n");
- err = llight_getattr(2, NULL);
+ err = mdc_getattr(2, NULL);
printk("-- done err %d\n", err);
break;
}
};
-int init_request_module(void)
+static int __init mds_request_init(void)
{
- misc_register( &request_dev );
+ misc_register(&request_dev);
return 0 ;
}
-#ifdef MODULE
-MODULE_AUTHOR("Peter J. Braam <braam@clusterfs.com>");
-MODULE_DESCRIPTION("Lustre MDS Request Tester v1.0");
-
-#include <linux/module.h>
-
-int init_module(void)
-{
- return init_request_module();
-}
-void cleanup_module(void)
+static void __exit mds_request_exit(void)
{
misc_deregister(&request_dev);
- return;
}
-#endif
+MODULE_AUTHOR("Peter J. Braam <braam@clusterfs.com>");
+MODULE_DESCRIPTION("Lustre MDS Request Tester v1.0");
+MODULE_LICENSE("GPL");
+
+EXPORT_SYMBOL(mdc_getattr);
+
+
+module_init(mds_request_init);
+module_exit(mds_request_exit);
// XXX make this networked!
static int mds_queue_req(struct mds_request *req)
{
- struct mds_request *srv_request;
+ struct mds_request *srv_req;
if (!MDS) {
EXIT;
return -1;
}
- srv_request = kmalloc(sizeof(*srv_request), GFP_KERNEL);
- if (!srv_request) {
+ srv_req = kmalloc(sizeof(*srv_req), GFP_KERNEL);
+ if (!srv_req) {
EXIT;
return -ENOMEM;
}
- /* move the request buffer */
- srv_request->rq_reqlen = req->rq_reqlen;
- srv_request->rq_reqbuf = req->rq_reqbuf;
- srv_request->rq_obd = MDS;
+ printk("---> MDS at %d %p, incoming req %p, srv_req %p\n",
+ __LINE__, MDS, req, srv_req);
+
+ memset(srv_req, 0, sizeof(*req));
- req->rq_reqbuf = NULL;
- req->rq_reqlen = 0;
+ /* move the request buffer */
+ srv_req->rq_reqbuf = req->rq_reqbuf;
+ srv_req->rq_reqlen = req->rq_reqlen;
+ srv_req->rq_obd = MDS;
/* remember where it came from */
- srv_request->rq_reply_handle = req;
+ srv_req->rq_reply_handle = req;
- /* get the server working on this request */
- spin_lock(&MDS->mds_lock);
- list_add(&srv_request->rq_list, &MDS->mds_reqs);
- spin_unlock(&MDS->mds_lock);
+ list_add(&srv_req->rq_list, &MDS->mds_reqs);
wake_up(&MDS->mds_waitq);
+ return 0;
+}
- /* put client asleep */
- printk("-- sleeping\n");
- interruptible_sleep_on(&req->rq_wait_for_rep);
- printk("-- done\n");
+/* XXX replace with networking code */
+int mds_reply(struct mds_request *req)
+{
+ struct mds_request *clnt_req = req->rq_reply_handle;
+
+ ENTRY;
+
+ /* free the request buffer */
+ kfree(req->rq_reqbuf);
+ req->rq_reqbuf = NULL;
+
+ /* move the reply to the client */
+ clnt_req->rq_replen = req->rq_replen;
+ clnt_req->rq_repbuf = req->rq_repbuf;
+ req->rq_repbuf = NULL;
+ req->rq_replen = 0;
+
+ /* wake up the client */
+ wake_up_interruptible(&clnt_req->rq_wait_for_rep);
+ EXIT;
return 0;
}
+int mds_error(struct mds_request *req)
+{
+ struct mds_rep_hdr *hdr;
+
+ ENTRY;
+ hdr = kmalloc(sizeof(*hdr), GFP_KERNEL);
+ if (!hdr) {
+ EXIT;
+ return -ENOMEM;
+ }
+
+ memset(hdr, 0, sizeof(*hdr));
+
+ hdr->seqno = req->rq_reqhdr->seqno;
+ hdr->status = req->rq_status;
+ hdr->type = MDS_TYPE_ERR;
+
+ req->rq_repbuf = (char *)hdr;
+ req->rq_replen = sizeof(*hdr);
+
+ EXIT;
+ return mds_reply(req);
+}
+
+
+
static struct dentry *mds_fid2dentry(struct mds_obd *mds, struct lustre_fid *fid)
{
struct dentry *de;
return 0;
}
-/* XXX replace with networking code */
-int mds_reply(struct mds_request *req)
-{
- struct mds_request *clnt_req = req->rq_reply_handle;
-
- ENTRY;
-
- /* free the request buffer */
- kfree(req->rq_reqbuf);
- req->rq_reqbuf = NULL;
-
- /* move the reply to the client */
- clnt_req->rq_replen = req->rq_replen;
- clnt_req->rq_repbuf = req->rq_repbuf;
- req->rq_repbuf = NULL;
- req->rq_replen = 0;
-
- /* wake up the client */
- wake_up_interruptible(&clnt_req->rq_wait_for_rep);
- EXIT;
- return 0;
-}
-
-int mds_error(struct mds_request *req)
-{
- struct mds_rep_hdr *hdr;
-
- ENTRY;
- hdr = kmalloc(sizeof(*hdr), GFP_KERNEL);
- if (!hdr) {
- EXIT;
- return -ENOMEM;
- }
-
- memset(hdr, 0, sizeof(*hdr));
-
- hdr->seqno = req->rq_reqhdr->seqno;
- hdr->status = req->rq_status;
- hdr->type = MDS_TYPE_ERR;
-
- req->rq_repbuf = (char *)hdr;
- req->rq_replen = sizeof(*hdr);
-
- EXIT;
- return mds_reply(req);
-}
//int mds_handle(struct mds_conn *conn, int len, char *buf)
int mds_handle(struct mds_request *req)
# This code is issued under the GNU General Public License.
# See the file COPYING in this distribution
+DEFS:=
+
MODULE = osc
modulefs_DATA = osc.o
EXTRA_PROGRAMS = osc
request = (struct ost_request *)kmalloc(sizeof(*request), GFP_KERNEL);
if (!request) {
- printk("osc_getattr: request allocation out of memory\n");
+ printk("osc_prep_req: request allocation out of memory\n");
return NULL;
}
__LINE__, ost, req, srv_req);
memset(srv_req, 0, sizeof(*req));
+
+ /* move the request buffer */
srv_req->rq_reqbuf = req->rq_reqbuf;
srv_req->rq_reqlen = req->rq_reqlen;
srv_req->rq_obd = ost;
+
+ /* remember where it came from */
srv_req->rq_reply_handle = req;
list_add(&srv_req->rq_list, &ost->ost_reqs);
hdr->seqno = req->rq_reqhdr->seqno;
hdr->status = req->rq_status;
hdr->type = OST_TYPE_ERR;
+
req->rq_repbuf = (char *)hdr;
+ req->rq_replen = sizeof(*hdr);
EXIT;
return ost_reply(obddev, req);
R=/r
+
insmod /lib/modules/2.4.17/kernel/drivers/block/loop.o
+insmod $R/usr/src/obd/class/obdclass.o
+insmod $R/usr/src/obd/ext2obd/obdext2.o
+insmod $R/usr/src/obd/ost/ost.o
+insmod $R/usr/src/obd/osc/osc.o
+insmod $R/usr/src/obd/mds/mds.o
+insmod $R/usr/src/obd/llight/llight.o
+
dd if=/dev/zero of=/tmp/fs bs=1024 count=10000
mke2fs -F /tmp/fs
losetup /dev/loop/0 /tmp/fs
-insmod $R/usr/src/obd/class/obdclass.o
-insmod $R/usr/src/obd/mds/mds.o
mknod /dev/obd c 10 241
$R/usr/src/obd/utils/obdctl <<EOF
quit
EOF
-
-insmod $R/usr/src/obd/llight/llight.o
mknod /dev/request c 10 244
-$R/usr/src/obd/utils/testreq
+# $R/usr/src/obd/utils/testreq