From: braam Date: Sun, 20 Jan 2002 00:45:19 +0000 (+0000) Subject: Small fixes to the request processing. X-Git-Tag: v1_7_100~6024 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=dec61de39f0afd578d4687811b84981f8c41a6ae Small fixes to the request processing. Split off the metadata client into a separate module, probably easier to test things that way. --- diff --git a/lustre/lib/mds_pack.c b/lustre/lib/mds_pack.c index 7aa43f8..86d78e0 100644 --- a/lustre/lib/mds_pack.c +++ b/lustre/lib/mds_pack.c @@ -69,6 +69,7 @@ int mds_pack_req(char *name, int namelen, char *tgt, int tgtlen, memset(*buf, 0, *len); *hdr = (struct mds_req_hdr *)(*buf); + *req = (struct mds_req *)(*buf + sizeof(**hdr)); preq = (struct mds_req_packed *)(*buf + sizeof(**hdr)); ptr = *buf + sizeof(**hdr) + sizeof(*preq); @@ -153,6 +154,7 @@ int mds_pack_rep(char *name, int namelen, char *tgt, int tgtlen, memset(*buf, 0, *len); *hdr = (struct mds_rep_hdr *)(*buf); + *rep = (struct mds_rep *)(*buf + sizeof(**hdr)); prep = (struct mds_rep_packed *)(*buf + sizeof(**hdr)); ptr = *buf + sizeof(**hdr) + sizeof(*prep); diff --git a/lustre/lib/obd_pack.c b/lustre/lib/obd_pack.c index b0044da..691ae37 100644 --- a/lustre/lib/obd_pack.c +++ b/lustre/lib/obd_pack.c @@ -73,7 +73,6 @@ int ost_pack_req(char *buf1, int buflen1, char *buf2, int buflen2, preq = (struct ost_req_packed *)(*buf + sizeof(**hdr)); ptr = *buf + sizeof(**hdr) + sizeof(*preq); - *req = (struct ost_req *)(*buf + sizeof(**hdr)); (*hdr)->type = OST_TYPE_REQ; diff --git a/lustre/llite/Makefile.am b/lustre/llite/Makefile.am index 0f17467..cd4d49d 100644 --- a/lustre/llite/Makefile.am +++ b/lustre/llite/Makefile.am @@ -8,10 +8,6 @@ modulefs_DATA = llight.o EXTRA_PROGRAMS = llight -llight_SOURCES = mds_pack.c request.c # super.c rw.c file.c dir.c sysctl.c super.c namei.c symlink.c - -mds_pack.c: - ln -s ../lib/mds_pack.c . - +llight_SOURCES = super.c rw.c file.c dir.c sysctl.c namei.c symlink.c include $(top_srcdir)/Rules diff --git a/lustre/llite/dir.c b/lustre/llite/dir.c new file mode 100644 index 0000000..05edd4c --- /dev/null +++ b/lustre/llite/dir.c @@ -0,0 +1,625 @@ +/* + * linux/fs/ext2/dir.c + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/fs/minix/dir.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * ext2 directory handling functions + * + * Big-endian to little-endian byte-swapping/bitmaps by + * David S. Miller (davem@caip.rutgers.edu), 1995 + * + * All code that works with directory layout had been switched to pagecache + * and moved here. AV + */ + +#include +#include +#include +#include +#include + +typedef struct ext2_dir_entry_2 ext2_dirent; + +#define PageChecked(page) test_bit(PG_checked, &(page)->flags) +#define SetPageChecked(page) set_bit(PG_checked, &(page)->flags) + +int waitfor_one_page(struct page *page) +{ + int error = 0; + struct buffer_head *bh, *head = page->buffers; + + bh = head; + do { + wait_on_buffer(bh); + if (buffer_req(bh) && !buffer_uptodate(bh)) + error = -EIO; + } while ((bh = bh->b_this_page) != head); + return error; +} + +/* + * ext2 uses block-sized chunks. Arguably, sector-sized ones would be + * more robust, but we have what we have + */ +static inline unsigned ext2_chunk_size(struct inode *inode) +{ + //return inode->i_sb->s_blocksize; + return PAGE_SIZE; +} + +static inline void ext2_put_page(struct page *page) +{ + kunmap(page); + page_cache_release(page); +} + +static inline unsigned long dir_pages(struct inode *inode) +{ + return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT; +} + +static int ext2_commit_chunk(struct page *page, unsigned from, unsigned to) +{ + struct inode *dir = page->mapping->host; + int err = 0; + dir->i_version = ++event; + page->mapping->a_ops->commit_write(NULL, page, from, to); + if (IS_SYNC(dir)) + err = waitfor_one_page(page); + return err; +} + +static void ext2_check_page(struct page *page) +{ + struct inode *dir = page->mapping->host; + struct super_block *sb = dir->i_sb; + unsigned chunk_size = ext2_chunk_size(dir); + char *kaddr = page_address(page); + // u32 max_inumber = le32_to_cpu(sb->u.ext2_sb.s_es->s_inodes_count); + unsigned offs, rec_len; + unsigned limit = PAGE_CACHE_SIZE; + ext2_dirent *p; + char *error; + + if ((dir->i_size >> PAGE_CACHE_SHIFT) == page->index) { + limit = dir->i_size & ~PAGE_CACHE_MASK; + if (limit & (chunk_size - 1)) + goto Ebadsize; + for (offs = limit; offsrec_len = cpu_to_le16(chunk_size); + } + if (!limit) + goto out; + } + for (offs = 0; offs <= limit - EXT2_DIR_REC_LEN(1); offs += rec_len) { + p = (ext2_dirent *)(kaddr + offs); + rec_len = le16_to_cpu(p->rec_len); + + if (rec_len < EXT2_DIR_REC_LEN(1)) + goto Eshort; + if (rec_len & 3) + goto Ealign; + if (rec_len < EXT2_DIR_REC_LEN(p->name_len)) + goto Enamelen; + if (((offs + rec_len - 1) ^ offs) & ~(chunk_size-1)) + goto Espan; + // if (le32_to_cpu(p->inode) > max_inumber) + //goto Einumber; + } + if (offs != limit) + goto Eend; +out: + SetPageChecked(page); + return; + + /* Too bad, we had an error */ + +Ebadsize: + ext2_error(sb, "ext2_check_page", + "size of directory #%lu is not a multiple of chunk size", + dir->i_ino + ); + goto fail; +Eshort: + error = "rec_len is smaller than minimal"; + goto bad_entry; +Ealign: + error = "unaligned directory entry"; + goto bad_entry; +Enamelen: + error = "rec_len is too small for name_len"; + goto bad_entry; +Espan: + error = "directory entry across blocks"; + goto bad_entry; + //Einumber: + // error = "inode out of bounds"; +bad_entry: + ext2_error (sb, "ext2_check_page", "bad entry in directory #%lu: %s - " + "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", + dir->i_ino, error, (page->index<inode), + rec_len, p->name_len); + goto fail; +Eend: + p = (ext2_dirent *)(kaddr + offs); + ext2_error (sb, "ext2_check_page", + "entry in directory #%lu spans the page boundary" + "offset=%lu, inode=%lu", + dir->i_ino, (page->index<inode)); +fail: + SetPageChecked(page); + SetPageError(page); +} + +static struct page * ext2_get_page(struct inode *dir, unsigned long n) +{ + struct address_space *mapping = dir->i_mapping; + struct page *page = read_cache_page(mapping, n, + (filler_t*)mapping->a_ops->readpage, NULL); + if (!IS_ERR(page)) { + wait_on_page(page); + kmap(page); + if (!Page_Uptodate(page)) + goto fail; + if (!PageChecked(page)) + ext2_check_page(page); + if (PageError(page)) + goto fail; + } + return page; + +fail: + ext2_put_page(page); + return ERR_PTR(-EIO); +} + +/* + * NOTE! unlike strncmp, ext2_match returns 1 for success, 0 for failure. + * + * len <= EXT2_NAME_LEN and de != NULL are guaranteed by caller. + */ +static inline int ext2_match (int len, const char * const name, + struct ext2_dir_entry_2 * de) +{ + if (len != de->name_len) + return 0; + if (!de->inode) + return 0; + return !memcmp(name, de->name, len); +} + +/* + * p is at least 6 bytes before the end of page + */ +static inline ext2_dirent *ext2_next_entry(ext2_dirent *p) +{ + return (ext2_dirent *)((char*)p + le16_to_cpu(p->rec_len)); +} + +static inline unsigned +ext2_validate_entry(char *base, unsigned offset, unsigned mask) +{ + ext2_dirent *de = (ext2_dirent*)(base + offset); + ext2_dirent *p = (ext2_dirent*)(base + (offset&mask)); + while ((char*)p < (char*)de) + p = ext2_next_entry(p); + return (char *)p - base; +} + +static unsigned char ext2_filetype_table[EXT2_FT_MAX] = { + [EXT2_FT_UNKNOWN] DT_UNKNOWN, + [EXT2_FT_REG_FILE] DT_REG, + [EXT2_FT_DIR] DT_DIR, + [EXT2_FT_CHRDEV] DT_CHR, + [EXT2_FT_BLKDEV] DT_BLK, + [EXT2_FT_FIFO] DT_FIFO, + [EXT2_FT_SOCK] DT_SOCK, + [EXT2_FT_SYMLINK] DT_LNK, +}; + +static unsigned int obdfs_dt2fmt[DT_WHT + 1] = { + [EXT2_FT_UNKNOWN] 0, + [EXT2_FT_REG_FILE] S_IFREG, + [EXT2_FT_DIR] S_IFDIR, + [EXT2_FT_CHRDEV] S_IFCHR, + [EXT2_FT_BLKDEV] S_IFBLK, + [EXT2_FT_FIFO] S_IFIFO, + [EXT2_FT_SOCK] S_IFSOCK, + [EXT2_FT_SYMLINK] S_IFLNK +}; + +#define S_SHIFT 12 +static unsigned char ext2_type_by_mode[S_IFMT >> S_SHIFT] = { + [S_IFREG >> S_SHIFT] EXT2_FT_REG_FILE, + [S_IFDIR >> S_SHIFT] EXT2_FT_DIR, + [S_IFCHR >> S_SHIFT] EXT2_FT_CHRDEV, + [S_IFBLK >> S_SHIFT] EXT2_FT_BLKDEV, + [S_IFIFO >> S_SHIFT] EXT2_FT_FIFO, + [S_IFSOCK >> S_SHIFT] EXT2_FT_SOCK, + [S_IFLNK >> S_SHIFT] EXT2_FT_SYMLINK, +}; + +static inline void ext2_set_de_type(ext2_dirent *de, struct inode *inode) +{ + mode_t mode = inode->i_mode; + de->file_type = ext2_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; +} + +int +new_obdfs_readdir (struct file * filp, void * dirent, filldir_t filldir) +{ + loff_t pos = filp->f_pos; + struct inode *inode = filp->f_dentry->d_inode; + // XXX struct super_block *sb = inode->i_sb; + unsigned offset = pos & ~PAGE_CACHE_MASK; + unsigned long n = pos >> PAGE_CACHE_SHIFT; + unsigned long npages = dir_pages(inode); + unsigned chunk_mask = ~(ext2_chunk_size(inode)-1); + unsigned char *types = NULL; + int need_revalidate = (filp->f_version != inode->i_version); + + if (pos > inode->i_size - EXT2_DIR_REC_LEN(1)) + goto done; + + types = ext2_filetype_table; + + for ( ; n < npages; n++, offset = 0) { + char *kaddr, *limit; + ext2_dirent *de; + struct page *page = ext2_get_page(inode, n); + + if (IS_ERR(page)) + continue; + kaddr = page_address(page); + if (need_revalidate) { + offset = ext2_validate_entry(kaddr, offset, chunk_mask); + need_revalidate = 0; + } + de = (ext2_dirent *)(kaddr+offset); + limit = kaddr + PAGE_CACHE_SIZE - EXT2_DIR_REC_LEN(1); + for ( ;(char*)de <= limit; de = ext2_next_entry(de)) + if (de->inode) { + int over; + unsigned char d_type = DT_UNKNOWN; + + if (types && de->file_type < EXT2_FT_MAX) + d_type = types[de->file_type]; + + offset = (char *)de - kaddr; + over = filldir(dirent, de->name, de->name_len, + (n<inode), d_type); + if (over) { + ext2_put_page(page); + goto done; + } + } + ext2_put_page(page); + } + +done: + filp->f_pos = (n << PAGE_CACHE_SHIFT) | offset; + filp->f_version = inode->i_version; + UPDATE_ATIME(inode); + return 0; +} + +/* + * ext2_find_entry() + * + * finds an entry in the specified directory with the wanted name. It + * returns the page in which the entry was found, and the entry itself + * (as a parameter - res_dir). Page is returned mapped and unlocked. + * Entry is guaranteed to be valid. + */ +struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir, + struct dentry *dentry, struct page ** res_page) +{ + const char *name = dentry->d_name.name; + int namelen = dentry->d_name.len; + unsigned reclen = EXT2_DIR_REC_LEN(namelen); + unsigned long start, n; + unsigned long npages = dir_pages(dir); + struct page *page = NULL; + ext2_dirent * de; + + /* OFFSET_CACHE */ + *res_page = NULL; + + // start = dir->u.ext2_i.i_dir_start_lookup; + start = 0; + if (start >= npages) + start = 0; + n = start; + do { + char *kaddr; + page = ext2_get_page(dir, n); + if (!IS_ERR(page)) { + kaddr = page_address(page); + de = (ext2_dirent *) kaddr; + kaddr += PAGE_CACHE_SIZE - reclen; + while ((char *) de <= kaddr) { + if (ext2_match (namelen, name, de)) + goto found; + de = ext2_next_entry(de); + } + ext2_put_page(page); + } + if (++n >= npages) + n = 0; + } while (n != start); + return NULL; + +found: + *res_page = page; + // dir->u.ext2_i.i_dir_start_lookup = n; + return de; +} + +struct ext2_dir_entry_2 * ext2_dotdot (struct inode *dir, struct page **p) +{ + struct page *page = ext2_get_page(dir, 0); + ext2_dirent *de = NULL; + + if (!IS_ERR(page)) { + de = ext2_next_entry((ext2_dirent *) page_address(page)); + *p = page; + } + return de; +} + +ino_t obdfs_inode_by_name(struct inode * dir, struct dentry *dentry, int *type) +{ + ino_t res = 0; + struct ext2_dir_entry_2 * de; + struct page *page; + + de = ext2_find_entry (dir, dentry, &page); + if (de) { + res = le32_to_cpu(de->inode); + *type = obdfs_dt2fmt[de->file_type]; + kunmap(page); + page_cache_release(page); + } + return res; +} + +/* Releases the page */ +void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de, + struct page *page, struct inode *inode) +{ + unsigned from = (char *) de - (char *) page_address(page); + unsigned to = from + le16_to_cpu(de->rec_len); + int err; + + lock_page(page); + err = page->mapping->a_ops->prepare_write(NULL, page, from, to); + if (err) + BUG(); + de->inode = cpu_to_le32(inode->i_ino); + ext2_set_de_type (de, inode); + dir->i_mtime = dir->i_ctime = CURRENT_TIME; + err = ext2_commit_chunk(page, from, to); + UnlockPage(page); + ext2_put_page(page); +} + +/* + * Parent is locked. + */ +int ext2_add_link (struct dentry *dentry, struct inode *inode) +{ + struct inode *dir = dentry->d_parent->d_inode; + const char *name = dentry->d_name.name; + int namelen = dentry->d_name.len; + unsigned reclen = EXT2_DIR_REC_LEN(namelen); + unsigned short rec_len, name_len; + struct page *page = NULL; + ext2_dirent * de; + unsigned long npages = dir_pages(dir); + unsigned long n; + char *kaddr; + unsigned from, to; + int err; + + /* We take care of directory expansion in the same loop */ + for (n = 0; n <= npages; n++) { + page = ext2_get_page(dir, n); + err = PTR_ERR(page); + if (IS_ERR(page)) + goto out; + kaddr = page_address(page); + de = (ext2_dirent *)kaddr; + kaddr += PAGE_CACHE_SIZE - reclen; + while ((char *)de <= kaddr) { + err = -EEXIST; + if (ext2_match (namelen, name, de)) + goto out_page; + name_len = EXT2_DIR_REC_LEN(de->name_len); + rec_len = le16_to_cpu(de->rec_len); + if ( n==npages && rec_len == 0) { + printk("Fatal dir behaviour\n"); + goto out_page; + } + if (!de->inode && rec_len >= reclen) + goto got_it; + if (rec_len >= name_len + reclen) + goto got_it; + de = (ext2_dirent *) ((char *) de + rec_len); + } + ext2_put_page(page); + } + BUG(); + return -EINVAL; + +got_it: + from = (char*)de - (char*)page_address(page); + to = from + rec_len; + lock_page(page); + err = page->mapping->a_ops->prepare_write(NULL, page, from, to); + if (err) + goto out_unlock; + if (de->inode) { + ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len); + de1->rec_len = cpu_to_le16(rec_len - name_len); + de->rec_len = cpu_to_le16(name_len); + de = de1; + } + de->name_len = namelen; + memcpy (de->name, name, namelen); + de->inode = cpu_to_le32(inode->i_ino); + ext2_set_de_type (de, inode); + CDEBUG(D_INODE, "type set to %o\n", de->file_type); + dir->i_mtime = dir->i_ctime = CURRENT_TIME; + err = ext2_commit_chunk(page, from, to); + + // change_inode happens with the commit_chunk + // obdfs_change_inode(dir); + /* OFFSET_CACHE */ +out_unlock: + UnlockPage(page); +out_page: + ext2_put_page(page); +out: + return err; +} + +/* + * ext2_delete_entry deletes a directory entry by merging it with the + * previous entry. Page is up-to-date. Releases the page. + */ +int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page ) +{ + struct address_space *mapping = page->mapping; + struct inode *inode = mapping->host; + char *kaddr = page_address(page); + unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1); + unsigned to = ((char*)dir - kaddr) + le16_to_cpu(dir->rec_len); + ext2_dirent * pde = NULL; + ext2_dirent * de = (ext2_dirent *) (kaddr + from); + int err; + + while ((char*)de < (char*)dir) { + pde = de; + de = ext2_next_entry(de); + } + if (pde) + from = (char*)pde - (char*)page_address(page); + lock_page(page); + err = mapping->a_ops->prepare_write(NULL, page, from, to); + if (err) + BUG(); + if (pde) + pde->rec_len = cpu_to_le16(to-from); + dir->inode = 0; + inode->i_ctime = inode->i_mtime = CURRENT_TIME; + err = ext2_commit_chunk(page, from, to); + UnlockPage(page); + ext2_put_page(page); + return err; +} + +/* + * Set the first fragment of directory. + */ +int ext2_make_empty(struct inode *inode, struct inode *parent) +{ + struct address_space *mapping = inode->i_mapping; + struct page *page = grab_cache_page(mapping, 0); + unsigned chunk_size = ext2_chunk_size(inode); + struct ext2_dir_entry_2 * de; + char *base; + int err; + ENTRY; + + if (!page) + return -ENOMEM; + err = mapping->a_ops->prepare_write(NULL, page, 0, chunk_size); + if (err) + goto fail; + + base = page_address(page); + + de = (struct ext2_dir_entry_2 *) base; + de->name_len = 1; + de->rec_len = cpu_to_le16(EXT2_DIR_REC_LEN(1)); + memcpy (de->name, ".\0\0", 4); + de->inode = cpu_to_le32(inode->i_ino); + ext2_set_de_type (de, inode); + + de = (struct ext2_dir_entry_2 *) (base + EXT2_DIR_REC_LEN(1)); + de->name_len = 2; + de->rec_len = cpu_to_le16(chunk_size - EXT2_DIR_REC_LEN(1)); + de->inode = cpu_to_le32(parent->i_ino); + memcpy (de->name, "..\0", 4); + ext2_set_de_type (de, inode); + + err = ext2_commit_chunk(page, 0, chunk_size); +fail: + UnlockPage(page); + page_cache_release(page); + ENTRY; + return err; +} + +/* + * routine to check that the specified directory is empty (for rmdir) + */ +int ext2_empty_dir (struct inode * inode) +{ + struct page *page = NULL; + unsigned long i, npages = dir_pages(inode); + + for (i = 0; i < npages; i++) { + char *kaddr; + ext2_dirent * de; + page = ext2_get_page(inode, i); + + if (IS_ERR(page)) + continue; + + kaddr = page_address(page); + de = (ext2_dirent *)kaddr; + kaddr += PAGE_CACHE_SIZE-EXT2_DIR_REC_LEN(1); + + while ((char *)de <= kaddr) { + if (de->inode != 0) { + /* check for . and .. */ + if (de->name[0] != '.') + goto not_empty; + if (de->name_len > 2) + goto not_empty; + if (de->name_len < 2) { + if (de->inode != + cpu_to_le32(inode->i_ino)) + goto not_empty; + } else if (de->name[1] != '.') + goto not_empty; + } + de = ext2_next_entry(de); + } + ext2_put_page(page); + } + return 1; + +not_empty: + ext2_put_page(page); + return 0; +} + +struct file_operations obdfs_dir_operations = { + read: generic_read_dir, + readdir: new_obdfs_readdir +}; diff --git a/lustre/llite/file.c b/lustre/llite/file.c new file mode 100644 index 0000000..a652e42 --- /dev/null +++ b/lustre/llite/file.c @@ -0,0 +1,104 @@ +/* + * linux/fs/ext2/file.c + * + * This code is issued under the GNU General Public License. + * See the file COPYING in this distribution + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/fs/minix/file.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * ext2 fs regular file handling primitives + * + * 64-bit file support on 64-bit platforms by Jakub Jelinek + * (jj@sunsite.ms.mff.cuni.cz) + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +extern int obdfs_setattr(struct dentry *de, struct iattr *attr); +void obdfs_change_inode(struct inode *inode); + +static inline void obdfs_remove_suid(struct inode *inode) +{ + unsigned int mode; + + /* set S_IGID if S_IXGRP is set, and always set S_ISUID */ + mode = (inode->i_mode & S_IXGRP)*(S_ISGID/S_IXGRP) | S_ISUID; + + /* was any of the uid bits set? */ + mode &= inode->i_mode; + if (mode && !capable(CAP_FSETID)) { + inode->i_mode &= ~mode; + // XXX careful here - we cannot change the size + //obdfs_change_inode(inode); + } +} + +/* + * Write to a file (through the page cache). + */ +static ssize_t +obdfs_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos) +{ + ssize_t retval; + CDEBUG(D_INFO, "Writing inode %ld, %d bytes, offset %Ld\n", + file->f_dentry->d_inode->i_ino, count, *ppos); + + retval = generic_file_write(file, buf, count, ppos); + CDEBUG(D_INFO, "Wrote %d\n", retval); + + /* update mtime/ctime/atime here, NOT size */ + if (retval > 0) { + struct iattr attr; + attr.ia_valid = ATTR_MTIME | ATTR_CTIME | ATTR_ATIME; + attr.ia_mtime = attr.ia_ctime = attr.ia_atime = + CURRENT_TIME; + obdfs_setattr(file->f_dentry, &attr); + } + EXIT; + return retval; +} + + +/* XXX this does not need to do anything for data, it _does_ need to + call setattr */ +int obdfs_fsync(struct file *file, struct dentry *dentry, int data) +{ + return 0; +} + +struct file_operations obdfs_file_operations = { + read: generic_file_read, + write: obdfs_file_write, + mmap: generic_file_mmap, + fsync: NULL +}; + + +struct inode_operations obdfs_file_inode_operations = { + truncate: obdfs_truncate, + setattr: obdfs_setattr +}; + diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c new file mode 100644 index 0000000..f0e55c6 --- /dev/null +++ b/lustre/llite/namei.c @@ -0,0 +1,467 @@ +/* + * linux/fs/obdfs/namei.c + * + * This code is issued under the GNU General Public License. + * See the file COPYING in this distribution + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/fs/ext2/namei.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * Big-endian to little-endian byte-swapping/bitmaps by + * David S. Miller (davem@caip.rutgers.edu), 1995 + * Directory entry file type support and forward compatibility hooks + * for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998 + * + * Changes for use in OBDFS + * Copyright (c) 1999, Seagate Technology Inc. + * Copyright (C) 2001, Cluster File Systems, Inc. + * Rewritten based on recent ext2 page cache use. + * + */ + +#include +#include +#include +#include +#include +extern struct address_space_operations obdfs_aops; + +/* from super.c */ +extern void obdfs_change_inode(struct inode *inode); +extern int obdfs_setattr(struct dentry *de, struct iattr *attr); + +/* from dir.c */ +extern int ext2_add_link (struct dentry *dentry, struct inode *inode); +ino_t obdfs_inode_by_name(struct inode * dir, struct dentry *dentry, int *typ); +int ext2_make_empty(struct inode *inode, struct inode *parent); +struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir, + struct dentry *dentry, struct page ** res_page); +int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page ); +int ext2_empty_dir (struct inode * inode); +struct ext2_dir_entry_2 * ext2_dotdot (struct inode *dir, struct page **p); +void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de, + struct page *page, struct inode *inode); + +/* + * Couple of helper functions - make the code slightly cleaner. + */ +static inline void ext2_inc_count(struct inode *inode) +{ + inode->i_nlink++; + obdfs_change_inode(inode); +} + +/* postpone the disk update until the inode really goes away */ +static inline void ext2_dec_count(struct inode *inode) +{ + inode->i_nlink--; + if (inode->i_nlink > 0) + obdfs_change_inode(inode); +} + +static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode) +{ + int err; + err = ext2_add_link(dentry, inode); + if (!err) { + d_instantiate(dentry, inode); + return 0; + } + ext2_dec_count(inode); + iput(inode); + return err; +} + +/* methods */ +static struct dentry *obdfs_lookup(struct inode * dir, struct dentry *dentry) +{ + struct obdo *oa; + struct inode * inode = NULL; + int type; + ino_t ino; + + ENTRY; + if (dentry->d_name.len > EXT2_NAME_LEN) + return ERR_PTR(-ENAMETOOLONG); + + ino = obdfs_inode_by_name(dir, dentry, &type); + if (!ino) + goto negative; + + oa = obdo_fromid(IID(dir), ino, type, + OBD_MD_FLNOTOBD | OBD_MD_FLBLOCKS); + if ( IS_ERR(oa) ) { + printk(__FUNCTION__ ": obdo_fromid failed\n"); + EXIT; + return ERR_PTR(-EACCES); + } + + inode = iget4(dir->i_sb, ino, NULL, oa); + obdo_free(oa); + + if (!inode) + return ERR_PTR(-EACCES); + + negative: + d_add(dentry, inode); + return NULL; +} + + +/* + * NOTE! unlike strncmp, ext2_match returns 1 for success, 0 for failure. + * + * `len <= EXT2_NAME_LEN' is guaranteed by caller. + * `de != NULL' is guaranteed by caller. + */ +static inline int ext2_match (int len, const char * const name, + struct ext2_dir_entry_2 * de) +{ + if (len != de->name_len) + return 0; + if (!de->inode) + return 0; + return !memcmp(name, de->name, len); +} + +static struct inode *obdfs_new_inode(struct inode *dir, int mode) +{ + struct obdo *oa; + struct inode *inode; + int err; + + ENTRY; + if (IOPS(dir, create) == NULL) { + printk(KERN_ERR __FUNCTION__ ": no create method!\n"); + EXIT; + return ERR_PTR(-EIO); + } + oa = obdo_alloc(); + if (!oa) { + EXIT; + return ERR_PTR(-ENOMEM); + } + + /* Send a hint to the create method on the type of file to create */ + oa->o_mode = mode; + oa->o_valid |= OBD_MD_FLMODE; + CDEBUG(D_INODE, "\n"); + err = IOPS(dir, create)(IID(dir), oa); + CDEBUG(D_INODE, "\n"); + + if ( err ) { + printk("new_inode - fatal: err %d\n", err); + obdo_free(oa); + EXIT; + return ERR_PTR(err); + } + CDEBUG(D_INODE, "obdo mode %o\n", oa->o_mode); + + inode = iget4(dir->i_sb, (ino_t)oa->o_id, NULL, oa); + CDEBUG(D_INODE, "\n"); + obdo_free(oa); + + if (!inode) { + printk("new_inode -fatal: %ld\n", (long)oa->o_id); + IOPS(dir, destroy)(IID(dir), oa); + EXIT; + return ERR_PTR(-EIO); + } + + if (!list_empty(&inode->i_dentry)) { + printk("new_inode -fatal: aliases %ld, ct %d lnk %d\n", + (long)oa->o_id, + atomic_read(&inode->i_count), + inode->i_nlink); + IOPS(dir, destroy)(IID(dir), oa); + iput(inode); + EXIT; + return ERR_PTR(-EIO); + } + + EXIT; + return inode; +} /* obdfs_new_inode */ + + +/* + * By the time this is called, we already have created + * the directory cache entry for the new file, but it + * is so far negative - it has no inode. + * + * If the create succeeds, we fill in the inode information + * with d_instantiate(). + */ +static int obdfs_create (struct inode * dir, struct dentry * dentry, int mode) +{ + struct inode * inode = obdfs_new_inode (dir, mode); + int err = PTR_ERR(inode); + if (!IS_ERR(inode)) { + inode->i_op = &obdfs_file_inode_operations; + inode->i_fop = &obdfs_file_operations; + inode->i_mapping->a_ops = &obdfs_aops; + err = ext2_add_nondir(dentry, inode); + } + return err; +} /* obdfs_create */ + + +static int obdfs_mknod (struct inode * dir, struct dentry *dentry, int mode, int rdev) +{ + struct inode * inode = obdfs_new_inode (dir, mode); + int err = PTR_ERR(inode); + if (!IS_ERR(inode)) { + init_special_inode(inode, mode, rdev); + obdfs_change_inode(inode); + err = ext2_add_nondir(dentry, inode); + } + return err; +} + +static int obdfs_symlink (struct inode * dir, struct dentry * dentry, + const char * symname) +{ + struct super_block * sb = dir->i_sb; + int err = -ENAMETOOLONG; + unsigned l = strlen(symname)+1; + struct inode * inode; + struct obdfs_inode_info *oinfo; + + if (l > sb->s_blocksize) + goto out; + + inode = obdfs_new_inode (dir, S_IFLNK | S_IRWXUGO); + err = PTR_ERR(inode); + if (IS_ERR(inode)) + goto out; + + oinfo = obdfs_i2info(inode); + if (l >= sizeof(oinfo->oi_inline)) { + /* slow symlink */ + inode->i_op = &page_symlink_inode_operations; + inode->i_mapping->a_ops = &obdfs_aops; + err = block_symlink(inode, symname, l); + if (err) + goto out_fail; + } else { + /* fast symlink */ + inode->i_op = &obdfs_fast_symlink_inode_operations; + memcpy(oinfo->oi_inline, symname, l); + inode->i_size = l-1; + } + obdfs_change_inode(inode); + + err = ext2_add_nondir(dentry, inode); +out: + return err; + +out_fail: + ext2_dec_count(inode); + iput (inode); + goto out; +} + + + +static int obdfs_link (struct dentry * old_dentry, struct inode * dir, + struct dentry *dentry) +{ + struct inode *inode = old_dentry->d_inode; + + if (S_ISDIR(inode->i_mode)) + return -EPERM; + + if (inode->i_nlink >= EXT2_LINK_MAX) + return -EMLINK; + + inode->i_ctime = CURRENT_TIME; + ext2_inc_count(inode); + atomic_inc(&inode->i_count); + + return ext2_add_nondir(dentry, inode); +} + + +static int obdfs_mkdir(struct inode * dir, struct dentry * dentry, int mode) +{ + struct inode * inode; + int err = -EMLINK; + ENTRY; + + if (dir->i_nlink >= EXT2_LINK_MAX) + goto out; + + ext2_inc_count(dir); + + inode = obdfs_new_inode (dir, S_IFDIR | mode); + err = PTR_ERR(inode); + if (IS_ERR(inode)) + goto out_dir; + + inode->i_op = &obdfs_dir_inode_operations; + inode->i_fop = &obdfs_dir_operations; + inode->i_mapping->a_ops = &obdfs_aops; + + ext2_inc_count(inode); + + err = ext2_make_empty(inode, dir); + if (err) + goto out_fail; + + err = ext2_add_link(dentry, inode); + if (err) + goto out_fail; + + d_instantiate(dentry, inode); +out: + EXIT; + return err; + +out_fail: + ext2_dec_count(inode); + ext2_dec_count(inode); + iput(inode); + EXIT; +out_dir: + ext2_dec_count(dir); + EXIT; + goto out; +} + +static int obdfs_unlink(struct inode * dir, struct dentry *dentry) +{ + struct inode * inode = dentry->d_inode; + struct ext2_dir_entry_2 * de; + struct page * page; + int err = -ENOENT; + + de = ext2_find_entry (dir, dentry, &page); + if (!de) + goto out; + + err = ext2_delete_entry (de, page); + if (err) + goto out; + + inode->i_ctime = dir->i_ctime; + ext2_dec_count(inode); + err = 0; +out: + return err; +} + + +static int obdfs_rmdir (struct inode * dir, struct dentry *dentry) +{ + struct inode * inode = dentry->d_inode; + int err = -ENOTEMPTY; + + if (ext2_empty_dir(inode)) { + err = obdfs_unlink(dir, dentry); + if (!err) { + inode->i_size = 0; + ext2_dec_count(inode); + ext2_dec_count(dir); + } + } + return err; +} + +static int obdfs_rename (struct inode * old_dir, struct dentry * old_dentry, + struct inode * new_dir, struct dentry * new_dentry ) +{ + struct inode * old_inode = old_dentry->d_inode; + struct inode * new_inode = new_dentry->d_inode; + struct page * dir_page = NULL; + struct ext2_dir_entry_2 * dir_de = NULL; + struct page * old_page; + struct ext2_dir_entry_2 * old_de; + int err = -ENOENT; + + old_de = ext2_find_entry (old_dir, old_dentry, &old_page); + if (!old_de) + goto out; + + if (S_ISDIR(old_inode->i_mode)) { + err = -EIO; + dir_de = ext2_dotdot(old_inode, &dir_page); + if (!dir_de) + goto out_old; + } + + if (new_inode) { + struct page *new_page; + struct ext2_dir_entry_2 *new_de; + + err = -ENOTEMPTY; + if (dir_de && !ext2_empty_dir (new_inode)) + goto out_dir; + + err = -ENOENT; + new_de = ext2_find_entry (new_dir, new_dentry, &new_page); + if (!new_de) + goto out_dir; + ext2_inc_count(old_inode); + ext2_set_link(new_dir, new_de, new_page, old_inode); + new_inode->i_ctime = CURRENT_TIME; + if (dir_de) + new_inode->i_nlink--; + ext2_dec_count(new_inode); + } else { + if (dir_de) { + err = -EMLINK; + if (new_dir->i_nlink >= EXT2_LINK_MAX) + goto out_dir; + } + ext2_inc_count(old_inode); + err = ext2_add_link(new_dentry, old_inode); + if (err) { + ext2_dec_count(old_inode); + goto out_dir; + } + if (dir_de) + ext2_inc_count(new_dir); + } + + ext2_delete_entry (old_de, old_page); + ext2_dec_count(old_inode); + + if (dir_de) { + ext2_set_link(old_inode, dir_de, dir_page, new_dir); + ext2_dec_count(old_dir); + } + return 0; + + +out_dir: + if (dir_de) { + kunmap(dir_page); + page_cache_release(dir_page); + } +out_old: + kunmap(old_page); + page_cache_release(old_page); +out: + return err; +} + +struct inode_operations obdfs_dir_inode_operations = { + create: obdfs_create, + lookup: obdfs_lookup, + link: obdfs_link, + unlink: obdfs_unlink, + symlink: obdfs_symlink, + mkdir: obdfs_mkdir, + rmdir: obdfs_rmdir, + mknod: obdfs_mknod, + rename: obdfs_rename, + setattr: obdfs_setattr +}; diff --git a/lustre/llite/rw.c b/lustre/llite/rw.c new file mode 100644 index 0000000..501a2df --- /dev/null +++ b/lustre/llite/rw.c @@ -0,0 +1,722 @@ +/* + * OBDFS Super operations + * + * This code is issued under the GNU General Public License. + * See the file COPYING in this distribution + * + * Copyright (C) 1996, 1997, Olaf Kirch + * Copryright (C) 1999 Stelias Computing Inc, + * (author Peter J. Braam ) + * Copryright (C) 1999 Seagate Technology Inc. +*/ + + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +void obdfs_change_inode(struct inode *inode); + +static int cache_writes = 0; + + +/* page cache support stuff */ + + +/* + * Add a page to the dirty page list. + */ +void set_page_dirty(struct page *page) +{ + if (!test_and_set_bit(PG_dirty, &page->flags)) { + struct address_space *mapping = page->mapping; + + if (mapping) { + spin_lock(&pagecache_lock); + list_del(&page->list); + list_add(&page->list, &mapping->dirty_pages); + spin_unlock(&pagecache_lock); + + if (mapping->host) + mark_inode_dirty_pages(mapping->host); + } + } +} + +/* + * Remove page from dirty list + */ +void __set_page_clean(struct page *page) +{ + struct address_space *mapping = page->mapping; + struct inode *inode; + + if (!mapping) + return; + + spin_lock(&pagecache_lock); + list_del(&page->list); + list_add(&page->list, &mapping->clean_pages); + + inode = mapping->host; + if (list_empty(&mapping->dirty_pages)) { + CDEBUG(D_INODE, "inode clean\n"); + inode->i_state &= ~I_DIRTY_PAGES; + } + spin_unlock(&pagecache_lock); + EXIT; +} + +inline void set_page_clean(struct page *page) +{ + if (PageDirty(page)) { + ClearPageDirty(page); + __set_page_clean(page); + } +} + +/* SYNCHRONOUS I/O to object storage for an inode -- object attr will be updated too */ +static int obdfs_brw(int rw, struct inode *inode, struct page *page, int create) +{ + obd_count num_obdo = 1; + obd_count bufs_per_obdo = 1; + struct obdo *oa; + obd_size count = PAGE_SIZE; + obd_off offset = ((obd_off)page->index) << PAGE_SHIFT; + obd_flag flags = create ? OBD_BRW_CREATE : 0; + int err; + + ENTRY; + if (IOPS(inode, brw) == NULL) { + printk(KERN_ERR __FUNCTION__ ": no brw method!\n"); + EXIT; + return -EIO; + } + + oa = obdo_alloc(); + if ( !oa ) { + EXIT; + return -ENOMEM; + } + oa->o_valid = OBD_MD_FLNOTOBD; + obdfs_from_inode(oa, inode); + + err = IOPS(inode, brw)(rw, IID(inode), num_obdo, &oa, &bufs_per_obdo, + &page, &count, &offset, &flags); + //if ( !err ) + // obdfs_to_inode(inode, oa); /* copy o_blocks to i_blocks */ + + obdo_free(oa); + EXIT; + return err; +} /* obdfs_brw */ + +extern void set_page_clean(struct page *); + +/* SYNCHRONOUS I/O to object storage for an inode -- object attr will be updated too */ +static int obdfs_commit_page(struct page *page, int create, int from, int to) +{ + struct inode *inode = page->mapping->host; + obd_count num_obdo = 1; + obd_count bufs_per_obdo = 1; + struct obdo *oa; + obd_size count = to; + obd_off offset = (((obd_off)page->index) << PAGE_SHIFT); + obd_flag flags = create ? OBD_BRW_CREATE : 0; + int err; + + ENTRY; + if (IOPS(inode, brw) == NULL) { + printk(KERN_ERR __FUNCTION__ ": no brw method!\n"); + EXIT; + return -EIO; + } + + oa = obdo_alloc(); + if ( !oa ) { + EXIT; + return -ENOMEM; + } + oa->o_valid = OBD_MD_FLNOTOBD; + obdfs_from_inode(oa, inode); + + CDEBUG(D_INODE, "commit_page writing (at %d) to %d, count %Ld\n", + from, to, count); + + err = IOPS(inode, brw)(WRITE, IID(inode), num_obdo, &oa, &bufs_per_obdo, + &page, &count, &offset, &flags); + if ( !err ) { + SetPageUptodate(page); + set_page_clean(page); + } + + //if ( !err ) + // obdfs_to_inode(inode, oa); /* copy o_blocks to i_blocks */ + + obdo_free(oa); + EXIT; + return err; +} /* obdfs_brw */ + + +/* returns the page unlocked, but with a reference */ +int obdfs_readpage(struct file *file, struct page *page) +{ + struct inode *inode = page->mapping->host; + int rc; + + ENTRY; + + if ( ((inode->i_size + PAGE_CACHE_SIZE -1)>>PAGE_SHIFT) + <= page->index) { + memset(kmap(page), 0, PAGE_CACHE_SIZE); + kunmap(page); + goto readpage_out; + } + + if (Page_Uptodate(page)) { + EXIT; + goto readpage_out; + } + + rc = obdfs_brw(READ, inode, page, 0); + if ( rc ) { + EXIT; + return rc; + } + /* PDEBUG(page, "READ"); */ + + readpage_out: + SetPageUptodate(page); + obd_unlock_page(page); + EXIT; + return 0; +} /* obdfs_readpage */ + +int obdfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) +{ + struct inode *inode = page->mapping->host; + obd_off offset = ((obd_off)page->index) << PAGE_SHIFT; + int rc = 0; + ENTRY; + + kmap(page); + if (Page_Uptodate(page)) { + EXIT; + goto prepare_done; + } + + if ( (from <= offset) && (to >= offset + PAGE_SIZE) ) { + EXIT; + return 0; + } + + rc = obdfs_brw(READ, inode, page, 0); + if ( !rc ) { + SetPageUptodate(page); + } + + prepare_done: + set_page_dirty(page); + //SetPageDirty(page); + EXIT; + return rc; +} + + + + + + +static kmem_cache_t *obdfs_pgrq_cachep = NULL; + +int obdfs_init_pgrqcache(void) +{ + ENTRY; + if (obdfs_pgrq_cachep == NULL) { + CDEBUG(D_CACHE, "allocating obdfs_pgrq_cache\n"); + obdfs_pgrq_cachep = kmem_cache_create("obdfs_pgrq", + sizeof(struct obdfs_pgrq), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (obdfs_pgrq_cachep == NULL) { + EXIT; + return -ENOMEM; + } else { + CDEBUG(D_CACHE, "allocated cache at %p\n", + obdfs_pgrq_cachep); + } + } else { + CDEBUG(D_CACHE, "using existing cache at %p\n", + obdfs_pgrq_cachep); + } + EXIT; + return 0; +} /* obdfs_init_wreqcache */ + +inline void obdfs_pgrq_del(struct obdfs_pgrq *pgrq) +{ + --obdfs_cache_count; + CDEBUG(D_INFO, "deleting page %p from list [count %ld]\n", + pgrq->rq_page, obdfs_cache_count); + list_del(&pgrq->rq_plist); + OBDClearCachePage(pgrq->rq_page); + kmem_cache_free(obdfs_pgrq_cachep, pgrq); +} + +void obdfs_cleanup_pgrqcache(void) +{ + ENTRY; + if (obdfs_pgrq_cachep != NULL) { + CDEBUG(D_CACHE, "destroying obdfs_pgrqcache at %p, count %ld\n", + obdfs_pgrq_cachep, obdfs_cache_count); + if (kmem_cache_destroy(obdfs_pgrq_cachep)) + printk(KERN_INFO __FUNCTION__ + ": unable to free all of cache\n"); + obdfs_pgrq_cachep = NULL; + } else + printk(KERN_INFO __FUNCTION__ ": called with NULL pointer\n"); + + EXIT; +} /* obdfs_cleanup_wreqcache */ + + +/* called with the list lock held */ +static struct page *obdfs_find_page_index(struct inode *inode, + unsigned long index) +{ + struct list_head *page_list = obdfs_iplist(inode); + struct list_head *tmp; + struct page *page; + + ENTRY; + + CDEBUG(D_INFO, "looking for inode %ld pageindex %ld\n", + inode->i_ino, index); + OIDEBUG(inode); + + if (list_empty(page_list)) { + EXIT; + return NULL; + } + tmp = page_list; + while ( (tmp = tmp->next) != page_list ) { + struct obdfs_pgrq *pgrq; + + pgrq = list_entry(tmp, struct obdfs_pgrq, rq_plist); + page = pgrq->rq_page; + if (index == page->index) { + CDEBUG(D_INFO, + "INDEX SEARCH found page %p, index %ld\n", + page, index); + EXIT; + return page; + } + } + + EXIT; + return NULL; +} /* obdfs_find_page_index */ + + +/* call and free pages from Linux page cache: called with io lock on inodes */ +int obdfs_do_vec_wr(struct inode **inodes, obd_count num_io, + obd_count num_obdos, struct obdo **obdos, + obd_count *oa_bufs, struct page **pages, char **bufs, + obd_size *counts, obd_off *offsets, obd_flag *flags) +{ + int err; + + ENTRY; + if (IOPS(inodes[0], brw) == NULL) { + printk(KERN_ERR __FUNCTION__ ": no brw method!\n"); + EXIT; + return -EIO; + } + + CDEBUG(D_INFO, "writing %d page(s), %d obdo(s) in vector\n", + num_io, num_obdos); + if (obd_debug_level & D_INFO) { /* DEBUGGING */ + int i; + printk("OBDOS: "); + for (i = 0; i < num_obdos; i++) + printk("%ld:0x%p ", (long)obdos[i]->o_id, obdos[i]); + + printk("\nPAGES: "); + for (i = 0; i < num_io; i++) + printk("0x%p ", pages[i]); + printk("\n"); + } + + err = IOPS(inodes[0], brw)(WRITE, IID(inodes[0]), num_obdos, obdos, + oa_bufs, pages, counts, offsets, flags); + + CDEBUG(D_INFO, "BRW done\n"); + /* release the pages from the page cache */ + while ( num_io > 0 ) { + --num_io; + CDEBUG(D_INFO, "calling put_page for %p, index %ld\n", + pages[num_io], pages[num_io]->index); + /* PDEBUG(pages[num_io], "do_vec_wr"); */ + put_page(pages[num_io]); + /* PDEBUG(pages[num_io], "do_vec_wr"); */ + } + CDEBUG(D_INFO, "put_page done\n"); + + while ( num_obdos > 0) { + --num_obdos; + CDEBUG(D_INFO, "free obdo %ld\n",(long)obdos[num_obdos]->o_id); + /* copy o_blocks to i_blocks */ + obdfs_set_size (inodes[num_obdos], obdos[num_obdos]->o_size); + //obdfs_to_inode(inodes[num_obdos], obdos[num_obdos]); + obdo_free(obdos[num_obdos]); + } + CDEBUG(D_INFO, "obdo_free done\n"); + EXIT; + return err; +} + + +/* + * Add a page to the write request cache list for later writing. + * ASYNCHRONOUS write method. + */ +static int obdfs_add_page_to_cache(struct inode *inode, struct page *page) +{ + int err = 0; + ENTRY; + + /* The PG_obdcache bit is cleared by obdfs_pgrq_del() BEFORE the page + * is written, so at worst we will write the page out twice. + * + * If the page has the PG_obdcache bit set, then the inode MUST be + * on the superblock dirty list so we don't need to check this. + * Dirty inodes are removed from the superblock list ONLY when they + * don't have any more cached pages. It is possible to have an inode + * with no dirty pages on the superblock list, but not possible to + * have an inode with dirty pages NOT on the superblock dirty list. + */ + if (!OBDAddCachePage(page)) { + struct obdfs_pgrq *pgrq; + pgrq = kmem_cache_alloc(obdfs_pgrq_cachep, SLAB_KERNEL); + if (!pgrq) { + OBDClearCachePage(page); + EXIT; + return -ENOMEM; + } + /* not really necessary since we set all pgrq fields here + memset(pgrq, 0, sizeof(*pgrq)); + */ + + pgrq->rq_page = page; + pgrq->rq_jiffies = jiffies; + get_page(pgrq->rq_page); + + obd_down(&obdfs_i2sbi(inode)->osi_list_mutex); + list_add(&pgrq->rq_plist, obdfs_iplist(inode)); + obdfs_cache_count++; + //printk("-- count %d\n", obdfs_cache_count); + + /* If inode isn't already on superblock inodes list, add it. + * + * We increment the reference count on the inode to keep it + * from being freed from memory. This _should_ be an iget() + * with an iput() in both flush_reqs() and put_inode(), but + * since put_inode() is called from iput() we can't call iput() + * again there. Instead we just increment/decrement i_count, + * which is mostly what iget/iput do for an inode in memory. + */ + if ( list_empty(obdfs_islist(inode)) ) { + atomic_inc(&inode->i_count); + CDEBUG(D_INFO, + "adding inode %ld to superblock list %p\n", + inode->i_ino, obdfs_slist(inode)); + list_add(obdfs_islist(inode), obdfs_slist(inode)); + } + obd_up(&obdfs_i2sbi(inode)->osi_list_mutex); + + } + + /* XXX For testing purposes, we can write out the page here. + err = obdfs_flush_reqs(obdfs_slist(inode), ~0UL); + */ + + EXIT; + return err; +} /* obdfs_add_page_to_cache */ + +void rebalance(void) +{ + if (obdfs_cache_count > 60000) { + printk("-- count %ld\n", obdfs_cache_count); + //obdfs_flush_dirty_pages(~0UL); + printk("-- count %ld\n", obdfs_cache_count); + } +} + +/* select between SYNC and ASYNC I/O methods */ +int obdfs_do_writepage(struct page *page, int sync) +{ + struct inode *inode = page->mapping->host; + int err; + + ENTRY; + /* PDEBUG(page, "WRITEPAGE"); */ + if ( sync ) + err = obdfs_brw(WRITE, inode, page, 1); + else { + err = obdfs_add_page_to_cache(inode, page); + CDEBUG(D_INFO, "DO_WR ino: %ld, page %p, err %d, uptodate %d\n", + inode->i_ino, page, err, Page_Uptodate(page)); + } + + if ( !err ) { + SetPageUptodate(page); + set_page_clean(page); + } + /* PDEBUG(page,"WRITEPAGE"); */ + EXIT; + return err; +} /* obdfs_do_writepage */ + + + +/* returns the page unlocked, but with a reference */ +int obdfs_writepage(struct page *page) +{ + int rc; + struct inode *inode = page->mapping->host; + ENTRY; + printk("---> writepage called ino %ld!\n", inode->i_ino); + BUG(); + rc = obdfs_do_writepage(page, 1); + if ( !rc ) { + set_page_clean(page); + } else { + CDEBUG(D_INODE, "--> GRR %d\n", rc); + } + EXIT; + return rc; +} + +void write_inode_pages(struct inode *inode) +{ + struct list_head *tmp = &inode->i_mapping->dirty_pages; + + while ( (tmp = tmp->next) != &inode->i_mapping->dirty_pages) { + struct page *page; + page = list_entry(tmp, struct page, list); + obdfs_writepage(page); + } +} + + +int obdfs_commit_write(struct file *file, struct page *page, unsigned from, unsigned to) +{ + struct inode *inode = page->mapping->host; + int rc = 0; + loff_t len = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; + ENTRY; + CDEBUG(D_INODE, "commit write ino %ld (end at %Ld) from %d to %d ,ind %ld\n", + inode->i_ino, len, from, to, page->index); + + + if (cache_writes == 0) { + rc = obdfs_commit_page(page, 1, from, to); + } + + if (len > inode->i_size) { + obdfs_set_size(inode, len); + } + + kunmap(page); + EXIT; + return rc; +} + + +/* + * This does the "real" work of the write. The generic routine has + * allocated the page, locked it, done all the page alignment stuff + * calculations etc. Now we should just copy the data from user + * space and write it back to the real medium.. + * + * If the writer ends up delaying the write, the writer needs to + * increment the page use counts until he is done with the page. + * + * Return value is the number of bytes written. + */ +int obdfs_write_one_page(struct file *file, struct page *page, + unsigned long offset, unsigned long bytes, + const char * buf) +{ + struct inode *inode = file->f_dentry->d_inode; + int err; + + ENTRY; + /* We check for complete page writes here, as we then don't have to + * get the page before writing over everything anyways. + */ + if ( !Page_Uptodate(page) && (offset != 0 || bytes != PAGE_SIZE) ) { + err = obdfs_brw(READ, inode, page, 0); + if ( err ) + return err; + SetPageUptodate(page); + } + + if (copy_from_user((u8*)page_address(page) + offset, buf, bytes)) + return -EFAULT; + + lock_kernel(); + err = obdfs_writepage(page); + unlock_kernel(); + + return (err < 0 ? err : bytes); +} /* obdfs_write_one_page */ + +/* + * return an up to date page: + * - if locked is true then is returned locked + * - if create is true the corresponding disk blocks are created + * - page is held, i.e. caller must release the page + * + * modeled on NFS code. + */ +struct page *obdfs_getpage(struct inode *inode, unsigned long offset, + int create, int locked) +{ + struct page * page; + int index; + int err; + + ENTRY; + + offset = offset & PAGE_CACHE_MASK; + CDEBUG(D_INFO, "ino: %ld, offset %ld, create %d, locked %d\n", + inode->i_ino, offset, create, locked); + index = offset >> PAGE_CACHE_SHIFT; + + page = grab_cache_page(&inode->i_data, index); + + /* Yuck, no page */ + if (! page) { + printk(KERN_WARNING " grab_cache_page says no dice ...\n"); + EXIT; + return NULL; + } + + /* PDEBUG(page, "GETPAGE: got page - before reading\n"); */ + /* now check if the data in the page is up to date */ + if ( Page_Uptodate(page)) { + if (!locked) { + if (PageLocked(page)) + obd_unlock_page(page); + } else { + printk("file %s, line %d: expecting locked page\n", + __FILE__, __LINE__); + } + EXIT; + return page; + } + + +#ifdef EXT2_OBD_DEBUG + if ((obd_debug_level & D_INFO) && obdfs_find_page_index(inode, index)) { + CDEBUG(D_INFO, "OVERWRITE: found dirty page %p, index %ld\n", + page, page->index); + } +#endif + + err = obdfs_brw(READ, inode, page, create); + + if ( err ) { + SetPageError(page); + obd_unlock_page(page); + EXIT; + return page; + } + + if ( !locked ) + obd_unlock_page(page); + SetPageUptodate(page); + /* PDEBUG(page,"GETPAGE - after reading"); */ + EXIT; + return page; +} /* obdfs_getpage */ + + +void obdfs_truncate(struct inode *inode) +{ + struct obdo *oa; + int err; + ENTRY; + + //obdfs_dequeue_pages(inode); + + if (IOPS(inode, punch) == NULL) { + printk(KERN_ERR __FUNCTION__ ": no punch method!\n"); + EXIT; + return; + } + + oa = obdo_alloc(); + if ( !oa ) { + /* XXX This would give an inconsistent FS, so deal with it as + * best we can for now - an obdo on the stack is not pretty. + */ + struct obdo obdo; + + printk(__FUNCTION__ ": obdo_alloc failed - using stack!\n"); + + obdo.o_valid = OBD_MD_FLNOTOBD; + obdfs_from_inode(&obdo, inode); + + err = IOPS(inode, punch)(IID(inode), &obdo, 0, obdo.o_size); + } else { + oa->o_valid = OBD_MD_FLNOTOBD; + obdfs_from_inode(oa, inode); + + CDEBUG(D_INFO, "calling punch for %ld (%Lu bytes at 0)\n", + (long)oa->o_id, oa->o_size); + err = IOPS(inode, punch)(IID(inode), oa, oa->o_size, 0); + + obdo_free(oa); + } + + if (err) { + printk(__FUNCTION__ ": obd_truncate fails (%d)\n", err); + EXIT; + return; + } + EXIT; +} /* obdfs_truncate */ + +struct address_space_operations obdfs_aops = { + readpage: obdfs_readpage, + writepage: obdfs_writepage, + sync_page: block_sync_page, + prepare_write: obdfs_prepare_write, + commit_write: obdfs_commit_write, + bmap: NULL +}; diff --git a/lustre/llite/super.c b/lustre/llite/super.c new file mode 100644 index 0000000..b3f9ba6 --- /dev/null +++ b/lustre/llite/super.c @@ -0,0 +1,491 @@ + +/* + * OBDFS Super operations + * + * This code is issued under the GNU General Public License. + * See the file COPYING in this distribution + * + * Copryright (C) 1996 Peter J. Braam + * Copryright (C) 1999 Stelias Computing Inc. + * Copryright (C) 1999 Seagate Technology Inc. + * Copryright (C) 2001 Mountain View Data, Inc. + * Copryright (C) 2002 Cluster File Systems, Inc. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +struct list_head ll_super_list; +extern struct address_space_operations ll_aops; +struct super_operations ll_super_operations; +long ll_cache_count = 0; +long ll_mutex_start = 0; +long obd_memory = 0; + +static char *ll_read_opt(const char *opt, char *data) +{ + char *value; + char *retval; + + CDEBUG(D_INFO, "option: %s, data %s\n", opt, data); + if ( strncmp(opt, data, strlen(opt)) ) + return NULL; + + if ( (value = strchr(data, '=')) == NULL ) + return NULL; + + value++; + OBD_ALLOC(retval, char *, strlen(value) + 1); + if ( !retval ) { + printk(KERN_ALERT __FUNCTION__ ": out of memory!\n"); + return NULL; + } + + memcpy(retval, value, strlen(value)+1); + CDEBUG(D_PSDEV, "Assigned option: %s, value %s\n", opt, retval); + return retval; +} + +static void ll_options(char *options, char **dev, char **vers) +{ + char *this_char; + + if (!options) + return; + + for (this_char = strtok (options, ","); + this_char != NULL; + this_char = strtok (NULL, ",")) { + CDEBUG(D_INFO, "this_char %s\n", this_char); + if ( (!*dev && (*dev = ll_read_opt("device", this_char)))|| + (!*vers && (*vers = ll_read_opt("version", this_char))) ) + continue; + + } +} + +static struct super_block * ll_read_super(struct super_block *sb, + void *data, int silent) +{ + struct inode *root = 0; + struct ll_sb_info *sbi = (struct ll_sb_info *)(&sb->u.generic_sbp); + struct obd_device *obddev; + char *device = NULL; + char *version = NULL; + int root_ino = 2; + int connected = 0; + int devno; + int err; + struct obdo *oa; + + + ENTRY; + MOD_INC_USE_COUNT; + memset(sbi, 0, sizeof(*sbi)); + + CDEBUG(D_INFO, "\n"); + ll_options(data, &device, &version); + if ( !device ) { + printk(__FUNCTION__ ": no device\n"); + EXIT; + goto ERR; + } + + devno = simple_strtoul(device, NULL, 0); + CDEBUG(D_INFO, "\n"); + if ( devno >= MAX_OBD_DEVICES ) { + printk(__FUNCTION__ ": device of %s too high (%d)\n", device, devno); + EXIT; + goto ERR; + } + + CDEBUG(D_INFO, "\n"); + obddev = &obd_dev[devno]; + + + CDEBUG(D_INFO, "\n"); + if ( ! (obddev->obd_flags & OBD_ATTACHED) || + ! (obddev->obd_flags & OBD_SET_UP) ){ + printk("device %s not attached or not set up (%d)\n", + device, MINOR(devno)); + EXIT; + goto ERR;; + } + + CDEBUG(D_INFO, "\n"); + sbi->ll_obd = obddev; + sbi->ll_ops = sbi->ll_obd->obd_type->typ_ops; + + sbi->ll_conn.oc_dev = obddev; + err = sbi->ll_ops->o_connect(&sbi->ll_conn); + if ( err ) { + printk("OBDFS: cannot connect to %s\n", device); + EXIT; + goto ERR; + } + + connected = 1; + CDEBUG(D_INFO, "\n"); + /* list of dirty inodes, and a mutex to hold while modifying it */ + INIT_LIST_HEAD(&sbi->ll_inodes); + init_MUTEX (&sbi->ll_list_mutex); + + CDEBUG(D_INFO, "\n"); + sbi->ll_super = sb; + sbi->ll_rootino = 2; + + CDEBUG(D_INFO, "\n"); + sb->s_maxbytes = 1LL << 36; + printk("Max bytes: %Lx\n", sb->s_maxbytes); + sb->s_blocksize = PAGE_SIZE; + sb->s_blocksize_bits = (unsigned char)PAGE_SHIFT; + sb->s_magic = LL_SUPER_MAGIC; + sb->s_op = &ll_super_operations; + + /* make root inode */ + CDEBUG(D_INFO, "\n"); + oa = obdo_fromid(&sbi->ll_conn, root_ino, S_IFDIR, + OBD_MD_FLNOTOBD | OBD_MD_FLBLOCKS); + CDEBUG(D_INFO, "mode %o\n", oa->o_mode); + if ( IS_ERR(oa) ) { + printk(__FUNCTION__ ": obdo_fromid failed\n"); + iput(root); + EXIT; + goto ERR; + } + CDEBUG(D_INFO, "\n"); + root = iget4(sb, root_ino, NULL, oa); + obdo_free(oa); + CDEBUG(D_INFO, "\n"); + if (!root) { + printk("OBDFS: bad iget4 for root\n"); + sb->s_dev = 0; + err = -ENOENT; + EXIT; + goto ERR; + } + + sb->s_root = d_alloc_root(root); + list_add(&sbi->ll_list, &ll_super_list); + OBD_FREE(device, strlen(device) + 1); + if (version) + OBD_FREE(version, strlen(version) + 1); + EXIT; + return sb; + +ERR: + MOD_DEC_USE_COUNT; + if (device) + OBD_FREE(device, strlen(device) + 1); + if (version) + OBD_FREE(version, strlen(version) + 1); + if (connected) + sbi->ll_ops->o_disconnect(&sbi->ll_conn); + + if (sbi) { + sbi->ll_super = NULL; + } + if (root) { + iput(root); + } + sb->s_dev = 0; + return NULL; +} /* ll_read_super */ + + +static void ll_put_super(struct super_block *sb) +{ + struct ll_sb_info *sbi; + + ENTRY; + sb->s_dev = 0; + + sbi = (struct ll_sb_info *) &sb->u.generic_sbp; + //ll_flush_reqs(&sbi->ll_inodes, ~0UL); + + OPS(sb,disconnect)(ID(sb)); + list_del(&sbi->ll_list); + + printk(KERN_INFO "OBDFS: Bye bye.\n"); + + MOD_DEC_USE_COUNT; + EXIT; +} /* ll_put_super */ + + +void ll_do_change_inode(struct inode *inode, int valid) +{ + struct obdo *oa; + int err; + + ENTRY; + if (IOPS(inode, setattr) == NULL) { + printk(KERN_ERR __FUNCTION__ ": no setattr method!\n"); + EXIT; + return; + } + oa = obdo_alloc(); + if ( !oa ) { + printk(__FUNCTION__ ": obdo_alloc failed\n"); + EXIT; + return; + } + + oa->o_valid = OBD_MD_FLNOTOBD & (valid | OBD_MD_FLID); + ll_from_inode(oa, inode); + oa->o_mode = inode->i_mode; + err = IOPS(inode, setattr)(IID(inode), oa); + + if ( err ) + printk(__FUNCTION__ ": obd_setattr fails (%d)\n", err); + + EXIT; + obdo_free(oa); +} /* ll_write_inode */ + +void ll_change_inode(struct inode *inode, int mask) +{ + return ll_do_change_inode(inode, OBD_MD_FLNLINK); +} + + +extern void write_inode_pages(struct inode *); +/* This routine is called from iput() (for each unlink on the inode). + * We can't put this call into delete_inode() since that is called only + * when i_count == 0, and we need to keep a reference on the inode while + * it is in the page cache, which means i_count > 0. Catch 22. + */ +static void ll_put_inode(struct inode *inode) +{ + ENTRY; + if (inode->i_nlink && (atomic_read(&inode->i_count) == 1)) { + write_inode_pages(inode); + EXIT; + return; + } + + //ll_dequeue_pages(inode); + EXIT; +} /* ll_put_inode */ + + +static void ll_delete_inode(struct inode *inode) +{ + ll_do_change_inode(inode, ~0); + clear_inode(inode); +} +#if 0 +{ + struct obdo *oa; + int err; + + ENTRY; + if (IOPS(inode, destroy) == NULL) { + printk(KERN_ERR __FUNCTION__ ": no destroy method!\n"); + EXIT; + return; + } + + oa = obdo_alloc(); + if ( !oa ) { + printk(__FUNCTION__ ": obdo_alloc failed\n"); + EXIT; + return; + } + oa->o_valid = OBD_MD_FLNOTOBD; + ll_from_inode(oa, inode); + + /* XXX how do we know that this inode is now clean? */ + printk("delete_inode ------> link %d\n", inode->i_nlink); + ODEBUG(oa); + err = IOPS(inode, destroy)(IID(inode), oa); + obdo_free(oa); + clear_inode(inode); + if (err) { + printk(__FUNCTION__ ": obd_destroy fails (%d)\n", err); + EXIT; + return; + } + + EXIT; +} /* ll_delete_inode */ +#endif + + +static int ll_attr2inode(struct inode * inode, struct iattr * attr) +{ + unsigned int ia_valid = attr->ia_valid; + int error = 0; + + if (ia_valid & ATTR_SIZE) { + error = vmtruncate(inode, attr->ia_size); + if (error) + goto out; + } + + if (ia_valid & ATTR_UID) + inode->i_uid = attr->ia_uid; + if (ia_valid & ATTR_GID) + inode->i_gid = attr->ia_gid; + if (ia_valid & ATTR_ATIME) + inode->i_atime = attr->ia_atime; + if (ia_valid & ATTR_MTIME) + inode->i_mtime = attr->ia_mtime; + if (ia_valid & ATTR_CTIME) + inode->i_ctime = attr->ia_ctime; + if (ia_valid & ATTR_MODE) { + inode->i_mode = attr->ia_mode; + if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) + inode->i_mode &= ~S_ISGID; + } +out: + return error; +} + +int ll_setattr(struct dentry *de, struct iattr *attr) +{ + struct inode *inode = de->d_inode; + struct obdo *oa; + int err; + + ENTRY; + if (IOPS(inode, setattr) == NULL) { + printk(KERN_ERR __FUNCTION__ ": no setattr method!\n"); + EXIT; + return -EIO; + } + oa = obdo_alloc(); + if ( !oa ) { + printk(__FUNCTION__ ": obdo_alloc failed\n"); + return -ENOMEM; + } + + ll_attr2inode(inode, attr); + oa->o_id = inode->i_ino; + oa->o_mode = inode->i_mode; + obdo_from_iattr(oa, attr); + err = IOPS(inode, setattr)(IID(inode), oa); + + if ( err ) + printk(__FUNCTION__ ": obd_setattr fails (%d)\n", err); + + EXIT; + obdo_free(oa); + return err; +} /* ll_setattr */ + + + +static int ll_statfs(struct super_block *sb, struct statfs *buf) +{ + struct statfs tmp; + int err; + + ENTRY; + + err = OPS(sb,statfs)(ID(sb), &tmp); + if ( err ) { + printk(__FUNCTION__ ": obd_statfs fails (%d)\n", err); + return err; + } + memcpy(buf, &tmp, sizeof(*buf)); + CDEBUG(D_SUPER, "statfs returns avail %ld\n", tmp.f_bavail); + EXIT; + + return err; +} + +static inline void ll_read_inode2(struct inode *inode, void *opaque) +{ + struct obdo *oa = opaque; + + ENTRY; + ll_to_inode(inode, oa); + + INIT_LIST_HEAD(ll_iplist(inode)); /* list of dirty pages on inode */ + INIT_LIST_HEAD(ll_islist(inode)); /* list of inodes in superblock */ + + /* OIDEBUG(inode); */ + + if (S_ISREG(inode->i_mode)) { + inode->i_op = &ll_file_inode_operations; + inode->i_fop = &ll_file_operations; + inode->i_mapping->a_ops = &ll_aops; + EXIT; + } else if (S_ISDIR(inode->i_mode)) { + inode->i_op = &ll_dir_inode_operations; + inode->i_fop = &ll_dir_operations; + inode->i_mapping->a_ops = &ll_aops; + EXIT; + } else if (S_ISLNK(inode->i_mode)) { + if (inode->i_blocks) { + inode->i_op = &ll_symlink_inode_operations; + inode->i_mapping->a_ops = &ll_aops; + }else { + inode->i_op = &ll_fast_symlink_inode_operations; + } + EXIT; + } else { + init_special_inode(inode, inode->i_mode, + ((int *)ll_i2info(inode)->oi_inline)[0]); + } + + EXIT; + return; +} + +/* exported operations */ +struct super_operations ll_super_operations = +{ + read_inode2: ll_read_inode2, + // put_inode: ll_put_inode, + // delete_inode: ll_delete_inode, + // put_super: ll_put_super, + // statfs: ll_statfs +}; + + + +struct file_system_type lustre_light_fs_type = { + "lustre_light", 0, ll_read_super, NULL +}; + +static int __init init_lustre_light(void) +{ + printk(KERN_INFO "Lustre Light 0.0.1, braam@clusterfs.com\n"); + + return register_filesystem(&lustre_light_fs_type); +} + +static void __exit exit_lustre_light(void) +{ + unregister_filesystem(&lustre_light_fs_type); +} + +MODULE_AUTHOR("Peter J. Braam "); +MODULE_DESCRIPTION("Lustre Light Client File System v1.0"); +MODULE_LICENSE("GPL"); + +module_init(init_lustre_light); +module_exit(exit_lustre_light); diff --git a/lustre/llite/symlink.c b/lustre/llite/symlink.c new file mode 100644 index 0000000..01484fd --- /dev/null +++ b/lustre/llite/symlink.c @@ -0,0 +1,94 @@ +/* + * linux/fs/ext2/symlink.c + * + * This code is issued under the GNU General Public License. + * See the file COPYING in this distribution + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/fs/minix/symlink.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * ext2 symlink handling code + * + * Modified for OBDFS: + * Copyright (C) 1999 Seagate Technology Inc. (author: braam@stelias.com) + */ + +#include +#include +#include +#include +#include /* for ENTRY and EXIT only */ +#include + +static int obdfs_fast_readlink(struct dentry *dentry, char *buffer, int buflen) +{ + char *s = obdfs_i2info(dentry->d_inode)->oi_inline; + return vfs_readlink(dentry, buffer, buflen, s); +} + +static int obdfs_fast_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + char *s = obdfs_i2info(dentry->d_inode)->oi_inline; + return vfs_follow_link(nd, s); +} + +extern int obdfs_setattr(struct dentry *de, struct iattr *attr); +struct inode_operations obdfs_fast_symlink_inode_operations = { + readlink: obdfs_fast_readlink, + follow_link: obdfs_fast_follow_link, + setattr: obdfs_setattr +}; + +static int obdfs_readlink(struct dentry *dentry, char *buffer, int buflen) +{ + struct page *page = NULL; + int res; + + ENTRY; + OIDEBUG(dentry->d_inode); + page = obdfs_getpage(dentry->d_inode, 0, 0, 0); + /* PDEBUG(page, "readlink"); */ + if (!page) { + EXIT; + return 0; + } + res = vfs_readlink(dentry, buffer, buflen, (char *)page_address(page)); + page_cache_release(page); + EXIT; + return res; +} /* obdfs_readlink */ + +static int obdfs_follow_link(struct dentry * dentry, + struct nameidata *nd) +{ + struct page *page = NULL; + int res; + + ENTRY; + OIDEBUG(dentry->d_inode); + page = obdfs_getpage(dentry->d_inode, 0, 0, 0); + /* PDEBUG(page, "follow_link"); */ + if (!page) { + dput(nd->dentry); + EXIT; + return -EIO; + } + res = vfs_follow_link(nd, (char *)page_address(page)); + page_cache_release(page); + EXIT; + return res; +} + +struct inode_operations obdfs_symlink_inode_operations = { + readlink: obdfs_readlink, + follow_link: obdfs_follow_link, + setattr: obdfs_setattr +}; diff --git a/lustre/llite/sysctl.c b/lustre/llite/sysctl.c new file mode 100644 index 0000000..c03b0289 --- /dev/null +++ b/lustre/llite/sysctl.c @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2001 Cluster File Systems, Inc. + * + * This code is issued under the GNU General Public License. + * See the file COPYING in this distribution + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +struct ctl_table_header *obdfs_table_header = NULL; + +int obdfs_debug_level = 0; +int obdfs_print_entry = 1; + + +#define OBDFS_SYSCTL 1 + +#define OBDFS_DEBUG 1 /* control debugging */ +#define OBDFS_ENTRY 2 /* control enter/leave pattern */ +#define OBDFS_TIMEOUT 3 /* timeout on upcalls to become intrble */ +#define OBDFS_HARD 4 /* mount type "hard" or "soft" */ +#define OBDFS_VARS 5 +#define OBDFS_INDEX 6 +#define OBDFS_RESET 7 + +#define OBDFS_VARS_SLOT 2 + +static ctl_table obdfs_table[] = { + {OBDFS_DEBUG, "debug", &obdfs_debug_level, sizeof(int), 0644, NULL, &proc_dointvec}, + {OBDFS_ENTRY, "trace", &obdfs_print_entry, sizeof(int), 0644, NULL, &proc_dointvec}, + { 0 } +}; + +static ctl_table top_table[] = { + {OBDFS_SYSCTL, "obdfs", NULL, 0, 0555, obdfs_table}, + {0} +}; + +void obdfs_sysctl_init (void) +{ + +#ifdef CONFIG_SYSCTL + if ( !obdfs_table_header ) + obdfs_table_header = register_sysctl_table(top_table, 0); +#endif +} + +void obdfs_sysctl_clean (void) +{ +#ifdef CONFIG_SYSCTL + if ( obdfs_table_header ) + unregister_sysctl_table(obdfs_table_header); + obdfs_table_header = NULL; +#endif +} diff --git a/lustre/mdc/Makefile.am b/lustre/mdc/Makefile.am new file mode 100644 index 0000000..65234e6 --- /dev/null +++ b/lustre/mdc/Makefile.am @@ -0,0 +1,18 @@ +# Copyright (C) 2001 Cluster File Systems, Inc. +# +# This code is issued under the GNU General Public License. +# See the file COPYING in this distribution + +DEFS:= + +MODULE = mdc +modulefs_DATA = mdc.o +EXTRA_PROGRAMS = mdc + + +mdc_SOURCES = mds_pack.c mdc_request.c + +mds_pack.c: + ln -s ../lib/mds_pack.c . + +include $(top_srcdir)/Rules diff --git a/lustre/llite/request.c b/lustre/mdc/mdc_request.c similarity index 62% rename from lustre/llite/request.c rename to lustre/mdc/mdc_request.c index baf102e..37feabd 100644 --- a/lustre/llite/request.c +++ b/lustre/mdc/mdc_request.c @@ -17,6 +17,7 @@ #include #include +#include #include #include @@ -31,63 +32,98 @@ #include #define REQUEST_MINOR 244 + extern int mds_queue_req(struct mds_request *); -static int mds_send_req(struct mds_request *req) +struct mds_request *mds_prep_req(int size, int opcode) { + struct mds_request *request; int rc; - init_waitqueue_head(&req->rq_wait_for_rep); - /* XXX replace the following with networking code */ + ENTRY; + + request = (struct mds_request *)kmalloc(sizeof(*request), GFP_KERNEL); + if (!request) { + printk("mds_prep_req: request allocation out of memory\n"); + return NULL; + } + + rc = mds_pack_req(NULL, 0, NULL, 0, + &request->rq_reqhdr, &request->rq_req, + &request->rq_reqlen, &request->rq_reqbuf); + if (rc) { + printk("llight request: cannot pack request %d\n", rc); + return NULL; + } + request->rq_reqhdr->opc = opcode; + + EXIT; + return request; +} + + + + +static int mds_queue_wait(struct mds_request *req) +{ + int rc; + + /* XXX fix the race here (wait_for_event?)*/ + /* hand the packet over to the server */ rc = mds_queue_req(req); if (rc) { - EXIT; - return rc; + printk("osc_queue_wait: error %d, opcode %d\n", rc, + req->rq_reqhdr->opc); + return -rc; } + init_waitqueue_head(&req->rq_wait_for_rep); printk("-- sleeping\n"); interruptible_sleep_on(&req->rq_wait_for_rep); printk("-- done\n"); - return 0; + + mds_unpack_rep(req->rq_repbuf, req->rq_replen, &req->rq_rephdr, + &req->rq_rep); + printk("-->osc_queue_wait: buf %p len %d status %d\n", + req->rq_repbuf, req->rq_replen, req->rq_rephdr->status); + + EXIT; + return req->rq_rephdr->status; } -int llight_getattr(ino_t ino, struct mds_rep *rep) +void mds_free_req(struct mds_request *request) +{ + kfree(request); +} + +int mdc_getattr(ino_t ino, struct mds_rep **rep) { struct mds_request *request; int rc; - request = (struct mds_request *)kmalloc(sizeof(*request), - GFP_KERNEL); + request = mds_prep_req(sizeof(*request), MDS_GETATTR); if (!request) { - printk("llight request: out of memory\n"); + printk("llight request: cannot pack\n"); return -ENOMEM; } - rc = mds_pack_req(NULL, 0, NULL, 0, - &request->rq_reqhdr, &request->rq_req, - &request->rq_reqlen, &request->rq_reqbuf); - if (rc) { - printk("llight request: cannot pack request %d\n", rc); - return rc; - } request->rq_req->fid1.id = ino; - request->rq_reqhdr->opc = MDS_GETATTR; - - rc = mds_send_req(request); + rc = mds_queue_wait(request); if (rc) { printk("llight request: error in handling %d\n", rc); - return rc; + goto out; } - printk("mode: %o\n", request->rq_rep->mode); - if (rep) { - memcpy(rep, request->rq_repbuf, sizeof(*rep)); + printk("mds_getattr: mode: %o\n", request->rq_rep->mode); + + if (rep ) { + *rep = request->rq_rep; } - kfree(request->rq_repbuf); - kfree(request); - return 0; -} + out: + mds_free_req(request); + return rc; +} static int request_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) @@ -114,7 +150,7 @@ static int request_ioctl(struct inode *inode, struct file *file, switch (cmd) { case IOC_REQUEST_GETATTR: { printk("-- getting attr for ino 2\n"); - err = llight_getattr(2, NULL); + err = mdc_getattr(2, NULL); printk("-- done err %d\n", err); break; } @@ -140,27 +176,24 @@ static struct miscdevice request_dev = { }; -int init_request_module(void) +static int __init mds_request_init(void) { - misc_register( &request_dev ); + misc_register(&request_dev); return 0 ; } -#ifdef MODULE -MODULE_AUTHOR("Peter J. Braam "); -MODULE_DESCRIPTION("Lustre MDS Request Tester v1.0"); - -#include - -int init_module(void) -{ - return init_request_module(); -} -void cleanup_module(void) +static void __exit mds_request_exit(void) { misc_deregister(&request_dev); - return; } -#endif +MODULE_AUTHOR("Peter J. Braam "); +MODULE_DESCRIPTION("Lustre MDS Request Tester v1.0"); +MODULE_LICENSE("GPL"); + +EXPORT_SYMBOL(mdc_getattr); + + +module_init(mds_request_init); +module_exit(mds_request_exit); diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index b0ade44..1e40b3e 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -38,43 +38,86 @@ static struct mds_obd *MDS; // XXX make this networked! static int mds_queue_req(struct mds_request *req) { - struct mds_request *srv_request; + struct mds_request *srv_req; if (!MDS) { EXIT; return -1; } - srv_request = kmalloc(sizeof(*srv_request), GFP_KERNEL); - if (!srv_request) { + srv_req = kmalloc(sizeof(*srv_req), GFP_KERNEL); + if (!srv_req) { EXIT; return -ENOMEM; } - /* move the request buffer */ - srv_request->rq_reqlen = req->rq_reqlen; - srv_request->rq_reqbuf = req->rq_reqbuf; - srv_request->rq_obd = MDS; + printk("---> MDS at %d %p, incoming req %p, srv_req %p\n", + __LINE__, MDS, req, srv_req); + + memset(srv_req, 0, sizeof(*req)); - req->rq_reqbuf = NULL; - req->rq_reqlen = 0; + /* move the request buffer */ + srv_req->rq_reqbuf = req->rq_reqbuf; + srv_req->rq_reqlen = req->rq_reqlen; + srv_req->rq_obd = MDS; /* remember where it came from */ - srv_request->rq_reply_handle = req; + srv_req->rq_reply_handle = req; - /* get the server working on this request */ - spin_lock(&MDS->mds_lock); - list_add(&srv_request->rq_list, &MDS->mds_reqs); - spin_unlock(&MDS->mds_lock); + list_add(&srv_req->rq_list, &MDS->mds_reqs); wake_up(&MDS->mds_waitq); + return 0; +} - /* put client asleep */ - printk("-- sleeping\n"); - interruptible_sleep_on(&req->rq_wait_for_rep); - printk("-- done\n"); +/* XXX replace with networking code */ +int mds_reply(struct mds_request *req) +{ + struct mds_request *clnt_req = req->rq_reply_handle; + + ENTRY; + + /* free the request buffer */ + kfree(req->rq_reqbuf); + req->rq_reqbuf = NULL; + + /* move the reply to the client */ + clnt_req->rq_replen = req->rq_replen; + clnt_req->rq_repbuf = req->rq_repbuf; + req->rq_repbuf = NULL; + req->rq_replen = 0; + + /* wake up the client */ + wake_up_interruptible(&clnt_req->rq_wait_for_rep); + EXIT; return 0; } +int mds_error(struct mds_request *req) +{ + struct mds_rep_hdr *hdr; + + ENTRY; + hdr = kmalloc(sizeof(*hdr), GFP_KERNEL); + if (!hdr) { + EXIT; + return -ENOMEM; + } + + memset(hdr, 0, sizeof(*hdr)); + + hdr->seqno = req->rq_reqhdr->seqno; + hdr->status = req->rq_status; + hdr->type = MDS_TYPE_ERR; + + req->rq_repbuf = (char *)hdr; + req->rq_replen = sizeof(*hdr); + + EXIT; + return mds_reply(req); +} + + + static struct dentry *mds_fid2dentry(struct mds_obd *mds, struct lustre_fid *fid) { struct dentry *de; @@ -134,52 +177,6 @@ int mds_getattr(struct mds_request *req) return 0; } -/* XXX replace with networking code */ -int mds_reply(struct mds_request *req) -{ - struct mds_request *clnt_req = req->rq_reply_handle; - - ENTRY; - - /* free the request buffer */ - kfree(req->rq_reqbuf); - req->rq_reqbuf = NULL; - - /* move the reply to the client */ - clnt_req->rq_replen = req->rq_replen; - clnt_req->rq_repbuf = req->rq_repbuf; - req->rq_repbuf = NULL; - req->rq_replen = 0; - - /* wake up the client */ - wake_up_interruptible(&clnt_req->rq_wait_for_rep); - EXIT; - return 0; -} - -int mds_error(struct mds_request *req) -{ - struct mds_rep_hdr *hdr; - - ENTRY; - hdr = kmalloc(sizeof(*hdr), GFP_KERNEL); - if (!hdr) { - EXIT; - return -ENOMEM; - } - - memset(hdr, 0, sizeof(*hdr)); - - hdr->seqno = req->rq_reqhdr->seqno; - hdr->status = req->rq_status; - hdr->type = MDS_TYPE_ERR; - - req->rq_repbuf = (char *)hdr; - req->rq_replen = sizeof(*hdr); - - EXIT; - return mds_reply(req); -} //int mds_handle(struct mds_conn *conn, int len, char *buf) int mds_handle(struct mds_request *req) diff --git a/lustre/osc/Makefile.am b/lustre/osc/Makefile.am index ce6bcb6..602a073 100644 --- a/lustre/osc/Makefile.am +++ b/lustre/osc/Makefile.am @@ -3,6 +3,8 @@ # This code is issued under the GNU General Public License. # See the file COPYING in this distribution +DEFS:= + MODULE = osc modulefs_DATA = osc.o EXTRA_PROGRAMS = osc diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index e8721f0..ae12729 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -40,7 +40,7 @@ struct ost_request *osc_prep_req(int size, int opcode) request = (struct ost_request *)kmalloc(sizeof(*request), GFP_KERNEL); if (!request) { - printk("osc_getattr: request allocation out of memory\n"); + printk("osc_prep_req: request allocation out of memory\n"); return NULL; } diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c index 6e87dd0..51468c8 100644 --- a/lustre/ost/ost_handler.c +++ b/lustre/ost/ost_handler.c @@ -57,9 +57,13 @@ static int ost_queue_req(struct obd_device *obddev, struct ost_request *req) __LINE__, ost, req, srv_req); memset(srv_req, 0, sizeof(*req)); + + /* move the request buffer */ srv_req->rq_reqbuf = req->rq_reqbuf; srv_req->rq_reqlen = req->rq_reqlen; srv_req->rq_obd = ost; + + /* remember where it came from */ srv_req->rq_reply_handle = req; list_add(&srv_req->rq_list, &ost->ost_reqs); @@ -115,7 +119,9 @@ int ost_error(struct obd_device *obddev, struct ost_request *req) hdr->seqno = req->rq_reqhdr->seqno; hdr->status = req->rq_status; hdr->type = OST_TYPE_ERR; + req->rq_repbuf = (char *)hdr; + req->rq_replen = sizeof(*hdr); EXIT; return ost_reply(obddev, req); diff --git a/lustre/tests/umreq.sh b/lustre/tests/umreq.sh index a8f0bca..94c5cae 100644 --- a/lustre/tests/umreq.sh +++ b/lustre/tests/umreq.sh @@ -2,14 +2,20 @@ R=/r + insmod /lib/modules/2.4.17/kernel/drivers/block/loop.o +insmod $R/usr/src/obd/class/obdclass.o +insmod $R/usr/src/obd/ext2obd/obdext2.o +insmod $R/usr/src/obd/ost/ost.o +insmod $R/usr/src/obd/osc/osc.o +insmod $R/usr/src/obd/mds/mds.o +insmod $R/usr/src/obd/llight/llight.o + dd if=/dev/zero of=/tmp/fs bs=1024 count=10000 mke2fs -F /tmp/fs losetup /dev/loop/0 /tmp/fs -insmod $R/usr/src/obd/class/obdclass.o -insmod $R/usr/src/obd/mds/mds.o mknod /dev/obd c 10 241 $R/usr/src/obd/utils/obdctl <