From: nfshp Date: Fri, 4 Apr 2003 10:39:49 +0000 (+0000) Subject: add rudiment I/O support X-Git-Tag: v1_7_100~1^94~30 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=2d6d5b0a2a9cf47c93ef3e12061605ae597a0521;p=fs%2Flustre-release.git add rudiment I/O support --- diff --git a/lustre/liblustre/file.c b/lustre/liblustre/file.c index 9a97c16..15259a9 100644 --- a/lustre/liblustre/file.c +++ b/lustre/liblustre/file.c @@ -137,9 +137,9 @@ static struct inode *llu_create_node(struct inode *dir, const char *name, int llu_create(struct inode *dir, struct pnode_base *pnode, int mode) { struct inode *inode; +#if 0 int rc = 0; -#if 0 CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu,intent=%s\n", dentry->d_name.name, dir->i_ino, LL_IT2STR(dentry->d_it)); @@ -305,7 +305,9 @@ out: static int llu_file_open(struct inode *inode) { +#if 0 struct llu_sb_info *sbi = llu_i2sbi(inode); +#endif struct llu_inode_info *lli = llu_i2info(inode); struct lustre_handle *conn = llu_i2obdconn(inode); struct lookup_intent *it; @@ -375,3 +377,56 @@ int llu_iop_open(struct pnode *pnode, int flags, mode_t mode) LASSERT(pnode->p_base->pb_ino); return llu_file_open(pnode->p_base->pb_ino); } + +int llu_iop_ipreadv(struct inode *ino, + struct io_arguments *ioargs, + struct ioctx **ioctxp) +{ + struct ioctx *ioctx; + + if (!ioargs->ioarg_iovlen) + return 0; + if (ioargs->ioarg_iovlen < 0) + return -EINVAL; + + ioctx = _sysio_ioctx_new(ino, ioargs); + if (!ioctx) + return -ENOMEM; + + ioctx->ioctx_cc = llu_file_read(ino, + ioctx->ioctx_iovec, + ioctx->ioctx_iovlen, + ioctx->ioctx_offset); + if (ioctx->ioctx_cc < 0) + ioctx->ioctx_errno = ioctx->ioctx_cc; + + *ioctxp = ioctx; + return 0; +} + +int llu_iop_ipwritev(struct inode *ino, + struct io_arguments *ioargs, + struct ioctx **ioctxp) +{ + struct ioctx *ioctx; + + if (!ioargs->ioarg_iovlen) + return 0; + if (ioargs->ioarg_iovlen < 0) + return -EINVAL; + + ioctx = _sysio_ioctx_new(ino, ioargs); + if (!ioctx) + return -ENOMEM; + + ioctx->ioctx_cc = llu_file_write(ino, + ioctx->ioctx_iovec, + ioctx->ioctx_iovlen, + ioctx->ioctx_offset); + if (ioctx->ioctx_cc < 0) + ioctx->ioctx_errno = ioctx->ioctx_cc; + + *ioctxp = ioctx; + return 0; +} + diff --git a/lustre/liblustre/llite_lib.h b/lustre/liblustre/llite_lib.h index a454ce6..dbd4f3e 100644 --- a/lustre/liblustre/llite_lib.h +++ b/lustre/liblustre/llite_lib.h @@ -96,5 +96,16 @@ struct inode* llu_new_inode(struct filesys *fs, ino_t ino, mode_t mode); /* file.c */ int llu_create(struct inode *dir, struct pnode_base *pnode, int mode); int llu_iop_open(struct pnode *pnode, int flags, mode_t mode); +int llu_iop_ipreadv(struct inode *ino, + struct io_arguments *ioargs, + struct ioctx **ioctxp); +int llu_iop_ipwritev(struct inode *ino, + struct io_arguments *ioargs, + struct ioctx **ioctxp); + +/* rw.c */ +int llu_iop_iodone(struct ioctx *ioctxp __IS_UNUSED); +ssize_t llu_file_write(struct inode *inode, const struct iovec *iovec, + size_t iovlen, loff_t pos); #endif diff --git a/lustre/liblustre/lltest.c b/lustre/liblustre/lltest.c index 9c74a79..f26e006 100644 --- a/lustre/liblustre/lltest.c +++ b/lustre/liblustre/lltest.c @@ -75,7 +75,8 @@ int main(int argc, char * const argv[]) { struct stat statbuf; - int err, i, fd; + int err, i, fd, written, read; + char pgbuf[4096], readbuf[4096]; if (_sysio_init() != 0) { perror("init sysio"); @@ -104,8 +105,18 @@ main(int argc, char * const argv[]) } #endif #if 1 - fd = fixme_open("/newfile", O_CREAT, 00666); + fd = fixme_open("/newfile5", O_RDWR|O_CREAT|O_TRUNC, 00664); printf("***************** open return %d ****************\n", fd); + + memset(pgbuf, 'A', 4096); + written = fixme_write(fd, pgbuf, 4096); + printf("+++++++++++++++++ %d bytes written ++++++++++++++\n", written); + + fixme_lseek(fd, 0, SEEK_SET); + + memset(readbuf, 'a', 4096); + read = fixme_read(fd, readbuf, 4096); + printf("----------------- %d bytes read --------------\n", read); #endif printf("sysio is about shutdown\n"); /* diff --git a/lustre/liblustre/rw.c b/lustre/liblustre/rw.c new file mode 100644 index 0000000..da692b2 --- /dev/null +++ b/lustre/liblustre/rw.c @@ -0,0 +1,531 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Lustre Light Super operations + * + * Copyright (c) 2002, 2003 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define DEBUG_SUBSYSTEM S_LLITE + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "llite_lib.h" + +int llu_iop_iodone(struct ioctx *ioctxp __IS_UNUSED) +{ + return 1; +} + +/* + * this grabs a lock and manually implements behaviour that makes it look + * like the OST is returning the file size with each lock acquisition + */ +int llu_extent_lock(struct ll_file_data *fd, struct inode *inode, + struct lov_stripe_md *lsm, + int mode, struct ldlm_extent *extent, + struct lustre_handle *lockh) +{ +#if 0 + struct ll_inode_info *lli = ll_i2info(inode); + int rc; + ENTRY; + + rc = ll_extent_lock_no_validate(fd, inode, lsm, mode, extent, lockh); + if (rc != ELDLM_OK) + RETURN(rc); + + /* always do a getattr for the first person to pop out of lock + * acquisition.. the DID_GETATTR flag and semaphore serialize + * this initial race. we used to make a decision based on whether + * the lock was matched or acquired, but the matcher could win the + * waking race with the first issuer so that was no good.. + */ + if (test_bit(LLI_F_DID_GETATTR, &lli->lli_flags)) + RETURN(ELDLM_OK); + + down(&lli->lli_getattr_sem); + + if (!test_bit(LLI_F_DID_GETATTR, &lli->lli_flags)) { + rc = ll_inode_getattr(inode, lsm, fd ? &fd->fd_ost_och : NULL); + if (rc == 0) { + set_bit(LLI_F_DID_GETATTR, &lli->lli_flags); + } else { + /* XXX can this fail? */ + ll_extent_unlock(fd, inode, lsm, mode, lockh); + } + } + + up(&lli->lli_getattr_sem); + RETURN(rc); +#else + return ELDLM_OK; +#endif +} + +int ll_extent_unlock(struct ll_file_data *fd, struct inode *inode, + struct lov_stripe_md *lsm, int mode, + struct lustre_handle *lockh) +{ +#if 0 + struct ll_sb_info *sbi = ll_i2sbi(inode); + int rc; + ENTRY; + + /* XXX phil: can we do this? won't it screw the file size up? */ + if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) || + (sbi->ll_flags & LL_SBI_NOLCK)) + RETURN(0); + + rc = obd_cancel(&sbi->ll_osc_conn, lsm, mode, lockh); + + RETURN(rc); +#else + return 0; +#endif +} + +static int llu_brw(int cmd, struct inode *inode, struct page *page, int flags) +{ + struct llu_inode_info *lli = llu_i2info(inode); + struct lov_stripe_md *lsm = lli->lli_smd; + struct obd_brw_set *set; + struct brw_page pg; + int rc; + ENTRY; + + set = obd_brw_set_new(); + if (set == NULL) + RETURN(-ENOMEM); + + pg.pg = page; + pg.off = ((obd_off)page->index) << PAGE_SHIFT; + + /* FIXME FIXME FIXME FIXME FIXME FIXME FIXME FIXME FIXME */ +#if 0 + if (cmd == OBD_BRW_WRITE && (pg.off + PAGE_SIZE > lli->lli_st_size)) + pg.count = lli->lli_st_size % PAGE_SIZE; + else +#endif + pg.count = PAGE_SIZE; + + CDEBUG(D_PAGE, "%s %d bytes ino %lu at "LPU64"/"LPX64"\n", + cmd & OBD_BRW_WRITE ? "write" : "read", pg.count, lli->lli_st_ino, + pg.off, pg.off); + if (pg.count == 0) { + LBUG(); + } + + pg.flag = flags; + + set->brw_callback = ll_brw_sync_wait; + rc = obd_brw(cmd, llu_i2obdconn(inode), lsm, 1, &pg, set, NULL); + if (rc) { + if (rc != -EIO) + CERROR("error from obd_brw: rc = %d\n", rc); + } else { + rc = ll_brw_sync_wait(set, CB_PHASE_START); + if (rc) + CERROR("error from callback: rc = %d\n", rc); + } + obd_brw_set_decref(set); + + RETURN(rc); +} + +static int llu_prepare_write(struct inode *inode, struct page *page, + unsigned from, unsigned to) +{ + struct llu_inode_info *lli = llu_i2info(inode); + obd_off offset = ((obd_off)page->index) << PAGE_SHIFT; + int rc = 0; + ENTRY; + +#if 0 + if (!PageLocked(page)) + LBUG(); + + if (PageUptodate(page)) + RETURN(0); + + //POISON(addr + from, 0xca, to - from); +#endif + /* We're completely overwriting an existing page, so _don't_ set it up + * to date until commit_write */ + if (from == 0 && to == PAGE_SIZE) + RETURN(0); + + /* If are writing to a new page, no need to read old data. + * the extent locking and getattr procedures in ll_file_write have + * guaranteed that i_size is stable enough for our zeroing needs */ + if (lli->lli_st_size <= offset) { + memset(kmap(page), 0, PAGE_SIZE); + kunmap(page); + GOTO(prepare_done, rc = 0); + } + + rc = llu_brw(OBD_BRW_READ, inode, page, 0); + + EXIT; + + prepare_done: + return rc; +} + +static int llu_commit_write(struct inode *inode, struct page *page, + unsigned from, unsigned to) +{ + struct llu_inode_info *lli = llu_i2info(inode); + loff_t size; + int rc; + ENTRY; +#if 0 + LASSERT(inode == file->f_dentry->d_inode); + LASSERT(PageLocked(page)); + + CDEBUG(D_INODE, "inode %p is writing page %p from %d to %d at %lu\n", + inode, page, from, to, page->index); + CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu,from=%d,to=%d\n", + inode->i_ino, from, to); + /* to match full page case in prepare_write */ + SetPageUptodate(page); + /* mark the page dirty, put it on mapping->dirty, + * mark the inode PAGES_DIRTY, put it on sb->dirty */ + set_page_dirty(page); +#endif + rc = llu_brw(OBD_BRW_WRITE, inode, page, 0); + if (rc) + return rc; + + /* this is matched by a hack in obdo_to_inode at the moment */ + size = (((obd_off)page->index) << PAGE_SHIFT) + to; + if (size > lli->lli_st_size) + lli->lli_st_size = size; + + RETURN(0); +} /* ll_commit_write */ + +ssize_t +llu_generic_file_write(struct inode *inode, const char *buf, + size_t count, loff_t pos) +{ + struct page *page; + ssize_t written; + long status = 0; + int err; + unsigned bytes; + + if ((ssize_t) count < 0) + return -EINVAL; +#if 0 + down(&inode->i_sem); +#endif + if (pos < 0) + return -EINVAL; + + written = 0; + +#if 0 + remove_suid(inode); + update_inode_times(inode); +#endif + do { + unsigned long index, offset; + char *kaddr; + + /* + * Try to find the page in the cache. If it isn't there, + * allocate a free page. + */ + offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ + index = pos >> PAGE_CACHE_SHIFT; + bytes = PAGE_CACHE_SIZE - offset; + if (bytes > count) { + bytes = count; + } + + status = -ENOMEM; /* we'll assign it later anyway */ + page = __grab_cache_page(index); + if (!page) + break; + + kaddr = kmap(page); + status = llu_prepare_write(inode, page, offset, offset+bytes); + if (status) + goto sync_failure; + + memcpy(kaddr+offset, buf, bytes); + + status = llu_commit_write(inode, page, offset, offset+bytes); + if (!status) + status = bytes; + + if (status >= 0) { + written += status; + count -= status; + pos += status; + buf += status; + } +unlock: + kunmap(page); + page_cache_release(page); + + if (status < 0) + break; + } while (count); +done: + err = written ? written : status; + +#if 0 + up(&inode->i_sem); +#endif + return err; + + status = -EFAULT; + goto unlock; + +sync_failure: + /* + * If blocksize < pagesize, prepare_write() may have instantiated a + * few blocks outside i_size. Trim these off again. + */ + kunmap(page); + page_cache_release(page); + goto done; +} + +ssize_t llu_file_write(struct inode *inode, const struct iovec *iovec, + size_t iovlen, loff_t pos) +{ + struct llu_inode_info *lli = llu_i2info(inode); + struct ll_file_data *fd = lli->lli_file_data; /* XXX not ready don't use it now */ + struct lustre_handle lockh = { 0, 0 }; + struct lov_stripe_md *lsm = lli->lli_smd; + struct ldlm_extent extent; + ldlm_error_t err; + ssize_t retval = 0; + ENTRY; + + /* XXX consider other types later */ + if (!S_ISREG(lli->lli_st_mode)) + LBUG(); +#if 0 + CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu,size="LPSZ",offset=%Ld\n", + inode->i_ino, count, *ppos); + + /* + * sleep doing some writeback work of this mount's dirty data + * if the VM thinks we're low on memory.. other dirtying code + * paths should think about doing this, too, but they should be + * careful not to hold locked pages while they do so. like + * ll_prepare_write. *cough* + */ + ll_check_dirty(inode->i_sb); +#endif + while (iovlen--) { + const char *buf = iovec[iovlen].iov_base; + size_t count = iovec[iovlen].iov_len; + + /* POSIX, but surprised the VFS doesn't check this already */ + if (count == 0) + continue; + +#if 0 + if (!S_ISBLK(lli->lli_st_mode) && file->f_flags & O_APPEND) { + extent.start = 0; + extent.end = OBD_OBJECT_EOF; + } else { + extent.start = *ppos; + extent.end = *ppos + count - 1; + } +#else + extent.start = pos; + extent.end = pos + count - 1; +#endif + + err = llu_extent_lock(fd, inode, lsm, LCK_PW, &extent, &lockh); + if (err != ELDLM_OK) + RETURN(-ENOLCK); + +#if 0 + if (!S_ISBLK(inode->i_mode) && file->f_flags & O_APPEND) + *ppos = inode->i_size; + + CDEBUG(D_INFO, "Writing inode %lu, "LPSZ" bytes, offset %Lu\n", + inode->i_ino, count, *ppos); +#endif + retval += llu_generic_file_write(inode, buf, count, pos); + } + + /* XXX errors? */ + ll_extent_unlock(fd, inode, lsm, LCK_PW, &lockh); + return(retval); +} + +static void llu_update_atime(struct inode *inode) +{ +#if 0 + struct llu_inode_info *lli = llu_i2info(inode); + +#ifdef USE_ATIME + struct iattr attr; + + attr.ia_atime = LTIME_S(CURRENT_TIME); + attr.ia_valid = ATTR_ATIME; + + if (lli->lli_st_atime == attr.ia_atime) return; + if (IS_RDONLY(inode)) return; + if (IS_NOATIME(inode)) return; + + /* ll_inode_setattr() sets inode->i_atime from attr.ia_atime */ + llu_inode_setattr(inode, &attr, 0); +#else + /* update atime, but don't explicitly write it out just this change */ + inode->i_atime = CURRENT_TIME; +#endif +#endif +} + +static size_t llu_generic_file_read(struct inode *inode, char *buf, + size_t count, loff_t pos) +{ + struct llu_inode_info *lli = llu_i2info(inode); + unsigned long index, offset; + int error = 0; + size_t readed = 0; + + index = pos >> PAGE_CACHE_SHIFT; + offset = pos & ~PAGE_CACHE_MASK; + + do { + struct page *page; + unsigned long end_index, nr; + + end_index = lli->lli_st_size >> PAGE_CACHE_SHIFT; + + if (index > end_index) + break; + nr = PAGE_CACHE_SIZE; + if (index == end_index) { + nr = lli->lli_st_size & ~PAGE_CACHE_MASK; + if (nr <= offset) + break; + } + + nr = nr - offset; + if (nr > count) + nr = count; + + page = grab_cache_page(index); + if (!page) { + error = -ENOMEM; + break; + } + + error = llu_brw(OBD_BRW_READ, inode, page, 0); + if (error) { + page_cache_release(page); + break; + } + + memcpy(buf, kmap(page)+offset, nr); + offset += nr; + index += offset >> PAGE_CACHE_SHIFT; + offset &= ~PAGE_CACHE_MASK; + readed += nr; + count -= nr; + + page_cache_release(page); + } while (count); + + if (error) + return error; + return readed; +} + +ssize_t llu_file_read(struct inode *inode, const struct iovec *iovec, + size_t iovlen, loff_t pos) +{ + struct llu_inode_info *lli = llu_i2info(inode); + struct ll_file_data *fd = lli->lli_file_data; + struct lov_stripe_md *lsm = lli->lli_smd; + struct lustre_handle lockh = { 0, 0 }; +#if 0 + struct ll_read_extent rextent; +#else + struct ldlm_extent extent; +#endif + ldlm_error_t err; + ssize_t retval = 0; + ENTRY; + + while (iovlen--) { + char *buf = iovec[iovlen].iov_base; + size_t count = iovec[iovlen].iov_len; + + /* "If nbyte is 0, read() will return 0 and have no other results." + * -- Single Unix Spec */ + if (count == 0) + RETURN(0); + +#if 0 + rextent.re_extent.start = pos; + rextent.re_extent.end = pos + count - 1; +#else + extent.start = pos; + extent.end = pos + count - 1; +#endif + err = llu_extent_lock(fd, inode, lsm, LCK_PR, &extent, &lockh); + if (err != ELDLM_OK) + RETURN(-ENOLCK); +#if 0 + rextent.re_task = current; + spin_lock(&lli->lli_read_extent_lock); + list_add(&rextent.re_lli_item, &lli->lli_read_extents); + spin_unlock(&lli->lli_read_extent_lock); +#endif + CDEBUG(D_INFO, "Reading inode %lu, "LPSZ" bytes, offset %Ld\n", + lli->lli_st_ino, count, pos); + retval = llu_generic_file_read(inode, buf, count, pos); +#if 0 + spin_lock(&lli->lli_read_extent_lock); + list_del(&rextent.re_lli_item); + spin_unlock(&lli->lli_read_extent_lock); +#endif + } + + if (retval > 0) + llu_update_atime(inode); + + /* XXX errors? */ + ll_extent_unlock(fd, inode, lsm, LCK_PR, &lockh); + RETURN(retval); +} + diff --git a/lustre/liblustre/super.c b/lustre/liblustre/super.c index d5b6623..48bcd62 100644 --- a/lustre/liblustre/super.c +++ b/lustre/liblustre/super.c @@ -402,6 +402,9 @@ static struct inode_ops llu_inode_ops = { inop_lookup: llu_iop_lookup, inop_getattr: llu_iop_getattr, inop_open: llu_iop_open, + inop_ipreadv: llu_iop_ipreadv, + inop_ipwritev: llu_iop_ipwritev, + inop_iodone: llu_iop_iodone, };