Whamcloud - gitweb
Fix eric's extremely well-spotted locking bug. It's not clear that we even
[fs/lustre-release.git] / lustre / llite / rw.c
index c2b736a..823d18a 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Lustre Lite I/O Page Cache
  *
- * Copyright (C) 2002 Cluster File Systems, Inc. 
+ * Copyright (C) 2002 Cluster File Systems, Inc.
  */
 
 #include <linux/config.h>
 #include <linux/lustre_lite.h>
 #include <linux/lustre_lib.h>
 
+
 /* SYNCHRONOUS I/O to object storage for an inode */
 static int ll_brw(int rw, struct inode *inode, struct page *page, int create)
 {
-        obd_count        num_obdo = 1;
-        obd_count        bufs_per_obdo = 1;
-        struct obdo     *oa;
-        obd_size         count = PAGE_SIZE;
-        obd_off          offset = ((obd_off)page->index) << PAGE_SHIFT;
-        obd_flag         flags = create ? OBD_BRW_CREATE : 0;
-        int              err;
+        struct ll_inode_info *lli = ll_i2info(inode);
+        struct lov_stripe_md *md = lli->lli_smd;
+        struct brw_page pg;
+        int err;
+        struct io_cb_data *cbd = ll_init_cb();
         ENTRY;
 
-        oa = ll_i2info(inode)->lli_obdo;
-        err = obd_brw(rw, ll_i2obdconn(inode), num_obdo, &oa, &bufs_per_obdo,
-                      &page, &count, &offset, &flags, NULL);
+        if (!cbd)
+                RETURN(-ENOMEM);
+
+        pg.pg = page;
+        pg.count = PAGE_SIZE;
+        pg.off = ((obd_off)page->index) << PAGE_SHIFT;
+        pg.flag = create ? OBD_BRW_CREATE : 0;
+
+        err = obd_brw(rw, ll_i2obdconn(inode), md, 1, &pg, ll_sync_io_cb, cbd);
+
         RETURN(err);
 } /* ll_brw */
 
@@ -55,13 +61,14 @@ static int ll_brw(int rw, struct inode *inode, struct page *page, int create)
 static int ll_readpage(struct file *file, struct page *page)
 {
         struct inode *inode = page->mapping->host;
+        obd_off offset = ((obd_off)page->index) << PAGE_SHIFT;
         int rc = 0;
         ENTRY;
 
         if (!PageLocked(page))
                 LBUG();
 
-        if (((inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT) <= page->index) {
+        if (inode->i_size <= offset) {
                 memset(kmap(page), 0, PAGE_SIZE);
                 kunmap(page);
                 GOTO(readpage_out, rc);
@@ -87,11 +94,11 @@ static int ll_prepare_write(struct file *file, struct page *page, unsigned from,
                             unsigned to)
 {
         struct inode *inode = page->mapping->host;
-        //obd_off offset = ((obd_off)page->index) << PAGE_SHIFT;
+        obd_off offset = ((obd_off)page->index) << PAGE_SHIFT;
         int rc = 0;
         char *addr;
-        ENTRY; 
-        
+        ENTRY;
+
         addr = kmap(page);
         if (!PageLocked(page))
                 LBUG();
@@ -99,13 +106,17 @@ static int ll_prepare_write(struct file *file, struct page *page, unsigned from,
         if (Page_Uptodate(page))
                 GOTO(prepare_done, rc);
 
-        memset(addr, 0, PAGE_SIZE);
-
         /* We're completely overwriting an existing page, so _don't_ set it up
          * to date until commit_write */
         if (from == 0 && to == PAGE_SIZE)
                 RETURN(0);
 
+        /* We are writing to a new page, no need to read old data */
+        if (inode->i_size <= offset) {
+                memset(addr, 0, PAGE_SIZE);
+                goto prepare_done;
+        }
+
         rc = ll_brw(OBD_BRW_READ, inode, page, 0);
 
         EXIT;
@@ -133,10 +144,11 @@ static int ll_writepage(struct page *page)
         } else {
                 CERROR("ll_brw failure %d\n", err);
         }
-        UnlockPage(page); 
+        unlock_page(page);
         RETURN(err);
 }
 
+
 /* SYNCHRONOUS I/O to object storage for an inode -- object attr will be updated
  * too */
 static int ll_commit_write(struct file *file, struct page *page,
@@ -144,62 +156,83 @@ static int ll_commit_write(struct file *file, struct page *page,
 {
         int create = 1;
         struct inode *inode = page->mapping->host;
-        obd_count        num_obdo = 1;
-        obd_count        bufs_per_obdo = 1;
-        struct obdo     *oa;
-        obd_size         count = to;
-        obd_off          offset = (((obd_off)page->index) << PAGE_SHIFT);
-        obd_flag         flags = create ? OBD_BRW_CREATE : 0;
-        int              err;
-        struct iattr     iattr;
+        struct ll_inode_info *lli = ll_i2info(inode);
+        struct lov_stripe_md *md = lli->lli_smd;
+        struct brw_page pg;
+        int err;
+        loff_t size;
+        struct io_cb_data *cbd = ll_init_cb();
+
+        pg.pg = page;
+        pg.count = to;
+        pg.off = (((obd_off)page->index) << PAGE_SHIFT);
+        pg.flag = create ? OBD_BRW_CREATE : 0;
 
         ENTRY;
-        oa = ll_i2info(inode)->lli_obdo;
+        if (!cbd)
+                RETURN(-ENOMEM);
 
         SetPageUptodate(page);
 
         if (!PageLocked(page))
                 LBUG();
 
-        CDEBUG(D_INODE, "commit_page writing (at %d) to %d, count %Ld\n", 
-               from, to, (unsigned long long)count);
+        CDEBUG(D_INODE, "commit_page writing (at %d) to %d, count %Ld\n",
+               from, to, (unsigned long long)pg.count);
 
-        err = obd_brw(OBD_BRW_WRITE, ll_i2obdconn(inode), num_obdo, &oa,
-                      &bufs_per_obdo, &page, &count, &offset, &flags, NULL);
+        err = obd_brw(OBD_BRW_WRITE, ll_i2obdconn(inode), md,
+                      1, &pg, ll_sync_io_cb, cbd);
         kunmap(page);
 
-        iattr.ia_size = offset + to;
-        if (iattr.ia_size > inode->i_size) {
-                /* do NOT truncate when writing in the middle of a file */
-                inode->i_size = iattr.ia_size;
-                iattr.ia_valid = ATTR_SIZE;
-#if 0
-                err = ll_inode_setattr(inode, &iattr, 0);
-                if (err) {
-                        CERROR("failed - %d.\n", err);
-                        err = -EIO;
-                }
-#endif
-        }
+        size = pg.off + pg.count;
+        /* do NOT truncate when writing in the middle of a file */
+        if (size > inode->i_size)
+                inode->i_size = size;
 
         RETURN(err);
 } /* ll_commit_write */
 
 void ll_truncate(struct inode *inode)
 {
-        struct obdo *oa;
+        struct obdo oa = {0};
+        struct lov_stripe_md *md = ll_i2info(inode)->lli_smd;
+        struct lustre_handle *lockhs = NULL;
         int err;
         ENTRY;
 
-        oa = ll_i2info(inode)->lli_obdo;
-        
-        CDEBUG(D_INFO, "calling punch for %ld (%Lu bytes at 0)\n",
-               (long)oa->o_id, (unsigned long long)oa->o_size);
-        err = obd_punch(ll_i2obdconn(inode), oa, oa->o_size, 0);
+        if (!md) {
+                /* object not yet allocated */
+                inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+                return;
+        }
+
+        oa.o_id = md->lmd_object_id;
+        oa.o_size = inode->i_size;
 
+        CDEBUG(D_INFO, "calling punch for %ld (all bytes after %Ld)\n",
+               (long)oa.o_id, (unsigned long long)oa.o_size);
+
+        err = ll_size_lock(inode, md, oa.o_size, LCK_PW, &lockhs);
         if (err) {
-                CERROR("obd_truncate fails (%d)\n", err);
+                CERROR("ll_size_lock failed: %d\n", err);
+                /* FIXME: What to do here?  It's too late to back out... */
+                LBUG();
         }
+
+        oa.o_valid = OBD_MD_FLID;
+        /* truncate == punch to/from start from/to end:
+           set end to -1 for that. */
+        err = obd_punch(ll_i2obdconn(inode), &oa, md, inode->i_size,
+                        0xffffffffffffffff);
+        if (err)
+                CERROR("obd_truncate fails (%d)\n", err);
+        else
+                obdo_to_inode(inode, &oa, oa.o_valid);
+
+        err = ll_size_unlock(inode, md, LCK_PW, lockhs);
+        if (err)
+                CERROR("ll_size_unlock failed: %d\n", err);
+
         EXIT;
         return;
 } /* ll_truncate */
@@ -207,62 +240,55 @@ void ll_truncate(struct inode *inode)
 int ll_direct_IO(int rw, struct inode *inode, struct kiobuf *iobuf,
                  unsigned long blocknr, int blocksize)
 {
-        int i;
-        obd_count        num_obdo = 1;
         obd_count        bufs_per_obdo = iobuf->nr_pages;
-        struct obdo     *oa = NULL;
-        obd_size         *count = NULL;
-        obd_off          *offset = NULL;
-        obd_flag         *flags = NULL;
+        struct ll_inode_info *lli = ll_i2info(inode);
+        struct lov_stripe_md *md = lli->lli_smd;
+        struct brw_page *pga;
         int              rc = 0;
+        int i;
+        struct io_cb_data *cbd = ll_init_cb();
 
         ENTRY;
+        if (!cbd)
+                RETURN(-ENOMEM);
 
         if (blocksize != PAGE_SIZE) {
-                CERROR("direct_IO blocksize != PAGE_SIZE, what to do?\n");
-                LBUG();
+                CERROR("direct_IO blocksize != PAGE_SIZE\n");
+                return -EINVAL;
         }
 
-        OBD_ALLOC(count, sizeof(obd_size) * bufs_per_obdo);
-        OBD_ALLOC(offset, sizeof(obd_off) * bufs_per_obdo);
-        OBD_ALLOC(flags, sizeof(obd_flag) * bufs_per_obdo);
-        if (!count || !offset || !flags)
+        OBD_ALLOC(pga, sizeof(*pga) * bufs_per_obdo);
+        if (!pga)
                 GOTO(out, rc = -ENOMEM);
 
         /* NB: we can't use iobuf->maplist[i]->index for the offset
          * instead of "blocknr" because ->index contains garbage.
          */
         for (i = 0; i < bufs_per_obdo; i++, blocknr++) {
-                count[i] = PAGE_SIZE;
-                offset[i] = (obd_off)blocknr << PAGE_SHIFT;
-                flags[i] = OBD_BRW_CREATE;
+                pga[i].pg = iobuf->maplist[i];
+                pga[i].count = PAGE_SIZE;
+                pga[i].off = (obd_off)blocknr << PAGE_SHIFT;
+                pga[i].flag = OBD_BRW_CREATE;
         }
 
-        oa = ll_i2info(inode)->lli_obdo;
-        if (!oa)
+        if (!md || !md->lmd_object_id)
                 GOTO(out, rc = -ENOMEM);
-        rc = obd_brw(rw, ll_i2obdconn(inode), num_obdo, &oa, &bufs_per_obdo,
-                      iobuf->maplist, count, offset, flags, NULL);
-        if (rc == 0) 
+
+        rc = obd_brw(rw == WRITE ? OBD_BRW_WRITE : OBD_BRW_READ,
+                     ll_i2obdconn(inode), md, bufs_per_obdo, pga,
+                     ll_sync_io_cb, cbd);
+        if (rc == 0)
                 rc = bufs_per_obdo * PAGE_SIZE;
 
- out:
-        if (flags) 
-                OBD_FREE(flags, sizeof(obd_flag) * bufs_per_obdo); 
-        if (count) 
-                OBD_FREE(count, sizeof(obd_count) * bufs_per_obdo); 
-        if (offset) 
-                OBD_FREE(offset, sizeof(obd_off) * bufs_per_obdo); 
+out:
+        OBD_FREE(pga, sizeof(*pga) * bufs_per_obdo);
         RETURN(rc);
 }
 
 
 int ll_flush_inode_pages(struct inode * inode)
 {
-        //int i;
-        //        obd_count        num_obdo = 1;
         obd_count        bufs_per_obdo = 0;
-        struct obdo     *oa = NULL;
         obd_size         *count = NULL;
         obd_off          *offset = NULL;
         obd_flag         *flags = NULL;
@@ -275,43 +301,29 @@ int ll_flush_inode_pages(struct inode * inode)
         spin_unlock(&pagecache_lock);
 
 
-        OBD_ALLOC(count, sizeof(obd_size) * bufs_per_obdo); 
-        if (!count)
-                GOTO(out, err=-ENOMEM); 
-
-        OBD_ALLOC(offset, sizeof(obd_off) * bufs_per_obdo); 
-        if (!offset)
-                GOTO(out, err=-ENOMEM); 
-
-        OBD_ALLOC(flags, sizeof(obd_flag) * bufs_per_obdo); 
-        if (!flags)
-                GOTO(out, err=-ENOMEM); 
+        OBD_ALLOC(count, sizeof(*count) * bufs_per_obdo);
+        OBD_ALLOC(offset, sizeof(*offset) * bufs_per_obdo);
+        OBD_ALLOC(flags, sizeof(*flags) * bufs_per_obdo);
+        if (!count || !offset || !flags)
+                GOTO(out, err=-ENOMEM);
 
 #if 0
-        for (i = 0 ; i < bufs_per_obdo ; i++) { 
+        for (i = 0 ; i < bufs_per_obdo ; i++) {
                 count[i] = PAGE_SIZE;
                 offset[i] = ((obd_off)(iobuf->maplist[i])->index) << PAGE_SHIFT;
                 flags[i] = OBD_BRW_CREATE;
         }
 
-        oa = ll_oa_from_inode(inode, OBD_MD_FLNOTOBD);
-        if (!oa)
-                RETURN(-ENOMEM);
-
-        err = obd_brw(rw, ll_i2obdconn(inode), num_obdo, &oa, &bufs_per_obdo,
-                      iobuf->maplist, count, offset, flags);
-        if (err == 0) 
+        err = obd_brw(OBD_BRW_WRITE, ll_i2obdconn(inode),
+                      ll_i2info(inode)->lli_smd, bufs_per_obdo,
+                      iobuf->maplist, count, offset, flags, NULL, NULL);
+        if (err == 0)
                 err = bufs_per_obdo * 4096;
 #endif
  out:
-        if (oa) 
-                obdo_free(oa);
-        if (flags) 
-                OBD_FREE(flags, sizeof(obd_flag) * bufs_per_obdo); 
-        if (count) 
-                OBD_FREE(count, sizeof(obd_count) * bufs_per_obdo); 
-        if (offset) 
-                OBD_FREE(offset, sizeof(obd_off) * bufs_per_obdo); 
+        OBD_FREE(flags, sizeof(*flags) * bufs_per_obdo);
+        OBD_FREE(count, sizeof(*count) * bufs_per_obdo);
+        OBD_FREE(offset, sizeof(*offset) * bufs_per_obdo);
         RETURN(err);
 }
 
@@ -324,7 +336,7 @@ struct address_space_operations ll_aops = {
         direct_IO: ll_direct_IO,
 #endif
         sync_page: block_sync_page,
-        prepare_write: ll_prepare_write, 
+        prepare_write: ll_prepare_write,
         commit_write: ll_commit_write,
         bmap: NULL
 };