Whamcloud - gitweb
Branch HEAD
[fs/lustre-release.git] / lustre / llite / rw.c
index 435b145..dc99195 100644 (file)
@@ -21,7 +21,7 @@
  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
-#ifdef HAVE_KERNEL_CONFIG_H
+#ifndef AUTOCONF_INCLUDED
 #include <linux/config.h>
 #endif
 #include <linux/kernel.h>
@@ -73,8 +73,8 @@ static int ll_brw(int cmd, struct inode *inode, struct obdo *oa,
         pg.pg = page;
         pg.off = ((obd_off)page->index) << CFS_PAGE_SHIFT;
 
-        if ((cmd & OBD_BRW_WRITE) && (pg.off + CFS_PAGE_SIZE > inode->i_size))
-                pg.count = inode->i_size % CFS_PAGE_SIZE;
+        if ((cmd & OBD_BRW_WRITE) && (pg.off+CFS_PAGE_SIZE>i_size_read(inode)))
+                pg.count = i_size_read(inode) % CFS_PAGE_SIZE;
         else
                 pg.count = CFS_PAGE_SIZE;
 
@@ -83,9 +83,9 @@ static int ll_brw(int cmd, struct inode *inode, struct obdo *oa,
                        inode->i_ino, pg.off, pg.off);
         if (pg.count == 0) {
                 CERROR("ZERO COUNT: ino %lu: size %p:%Lu(%p:%Lu) idx %lu off "
-                       LPU64"\n",
-                       inode->i_ino, inode, inode->i_size, page->mapping->host,
-                       page->mapping->host->i_size, page->index, pg.off);
+                       LPU64"\n", inode->i_ino, inode, i_size_read(inode),
+                       page->mapping->host, i_size_read(page->mapping->host),
+                       page->index, pg.off);
         }
 
         pg.flag = flags;
@@ -100,7 +100,7 @@ static int ll_brw(int cmd, struct inode *inode, struct obdo *oa,
         oinfo.oi_md = lsm;
         /* NB partial write, so we might not have CAPA_OPC_OSS_READ capa */
         opc = cmd & OBD_BRW_WRITE ? CAPA_OPC_OSS_WRITE : CAPA_OPC_OSS_RW;
-        oinfo.oi_capa = ll_osscapa_get(inode, current->fsuid, opc);
+        oinfo.oi_capa = ll_osscapa_get(inode, opc);
         rc = obd_brw(cmd, ll_i2dtexp(inode), &oinfo, 1, &pg, NULL);
         capa_put(oinfo.oi_capa);
         if (rc == 0)
@@ -125,7 +125,8 @@ void ll_truncate(struct inode *inode)
         int rc;
         ENTRY;
         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) to %Lu=%#Lx\n",inode->i_ino,
-               inode->i_generation, inode, inode->i_size, inode->i_size);
+               inode->i_generation, inode, i_size_read(inode),
+               i_size_read(inode));
 
         ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_TRUNC, 1);
         if (lli->lli_size_sem_owner != current) {
@@ -146,38 +147,42 @@ void ll_truncate(struct inode *inode)
         lov_stripe_lock(lli->lli_smd);
         inode_init_lvb(inode, &lvb);
         obd_merge_lvb(ll_i2dtexp(inode), lli->lli_smd, &lvb, 0);
-        if (lvb.lvb_size == inode->i_size) {
+        if (lvb.lvb_size == i_size_read(inode)) {
                 CDEBUG(D_VFSTRACE, "skipping punch for obj "LPX64", %Lu=%#Lx\n",
-                       lli->lli_smd->lsm_object_id, inode->i_size, inode->i_size);
+                       lli->lli_smd->lsm_object_id, i_size_read(inode),
+                       i_size_read(inode));
                 lov_stripe_unlock(lli->lli_smd);
                 GOTO(out_unlock, 0);
         }
 
-        obd_adjust_kms(ll_i2dtexp(inode), lli->lli_smd, inode->i_size, 1);
+        obd_adjust_kms(ll_i2dtexp(inode), lli->lli_smd, i_size_read(inode), 1);
         lov_stripe_unlock(lli->lli_smd);
 
         if (unlikely((ll_i2sbi(inode)->ll_flags & LL_SBI_CHECKSUM) &&
-                     (inode->i_size & ~CFS_PAGE_MASK))) {
+                     (i_size_read(inode) & ~CFS_PAGE_MASK))) {
                 /* If the truncate leaves behind a partial page, update its
                  * checksum. */
                 struct page *page = find_get_page(inode->i_mapping,
-                                                  inode->i_size >> CFS_PAGE_SHIFT);
+                                                  i_size_read(inode) >>
+                                                  CFS_PAGE_SHIFT);
                 if (page != NULL) {
                         struct ll_async_page *llap = llap_cast_private(page);
                         if (llap != NULL) {
                                 llap->llap_checksum =
-                                        crc32_le(0, kmap(page), CFS_PAGE_SIZE);
-                                kunmap(page);
+                                        crc32_le(0,
+                                                 kmap_atomic(page, KM_USER0),
+                                                 CFS_PAGE_SIZE);
+                                kunmap_atomic(page, KM_USER0);
                         }
                         page_cache_release(page);
                 }
         }
 
         CDEBUG(D_INFO, "calling punch for "LPX64" (new size %Lu=%#Lx)\n",
-               lli->lli_smd->lsm_object_id, inode->i_size, inode->i_size);
+               lli->lli_smd->lsm_object_id, i_size_read(inode), i_size_read(inode));
 
         oinfo.oi_md = lli->lli_smd;
-        oinfo.oi_policy.l_extent.start = inode->i_size;
+        oinfo.oi_policy.l_extent.start = i_size_read(inode);
         oinfo.oi_policy.l_extent.end = OBD_OBJECT_EOF;
         oinfo.oi_oa = &oa;
         oa.o_id = lli->lli_smd->lsm_object_id;
@@ -190,7 +195,7 @@ void ll_truncate(struct inode *inode)
 
         ll_inode_size_unlock(inode, 0);
 
-        oinfo.oi_capa = ll_osscapa_get(inode, 0, CAPA_OPC_OSS_TRUNC);
+        oinfo.oi_capa = ll_osscapa_get(inode, CAPA_OPC_OSS_TRUNC);
         rc = obd_punch_rqset(ll_i2dtexp(inode), &oinfo, NULL);
         ll_truncate_free_capa(oinfo.oi_capa);
         if (rc)
@@ -264,8 +269,8 @@ int ll_prepare_write(struct file *file, struct page *page, unsigned from,
         if (lvb.lvb_size <= offset) {
                 LL_CDEBUG_PAGE(D_PAGE, page, "kms "LPU64" <= offset "LPU64"\n",
                                lvb.lvb_size, offset);
-                memset(kmap(page), 0, CFS_PAGE_SIZE);
-                kunmap(page);
+                memset(kmap_atomic(page, KM_USER0), 0, CFS_PAGE_SIZE);
+                kunmap_atomic(page, KM_USER0);
                 GOTO(prepare_done, rc = 0);
         }
 
@@ -310,16 +315,12 @@ static int ll_ap_make_ready(void *data, int cmd)
          * we got the page cache list we'd create a lock inversion
          * with the removepage path which gets the page lock then the
          * cli lock */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-        clear_page_dirty(page);
-#else
         LASSERTF(!PageWriteback(page),"cmd %x page %p ino %lu index %lu\n", cmd, page,
                  page->mapping->host->i_ino, page->index);
         clear_page_dirty_for_io(page);
 
         /* This actually clears the dirty bit in the radix tree.*/
         set_page_writeback(page);
-#endif
 
         LL_CDEBUG_PAGE(D_PAGE, page, "made ready\n");
         page_cache_get(page);
@@ -430,8 +431,7 @@ static struct obd_capa *ll_ap_lookup_capa(void *data, int cmd)
         struct ll_async_page *llap = LLAP_FROM_COOKIE(data);
         int opc = cmd & OBD_BRW_WRITE ? CAPA_OPC_OSS_WRITE : CAPA_OPC_OSS_RW;
 
-        return ll_osscapa_get(llap->llap_page->mapping->host, llap->llap_fsuid,
-                              opc);
+        return ll_osscapa_get(llap->llap_page->mapping->host, opc);
 }
 
 static struct obd_async_page_ops ll_async_page_ops = {
@@ -513,18 +513,16 @@ int llap_shrink_cache(struct ll_sb_info *sbi, int shrink_fraction)
                         continue;
                 }
 
-                if (llap->llap_write_queued || PageDirty(page) ||
-                    (!PageUptodate(page) &&
-                     llap->llap_origin != LLAP_ORIGIN_READAHEAD))
-                        keep = 1;
-                else
-                        keep = 0;
+               keep = (llap->llap_write_queued || PageDirty(page) ||
+                      PageWriteback(page) || (!PageUptodate(page) &&
+                      llap->llap_origin != LLAP_ORIGIN_READAHEAD));
 
-                LL_CDEBUG_PAGE(D_PAGE, page,"%s LRU page: %s%s%s%s origin %s\n",
+                LL_CDEBUG_PAGE(D_PAGE, page,"%s LRU page: %s%s%s%s%s origin %s\n",
                                keep ? "keep" : "drop",
                                llap->llap_write_queued ? "wq " : "",
                                PageDirty(page) ? "pd " : "",
                                PageUptodate(page) ? "" : "!pu ",
+                               PageWriteback(page) ? "wb" : "",
                                llap->llap_defer_uptodate ? "" : "!du",
                                llap_origins[llap->llap_origin]);
 
@@ -614,7 +612,7 @@ struct ll_async_page *llap_from_page(struct page *page, unsigned origin)
         if (sbi->ll_async_page_count >= sbi->ll_async_page_max)
                 llap_shrink_cache(sbi, 0);
 
-        OBD_SLAB_ALLOC(llap, ll_async_page_slab, GFP_KERNEL,
+        OBD_SLAB_ALLOC(llap, ll_async_page_slab, CFS_ALLOC_STD,
                        ll_async_page_slab_size);
         if (llap == NULL)
                 RETURN(ERR_PTR(-ENOMEM));
@@ -645,8 +643,9 @@ struct ll_async_page *llap_from_page(struct page *page, unsigned origin)
  out:
         if (unlikely(sbi->ll_flags & LL_SBI_CHECKSUM)) {
                 __u32 csum = 0;
-                csum = crc32_le(csum, kmap(page), CFS_PAGE_SIZE);
-                kunmap(page);
+                csum = crc32_le(csum, kmap_atomic(page, KM_USER0),
+                                CFS_PAGE_SIZE);
+                kunmap_atomic(page, KM_USER0);
                 if (origin == LLAP_ORIGIN_READAHEAD ||
                     origin == LLAP_ORIGIN_READPAGE) {
                         llap->llap_checksum = 0;
@@ -673,7 +672,7 @@ static int queue_or_sync_write(struct obd_export *exp, struct inode *inode,
                                struct ll_async_page *llap,
                                unsigned to, obd_flag async_flags)
 {
-        unsigned long size_index = inode->i_size >> CFS_PAGE_SHIFT;
+        unsigned long size_index = i_size_read(inode) >> CFS_PAGE_SHIFT;
         struct obd_io_group *oig;
         struct ll_sb_info *sbi = ll_i2sbi(inode);
         int rc, noquot = llap->llap_ignore_quota ? OBD_BRW_NOQUOTA : 0;
@@ -704,7 +703,7 @@ static int queue_or_sync_write(struct obd_export *exp, struct inode *inode,
                                size_index, to);
                 to = CFS_PAGE_SIZE;
         } else if (to != CFS_PAGE_SIZE && llap->llap_page->index == size_index) {
-                int size_to = inode->i_size & ~CFS_PAGE_MASK;
+                int size_to = i_size_read(inode) & ~CFS_PAGE_MASK;
                 LL_CDEBUG_PAGE(D_PAGE, llap->llap_page,
                                "sync write at EOF: size_index %lu, to %d/%d\n",
                                size_index, to, size_to);
@@ -717,8 +716,9 @@ static int queue_or_sync_write(struct obd_export *exp, struct inode *inode,
                      llap->llap_checksum != 0)) {
                 __u32 csum = 0;
                 struct page *page = llap->llap_page;
-                csum = crc32_le(csum, kmap(page), CFS_PAGE_SIZE);
-                kunmap(page);
+                csum = crc32_le(csum, kmap_atomic(page, KM_USER0),
+                                CFS_PAGE_SIZE);
+                kunmap_atomic(page, KM_USER0);
                 if (llap->llap_checksum == csum) {
                         CDEBUG(D_PAGE, "page %p cksum %x confirmed\n",
                                page, csum);
@@ -836,10 +836,10 @@ out:
                 lov_stripe_lock(lsm);
                 obd_adjust_kms(exp, lsm, size, 0);
                 lov_stripe_unlock(lsm);
-                if (size > inode->i_size)
-                        inode->i_size = size;
+                if (size > i_size_read(inode))
+                        i_size_write(inode, size);
                 SetPageUptodate(page);
-        } else if (size > inode->i_size) {
+        } else if (size > i_size_read(inode)) {
                 /* this page beyond the pales of i_size, so it can't be
                  * truncated in ll_p_r_e during lock revoking. we must
                  * teardown our book-keeping here. */
@@ -902,11 +902,12 @@ int ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc)
         } else {
                 if (cmd & OBD_BRW_READ) {
                         llap->llap_defer_uptodate = 0;
-                } else {
-                        ll_redirty_page(page);
-                        ret = 1;
                 }
                 SetPageError(page);
+                if (rc == -ENOSPC)
+                        set_bit(AS_ENOSPC, &page->mapping->flags);
+                else
+                        set_bit(AS_EIO, &page->mapping->flags);
         }
 
         unlock_page(page);
@@ -1341,7 +1342,8 @@ static void ras_update(struct ll_sb_info *sbi, struct inode *inode,
         if (ras->ras_requests == 2 && !ras->ras_request_index) {
                 __u64 kms_pages;
 
-                kms_pages = (inode->i_size + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT;
+                kms_pages = (i_size_read(inode) + CFS_PAGE_SIZE - 1) >>
+                            CFS_PAGE_SHIFT;
 
                 CDEBUG(D_READA, "kmsp "LPU64" mwp %lu mp %lu\n", kms_pages,
                        ra->ra_max_read_ahead_whole_pages, ra->ra_max_pages);
@@ -1436,7 +1438,9 @@ out:
                 if (PageWriteback(page)) {
                         end_page_writeback(page);
                 }
-                ll_redirty_page(page);
+                /* resend page only for not started IO*/
+                if (!PageError(page))
+                        ll_redirty_page(page);
                 unlock_page(page);
         }
         RETURN(rc);
@@ -1493,8 +1497,6 @@ int ll_readpage(struct file *filp, struct page *page)
         if (IS_ERR(llap))
                 GOTO(out, rc = PTR_ERR(llap));
 
-        llap->llap_fsuid = current->fsuid;
-
         if (ll_i2sbi(inode)->ll_ra_info.ra_max_pages)
                 ras_update(ll_i2sbi(inode), inode, &fd->fd_ras, page->index,
                            llap->llap_defer_uptodate);