Whamcloud - gitweb
- set SCSI_MAX_PHYS_SEGMENTS to 256 for 1MB requests
[fs/lustre-release.git] / lustre / snapfs / file.c
index 78cc9ab..a8a8860 100644 (file)
 #include <linux/slab.h>
 #include <linux/stat.h>
 #include <linux/unistd.h>
+#include <linux/pagemap.h>
 #include <linux/jbd.h>
 #include <linux/ext3_fs.h>
 #include <linux/snap.h>
 
 #include "snapfs_internal.h" 
 
-/* instantiate a file handle to the cache file */
-static void currentfs_prepare_snapfile(struct inode *inode,
-                                    struct file *clone_file, 
-                                    struct inode *cache_inode,
-                                    struct file *cache_file,
-                                    struct dentry *cache_dentry)
+static int has_pages(struct inode *inode, int index)
 {
-        cache_file->f_pos = clone_file->f_pos;
-        cache_file->f_mode = clone_file->f_mode;
-        cache_file->f_flags = clone_file->f_flags;
-        cache_file->f_count  = clone_file->f_count;
-        cache_file->f_owner  = clone_file->f_owner;
-       cache_file->f_dentry = cache_dentry;
-        cache_file->f_dentry->d_inode = cache_inode;
+       unsigned long offset = index << PAGE_CACHE_SHIFT;
+       unsigned long blk_start = offset >> inode->i_sb->s_blocksize_bits; 
+       unsigned long blk_end = (offset + PAGE_CACHE_SIZE) >> 
+                               inode->i_sb->s_blocksize_bits; 
+
+       while (blk_start <= blk_end) {
+               if (inode->i_mapping && inode->i_mapping->a_ops) {
+                       if (inode->i_mapping->a_ops->bmap(inode->i_mapping, 
+                                                         blk_start))
+                               return 1;
+               }
+               blk_start++;
+       }
+       return 0;
 }
 
-/* update the currentfs file struct after IO in cache file */
-static void currentfs_restore_snapfile(struct inode *cache_inode,
-                                  struct file *cache_file, 
-                                  struct inode *clone_inode,
-                                  struct file *clone_file)
+static int copy_back_page(struct inode *dst, 
+                         struct inode *src,
+                         unsigned long start,
+                         unsigned long end)
 {
-        cache_file->f_pos = clone_file->f_pos;
-}
+       char *kaddr_src, *kaddr_dst;
+        struct snap_cache *cache;
+       struct address_space_operations *c_aops;
+       struct page *src_page = NULL, *dst_page = NULL;
+       unsigned long index, offset, bytes;
+       int    err = 0;
+       ENTRY;
 
+       offset = (start & (PAGE_CACHE_SIZE -1)); /* Within page */
+       bytes = end - start;
+        index = start >> PAGE_CACHE_SHIFT;
+
+       if (!has_pages(src, index) || bytes > 4096) 
+               RETURN(0);
+
+       cache = snap_find_cache(src->i_dev);
+       if (!cache) 
+               RETURN(-EINVAL);
+       c_aops = filter_c2cfaops(cache->cache_filter);
+       
+       if (!c_aops) 
+               RETURN(-EINVAL);
+
+       src_page = grab_cache_page(src->i_mapping, index);
+       if (!src_page) {
+               CERROR("copy block %lu from %lu to %lu ENOMEM \n",
+                         index, src->i_ino, dst->i_ino);
+               RETURN(-ENOMEM);
+       }
+       
+       c_aops->readpage(NULL, src_page);
+       wait_on_page(src_page);
+       
+       kaddr_src = kmap(src_page);
+       if (!Page_Uptodate(src_page)) {
+               CERROR("Can not read page index %lu of inode %lu\n",
+                         index, src->i_ino);
+               err = -EIO;
+               goto unlock_src_page;
+       }
+       dst_page = grab_cache_page(dst->i_mapping, index);
+       if (!dst_page) {
+               CERROR("copy block %lu from %lu to %lu ENOMEM \n",
+                         index, src->i_ino, dst->i_ino);
+               err = -ENOMEM;
+               goto unlock_src_page;
+       }       
+       kaddr_dst = kmap(dst_page);
+
+       err = c_aops->prepare_write(NULL, dst_page, offset, offset + bytes);
+       if (err) 
+               goto unlock_dst_page; 
+       memcpy(kaddr_dst, kaddr_src, PAGE_CACHE_SIZE);
+       flush_dcache_page(dst_page);
+
+       err = c_aops->commit_write(NULL, dst_page, offset, offset + bytes);
+       CDEBUG(D_SNAP, "copy back pages %p index %lu src %lu dst %lu \n",
+              dst_page, dst_page->index, src->i_ino, dst->i_ino); 
+       if (err) 
+               goto unlock_dst_page; 
+       err = 1;
+unlock_dst_page:
+       kunmap(dst_page);
+       UnlockPage(dst_page);
+       page_cache_release(dst_page);
+unlock_src_page:
+       kunmap(src_page);
+       page_cache_release(src_page);
+       RETURN(err);
+}
 
 static ssize_t currentfs_write (struct file *filp, const char *buf, 
                                size_t count, loff_t *ppos)
 {
         struct snap_cache *cache;
        struct inode *inode = filp->f_dentry->d_inode;
-        ssize_t rc;
         struct file_operations *fops;
-       loff_t pos;
-       long block[2]={-1,-1}, mask, i;
+       long   start[2]={-1,-1}, end[2]={-1,-1};
        struct snap_table *table;
-       int slot = 0;
-       int index = 0;
-       struct address_space_operations *aops;
        struct inode *cache_inode = NULL;
-       struct snapshot_operations *snapops;
+       int slot = 0, index = 0, result = 0;
+       long i;
+        ssize_t rc;
+       loff_t pos;
   
        ENTRY;
 
@@ -67,84 +134,98 @@ static ssize_t currentfs_write (struct file *filp, const char *buf,
         if ( !cache ) 
                 RETURN(-EINVAL);
 
+       down(&inode->i_sem);
+
         if ( snap_needs_cow(inode) != -1 ) {
                 CDEBUG(D_SNAP, "snap_needs_cow for ino %lu \n",inode->i_ino);
                 snap_do_cow(inode, filp->f_dentry->d_parent->d_inode->i_ino, 0);
        }
 
         fops = filter_c2cffops(cache->cache_filter); 
-        if (!fops || !fops->write) 
-                RETURN(-EINVAL);
-
+        if (!fops || !fops->write) { 
+                up(&inode->i_sem); 
+               RETURN(-EINVAL);
+       }
         if (filp->f_flags & O_APPEND)
                 pos = inode->i_size;
         else {
                 pos = *ppos;
-                if (pos != *ppos)
+                if (pos != *ppos){
+                       up(&inode->i_sem); 
                         RETURN(-EINVAL);
+               }
         }
-
-       /*
-        * we only need to copy back the first and last blocks
-        */
-       mask = inode->i_sb->s_blocksize-1;
-       if( pos & mask )
-               block[0] = pos >> inode->i_sb->s_blocksize_bits;
-       pos += count - 1;
-       if( (pos+1) &  mask )
-               block[1] = pos >> inode->i_sb->s_blocksize_bits;
-       if( block[0] == block[1] )
-               block[1] = -1;
        
-       aops = filter_c2cfaops(cache->cache_filter);
-       snapops = filter_c2csnapops(cache->cache_filter);
-
-       for( i=0; i<2; i++ ){
-               if(block[i]!=-1 && aops->bmap(inode->i_mapping, block[i])) {
-                       table = &snap_tables[cache->cache_snap_tableno];
-                       for (slot = table->tbl_count - 1; slot >= 1; slot--) {
-                               struct address_space_operations *c_aops = 
-                                       cache_inode->i_mapping->a_ops;
-                               cache_inode = NULL;
-                               index = table->snap_items[slot].index;
-                               cache_inode = snap_get_indirect(inode, NULL, index);
-
-                               if ( !cache_inode )  continue;
-
-                               if (c_aops->bmap(cache_inode->i_mapping, block[i])) {
-                                       CDEBUG(D_SNAP, "find cache_ino %lu\n",
-                                               cache_inode->i_ino);
-                                       if( snapops && snapops->copy_block) {
-                                               snapops->copy_block(inode, 
-                                                               cache_inode, block[i]);
-                                       }
-                                       iput(cache_inode);
-                                       break;
-                               }
-                                       iput(cache_inode);
-                       }
-               }
+       CDEBUG(D_SNAP, "write offset %lld count %u \n", pos, count);
+       
+       if (pos & (PAGE_CACHE_SIZE - 1)) {
+               start[0] = pos & PAGE_CACHE_MASK;
+               end[0] = pos;
+       }
+       pos += count - 1;
+       if ((pos+1) & (PAGE_CACHE_SIZE - 1)) {
+               start[1] = pos;  
+               end[1] = PAGE_CACHE_ALIGN(pos);
        }
-        rc = fops->write(filp, buf, count, ppos);
+
+       if (((start[0] >> PAGE_CACHE_SHIFT) == (start[1] >> PAGE_CACHE_SHIFT)) ||
+           pos > inode->i_size) 
+               start[1] = -1;
+
+       CDEBUG(D_SNAP, "copy back start[0] %ld end[0] %ld start[1] %ld end[1] %ld \n",
+              start[0], end[0], start[1], end[1]);     
+       for (i = 0; i < 2; i++) {
+               if (start[i] == -1) 
+                       continue;
+               table = &snap_tables[cache->cache_snap_tableno];
+               /*Find the nearest page in snaptable and copy back it*/
+               for (slot = table->tbl_count - 1; slot >= 1; slot--) {
+                       cache_inode = NULL;
+                               index = table->snap_items[slot].index;
+                       cache_inode = snap_get_indirect(inode, NULL, index);
+
+                       if (!cache_inode)  continue;
+
+                       CDEBUG(D_SNAP, "find cache_ino %lu\n", cache_inode->i_ino);
+               
+                       result = copy_back_page(inode, cache_inode, start[i], end[i]);
+                       if (result == 1) {
+                               CDEBUG(D_SNAP, "copy page%lu back from ind %lu to %lu\n", 
+                                      (start[i] >> PAGE_CACHE_SHIFT), 
+                                      cache_inode->i_ino, 
+                                      inode->i_ino);
+                                       iput(cache_inode);
+                               result = 0;
+                               break;
+                       }
+                       if (result < 0) {
+                               iput(cache_inode);
+                               rc = result;
+                               up(&inode->i_sem);
+                               goto exit;
+                       }
+                               iput(cache_inode);
+               }
+       }
+       
+        up(&inode->i_sem); 
+       rc = fops->write(filp, buf, count, ppos);
+exit:
         RETURN(rc);
 }
 
 static int currentfs_readpage(struct file *file, struct page *page)
 {
-       int result = 0;
        struct inode *inode = file->f_dentry->d_inode;
        unsigned long ind_ino = inode->i_ino;
        struct inode *pri_inode = NULL;
        struct inode *cache_inode = NULL;
-       struct file open_file;
-       struct dentry open_dentry ;
        struct address_space_operations *c_aops;
        struct snap_cache *cache;
-       long block;
        struct snap_table *table;
-       int slot = 0;
-       int index = 0;
-       int search_older = 0;
+       struct page *cache_page = NULL;
+       int rc = 0, slot = 0, index = 0, search_older = 0;
+       long block;
 
        ENTRY;
 
@@ -155,14 +236,14 @@ static int currentfs_readpage(struct file *file, struct page *page)
        
        c_aops = filter_c2cfaops(cache->cache_filter);
 
-       block = page->index >> inode->i_sb->s_blocksize_bits;
+       block = (page->index << PAGE_CACHE_SHIFT) >> inode->i_sb->s_blocksize_bits;
 
        /* if there is a block in the cache, return the cache readpage */
-       if( inode->i_blocks && c_aops->bmap(inode->i_mapping, block) ) {
+       if(c_aops->bmap(inode->i_mapping, block) ) {
                CDEBUG(D_SNAP, "block %lu in cache, ino %lu\n", 
                                block, inode->i_ino);
-               result = c_aops->readpage(file, page);
-               RETURN(result);
+               rc = c_aops->readpage(file, page);
+               RETURN(rc);
        }
 
        /*
@@ -179,11 +260,7 @@ static int currentfs_readpage(struct file *file, struct page *page)
 
        table = &snap_tables[cache->cache_snap_tableno];
 
-        for (slot = table->tbl_count - 1; slot >= 1; slot--)
-        {
-               struct address_space_operations *c_aops = 
-                                       cache_inode->i_mapping->a_ops;
-               cache_inode = NULL;
+        for (slot = table->tbl_count - 1; slot >= 1; slot--) {
                 index = table->snap_items[slot].index;
                cache_inode = snap_get_indirect(inode, NULL, index);
 
@@ -196,31 +273,61 @@ static int currentfs_readpage(struct file *file, struct page *page)
                 if (!search_older && c_aops->bmap(cache_inode->i_mapping, block)) 
                         break;
                 iput(cache_inode);
+               cache_inode = NULL;
         }
-       if( pri_inode )
-               iput(pri_inode);
+       if (pri_inode) iput(pri_inode);
+
+       if (!cache_inode) {
+               CDEBUG(D_SNAP, "block %lu is a hole of inode %lu \n", 
+                      block, inode->i_ino);
+               memset(kmap(page), 0, PAGE_CACHE_SIZE);
+               flush_dcache_page(page);
+               GOTO(exit, rc = 0);
+       }
+       CDEBUG(D_INODE, "readpage ino %lu icount %d \n", cache_inode->i_ino, 
+              atomic_read(&cache_inode->i_count));
+       down(&cache_inode->i_sem);
 
-       if ( !cache_inode )  
-               RETURN(-EINVAL);
+       /*Here we have changed a file to read,
+        *So we should rewrite generic file read here 
+        *FIXME later, the code is ugly
+        */
+       
+       cache_page = grab_cache_page(cache_inode->i_mapping, page->index);
+       if (!cache_page) 
+               GOTO(exit_release, rc = -ENOMEM);
+       if ((rc = c_aops->readpage(file, cache_page)))
+               GOTO(exit_release, 0);
+       
+       wait_on_page(cache_page);
 
-       currentfs_prepare_snapfile(inode, file, cache_inode, &open_file,
-                             &open_dentry);
+       if (!Page_Uptodate(cache_page))
+               GOTO(exit_release, rc = -EIO);
 
-       down(&cache_inode->i_sem);
+       memcpy(kmap(page), kmap(cache_page), PAGE_CACHE_SIZE);
+       flush_dcache_page(page);
+
+       kunmap(cache_page);
+       page_cache_release(cache_page);
 
-       if( c_aops->readpage ) {
-               CDEBUG(D_SNAP, "block %lu NOT in cache, use redirected ino %lu\n", 
-                      block, cache_inode->i_ino );
-               result = c_aops->readpage(&open_file, page);
-       }else {
-               CDEBUG(D_SNAP, "cache ino %lu, readpage is NULL\n", 
-                      cache_inode->i_ino);
-       }
        up(&cache_inode->i_sem);
-       currentfs_restore_snapfile(inode, file, cache_inode, &open_file);
        iput(cache_inode);
-       RETURN(result);
+exit:  
+       kunmap(page);
+       SetPageUptodate(page);
+       UnlockPage(page);
+
+       RETURN(rc);
+
+exit_release:
+       if (cache_page) 
+               page_cache_release(cache_page);
+       up(&cache_inode->i_sem);
+       iput(cache_inode);
+       UnlockPage(page);
+       RETURN(rc);
 }
+
 struct address_space_operations currentfs_file_aops = {
        readpage:       currentfs_readpage,
 };
@@ -230,6 +337,8 @@ struct file_operations currentfs_file_fops = {
 };
                                                                                                                                                                                                      
 struct inode_operations currentfs_file_iops = {
-       revalidate:     NULL,
+       setattr:        currentfs_setattr,
+       setxattr:       currentfs_setxattr,
+       removexattr:    currentfs_removexattr,  
 };