Whamcloud - gitweb
Landing b_bug974 onto HEAD (20040213_1538).
[fs/lustre-release.git] / lustre / lvfs / fsfilt_ext3.c
index 7c21ba4..91513f8 100644 (file)
@@ -74,6 +74,7 @@ static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private)
 {
         /* For updates to the last recieved file */
         int nblocks = EXT3_DATA_TRANS_BLOCKS;
+        int blocksize, block_count = 0;
         void *handle;
 
         if (current->journal_info) {
@@ -119,6 +120,13 @@ static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private)
                 /* Setattr on inode */
                 nblocks += 1;
                 break;
+        case FSFILT_OP_CANCEL_UNLINK_LOG:
+                blocksize = 1 << inode->i_blkbits;
+                block_count = (blocksize - 1) + LLOG_CHUNK_SIZE;
+                block_count = (block_count + blocksize - 1) >> inode->i_blkbits;
+                block_count = block_count * EXT3_DATA_TRANS_BLOCKS + 2;
+                nblocks = 2 * 2 * block_count;
+                break;
         default: CERROR("unknown transaction start op %d\n", op);
                  LBUG();
         }
@@ -159,28 +167,44 @@ static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private)
  *
  * 1 EXT3_DATA_TRANS_BLOCKS for the last_rcvd update.
  */
-static int fsfilt_ext3_credits_needed(int objcount, struct fsfilt_objinfo *fso)
+static int fsfilt_ext3_credits_needed(int objcount, struct fsfilt_objinfo *fso,
+                                      int niocount, struct niobuf_local *nb)
 {
         struct super_block *sb = fso->fso_dentry->d_inode->i_sb;
-        int blockpp = 1 << (PAGE_CACHE_SHIFT - sb->s_blocksize_bits);
-        int addrpp = EXT3_ADDR_PER_BLOCK(sb) * blockpp;
-        int nbitmaps = 0;
-        int ngdblocks = 0;
-        int needed = objcount + 1;
-        int i;
-
-        for (i = 0; i < objcount; i++, fso++) {
-                int nblocks = fso->fso_bufcnt * blockpp;
-                int ndindirect = min(nblocks, addrpp + 1);
-                int nindir = nblocks + ndindirect + 1;
-
-                nbitmaps += nindir + nblocks;
-                ngdblocks += nindir + nblocks;
-
-                needed += nindir;
+        __u64 next_indir;
+        const int blockpp = 1 << (PAGE_CACHE_SHIFT - sb->s_blocksize_bits);
+        int nbitmaps = 0, ngdblocks;
+        int needed = objcount + 1; /* inodes + superblock */
+        int i, j;
+
+        for (i = 0, j = 0; i < objcount; i++, fso++) {
+                /* two or more dindirect blocks in case we cross boundary */
+                int ndind = (long)((nb[j + fso->fso_bufcnt - 1].offset -
+                                    nb[j].offset) >>
+                                   sb->s_blocksize_bits) /
+                        (EXT3_ADDR_PER_BLOCK(sb) * EXT3_ADDR_PER_BLOCK(sb));
+                nbitmaps += min(fso->fso_bufcnt, ndind > 0 ? ndind : 2);
+
+                /* leaf, indirect, tindirect blocks for first block */
+                nbitmaps += blockpp + 2;
+
+                j += fso->fso_bufcnt;
+        }
+
+        next_indir = nb[0].offset +
+                (EXT3_ADDR_PER_BLOCK(sb) << sb->s_blocksize_bits);
+        for (i = 1; i < niocount; i++) {
+                if (nb[i].offset >= next_indir) {
+                        nbitmaps++;     /* additional indirect */
+                        next_indir = nb[i].offset +
+                                (EXT3_ADDR_PER_BLOCK(sb)<<sb->s_blocksize_bits);
+                } else if (nb[i].offset != nb[i - 1].offset + sb->s_blocksize) {
+                        nbitmaps++;     /* additional indirect */
+                }
+                nbitmaps += blockpp;    /* each leaf in different group? */
         }
 
-        /* Assumes ext3 and ext3 have same sb_info layout at the start. */
+        ngdblocks = nbitmaps;
         if (nbitmaps > EXT3_SB(sb)->s_groups_count)
                 nbitmaps = EXT3_SB(sb)->s_groups_count;
         if (ngdblocks > EXT3_SB(sb)->s_gdb_count)
@@ -191,7 +215,7 @@ static int fsfilt_ext3_credits_needed(int objcount, struct fsfilt_objinfo *fso)
         /* last_rcvd update */
         needed += EXT3_DATA_TRANS_BLOCKS;
 
-#ifdef CONFIG_QUOTA
+#if defined(CONFIG_QUOTA) && !defined(__x86_64__) /* XXX */
         /* We assume that there will be 1 bit set in s_dquot.flags for each
          * quota file that is active.  This is at least true for now.
          */
@@ -217,7 +241,8 @@ static int fsfilt_ext3_credits_needed(int objcount, struct fsfilt_objinfo *fso)
  * the pages have been written.
  */
 static void *fsfilt_ext3_brw_start(int objcount, struct fsfilt_objinfo *fso,
-                                   int niocount, void *desc_private)
+                                   int niocount, struct niobuf_local *nb,
+                                   void *desc_private)
 {
         journal_t *journal;
         handle_t *handle;
@@ -226,7 +251,7 @@ static void *fsfilt_ext3_brw_start(int objcount, struct fsfilt_objinfo *fso,
 
         LASSERT(current->journal_info == desc_private);
         journal = EXT3_SB(fso->fso_dentry->d_inode->i_sb)->s_journal;
-        needed = fsfilt_ext3_credits_needed(objcount, fso);
+        needed = fsfilt_ext3_credits_needed(objcount, fso, niocount, nb);
 
         /* The number of blocks we could _possibly_ dirty can very large.
          * We reduce our request if it is absurd (and we couldn't get that
@@ -298,11 +323,14 @@ static int fsfilt_ext3_commit_async(struct inode *inode, void *h,
                 unlock_kernel();
                 return rc;
         }
-
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
         rtid = log_start_commit(journal, transaction);
         if (rtid != tid)
                 CERROR("strange race: %lu != %lu\n",
                        (unsigned long) tid, (unsigned long) rtid);
+#else
+        log_start_commit(journal, transaction->t_tid);
+#endif
         unlock_kernel();
 
         *wait_handle = (void *) tid;
@@ -636,7 +664,11 @@ static int fsfilt_ext3_add_journal_cb(struct obd_device *obd, __u64 last_rcvd,
 static int fsfilt_ext3_statfs(struct super_block *sb, struct obd_statfs *osfs)
 {
         struct kstatfs sfs;
-        int rc = vfs_statfs(sb, &sfs);
+        int rc;
+
+        memset(&sfs, 0, sizeof(sfs));
+
+        rc = sb->s_op->statfs(sb, &sfs);
 
         if (!rc && sfs.f_bfree < sfs.f_ffree) {
                 sfs.f_files = (sfs.f_files - sfs.f_ffree) + sfs.f_bfree;
@@ -671,102 +703,108 @@ static int fsfilt_ext3_prep_san_write(struct inode *inode, long *blocks,
 static int fsfilt_ext3_read_record(struct file * file, void *buf,
                                    int size, loff_t *offs)
 {
-        struct buffer_head *bh;
-        unsigned long block, boffs;
         struct inode *inode = file->f_dentry->d_inode;
-        int err;
+        unsigned long block;
+        struct buffer_head *bh;
+        int err, blocksize, csize, boffs;
 
+        /* prevent reading after eof */
+        lock_kernel();
         if (inode->i_size < *offs + size) {
                 size = inode->i_size - *offs;
+                unlock_kernel();
                 if (size < 0) {
                         CERROR("size %llu is too short for read %u@%llu\n",
-                                        inode->i_size, size, *offs);
+                               inode->i_size, size, *offs);
                         return -EIO;
-                } else if (size == 0)
+                } else if (size == 0) {
                         return 0;
+                }
+        } else {
+                unlock_kernel();
         }
 
-        block = *offs >> inode->i_blkbits;
-        bh = ext3_bread(NULL, inode, block, 0, &err);
-        if (!bh) {
-                CERROR("can't read block: %d\n", err);
-                return err;
-        }
+        blocksize = 1 << inode->i_blkbits;
+
+        while (size > 0) {
+                block = *offs >> inode->i_blkbits;
+                boffs = *offs & (blocksize - 1);
+                csize = min(blocksize - boffs, size);
+                bh = ext3_bread(NULL, inode, block, 0, &err);
+                if (!bh) {
+                        CERROR("can't read block: %d\n", err);
+                        return err;
+                }
 
-        boffs = (unsigned)*offs % bh->b_size;
-        if (boffs + size > bh->b_size) {
-                CERROR("request crosses block's border. offset %llu, size %u\n",
-                       *offs, size);
+                memcpy(buf, bh->b_data + boffs, csize);
                 brelse(bh);
-                return -EIO;
-        }
 
-        memcpy(buf, bh->b_data + boffs, size);
-        brelse(bh);
-        *offs += size;
+                *offs += csize;
+                buf += csize;
+                size -= csize;
+        }
         return 0;
 }
 
-static int fsfilt_ext3_write_record(struct file *file, void *buf, int size,
+static int fsfilt_ext3_write_record(struct file *file, void *buf, int bufsize,
                                     loff_t *offs, int force_sync)
 {
-        struct buffer_head *bh;
-        unsigned long block, boffs;
+        struct buffer_head *bh = NULL;
+        unsigned long block;
         struct inode *inode = file->f_dentry->d_inode;
-        loff_t old_size = inode->i_size;
+        loff_t old_size = inode->i_size, offset = *offs;
+        loff_t new_size = inode->i_size;
         journal_t *journal;
         handle_t *handle;
-        int err;
+        int err, block_count = 0, blocksize, size, boffs;
+
+        /* Determine how many transaction credits are needed */
+        blocksize = 1 << inode->i_blkbits;
+        block_count = (*offs & (blocksize - 1)) + bufsize;
+        block_count = (block_count + blocksize - 1) >> inode->i_blkbits;
 
         journal = EXT3_SB(inode->i_sb)->s_journal;
-        handle = journal_start(journal, EXT3_DATA_TRANS_BLOCKS + 2);
+        lock_kernel();
+        handle = journal_start(journal,
+                               block_count * EXT3_DATA_TRANS_BLOCKS + 2);
+        unlock_kernel();
         if (IS_ERR(handle)) {
                 CERROR("can't start transaction\n");
                 return PTR_ERR(handle);
         }
 
-        block = *offs >> inode->i_blkbits;
-        if (*offs + size > inode->i_size) {
-                down(&inode->i_sem);
-                if (*offs + size > inode->i_size)
-                        inode->i_size = *offs + size;
-                if (inode->i_size > EXT3_I(inode)->i_disksize)
-                        EXT3_I(inode)->i_disksize = inode->i_size;
-                up(&inode->i_sem);
-        }
-
-        bh = ext3_bread(handle, inode, block, 1, &err);
-        if (!bh) {
-                CERROR("can't read/create block: %d\n", err);
-                goto out;
-        }
-
-        /* This is a hack only needed because ext3_get_block_handle() updates
-         * i_disksize after marking the inode dirty in ext3_splice_branch().
-         * We will fix that when we get a chance, as ext3_mark_inode_dirty()
-         * is not without cost, nor is it even exported.
-         */
-        if (inode->i_size > old_size)
-                mark_inode_dirty(inode);
-
-        boffs = (unsigned)*offs % bh->b_size;
-        if (boffs + size > bh->b_size) {
-                CERROR("request crosses block's border. offset %llu, size %u\n",
-                       *offs, size);
-                err = -EIO;
-                goto out;
-        }
+        while (bufsize > 0) {
+                if (bh != NULL)
+                        brelse(bh);
+
+                block = offset >> inode->i_blkbits;
+                boffs = offset & (blocksize - 1);
+                size = min(blocksize - boffs, bufsize);
+                bh = ext3_bread(handle, inode, block, 1, &err);
+                if (!bh) {
+                        CERROR("can't read/create block: %d\n", err);
+                        goto out;
+                }
 
-        err = ext3_journal_get_write_access(handle, bh);
-        if (err) {
-                CERROR("journal_get_write_access() returned error %d\n", err);
-                goto out;
-        }
-        memcpy(bh->b_data + boffs, buf, size);
-        err = ext3_journal_dirty_metadata(handle, bh);
-        if (err) {
-                CERROR("journal_dirty_metadata() returned error %d\n", err);
-                goto out;
+                err = ext3_journal_get_write_access(handle, bh);
+                if (err) {
+                        CERROR("journal_get_write_access() returned error %d\n",
+                               err);
+                        goto out;
+                }
+                LASSERT(bh->b_data + boffs + size <= bh->b_data + bh->b_size);
+                memcpy(bh->b_data + boffs, buf, size);
+                err = ext3_journal_dirty_metadata(handle, bh);
+                if (err) {
+                        CERROR("journal_dirty_metadata() returned error %d\n",
+                               err);
+                        goto out;
+                }
+                if (offset + size > new_size)
+                        new_size = offset + size;
+                offset += size;
+                bufsize -= size;
+                buf += size;
         }
 
         if (force_sync)
@@ -774,9 +812,25 @@ static int fsfilt_ext3_write_record(struct file *file, void *buf, int size,
 out:
         if (bh)
                 brelse(bh);
+
+        /* correct in-core and on-disk sizes */
+        if (new_size > inode->i_size) {
+                lock_kernel();
+                if (new_size > inode->i_size)
+                        inode->i_size = new_size;
+                if (inode->i_size > EXT3_I(inode)->i_disksize)
+                        EXT3_I(inode)->i_disksize = inode->i_size;
+                if (inode->i_size > old_size)
+                        mark_inode_dirty(inode);
+                unlock_kernel();
+        }
+
+        lock_kernel();
         journal_stop(handle);
+        unlock_kernel();
+
         if (err == 0)
-                *offs += size;
+                *offs = offset;
         return err;
 }