X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Flvfs%2Ffsfilt_ext3.c;h=91513f8749e379fef0907372b6c0e13d8d49ce24;hp=7c21ba4ec1b0351bd54e6a06cb40a84938f9c7a1;hb=30c3a18963d1d6d70175fbbbdd9554e1eb2fa40d;hpb=c5050e412572b00cbe93d8517d2d1f767bebfa92 diff --git a/lustre/lvfs/fsfilt_ext3.c b/lustre/lvfs/fsfilt_ext3.c index 7c21ba4..91513f8 100644 --- a/lustre/lvfs/fsfilt_ext3.c +++ b/lustre/lvfs/fsfilt_ext3.c @@ -74,6 +74,7 @@ static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private) { /* For updates to the last recieved file */ int nblocks = EXT3_DATA_TRANS_BLOCKS; + int blocksize, block_count = 0; void *handle; if (current->journal_info) { @@ -119,6 +120,13 @@ static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private) /* Setattr on inode */ nblocks += 1; break; + case FSFILT_OP_CANCEL_UNLINK_LOG: + blocksize = 1 << inode->i_blkbits; + block_count = (blocksize - 1) + LLOG_CHUNK_SIZE; + block_count = (block_count + blocksize - 1) >> inode->i_blkbits; + block_count = block_count * EXT3_DATA_TRANS_BLOCKS + 2; + nblocks = 2 * 2 * block_count; + break; default: CERROR("unknown transaction start op %d\n", op); LBUG(); } @@ -159,28 +167,44 @@ static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private) * * 1 EXT3_DATA_TRANS_BLOCKS for the last_rcvd update. */ -static int fsfilt_ext3_credits_needed(int objcount, struct fsfilt_objinfo *fso) +static int fsfilt_ext3_credits_needed(int objcount, struct fsfilt_objinfo *fso, + int niocount, struct niobuf_local *nb) { struct super_block *sb = fso->fso_dentry->d_inode->i_sb; - int blockpp = 1 << (PAGE_CACHE_SHIFT - sb->s_blocksize_bits); - int addrpp = EXT3_ADDR_PER_BLOCK(sb) * blockpp; - int nbitmaps = 0; - int ngdblocks = 0; - int needed = objcount + 1; - int i; - - for (i = 0; i < objcount; i++, fso++) { - int nblocks = fso->fso_bufcnt * blockpp; - int ndindirect = min(nblocks, addrpp + 1); - int nindir = nblocks + ndindirect + 1; - - nbitmaps += nindir + nblocks; - ngdblocks += nindir + nblocks; - - needed += nindir; + __u64 next_indir; + const int blockpp = 1 << (PAGE_CACHE_SHIFT - sb->s_blocksize_bits); + int nbitmaps = 0, ngdblocks; + int needed = objcount + 1; /* inodes + superblock */ + int i, j; + + for (i = 0, j = 0; i < objcount; i++, fso++) { + /* two or more dindirect blocks in case we cross boundary */ + int ndind = (long)((nb[j + fso->fso_bufcnt - 1].offset - + nb[j].offset) >> + sb->s_blocksize_bits) / + (EXT3_ADDR_PER_BLOCK(sb) * EXT3_ADDR_PER_BLOCK(sb)); + nbitmaps += min(fso->fso_bufcnt, ndind > 0 ? ndind : 2); + + /* leaf, indirect, tindirect blocks for first block */ + nbitmaps += blockpp + 2; + + j += fso->fso_bufcnt; + } + + next_indir = nb[0].offset + + (EXT3_ADDR_PER_BLOCK(sb) << sb->s_blocksize_bits); + for (i = 1; i < niocount; i++) { + if (nb[i].offset >= next_indir) { + nbitmaps++; /* additional indirect */ + next_indir = nb[i].offset + + (EXT3_ADDR_PER_BLOCK(sb)<s_blocksize_bits); + } else if (nb[i].offset != nb[i - 1].offset + sb->s_blocksize) { + nbitmaps++; /* additional indirect */ + } + nbitmaps += blockpp; /* each leaf in different group? */ } - /* Assumes ext3 and ext3 have same sb_info layout at the start. */ + ngdblocks = nbitmaps; if (nbitmaps > EXT3_SB(sb)->s_groups_count) nbitmaps = EXT3_SB(sb)->s_groups_count; if (ngdblocks > EXT3_SB(sb)->s_gdb_count) @@ -191,7 +215,7 @@ static int fsfilt_ext3_credits_needed(int objcount, struct fsfilt_objinfo *fso) /* last_rcvd update */ needed += EXT3_DATA_TRANS_BLOCKS; -#ifdef CONFIG_QUOTA +#if defined(CONFIG_QUOTA) && !defined(__x86_64__) /* XXX */ /* We assume that there will be 1 bit set in s_dquot.flags for each * quota file that is active. This is at least true for now. */ @@ -217,7 +241,8 @@ static int fsfilt_ext3_credits_needed(int objcount, struct fsfilt_objinfo *fso) * the pages have been written. */ static void *fsfilt_ext3_brw_start(int objcount, struct fsfilt_objinfo *fso, - int niocount, void *desc_private) + int niocount, struct niobuf_local *nb, + void *desc_private) { journal_t *journal; handle_t *handle; @@ -226,7 +251,7 @@ static void *fsfilt_ext3_brw_start(int objcount, struct fsfilt_objinfo *fso, LASSERT(current->journal_info == desc_private); journal = EXT3_SB(fso->fso_dentry->d_inode->i_sb)->s_journal; - needed = fsfilt_ext3_credits_needed(objcount, fso); + needed = fsfilt_ext3_credits_needed(objcount, fso, niocount, nb); /* The number of blocks we could _possibly_ dirty can very large. * We reduce our request if it is absurd (and we couldn't get that @@ -298,11 +323,14 @@ static int fsfilt_ext3_commit_async(struct inode *inode, void *h, unlock_kernel(); return rc; } - +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) rtid = log_start_commit(journal, transaction); if (rtid != tid) CERROR("strange race: %lu != %lu\n", (unsigned long) tid, (unsigned long) rtid); +#else + log_start_commit(journal, transaction->t_tid); +#endif unlock_kernel(); *wait_handle = (void *) tid; @@ -636,7 +664,11 @@ static int fsfilt_ext3_add_journal_cb(struct obd_device *obd, __u64 last_rcvd, static int fsfilt_ext3_statfs(struct super_block *sb, struct obd_statfs *osfs) { struct kstatfs sfs; - int rc = vfs_statfs(sb, &sfs); + int rc; + + memset(&sfs, 0, sizeof(sfs)); + + rc = sb->s_op->statfs(sb, &sfs); if (!rc && sfs.f_bfree < sfs.f_ffree) { sfs.f_files = (sfs.f_files - sfs.f_ffree) + sfs.f_bfree; @@ -671,102 +703,108 @@ static int fsfilt_ext3_prep_san_write(struct inode *inode, long *blocks, static int fsfilt_ext3_read_record(struct file * file, void *buf, int size, loff_t *offs) { - struct buffer_head *bh; - unsigned long block, boffs; struct inode *inode = file->f_dentry->d_inode; - int err; + unsigned long block; + struct buffer_head *bh; + int err, blocksize, csize, boffs; + /* prevent reading after eof */ + lock_kernel(); if (inode->i_size < *offs + size) { size = inode->i_size - *offs; + unlock_kernel(); if (size < 0) { CERROR("size %llu is too short for read %u@%llu\n", - inode->i_size, size, *offs); + inode->i_size, size, *offs); return -EIO; - } else if (size == 0) + } else if (size == 0) { return 0; + } + } else { + unlock_kernel(); } - block = *offs >> inode->i_blkbits; - bh = ext3_bread(NULL, inode, block, 0, &err); - if (!bh) { - CERROR("can't read block: %d\n", err); - return err; - } + blocksize = 1 << inode->i_blkbits; + + while (size > 0) { + block = *offs >> inode->i_blkbits; + boffs = *offs & (blocksize - 1); + csize = min(blocksize - boffs, size); + bh = ext3_bread(NULL, inode, block, 0, &err); + if (!bh) { + CERROR("can't read block: %d\n", err); + return err; + } - boffs = (unsigned)*offs % bh->b_size; - if (boffs + size > bh->b_size) { - CERROR("request crosses block's border. offset %llu, size %u\n", - *offs, size); + memcpy(buf, bh->b_data + boffs, csize); brelse(bh); - return -EIO; - } - memcpy(buf, bh->b_data + boffs, size); - brelse(bh); - *offs += size; + *offs += csize; + buf += csize; + size -= csize; + } return 0; } -static int fsfilt_ext3_write_record(struct file *file, void *buf, int size, +static int fsfilt_ext3_write_record(struct file *file, void *buf, int bufsize, loff_t *offs, int force_sync) { - struct buffer_head *bh; - unsigned long block, boffs; + struct buffer_head *bh = NULL; + unsigned long block; struct inode *inode = file->f_dentry->d_inode; - loff_t old_size = inode->i_size; + loff_t old_size = inode->i_size, offset = *offs; + loff_t new_size = inode->i_size; journal_t *journal; handle_t *handle; - int err; + int err, block_count = 0, blocksize, size, boffs; + + /* Determine how many transaction credits are needed */ + blocksize = 1 << inode->i_blkbits; + block_count = (*offs & (blocksize - 1)) + bufsize; + block_count = (block_count + blocksize - 1) >> inode->i_blkbits; journal = EXT3_SB(inode->i_sb)->s_journal; - handle = journal_start(journal, EXT3_DATA_TRANS_BLOCKS + 2); + lock_kernel(); + handle = journal_start(journal, + block_count * EXT3_DATA_TRANS_BLOCKS + 2); + unlock_kernel(); if (IS_ERR(handle)) { CERROR("can't start transaction\n"); return PTR_ERR(handle); } - block = *offs >> inode->i_blkbits; - if (*offs + size > inode->i_size) { - down(&inode->i_sem); - if (*offs + size > inode->i_size) - inode->i_size = *offs + size; - if (inode->i_size > EXT3_I(inode)->i_disksize) - EXT3_I(inode)->i_disksize = inode->i_size; - up(&inode->i_sem); - } - - bh = ext3_bread(handle, inode, block, 1, &err); - if (!bh) { - CERROR("can't read/create block: %d\n", err); - goto out; - } - - /* This is a hack only needed because ext3_get_block_handle() updates - * i_disksize after marking the inode dirty in ext3_splice_branch(). - * We will fix that when we get a chance, as ext3_mark_inode_dirty() - * is not without cost, nor is it even exported. - */ - if (inode->i_size > old_size) - mark_inode_dirty(inode); - - boffs = (unsigned)*offs % bh->b_size; - if (boffs + size > bh->b_size) { - CERROR("request crosses block's border. offset %llu, size %u\n", - *offs, size); - err = -EIO; - goto out; - } + while (bufsize > 0) { + if (bh != NULL) + brelse(bh); + + block = offset >> inode->i_blkbits; + boffs = offset & (blocksize - 1); + size = min(blocksize - boffs, bufsize); + bh = ext3_bread(handle, inode, block, 1, &err); + if (!bh) { + CERROR("can't read/create block: %d\n", err); + goto out; + } - err = ext3_journal_get_write_access(handle, bh); - if (err) { - CERROR("journal_get_write_access() returned error %d\n", err); - goto out; - } - memcpy(bh->b_data + boffs, buf, size); - err = ext3_journal_dirty_metadata(handle, bh); - if (err) { - CERROR("journal_dirty_metadata() returned error %d\n", err); - goto out; + err = ext3_journal_get_write_access(handle, bh); + if (err) { + CERROR("journal_get_write_access() returned error %d\n", + err); + goto out; + } + LASSERT(bh->b_data + boffs + size <= bh->b_data + bh->b_size); + memcpy(bh->b_data + boffs, buf, size); + err = ext3_journal_dirty_metadata(handle, bh); + if (err) { + CERROR("journal_dirty_metadata() returned error %d\n", + err); + goto out; + } + if (offset + size > new_size) + new_size = offset + size; + offset += size; + bufsize -= size; + buf += size; } if (force_sync) @@ -774,9 +812,25 @@ static int fsfilt_ext3_write_record(struct file *file, void *buf, int size, out: if (bh) brelse(bh); + + /* correct in-core and on-disk sizes */ + if (new_size > inode->i_size) { + lock_kernel(); + if (new_size > inode->i_size) + inode->i_size = new_size; + if (inode->i_size > EXT3_I(inode)->i_disksize) + EXT3_I(inode)->i_disksize = inode->i_size; + if (inode->i_size > old_size) + mark_inode_dirty(inode); + unlock_kernel(); + } + + lock_kernel(); journal_stop(handle); + unlock_kernel(); + if (err == 0) - *offs += size; + *offs = offset; return err; }