-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
+/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* GPL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Whamcloud, Inc.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
#include <linux/slab.h>
#include <linux/pagemap.h>
#include <linux/quotaops.h>
-#ifdef HAVE_EXT4_LDISKFS
+#ifdef HAVE_LINUX_EXPORTFS_H
+#include <linux/exportfs.h>
+#endif
#include <ext4/ext4.h>
#include <ext4/ext4_jbd2.h>
-#else
-#include <linux/jbd.h>
-#include <linux/ext3_fs.h>
-#include <linux/ext3_jbd.h>
-#endif
#include <linux/version.h>
#include <linux/bitops.h>
#include <linux/quota.h>
-#include <linux/quotaio_v1.h>
-#include <linux/quotaio_v2.h>
-#if defined(HAVE_EXT3_XATTR_H)
-#include <ext3/xattr.h>
+#ifdef HAVE_QUOTAIO_H
+# include <linux/quotaio_v2.h>
+#elif defined(HAVE_FS_QUOTA_QUOTAIO_H)
+# include <quota/quotaio_v2.h>
+# include <quota/quota_tree.h>
+# define V2_DQTREEOFF QT_TREEOFF
+#elif defined(HAVE_FS_QUOTAIO_H)
+# include <quotaio_v2.h>
+# include <quota_tree.h>
+# define V2_DQTREEOFF QT_TREEOFF
+# define V2_INITQVERSIONS_R1 V2_INITQVERSIONS
+#endif
+
+#ifdef QFMT_VFS_V1
+#define QFMT_LUSTRE QFMT_VFS_V1
#else
+#define QFMT_LUSTRE QFMT_VFS_V0
+#endif
+
+#if defined(HAVE_EXT3_XATTR_H)
+# include <ext3/xattr.h>
+#elif !defined(EXT3_XATTR_INDEX_TRUSTED)
/* ext3 xattr.h not available in rh style kernel-devel rpm */
+/* CHAOS kernel-devel package will not include fs/ldiskfs/xattr.h */
+# define EXT3_XATTR_INDEX_TRUSTED 4
extern int ext3_xattr_get(struct inode *, int, const char *, void *, size_t);
extern int ext3_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
#endif
#include <linux/lustre_compat25.h>
#include <linux/lprocfs_status.h>
-#ifdef EXT3_MULTIBLOCK_ALLOCATOR
-#ifdef HAVE_EXT4_LDISKFS
#include <ext4/ext4_extents.h>
-#else
-#include <linux/ext3_extents.h>
-#endif
-#endif
#include "lustre_quota_fmt.h"
#define FSFILT_DELETE_TRANS_BLOCKS(sb) EXT3_DELETE_TRANS_BLOCKS(sb)
#endif
-#ifdef EXT3_SINGLEDATA_TRANS_BLOCKS_HAS_SB
/* for kernels 2.6.18 and later */
#define FSFILT_SINGLEDATA_TRANS_BLOCKS(sb) EXT3_SINGLEDATA_TRANS_BLOCKS(sb)
-#else
-#define FSFILT_SINGLEDATA_TRANS_BLOCKS(sb) EXT3_SINGLEDATA_TRANS_BLOCKS
-#endif
-#define fsfilt_ext3_journal_start(inode, nblocks) ext3_journal_start(inode, nblocks)
-#define fsfilt_ext3_journal_stop(handle) ext3_journal_stop(handle)
+#define fsfilt_ext3_ext_insert_extent(handle, inode, path, newext, flag) \
+ ext3_ext_insert_extent(handle, inode, path, newext, flag)
+
+#define ext3_mb_discard_inode_preallocations(inode) \
+ ext3_discard_preallocations(inode)
+
+#define fsfilt_log_start_commit(journal, tid) jbd2_log_start_commit(journal, tid)
+#define fsfilt_log_wait_commit(journal, tid) jbd2_log_wait_commit(journal, tid)
+
+#ifdef HAVE_EXT4_JOURNAL_CALLBACK_ADD
+# define journal_callback ext4_journal_cb_entry
+# define fsfilt_journal_callback_set(handle, func, jcb) \
+ ext4_journal_callback_add(handle, func, jcb)
+#elif defined(HAVE_JBD2_JOURNAL_CALLBACK_SET)
+# define fsfilt_journal_callback_set(handle, func, jcb) \
+ jbd2_journal_callback_set(handle, func, jcb)
+#elif defined(HAVE_JOURNAL_CALLBACK_SET)
+# define fsfilt_journal_callback_set(handle, func, jcb) \
+ journal_callback_set(handle, func, jcb)
+#else
+# error missing journal commit callback
+#endif /* HAVE_EXT4_JOURNAL_CALLBACK_ADD */
static cfs_mem_cache_t *fcb_cache;
void *cb_data; /* MDS/OST completion function data */
};
-#ifndef EXT3_XATTR_INDEX_TRUSTED /* temporary until we hit l28 kernel */
-#define EXT3_XATTR_INDEX_TRUSTED 4
+#ifndef ext3_find_next_bit
+#define ext3_find_next_bit ext2_find_next_bit
#endif
-#ifdef HAVE_EXT4_LDISKFS
-#define fsfilt_log_start_commit(journal, tid) jbd2_log_start_commit(journal, tid)
-#define fsfilt_log_wait_commit(journal, tid) jbd2_log_wait_commit(journal, tid)
-#define fsfilt_journal_callback_set(handle, func, jcb) jbd2_journal_callback_set(handle, func, jcb)
+#ifndef ext2_find_next_bit
+#ifdef __LITTLE_ENDIAN
+#define ext2_find_next_bit(addr, size, off) find_next_bit((unsigned long *)(addr), (size), (off))
#else
-#define fsfilt_log_start_commit(journal, tid) log_start_commit(journal, tid)
-#define fsfilt_log_wait_commit(journal, tid) log_wait_commit(journal, tid)
-#define fsfilt_journal_callback_set(handle, func, jcb) journal_callback_set(handle, func, jcb)
-#define ext_pblock(ex) le32_to_cpu((ex)->ee_start)
-#define ext3_ext_store_pblock(ex, pblock) ((ex)->ee_start = cpu_to_le32(pblock))
-#define ext3_inode_bitmap(sb,desc) le32_to_cpu((desc)->bg_inode_bitmap)
-#endif
+error "Need implementation of find_next_bit on big-endian systems"
+#endif /* __LITTLE_ENDIAN */
+#endif /* !ext2_find_next_le_bit */
static char *fsfilt_ext3_get_label(struct super_block *sb)
{
static __u64 get_i_version(struct inode *inode)
{
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,27)) && defined(HAVE_EXT4_LDISKFS)
- return inode->i_version;
-#else
return EXT3_I(inode)->i_fs_version;
-#endif
}
static void set_i_version(struct inode *inode, __u64 new_version)
{
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,27)) && defined(HAVE_EXT4_LDISKFS)
- inode->i_version = new_version;
-#else
(EXT3_I(inode))->i_fs_version = new_version;
-#endif
}
/*
nblocks += 3;
/* no break */
case FSFILT_OP_CREATE: {
-#if defined(EXT3_EXTENTS_FL) && defined(EXT3_INDEX_FL) && !defined(HAVE_EXT4_LDISKFS)
- static int warned;
- if (!warned) {
- if (!test_opt(inode->i_sb, EXTENTS)) {
- warned = 1;
- } else if (((EXT3_I(inode)->i_flags &
- cpu_to_le32(EXT3_EXTENTS_FL | EXT3_INDEX_FL)) ==
- cpu_to_le32(EXT3_EXTENTS_FL | EXT3_INDEX_FL))) {
- CWARN("extent-mapped directory found with "
- "ext3-based ldiskfs - contact "
- "http://bugzilla.lustre.org/\n");
- warned = 1;
- }
- }
-#endif
/* no break */
}
case FSFILT_OP_MKDIR:
nblocks = (LLOG_CHUNK_SIZE >> inode->i_blkbits) +
FSFILT_DELETE_TRANS_BLOCKS(inode->i_sb) * logs;
break;
- case FSFILT_OP_JOIN:
- /* delete 2 file(file + array id) + create 1 file (array id)
- * create/update logs for each stripe */
- nblocks += 2 * FSFILT_DELETE_TRANS_BLOCKS(inode->i_sb);
-
- /*create array log for head file*/
- nblocks += 3;
- nblocks += (EXT3_INDEX_EXTRA_TRANS_BLOCKS +
- FSFILT_SINGLEDATA_TRANS_BLOCKS(inode->i_sb));
- /*update head file array */
- nblocks += EXT3_INDEX_EXTRA_TRANS_BLOCKS +
- FSFILT_DATA_TRANS_BLOCKS(inode->i_sb);
- break;
default: CERROR("unknown transaction start op %d\n", op);
LBUG();
}
journal_start:
LASSERTF(nblocks > 0, "can't start %d credit transaction\n", nblocks);
- handle = fsfilt_ext3_journal_start(inode, nblocks);
+ handle = ext3_journal_start(inode, nblocks);
if (!IS_ERR(handle))
LASSERT(current->journal_info == handle);
/* We assume that there will be 1 bit set in s_dquot.flags for each
* quota file that is active. This is at least true for now.
*/
- needed += hweight32(sb_any_quota_enabled(sb)) *
+ needed += hweight32(ll_sb_any_quota_active(sb)) *
FSFILT_SINGLEDATA_TRANS_BLOCKS(sb);
#endif
}
LASSERTF(needed > 0, "can't start %d credit transaction\n", needed);
- handle = fsfilt_ext3_journal_start(fso->fso_dentry->d_inode, needed);
+ handle = ext3_journal_start(fso->fso_dentry->d_inode, needed);
if (IS_ERR(handle)) {
CERROR("can't get handle for %d credits: rc = %ld\n", needed,
PTR_ERR(handle));
if (force_sync)
handle->h_sync = 1; /* recovery likes this */
- rc = fsfilt_ext3_journal_stop(handle);
+ rc = ext3_journal_stop(handle);
return rc;
}
tid = transaction->t_tid;
/* we don't want to be blocked */
handle->h_sync = 0;
- rc = fsfilt_ext3_journal_stop(handle);
+ rc = ext3_journal_stop(handle);
if (rc) {
CERROR("error while stopping transaction: %d\n", rc);
return rc;
struct inode *inode = dentry->d_inode;
int rc = 0;
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2,7,50,0)
+ /* Try to correct for a bug in 2.1.0 (LU-221) that caused negative
+ * timestamps to appear to be in the far future, due old timestamp
+ * being stored on disk as an unsigned value. This fixes up any
+ * bad values held by the client before storing them on disk,
+ * and ensures any timestamp updates are correct. LU-1042 */
+ if (unlikely(LTIME_S(inode->i_atime) == LU221_BAD_TIME &&
+ !(iattr->ia_valid & ATTR_ATIME))) {
+ iattr->ia_valid |= ATTR_ATIME;
+ LTIME_S(iattr->ia_atime) = 0;
+ }
+ if (unlikely(LTIME_S(inode->i_mtime) == LU221_BAD_TIME &&
+ !(iattr->ia_valid & ATTR_MTIME))) {
+ iattr->ia_valid |= ATTR_MTIME;
+ LTIME_S(iattr->ia_mtime) = 0;
+ }
+ if (unlikely((LTIME_S(inode->i_ctime) == LU221_BAD_TIME ||
+ LTIME_S(inode->i_ctime) == 0) &&
+ !(iattr->ia_valid & ATTR_CTIME))) {
+ iattr->ia_valid |= ATTR_CTIME;
+ LTIME_S(iattr->ia_ctime) = 0;
+ }
+#else
+#warning "remove old LU-221/LU-1042 workaround code"
+#endif
+
+ /* When initializating timestamps for new inodes, use the filesystem
+ * mkfs time for ctime to avoid e2fsck ibadness incorrectly thinking
+ * that this is potentially an invalid inode. Files with an old ctime
+ * migrated to a newly-formatted OST with a newer s_mkfs_time will not
+ * hit this check, since it is only for ctime == 0. LU-1010/LU-1042 */
+ if ((iattr->ia_valid & ATTR_CTIME) && LTIME_S(iattr->ia_ctime) == 0)
+ LTIME_S(iattr->ia_ctime) =
+ EXT4_SB(inode->i_sb)->s_es->s_mkfs_time;
+
/* Avoid marking the inode dirty on the superblock list unnecessarily.
* We are already writing the inode to disk as part of this
* transaction and want to avoid a lot of extra inode writeout
if (iattr->ia_valid & ATTR_MODE) {
inode->i_mode = iattr->ia_mode;
- if (!in_group_p(inode->i_gid) &&
+ if (!cfs_curproc_is_in_groups(inode->i_gid) &&
!cfs_capable(CFS_CAP_FSETID))
inode->i_mode &= ~S_ISGID;
}
/* We set these flags on the client, but have already checked perms
* so don't confuse inode_change_ok. */
- iattr->ia_valid &= ~(ATTR_MTIME_SET | ATTR_ATIME_SET);
+ iattr->ia_valid &= ~TIMES_SET_FLAGS;
if (inode->i_op->setattr) {
rc = inode->i_op->setattr(dentry, iattr);
} else {
+#ifndef HAVE_SIMPLE_SETATTR /* simple_setattr() already call it */
rc = inode_change_ok(inode, iattr);
if (!rc)
- rc = inode_setattr(inode, iattr);
+#endif
+ rc = simple_setattr(dentry, iattr);
}
out:
ENTRY;
/* FIXME: Can't do this because of nested transaction deadlock */
- if (cmd == EXT3_IOC_SETFLAGS && (*(int *)arg) & EXT3_JOURNAL_DATA_FL) {
- CERROR("can't set data journal flag on file\n");
- RETURN(-EPERM);
+ if (cmd == EXT3_IOC_SETFLAGS) {
+ /* We can't enable data journaling on OST objects, because
+ * this forces the transaction to be closed in order to
+ * flush the journal, but the caller will already have a
+ * compound transaction open to update the last_rcvd file,
+ * and this thread would deadlock trying to set the flag. */
+ if ((*(int *)arg) & EXT3_JOURNAL_DATA_FL) {
+ CERROR("can't set data journal flag on file\n");
+ RETURN(-EPERM);
+ }
+ /* Because the MDS does not see the EXTENTS_FL set on the
+ * OST objects, mask this flag into all set flags. It is
+ * not legal to clear this flag in any case, so we are not
+ * changing the functionality by doing this. b=22911 */
+ *(int *)arg |= EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL;
}
-#ifdef HAVE_EXT4_LDISKFS
/* ext4_ioctl does not have a inode argument */
if (inode->i_fop->unlocked_ioctl)
rc = inode->i_fop->unlocked_ioctl(file, cmd, arg);
-#else
- if (inode->i_fop->ioctl)
- rc = inode->i_fop->ioctl(inode, file, cmd, arg);
-#endif
else
RETURN(-ENOTTY);
return rc;
}
+#ifdef HAVE_EXT4_JOURNAL_CALLBACK_ADD
+static void fsfilt_ext3_cb_func(struct super_block *sb,
+ struct journal_callback *jcb, int error)
+#else
static void fsfilt_ext3_cb_func(struct journal_callback *jcb, int error)
+#endif
{
- struct fsfilt_cb_data *fcb = (struct fsfilt_cb_data *)jcb;
+ struct fsfilt_cb_data *fcb = container_of(jcb, typeof(*fcb), cb_jcb);
fcb->cb_func(fcb->cb_obd, fcb->cb_last_rcvd, fcb->cb_data, error);
fcb->cb_data = cb_data;
CDEBUG(D_EXT2, "set callback for last_rcvd: "LPD64"\n", last_rcvd);
- fsfilt_journal_callback_set(handle, fsfilt_ext3_cb_func,
- (struct journal_callback *)fcb);
+ fsfilt_journal_callback_set(handle, fsfilt_ext3_cb_func, &fcb->cb_jcb);
return 0;
}
-/*
- * We need to hack the return value for the free inode counts because
- * the current EA code requires one filesystem block per inode with EAs,
- * so it is possible to run out of blocks before we run out of inodes.
- *
- * This can be removed when the ext3 EA code is fixed.
- */
static int fsfilt_ext3_statfs(struct super_block *sb, struct obd_statfs *osfs)
{
struct kstatfs sfs;
memset(&sfs, 0, sizeof(sfs));
rc = ll_do_statfs(sb, &sfs);
- if (!rc && sfs.f_bfree < sfs.f_ffree) {
- sfs.f_files = (sfs.f_files - sfs.f_ffree) + sfs.f_bfree;
- sfs.f_ffree = sfs.f_bfree;
- }
-
statfs_pack(osfs, &sfs);
return rc;
}
return ext3_force_commit(sb);
}
-#if defined(EXT3_MULTIBLOCK_ALLOCATOR) && (!defined(EXT3_EXT_CACHE_NO) || defined(EXT_CACHE_MARK))
-#warning "kernel code has old extents/mballoc patch, disabling"
-#undef EXT3_MULTIBLOCK_ALLOCATOR
-#endif
#ifndef EXT3_EXTENTS_FL
#define EXT3_EXTENTS_FL 0x00080000 /* Inode uses extents */
#endif
-#ifdef EXT3_MULTIBLOCK_ALLOCATOR
#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17))
-#define fsfilt_up_truncate_sem(inode) up(&EXT3_I(inode)->truncate_sem);
-#define fsfilt_down_truncate_sem(inode) down(&EXT3_I(inode)->truncate_sem);
+# define fsfilt_up_truncate_sem(inode) up(&LDISKFS_I(inode)->truncate_sem);
+# define fsfilt_down_truncate_sem(inode) down(&LDISKFS_I(inode)->truncate_sem);
#else
-#ifdef HAVE_EXT4_LDISKFS
-#define fsfilt_up_truncate_sem(inode) up_write((&EXT4_I(inode)->i_data_sem));
-#define fsfilt_down_truncate_sem(inode) down_write((&EXT4_I(inode)->i_data_sem));
-#else
-#define fsfilt_up_truncate_sem(inode) mutex_unlock(&EXT3_I(inode)->truncate_mutex);
-#define fsfilt_down_truncate_sem(inode) mutex_lock(&EXT3_I(inode)->truncate_mutex);
-#endif
+# define fsfilt_up_truncate_sem(inode) do{ }while(0)
+# define fsfilt_down_truncate_sem(inode) do{ }while(0)
#endif
#ifndef EXT_ASSERT
#ifdef EXT3_EXT_HAS_NO_TREE
/* for kernels 2.6.18 and later */
-#ifdef HAVE_EXT4_LDISKFS
#define EXT_GENERATION(inode) (EXT4_I(inode)->i_ext_generation)
-#else
-#define EXT_GENERATION(inode) ext_generation(inode)
-#endif
#define ext3_ext_base inode
#define ext3_ext_base2inode(inode) (inode)
#define EXT_DEPTH(inode) ext_depth(inode)
ext3_ext_walk_space(tree, block, num, cb);
#endif
-#include <linux/lustre_version.h>
-
struct bpointers {
unsigned long *blocks;
int *created;
unsigned long count;
handle_t *handle;
- i = EXT_DEPTH(base);
- EXT_ASSERT(i == path->p_depth);
- EXT_ASSERT(path[i].p_hdr);
-
if (cex->ec_type == EXT3_EXT_CACHE_EXTENT) {
err = EXT_CONTINUE;
goto map;
count = ext3_ext_calc_credits_for_insert(base, path);
fsfilt_up_truncate_sem(inode);
- handle = fsfilt_ext3_journal_start(inode, count+EXT3_ALLOC_NEEDED+1);
+ handle = ext3_journal_start(inode, count+EXT3_ALLOC_NEEDED+1);
if (IS_ERR(handle)) {
fsfilt_down_truncate_sem(inode);
return PTR_ERR(handle);
fsfilt_down_truncate_sem(inode);
if (tgen != EXT_GENERATION(base)) {
/* the tree has changed. so path can be invalid at moment */
- fsfilt_ext3_journal_stop(handle);
+ ext3_journal_stop(handle);
+ return EXT_REPEAT;
+ }
+
+ /* In 2.6.32 kernel, ext4_ext_walk_space()'s callback func is not
+ * protected by i_data_sem as whole. so we patch it to store
+ * generation to path and now verify the tree hasn't changed */
+ down_write((&EXT4_I(inode)->i_data_sem));
+
+ /* validate extent, make sure the extent tree does not changed */
+ if (EXT_GENERATION(base) != path[0].p_generation) {
+ /* cex is invalid, try again */
+ up_write(&EXT4_I(inode)->i_data_sem);
+ ext3_journal_stop(handle);
return EXT_REPEAT;
}
nex.ee_block = cpu_to_le32(cex->ec_block);
ext3_ext_store_pblock(&nex, pblock);
nex.ee_len = cpu_to_le16(count);
- err = ext3_ext_insert_extent(handle, base, path, &nex);
+ err = fsfilt_ext3_ext_insert_extent(handle, base, path, &nex, 0);
if (err) {
/* free data blocks we just allocated */
/* not a good idea to call discard here directly,
BUG_ON(le32_to_cpu(nex.ee_block) != cex->ec_block);
out:
- fsfilt_ext3_journal_stop(handle);
+ up_write((&EXT4_I(inode)->i_data_sem));
+ ext3_journal_stop(handle);
map:
if (err >= 0) {
/* map blocks */
cleanup:
return rc;
}
-#endif /* EXT3_MULTIBLOCK_ALLOCATOR */
extern int ext3_map_inode_page(struct inode *inode, struct page *page,
unsigned long *blocks, int *created, int create);
int fsfilt_ext3_map_inode_pages(struct inode *inode, struct page **page,
int pages, unsigned long *blocks,
int *created, int create,
- struct semaphore *optional_sem)
+ cfs_mutex_t *optional_mutex)
{
int rc;
-#ifdef EXT3_MULTIBLOCK_ALLOCATOR
+
if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) {
rc = fsfilt_ext3_map_ext_inode_pages(inode, page, pages,
blocks, created, create);
return rc;
}
-#endif
- if (optional_sem != NULL)
- down(optional_sem);
+ if (optional_mutex != NULL)
+ cfs_mutex_lock(optional_mutex);
rc = fsfilt_ext3_map_bm_inode_pages(inode, page, pages, blocks,
created, create);
- if (optional_sem != NULL)
- up(optional_sem);
+ if (optional_mutex != NULL)
+ cfs_mutex_unlock(optional_mutex);
return rc;
}
int err, blocksize, csize, boffs, osize = size;
/* prevent reading after eof */
- lock_kernel();
+ spin_lock(&inode->i_lock);
if (i_size_read(inode) < *offs + size) {
size = i_size_read(inode) - *offs;
- unlock_kernel();
+ spin_unlock(&inode->i_lock);
if (size < 0) {
- CERROR("size %llu is too short for read %u@%llu\n",
- i_size_read(inode), size, *offs);
- return -EIO;
+ CDEBUG(D_EXT2, "size %llu is too short for read @%llu\n",
+ i_size_read(inode), *offs);
+ return -EBADR;
} else if (size == 0) {
return 0;
}
} else {
- unlock_kernel();
+ spin_unlock(&inode->i_lock);
}
blocksize = 1 << inode->i_blkbits;
/* correct in-core and on-disk sizes */
if (new_size > i_size_read(inode)) {
- lock_kernel();
+ spin_lock(&inode->i_lock);
if (new_size > i_size_read(inode))
i_size_write(inode, new_size);
if (i_size_read(inode) > EXT3_I(inode)->i_disksize)
EXT3_I(inode)->i_disksize = i_size_read(inode);
- if (i_size_read(inode) > old_size)
+ if (i_size_read(inode) > old_size) {
+ spin_unlock(&inode->i_lock);
mark_inode_dirty(inode);
- unlock_kernel();
+ } else {
+ spin_unlock(&inode->i_lock);
+ }
}
if (err == 0)
block_count = (*offs & (blocksize - 1)) + bufsize;
block_count = (block_count + blocksize - 1) >> inode->i_blkbits;
- handle = fsfilt_ext3_journal_start(inode,
+ handle = ext3_journal_start(inode,
block_count * FSFILT_DATA_TRANS_BLOCKS(inode->i_sb) + 2);
if (IS_ERR(handle)) {
CERROR("can't start transaction for %d blocks (%d bytes)\n",
if (!err && force_sync)
handle->h_sync = 1; /* recovery likes this */
- fsfilt_ext3_journal_stop(handle);
+ ext3_journal_stop(handle);
return err;
}
sbi->dx_unlock = fsfilt_ext3_dx_unlock;
#endif
#endif
+ if (!EXT3_HAS_COMPAT_FEATURE(sb,
+ EXT3_FEATURE_COMPAT_HAS_JOURNAL)) {
+ CERROR("ext3 mounted without journal\n");
+ return -EINVAL;
+ }
+
#ifdef S_PDIROPS
CWARN("Enabling PDIROPS\n");
set_opt(sbi->s_mount_opt, PDIROPS);
sbi->s_qf_names[USRQUOTA] = NULL;
return -ENOMEM;
}
- sbi->s_jquota_fmt = QFMT_VFS_V0;
+ sbi->s_jquota_fmt = QFMT_LUSTRE;
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,13))
set_opt(sbi->s_mount_opt, QUOTA);
#endif
struct obd_quotactl *oqc)
{
int i, rc = 0, error = 0;
- struct quotactl_ops *qcop;
+ const struct quotactl_ops *qcop;
struct if_dqinfo *info;
struct if_dqblk *dqblk;
ENTRY;
LASSERT(oqc->qc_id == LUSTRE_QUOTA_V2);
- if (!qcop->quota_on)
- GOTO(out, rc = -ENOSYS);
-
- rc = qcop->quota_on(sb, i, QFMT_VFS_V0,
- name[i]);
+ rc = ll_quota_on(sb, i, QFMT_LUSTRE,
+ name[i], 0);
} else if (oqc->qc_cmd == Q_QUOTAOFF) {
- if (!qcop->quota_off)
- GOTO(out, rc = -ENOSYS);
- rc = qcop->quota_off(sb, i);
+ rc = ll_quota_off(sb, i, 0);
}
if (rc == -EBUSY)
}
struct chk_dqblk{
- struct hlist_node dqb_hash; /** quotacheck hash */
- struct list_head dqb_list; /** in list also */
+ cfs_hlist_node_t dqb_hash; /** quotacheck hash */
+ cfs_list_t dqb_list; /** in list also */
qid_t dqb_id; /** uid/gid */
short dqb_type; /** USRQUOTA/GRPQUOTA */
qsize_t dqb_bhardlimit; /** block hard limit */
qsize_t dqb_ihardlimit; /** inode hard limit */
qsize_t dqb_isoftlimit; /** inode soft limit */
qsize_t dqb_curinodes; /** current inodes */
- __u64 dqb_btime; /** block grace time */
- __u64 dqb_itime; /** inode grace time */
+ obd_time dqb_btime; /** block grace time */
+ obd_time dqb_itime; /** inode grace time */
__u32 dqb_valid; /** flag for above fields */
};
}
static inline struct chk_dqblk *
-find_chkquot(struct hlist_head *head, qid_t id, int type)
+find_chkquot(cfs_hlist_head_t *head, qid_t id, int type)
{
- struct hlist_node *node;
+ cfs_hlist_node_t *node;
struct chk_dqblk *cdqb;
- hlist_for_each(node, head) {
- cdqb = hlist_entry(node, struct chk_dqblk, dqb_hash);
+ cfs_hlist_for_each(node, head) {
+ cdqb = cfs_hlist_entry(node, struct chk_dqblk, dqb_hash);
if (cdqb->dqb_id == id && cdqb->dqb_type == type)
return cdqb;
}
OBD_ALLOC_PTR(cdqb);
if (cdqb) {
- INIT_HLIST_NODE(&cdqb->dqb_hash);
- INIT_LIST_HEAD(&cdqb->dqb_list);
+ CFS_INIT_HLIST_NODE(&cdqb->dqb_hash);
+ CFS_INIT_LIST_HEAD(&cdqb->dqb_list);
cdqb->dqb_id = id;
cdqb->dqb_type = type;
}
}
static struct chk_dqblk *
-cqget(struct super_block *sb, struct hlist_head *hash, struct list_head *list,
- qid_t id, int type, int first_check)
+cqget(struct super_block *sb, cfs_hlist_head_t *hash,
+ cfs_list_t *list, qid_t id, int type, int first_check)
{
- struct hlist_head *head = hash + chkquot_hash(id, type);
+ cfs_hlist_head_t *head = hash + chkquot_hash(id, type);
struct if_dqblk dqb;
struct chk_dqblk *cdqb;
int rc;
}
}
- hlist_add_head(&cdqb->dqb_hash, head);
- list_add_tail(&cdqb->dqb_list, list);
+ cfs_hlist_add_head(&cdqb->dqb_hash, head);
+ cfs_list_add_tail(&cdqb->dqb_list, list);
return cdqb;
}
RETURN(rc);
}
-static inline struct ext3_group_desc *
-get_group_desc(struct super_block *sb, int group)
-{
- unsigned long desc_block, desc;
- struct ext3_group_desc *gdp;
-
- desc_block = group / EXT3_DESC_PER_BLOCK(sb);
- desc = group % EXT3_DESC_PER_BLOCK(sb);
- gdp = (struct ext3_group_desc *)
- EXT3_SB(sb)->s_group_desc[desc_block]->b_data;
-
- return gdp + desc;
-}
-
-
-#ifndef HAVE_EXT4_LDISKFS
-static inline struct buffer_head *
-ext3_read_inode_bitmap(struct super_block *sb, unsigned long group)
-{
- struct ext3_group_desc *desc;
- struct buffer_head *bh;
-
- desc = get_group_desc(sb, group);
- bh = sb_bread(sb, ext3_inode_bitmap(sb, desc));
- return bh;
-}
-#endif
-
-static inline struct inode *ext3_iget_inuse(struct super_block *sb,
- struct buffer_head *bitmap_bh,
- int index, unsigned long ino)
-{
- struct inode *inode = NULL;
-
-
- if (ext3_test_bit(index, bitmap_bh->b_data))
-#ifdef HAVE_EXT4_LDISKFS
- inode = ext4_iget(sb, ino);
- if (IS_ERR(inode))
- /* Newer kernels return an error instead of a NULL pointer */
- inode = NULL;
-#else
- inode = iget(sb, ino);
-#endif
- return inode;
-}
-
struct qchk_ctxt {
- struct hlist_head qckt_hash[NR_DQHASH]; /* quotacheck hash */
- struct list_head qckt_list; /* quotacheck list */
+ cfs_hlist_head_t qckt_hash[NR_DQHASH]; /* quotacheck hash */
+ cfs_list_t qckt_list; /* quotacheck list */
int qckt_first_check[MAXQUOTAS]; /* 1 if no old quotafile */
- struct if_dqinfo qckt_dqinfo[MAXQUOTAS]; /* old dqinfo */
+ struct if_dqinfo qckt_dqinfo[MAXQUOTAS]; /* old dqinfo */
};
static int add_inode_quota(struct inode *inode, struct qchk_ctxt *qctxt,
static int v3_write_dqheader(struct file *f, int type)
{
static const __u32 quota_magics[] = V2_INITQMAGICS;
- static const __u32 quota_versions[] = V2_INITQVERSIONS_R1;
+ static const __u32 quota_versions[] = LUSTRE_INITQVERSIONS_V2;
struct v2_disk_dqheader dqhead;
loff_t offset = 0;
GOTO(out, rc = -EINVAL);
}
- DQUOT_DROP(file->f_dentry->d_inode);
+ ll_vfs_dq_drop(file->f_dentry->d_inode);
rc = v3_write_dqheader(file, i);
if (rc) {
struct chk_dqblk *cdqb, *tmp;
int rc;
- list_for_each_entry_safe(cdqb, tmp, &qctxt->qckt_list, dqb_list) {
+ cfs_list_for_each_entry_safe(cdqb, tmp, &qctxt->qckt_list, dqb_list) {
if (!error) {
rc = commit_chkquot(sb, qctxt, cdqb);
if (rc)
error = rc;
}
- hlist_del_init(&cdqb->dqb_hash);
- list_del(&cdqb->dqb_list);
+ cfs_hlist_del_init(&cdqb->dqb_hash);
+ cfs_list_del(&cdqb->dqb_list);
OBD_FREE_PTR(cdqb);
}
return error;
}
+#ifndef EXT3_FEATURE_RO_COMPAT_GDT_CSUM
+#define EXT3_FEATURE_RO_COMPAT_GDT_CSUM 0x0010
+#endif
+
static int fsfilt_ext3_quotacheck(struct super_block *sb,
struct obd_quotactl *oqc)
{
struct ext3_sb_info *sbi = EXT3_SB(sb);
- int i, group;
+ int i, group, uninit_feat = 0;
struct qchk_ctxt *qctxt;
struct buffer_head *bitmap_bh = NULL;
- unsigned long ino;
+ unsigned long ino, inode_inuse;
struct inode *inode;
int rc = 0;
ENTRY;
}
for (i = 0; i < NR_DQHASH; i++)
- INIT_HLIST_HEAD(&qctxt->qckt_hash[i]);
- INIT_LIST_HEAD(&qctxt->qckt_list);
+ CFS_INIT_HLIST_HEAD(&qctxt->qckt_hash[i]);
+ CFS_INIT_LIST_HEAD(&qctxt->qckt_list);
for (i = 0; i < MAXQUOTAS; i++) {
if (!Q_TYPESET(oqc, i))
GOTO(out, rc);
}
}
+ if (EXT3_HAS_RO_COMPAT_FEATURE(sb, EXT3_FEATURE_RO_COMPAT_GDT_CSUM))
+ /* This filesystem supports the uninit group feature */
+ uninit_feat = 1;
+
+ /* number of inodes that have been allocated */
+ inode_inuse = sbi->s_inodes_per_group * sbi->s_groups_count -
+ percpu_counter_sum(&sbi->s_freeinodes_counter);
/* check quota and update in hash */
- for (group = 0; group < sbi->s_groups_count; group++) {
+ for (group = 0; group < sbi->s_groups_count && inode_inuse > 0;
+ group++) {
+ unsigned long used_count = sbi->s_inodes_per_group;
+
+ if (uninit_feat) {
+ struct ext3_group_desc *desc;
+ desc = ext3_get_group_desc(sb, group, NULL);
+ if (!desc)
+ GOTO(out, -EIO);
+
+ /* we don't really need to take the group lock here,
+ * but it may be useful if one day we support online
+ * quotacheck */
+ ext4_lock_group(sb, group);
+ if (desc->bg_flags & cpu_to_le16(EXT3_BG_INODE_UNINIT)) {
+ /* no inode in use in this group, just skip it */
+ ext3_unlock_group(sb, group);
+ continue;
+ }
+
+ used_count -= ext3_itable_unused_count(sb, desc);
+ ext3_unlock_group(sb, group);
+ }
+
ino = group * sbi->s_inodes_per_group + 1;
bitmap_bh = ext3_read_inode_bitmap(sb, group);
if (!bitmap_bh) {
- CERROR("ext3_read_inode_bitmap group %d failed", group);
- GOTO(out, rc = -EIO);
+ CERROR("%s: ext3_read_inode_bitmap group %d failed\n",
+ sb->s_id, group);
+ GOTO(out, -EIO);
}
- for (i = 0; i < sbi->s_inodes_per_group; i++, ino++) {
+ i = 0;
+ while (i < used_count &&
+ (i = ext3_find_next_bit(bitmap_bh->b_data,
+ used_count, i)) < used_count) {
+ inode_inuse--;
+ i++;
+ ino = i + group * sbi->s_inodes_per_group;
if (ino < sbi->s_first_ino)
continue;
+ inode = ext3_iget(sb, ino);
+ if (!inode || IS_ERR(inode))
+ continue;
- inode = ext3_iget_inuse(sb, bitmap_bh, i, ino);
rc = add_inode_quota(inode, qctxt, oqc);
iput(inode);
if (rc) {
GOTO(out, rc);
}
}
-
brelse(bitmap_bh);
}
* has limits but hasn't file) */
#ifdef HAVE_QUOTA_SUPPORT
for (i = 0; i < MAXQUOTAS; i++) {
- struct list_head id_list;
+ cfs_list_t id_list;
struct dquot_id *dqid, *tmp;
if (!Q_TYPESET(oqc, i))
continue;
- LASSERT(sb_dqopt(sb)->files[i] != NULL);
- INIT_LIST_HEAD(&id_list);
-#ifndef KERNEL_SUPPORTS_QUOTA_READ
- rc = lustre_get_qids(sb_dqopt(sb)->files[i], NULL, i, &id_list);
-#else
- rc = lustre_get_qids(NULL, sb_dqopt(sb)->files[i], i, &id_list);
-#endif
- if (rc)
- CERROR("read old limits failed. (rc:%d)\n", rc);
+ LASSERT(sb_dqopt(sb)->files[i] != NULL);
+ CFS_INIT_LIST_HEAD(&id_list);
+ rc = lustre_get_qids(NULL, sb_dqopt(sb)->files[i], i, &id_list);
+ if (rc)
+ CERROR("read old limits failed. (rc:%d)\n", rc);
- list_for_each_entry_safe(dqid, tmp, &id_list, di_link) {
- list_del_init(&dqid->di_link);
+ cfs_list_for_each_entry_safe(dqid, tmp, &id_list, di_link) {
+ cfs_list_del_init(&dqid->di_link);
if (!rc)
cqget(sb, qctxt->qckt_hash, &qctxt->qckt_list,
if (lqi->qi_files[type] == NULL) {
CERROR("operate qinfo before it's enabled!\n");
- RETURN(-EIO);
+ RETURN(-ESRCH);
}
switch (cmd) {
}
static int fsfilt_ext3_qids(struct file *file, struct inode *inode, int type,
- struct list_head *list)
+ cfs_list_t *list)
{
return lustre_get_qids(file, inode, type, list);
}
if (dquot->dq_info->qi_files[dquot->dq_type] == NULL) {
CERROR("operate dquot before it's enabled!\n");
- RETURN(-EIO);
+ RETURN(-ESRCH);
}
switch (cmd) {
dquot->dq_dqb.dqb_isoftlimit ||
dquot->dq_dqb.dqb_bhardlimit ||
dquot->dq_dqb.dqb_bsoftlimit)
- clear_bit(DQ_FAKE_B, &dquot->dq_flags);
+ cfs_clear_bit(DQ_FAKE_B, &dquot->dq_flags);
else
- set_bit(DQ_FAKE_B, &dquot->dq_flags);
+ cfs_set_bit(DQ_FAKE_B, &dquot->dq_flags);
rc = lustre_commit_dquot(dquot);
if (rc >= 0)
}
EXPORT_SYMBOL(fsfilt_ext3_journal_sbdev);
+ssize_t lustre_read_quota(struct file *f, struct inode *inode, int type,
+ char *buf, int count, loff_t pos)
+{
+ loff_t p = pos;
+ int rc;
+
+ if (!f && !inode) {
+ CERROR("lustre_read_quota failed for no quota file!\n");
+ libcfs_debug_dumpstack(NULL);
+ return -EINVAL;
+ }
+
+ /* Support for both adm and op quota files must be provided */
+ if (f) {
+ rc = fsfilt_ext3_read_record(f, buf, count, &p);
+ rc = rc < 0 ? rc : p - pos;
+ } else {
+ struct super_block *sb = inode->i_sb;
+ rc = sb->s_op->quota_read(sb, type, buf, count, pos);
+ }
+ return rc;
+}
+
+ssize_t lustre_write_quota(struct file *f, char *buf, int count, loff_t pos)
+{
+ loff_t p = pos;
+ int rc;
+
+ /* Only adm quota files are supported, op updates are handled by vfs */
+ rc = fsfilt_ext3_write_record(f, buf, count, &p, 0);
+ rc = rc < 0 ? rc : p - pos;
+
+ return rc;
+}
+
+void *lustre_quota_journal_start(struct inode *inode, int delete)
+{
+ handle_t *handle;
+ unsigned block_count;
+
+ if (delete) {
+ /* each indirect block (+4) may become free, attaching to the
+ * header list of free blocks (+1); the data block (+1) may
+ * become a free block (+0) or a block with free dqentries (+0) */
+ block_count = (4 + 1) + 1;
+ handle = ext3_journal_start(inode,
+ block_count*FSFILT_DATA_TRANS_BLOCKS(inode->i_sb)+2);
+ } else {
+ /* indirect blocks are touched (+4), each causes file expansion (+0) or
+ * freeblk reusage with a header update (+1); dqentry is either reused
+ * causing update of the entry block (+1), prev (+1) and next (+1) or
+ * a new block allocation (+1) with a header update (+1) */
+ block_count = (4 + 1) + 3;
+ handle = ext3_journal_start(inode,
+ block_count*FSFILT_DATA_TRANS_BLOCKS(inode->i_sb)+2);
+
+ }
+
+ return handle;
+}
+
+void lustre_quota_journal_stop(void *handle)
+{
+ ext3_journal_stop((handle_t *)handle);
+}
+
+static int ll_decode_fh_accept(void *context, struct dentry *de)
+{
+ return 1;
+}
+
+#ifdef HAVE_EXPORTFS_DECODE_FH
+# define ll_exportfs_decode_fh(mnt, fid, len, type, acceptable, context) \
+ exportfs_decode_fh(mnt, (struct fid*)(fid), len, type, \
+ acceptable, context)
+#else
+# define ll_exportfs_decode_fh(mnt, fid, len, type, acceptable, context) \
+ export_op_default.decode_fh((mnt)->mnt_sb, &(fid)->ino, len, \
+ type, acceptable, context)
+# define FILEID_INO32_GEN 1
+extern struct export_operations export_op_default;
+#endif
+
+struct dentry *fsfilt_ext3_fid2dentry(struct vfsmount *mnt,
+ struct fsfilt_fid *fid, int ignore_gen)
+{
+ struct inode *inode;
+ struct dentry *result;
+
+ result = ll_exportfs_decode_fh(mnt, fid, 2, FILEID_INO32_GEN,
+ ll_decode_fh_accept, NULL);
+ if (IS_ERR(result)) {
+ CDEBUG(D_DENTRY, "%s of %u/%u failed %ld\n", __func__,
+ fid->ino, fid->gen, PTR_ERR(result));
+ return result;
+ }
+
+ CDEBUG(D_DENTRY, "%s of %u/%u succeeded\n", __func__,
+ fid->ino, fid->gen);
+ inode = result->d_inode;
+ if (inode == NULL)
+ goto err_out;
+
+ if (inode->i_nlink == 0 &&
+ inode->i_mode == 0 && LTIME_S(inode->i_ctime) == 0) {
+ LCONSOLE_WARN("Found inode with zero nlink, mode and"
+ " ctime -- this may indicate disk "
+ "corruption (inode: %lu, link: %lu, "
+ "count: %d)\n", inode->i_ino,
+ (unsigned long)inode->i_nlink,
+ atomic_read(&inode->i_count));
+ goto err_out;
+ }
+ if (fid->gen && inode->i_generation != fid->gen) {
+ /* we didn't find the right inode.. */
+ CDEBUG(D_INODE, "found wrong generation: inode %lu, link: %lu, "
+ "count: %d, generation %u/%u\n",
+ inode->i_ino, (unsigned long)inode->i_nlink,
+ atomic_read(&inode->i_count), inode->i_generation,
+ fid->gen);
+ goto err_out;
+ }
+
+ return result;
+err_out:
+ l_dput(result);
+ return ERR_PTR(-ENOENT);
+}
+
static struct fsfilt_operations fsfilt_ext3_ops = {
.fs_type = "ext3",
.fs_owner = THIS_MODULE,
.fs_get_mblk = fsfilt_ext3_get_mblk,
#endif
.fs_journal_sbdev = fsfilt_ext3_journal_sbdev,
+ .fs_fid2dentry = fsfilt_ext3_fid2dentry,
};
static int __init fsfilt_ext3_init(void)