* GPL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
+ *
+ * Copyright (c) 2011 Whamcloud, Inc.
+ *
*/
/*
* This file is part of Lustre, http://www.lustre.org/
#include <linux/slab.h>
#include <linux/pagemap.h>
#include <linux/quotaops.h>
+#ifdef HAVE_LINUX_EXPORTFS_H
+#include <linux/exportfs.h>
+#endif
#ifdef HAVE_EXT4_LDISKFS
#include <ext4/ext4.h>
#include <ext4/ext4_jbd2.h>
#include <linux/version.h>
#include <linux/bitops.h>
#include <linux/quota.h>
-#ifdef HAVE_QUOTAIO_V1_H
-# include <linux/quotaio_v1.h>
+#ifdef HAVE_QUOTAIO_H
# include <linux/quotaio_v2.h>
-#else
-# include <quotaio_v1.h>
+#elif defined(HAVE_FS_QUOTA_QUOTAIO_H)
+# include <quota/quotaio_v2.h>
+# include <quota/quota_tree.h>
+# define V2_DQTREEOFF QT_TREEOFF
+#elif defined(HAVE_FS_QUOTAIO_V1_H)
# include <quotaio_v2.h>
# include <quota_tree.h>
# define V2_DQTREEOFF QT_TREEOFF
+# define V2_INITQVERSIONS_R1 V2_INITQVERSIONS
+#endif
+
+#ifdef QFMT_VFS_V1
+#define QFMT_LUSTRE QFMT_VFS_V1
+#else
+#define QFMT_LUSTRE QFMT_VFS_V0
#endif
#if defined(HAVE_EXT3_XATTR_H)
#include <linux/lustre_compat25.h>
#include <linux/lprocfs_status.h>
-#ifdef EXT3_MULTIBLOCK_ALLOCATOR
#ifdef HAVE_EXT4_LDISKFS
#include <ext4/ext4_extents.h>
#else
#include <linux/ext3_extents.h>
#endif
-#endif
#include "lustre_quota_fmt.h"
#define FSFILT_SINGLEDATA_TRANS_BLOCKS(sb) EXT3_SINGLEDATA_TRANS_BLOCKS
#endif
+#ifdef EXT_INSERT_EXTENT_WITH_5ARGS
+#define fsfilt_ext3_ext_insert_extent(handle, inode, path, newext, flag) \
+ ext3_ext_insert_extent(handle, inode, path, newext, flag)
+#else
+#define fsfilt_ext3_ext_insert_extent(handle, inode, path, newext, flag) \
+ ext3_ext_insert_extent(handle, inode, path, newext)
+#endif
+
+#ifdef EXT3_DISCARD_PREALLOCATIONS
+#define ext3_mb_discard_inode_preallocations(inode) \
+ ext3_discard_preallocations(inode)
+#endif
+
+
static cfs_mem_cache_t *fcb_cache;
struct fsfilt_cb_data {
static __u64 get_i_version(struct inode *inode)
{
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,27)) && defined(HAVE_EXT4_LDISKFS)
- return inode->i_version;
-#else
return EXT3_I(inode)->i_fs_version;
-#endif
}
static void set_i_version(struct inode *inode, __u64 new_version)
{
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,27)) && defined(HAVE_EXT4_LDISKFS)
- inode->i_version = new_version;
-#else
(EXT3_I(inode))->i_fs_version = new_version;
-#endif
}
/*
cpu_to_le32(EXT3_EXTENTS_FL | EXT3_INDEX_FL))) {
CWARN("extent-mapped directory found with "
"ext3-based ldiskfs - contact "
- "http://bugzilla.lustre.org/\n");
+ "http://bugs.whamcloud.com/\n");
warned = 1;
}
}
ENTRY;
/* FIXME: Can't do this because of nested transaction deadlock */
- if (cmd == EXT3_IOC_SETFLAGS && (*(int *)arg) & EXT3_JOURNAL_DATA_FL) {
- CERROR("can't set data journal flag on file\n");
- RETURN(-EPERM);
+ if (cmd == EXT3_IOC_SETFLAGS) {
+ /* We can't enable data journaling on OST objects, because
+ * this forces the transaction to be closed in order to
+ * flush the journal, but the caller will already have a
+ * compound transaction open to update the last_rcvd file,
+ * and this thread would deadlock trying to set the flag. */
+ if ((*(int *)arg) & EXT3_JOURNAL_DATA_FL) {
+ CERROR("can't set data journal flag on file\n");
+ RETURN(-EPERM);
+ }
+ /* Because the MDS does not see the EXTENTS_FL set on the
+ * OST objects, mask this flag into all set flags. It is
+ * not legal to clear this flag in any case, so we are not
+ * changing the functionality by doing this. b=22911 */
+ *(int *)arg |= EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL;
}
#ifdef HAVE_EXT4_LDISKFS
return 0;
}
-/*
- * We need to hack the return value for the free inode counts because
- * the current EA code requires one filesystem block per inode with EAs,
- * so it is possible to run out of blocks before we run out of inodes.
- *
- * This can be removed when the ext3 EA code is fixed.
- */
static int fsfilt_ext3_statfs(struct super_block *sb, struct obd_statfs *osfs)
{
struct kstatfs sfs;
memset(&sfs, 0, sizeof(sfs));
rc = ll_do_statfs(sb, &sfs);
- if (!rc && sfs.f_bfree < sfs.f_ffree) {
- sfs.f_files = (sfs.f_files - sfs.f_ffree) + sfs.f_bfree;
- sfs.f_ffree = sfs.f_bfree;
- }
-
statfs_pack(osfs, &sfs);
return rc;
}
return ext3_force_commit(sb);
}
-#if defined(EXT3_MULTIBLOCK_ALLOCATOR) && (!defined(EXT3_EXT_CACHE_NO) || defined(EXT_CACHE_MARK))
-#warning "kernel code has old extents/mballoc patch, disabling"
-#undef EXT3_MULTIBLOCK_ALLOCATOR
-#endif
#ifndef EXT3_EXTENTS_FL
#define EXT3_EXTENTS_FL 0x00080000 /* Inode uses extents */
#endif
-#ifdef EXT3_MULTIBLOCK_ALLOCATOR
#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17))
-#define fsfilt_up_truncate_sem(inode) up(&EXT3_I(inode)->truncate_sem);
-#define fsfilt_down_truncate_sem(inode) down(&EXT3_I(inode)->truncate_sem);
+# define fsfilt_up_truncate_sem(inode) up(&LDISKFS_I(inode)->truncate_sem);
+# define fsfilt_down_truncate_sem(inode) down(&LDISKFS_I(inode)->truncate_sem);
#else
-#ifdef HAVE_EXT4_LDISKFS
-#define fsfilt_up_truncate_sem(inode) up_write((&EXT4_I(inode)->i_data_sem));
-#define fsfilt_down_truncate_sem(inode) down_write((&EXT4_I(inode)->i_data_sem));
-#else
-#define fsfilt_up_truncate_sem(inode) mutex_unlock(&EXT3_I(inode)->truncate_mutex);
-#define fsfilt_down_truncate_sem(inode) mutex_lock(&EXT3_I(inode)->truncate_mutex);
-#endif
+# ifdef HAVE_EXT4_LDISKFS
+# ifdef WALK_SPACE_HAS_DATA_SEM /* We only use it in fsfilt_map_nblocks() for now */
+# define fsfilt_up_truncate_sem(inode) do{ }while(0)
+# define fsfilt_down_truncate_sem(inode) do{ }while(0)
+# else
+# define fsfilt_up_truncate_sem(inode) up_write((&EXT4_I(inode)->i_data_sem))
+# define fsfilt_down_truncate_sem(inode) down_write((&EXT4_I(inode)->i_data_sem))
+# endif
+# else
+# define fsfilt_up_truncate_sem(inode) mutex_unlock(&EXT3_I(inode)->truncate_mutex)
+# define fsfilt_down_truncate_sem(inode) mutex_lock(&EXT3_I(inode)->truncate_mutex)
+# endif
#endif
#ifndef EXT_ASSERT
ext3_ext_walk_space(tree, block, num, cb);
#endif
+#ifdef EXT_INSERT_EXTENT_WITH_5ARGS
+#define fsfilt_ext3_ext_insert_extent(handle, inode, path, newext, flag) \
+ ext3_ext_insert_extent(handle, inode, path, newext, flag)
+#else
+#define fsfilt_ext3_ext_insert_extent(handle, inode, path, newext, flag) \
+ ext3_ext_insert_extent(handle, inode, path, newext)
+#endif
+
#include <linux/lustre_version.h>
struct bpointers {
#endif
struct inode *inode = ext3_ext_base2inode(base);
struct ext3_extent nex;
+#if defined(HAVE_EXT4_LDISKFS) && defined(WALK_SPACE_HAS_DATA_SEM)
+ struct ext4_ext_path *tmppath = NULL;
+ struct ext4_extent *tmpex;
+#endif
unsigned long pblock;
unsigned long tgen;
- int err, i;
+ int err, i, depth;
unsigned long count;
handle_t *handle;
- i = EXT_DEPTH(base);
+ i = depth = EXT_DEPTH(base);
EXT_ASSERT(i == path->p_depth);
EXT_ASSERT(path[i].p_hdr);
return EXT_REPEAT;
}
+#if defined(HAVE_EXT4_LDISKFS) && defined(WALK_SPACE_HAS_DATA_SEM)
+ /* In 2.6.32 kernel, ext4_ext_walk_space()'s callback func is not
+ * protected by i_data_sem, we need revalidate extent to be created */
+ down_write((&EXT4_I(inode)->i_data_sem));
+
+ /* validate extent, make sure the extent tree does not changed */
+ tmppath = ext4_ext_find_extent(inode, cex->ec_block, NULL);
+ if (IS_ERR(tmppath)) {
+ up_write(&EXT4_I(inode)->i_data_sem);
+ ext3_journal_stop(handle);
+ return PTR_ERR(tmppath);
+ }
+ tmpex = tmppath[depth].p_ext;
+ if (tmpex != ex) {
+ /* cex is invalid, try again */
+ ext4_ext_drop_refs(tmppath);
+ kfree(tmppath);
+ up_write(&EXT4_I(inode)->i_data_sem);
+ ext3_journal_stop(handle);
+ return EXT_REPEAT;
+ }
+#endif
+
count = cex->ec_len;
pblock = new_blocks(handle, base, path, cex->ec_block, &count, &err);
if (!pblock)
nex.ee_block = cpu_to_le32(cex->ec_block);
ext3_ext_store_pblock(&nex, pblock);
nex.ee_len = cpu_to_le16(count);
- err = ext3_ext_insert_extent(handle, base, path, &nex);
+ err = fsfilt_ext3_ext_insert_extent(handle, base, path, &nex, 0);
if (err) {
/* free data blocks we just allocated */
/* not a good idea to call discard here directly,
BUG_ON(le32_to_cpu(nex.ee_block) != cex->ec_block);
out:
+#if defined(HAVE_EXT4_LDISKFS) && defined(WALK_SPACE_HAS_DATA_SEM)
+ ext4_ext_drop_refs(tmppath);
+ kfree(tmppath);
+ up_write((&EXT4_I(inode)->i_data_sem));
+#endif
ext3_journal_stop(handle);
map:
if (err >= 0) {
cleanup:
return rc;
}
-#endif /* EXT3_MULTIBLOCK_ALLOCATOR */
extern int ext3_map_inode_page(struct inode *inode, struct page *page,
unsigned long *blocks, int *created, int create);
cfs_semaphore_t *optional_sem)
{
int rc;
-#ifdef EXT3_MULTIBLOCK_ALLOCATOR
+
if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) {
rc = fsfilt_ext3_map_ext_inode_pages(inode, page, pages,
blocks, created, create);
return rc;
}
-#endif
if (optional_sem != NULL)
cfs_down(optional_sem);
rc = fsfilt_ext3_map_bm_inode_pages(inode, page, pages, blocks,
sbi->s_qf_names[USRQUOTA] = NULL;
return -ENOMEM;
}
- sbi->s_jquota_fmt = QFMT_VFS_V0;
+ sbi->s_jquota_fmt = QFMT_LUSTRE;
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,13))
set_opt(sbi->s_mount_opt, QUOTA);
#endif
struct obd_quotactl *oqc)
{
int i, rc = 0, error = 0;
- struct quotactl_ops *qcop;
+ const struct quotactl_ops *qcop;
struct if_dqinfo *info;
struct if_dqblk *dqblk;
ENTRY;
LASSERT(oqc->qc_id == LUSTRE_QUOTA_V2);
- rc = ll_quota_on(sb, i, QFMT_VFS_V0,
+ rc = ll_quota_on(sb, i, QFMT_LUSTRE,
name[i], 0);
} else if (oqc->qc_cmd == Q_QUOTAOFF) {
rc = ll_quota_off(sb, i, 0);
qsize_t dqb_ihardlimit; /** inode hard limit */
qsize_t dqb_isoftlimit; /** inode soft limit */
qsize_t dqb_curinodes; /** current inodes */
- __u64 dqb_btime; /** block grace time */
- __u64 dqb_itime; /** inode grace time */
+ obd_time dqb_btime; /** block grace time */
+ obd_time dqb_itime; /** inode grace time */
__u32 dqb_valid; /** flag for above fields */
};
static int v3_write_dqheader(struct file *f, int type)
{
static const __u32 quota_magics[] = V2_INITQMAGICS;
- static const __u32 quota_versions[] = V2_INITQVERSIONS_R1;
+ static const __u32 quota_versions[] = LUSTRE_INITQVERSIONS_V2;
struct v2_disk_dqheader dqhead;
loff_t offset = 0;
GOTO(out, rc = -EINVAL);
}
- DQUOT_DROP(file->f_dentry->d_inode);
+ ll_vfs_dq_drop(file->f_dentry->d_inode);
rc = v3_write_dqheader(file, i);
if (rc) {
/* we don't really need to take the group lock here,
* but it may be useful if one day we support online
* quotacheck */
+#ifdef HAVE_EXT4_LDISKFS
+ ext4_lock_group(sb, group);
+#else
spin_lock(sb_bgl_lock(sbi, group));
+#endif
if (desc->bg_flags & cpu_to_le16(EXT3_BG_INODE_UNINIT)) {
/* no inode in use in this group, just skip it */
+#ifdef HAVE_EXT4_LDISKFS
+ ext3_unlock_group(sb, group);
+#else
spin_unlock(sb_bgl_lock(sbi, group));
+#endif
continue;
}
+
used_count -= ext3_itable_unused_count(sb, desc);
+#ifdef HAVE_EXT4_LDISKFS
+ ext3_unlock_group(sb, group);
+#else
spin_unlock(sb_bgl_lock(sbi, group));
+#endif
}
ino = group * sbi->s_inodes_per_group + 1;
loff_t p = pos;
int rc;
+ if (!f && !inode) {
+ CERROR("lustre_read_quota failed for no quota file!\n");
+ libcfs_debug_dumpstack(NULL);
+ return -EINVAL;
+ }
+
/* Support for both adm and op quota files must be provided */
if (f) {
rc = fsfilt_ext3_read_record(f, buf, count, &p);
ext3_journal_stop((handle_t *)handle);
}
+static int ll_decode_fh_accept(void *context, struct dentry *de)
+{
+ return 1;
+}
+
+#ifdef HAVE_EXPORTFS_DECODE_FH
+# define ll_exportfs_decode_fh(mnt, fid, len, type, acceptable, context) \
+ exportfs_decode_fh(mnt, (struct fid*)(fid), len, type, \
+ acceptable, context)
+#else
+# define ll_exportfs_decode_fh(mnt, fid, len, type, acceptable, context) \
+ export_op_default.decode_fh((mnt)->mnt_sb, &(fid)->ino, len, \
+ type, acceptable, context)
+# define FILEID_INO32_GEN 1
+extern struct export_operations export_op_default;
+#endif
+
+struct dentry *fsfilt_ext3_fid2dentry(struct vfsmount *mnt,
+ struct fsfilt_fid *fid, int ignore_gen)
+{
+ struct inode *inode;
+ struct dentry *result;
+
+ result = ll_exportfs_decode_fh(mnt, fid, 2, FILEID_INO32_GEN,
+ ll_decode_fh_accept, NULL);
+ if (IS_ERR(result)) {
+ CDEBUG(D_DENTRY, "%s of %u/%u failed %ld\n", __func__,
+ fid->ino, fid->gen, PTR_ERR(result));
+ return result;
+ }
+
+ CDEBUG(D_DENTRY, "%s of %u/%u succeeded\n", __func__,
+ fid->ino, fid->gen);
+ inode = result->d_inode;
+ if (inode == NULL)
+ goto err_out;
+
+ if (inode->i_nlink == 0 &&
+ inode->i_mode == 0 && LTIME_S(inode->i_ctime) == 0) {
+ LCONSOLE_WARN("Found inode with zero nlink, mode and"
+ " ctime -- this may indicate disk "
+ "corruption (inode: %lu, link: %lu, "
+ "count: %d)\n", inode->i_ino,
+ (unsigned long)inode->i_nlink,
+ atomic_read(&inode->i_count));
+ goto err_out;
+ }
+ if (fid->gen && inode->i_generation != fid->gen) {
+ /* we didn't find the right inode.. */
+ CDEBUG(D_INODE, "found wrong generation: inode %lu, link: %lu, "
+ "count: %d, generation %u/%u\n",
+ inode->i_ino, (unsigned long)inode->i_nlink,
+ atomic_read(&inode->i_count), inode->i_generation,
+ fid->gen);
+ goto err_out;
+ }
+
+ return result;
+err_out:
+ l_dput(result);
+ return ERR_PTR(-ENOENT);
+}
+
static struct fsfilt_operations fsfilt_ext3_ops = {
.fs_type = "ext3",
.fs_owner = THIS_MODULE,
.fs_get_mblk = fsfilt_ext3_get_mblk,
#endif
.fs_journal_sbdev = fsfilt_ext3_journal_sbdev,
+ .fs_fid2dentry = fsfilt_ext3_fid2dentry,
};
static int __init fsfilt_ext3_init(void)