Whamcloud - gitweb
LU-653 i_version shouldn't be used for VBR
[fs/lustre-release.git] / lustre / lvfs / fsfilt_ext3.c
index c89de20..271f194 100644 (file)
@@ -28,6 +28,9 @@
 /*
  * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
+ *
+ * Copyright (c) 2011 Whamcloud, Inc.
+ *
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
@@ -46,6 +49,9 @@
 #include <linux/slab.h>
 #include <linux/pagemap.h>
 #include <linux/quotaops.h>
+#ifdef HAVE_LINUX_EXPORTFS_H
+#include <linux/exportfs.h>
+#endif
 #ifdef HAVE_EXT4_LDISKFS
 #include <ext4/ext4.h>
 #include <ext4/ext4_jbd2.h>
 #include <linux/version.h>
 #include <linux/bitops.h>
 #include <linux/quota.h>
-#ifdef HAVE_QUOTAIO_V1_H
-# include <linux/quotaio_v1.h>
+#ifdef HAVE_QUOTAIO_H
 # include <linux/quotaio_v2.h>
-#elif defined(HAVE_FS_QUOTA_QUOTAIO_V1_H)
-# include <quota/quotaio_v1.h>
+#elif defined(HAVE_FS_QUOTA_QUOTAIO_H)
 # include <quota/quotaio_v2.h>
 # include <quota/quota_tree.h>
 # define V2_DQTREEOFF    QT_TREEOFF
-#else
-# include <quotaio_v1.h>
+#elif defined(HAVE_FS_QUOTAIO_V1_H)
 # include <quotaio_v2.h>
 # include <quota_tree.h>
 # define V2_DQTREEOFF    QT_TREEOFF
+# define V2_INITQVERSIONS_R1 V2_INITQVERSIONS
+#endif
+
+#ifdef QFMT_VFS_V1
+#define QFMT_LUSTRE QFMT_VFS_V1
+#else
+#define QFMT_LUSTRE QFMT_VFS_V0
 #endif
 
 #if defined(HAVE_EXT3_XATTR_H)
@@ -118,6 +128,11 @@ extern int ext3_xattr_set_handle(handle_t *, struct inode *, int, const char *,
                ext3_ext_insert_extent(handle, inode, path, newext)
 #endif
 
+#ifdef EXT3_DISCARD_PREALLOCATIONS
+#define ext3_mb_discard_inode_preallocations(inode) \
+                 ext3_discard_preallocations(inode)
+#endif
+
 
 static cfs_mem_cache_t *fcb_cache;
 
@@ -201,20 +216,12 @@ static char *fsfilt_ext3_uuid(struct super_block *sb)
 
 static __u64 get_i_version(struct inode *inode)
 {
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,27)) && defined(HAVE_EXT4_LDISKFS)
-        return inode->i_version;
-#else
         return EXT3_I(inode)->i_fs_version;
-#endif
 }
 
 static void set_i_version(struct inode *inode, __u64 new_version)
 {
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,27)) && defined(HAVE_EXT4_LDISKFS)
-        inode->i_version = new_version;
-#else
         (EXT3_I(inode))->i_fs_version = new_version;
-#endif
 }
 
 /*
@@ -292,7 +299,7 @@ static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private,
                               cpu_to_le32(EXT3_EXTENTS_FL | EXT3_INDEX_FL))) {
                                 CWARN("extent-mapped directory found with "
                                       "ext3-based ldiskfs - contact "
-                                      "http://bugzilla.lustre.org/\n");
+                                      "http://bugs.whamcloud.com/\n");
                                 warned = 1;
                         }
                 }
@@ -801,13 +808,6 @@ static int fsfilt_ext3_add_journal_cb(struct obd_device *obd, __u64 last_rcvd,
         return 0;
 }
 
-/*
- * We need to hack the return value for the free inode counts because
- * the current EA code requires one filesystem block per inode with EAs,
- * so it is possible to run out of blocks before we run out of inodes.
- *
- * This can be removed when the ext3 EA code is fixed.
- */
 static int fsfilt_ext3_statfs(struct super_block *sb, struct obd_statfs *osfs)
 {
         struct kstatfs sfs;
@@ -815,11 +815,6 @@ static int fsfilt_ext3_statfs(struct super_block *sb, struct obd_statfs *osfs)
 
         memset(&sfs, 0, sizeof(sfs));
         rc = ll_do_statfs(sb, &sfs);
-        if (!rc && sfs.f_bfree < sfs.f_ffree) {
-                sfs.f_files = (sfs.f_files - sfs.f_ffree) + sfs.f_bfree;
-                sfs.f_ffree = sfs.f_bfree;
-        }
-
         statfs_pack(osfs, &sfs);
         return rc;
 }
@@ -996,13 +991,17 @@ static int ext3_ext_new_extent_cb(struct ext3_ext_base *base,
 #endif
         struct inode *inode = ext3_ext_base2inode(base);
         struct ext3_extent nex;
+#if defined(HAVE_EXT4_LDISKFS) && defined(WALK_SPACE_HAS_DATA_SEM)
+        struct ext4_ext_path *tmppath = NULL;
+        struct ext4_extent *tmpex;
+#endif
         unsigned long pblock;
         unsigned long tgen;
-        int err, i;
+        int err, i, depth;
         unsigned long count;
         handle_t *handle;
 
-        i = EXT_DEPTH(base);
+        i = depth = EXT_DEPTH(base);
         EXT_ASSERT(i == path->p_depth);
         EXT_ASSERT(path[i].p_hdr);
 
@@ -1047,6 +1046,29 @@ static int ext3_ext_new_extent_cb(struct ext3_ext_base *base,
                 return EXT_REPEAT;
         }
 
+#if defined(HAVE_EXT4_LDISKFS) && defined(WALK_SPACE_HAS_DATA_SEM)
+        /* In 2.6.32 kernel, ext4_ext_walk_space()'s callback func is not
+         * protected by i_data_sem, we need revalidate extent to be created */
+        down_write((&EXT4_I(inode)->i_data_sem));
+
+        /* validate extent, make sure the extent tree does not changed */
+        tmppath = ext4_ext_find_extent(inode, cex->ec_block, NULL);
+        if (IS_ERR(tmppath)) {
+                up_write(&EXT4_I(inode)->i_data_sem);
+                ext3_journal_stop(handle);
+                return PTR_ERR(tmppath);
+        }
+        tmpex = tmppath[depth].p_ext;
+        if (tmpex != ex) {
+                /* cex is invalid, try again */
+                ext4_ext_drop_refs(tmppath);
+                kfree(tmppath);
+                up_write(&EXT4_I(inode)->i_data_sem);
+                ext3_journal_stop(handle);
+                return EXT_REPEAT;
+        }
+#endif
+
         count = cex->ec_len;
         pblock = new_blocks(handle, base, path, cex->ec_block, &count, &err);
         if (!pblock)
@@ -1081,6 +1103,11 @@ static int ext3_ext_new_extent_cb(struct ext3_ext_base *base,
         BUG_ON(le32_to_cpu(nex.ee_block) != cex->ec_block);
 
 out:
+#if defined(HAVE_EXT4_LDISKFS) && defined(WALK_SPACE_HAS_DATA_SEM)
+        ext4_ext_drop_refs(tmppath);
+        kfree(tmppath);
+        up_write((&EXT4_I(inode)->i_data_sem));
+#endif
         ext3_journal_stop(handle);
 map:
         if (err >= 0) {
@@ -1428,7 +1455,7 @@ static int fsfilt_ext3_setup(struct super_block *sb)
                 sbi->s_qf_names[USRQUOTA] = NULL;
                 return -ENOMEM;
         }
-        sbi->s_jquota_fmt = QFMT_VFS_V0;
+        sbi->s_jquota_fmt = QFMT_LUSTRE;
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,13))
         set_opt(sbi->s_mount_opt, QUOTA);
 #endif
@@ -1524,7 +1551,7 @@ static int fsfilt_ext3_quotactl(struct super_block *sb,
 
                                 LASSERT(oqc->qc_id == LUSTRE_QUOTA_V2);
 
-                                rc = ll_quota_on(sb, i, QFMT_VFS_V0,
+                                rc = ll_quota_on(sb, i, QFMT_LUSTRE,
                                                  name[i], 0);
                         } else if (oqc->qc_cmd == Q_QUOTAOFF) {
                                 rc = ll_quota_off(sb, i, 0);
@@ -2322,6 +2349,69 @@ void lustre_quota_journal_stop(void *handle)
         ext3_journal_stop((handle_t *)handle);
 }
 
+static int ll_decode_fh_accept(void *context, struct dentry *de)
+{
+        return 1;
+}
+
+#ifdef HAVE_EXPORTFS_DECODE_FH
+# define ll_exportfs_decode_fh(mnt, fid, len, type, acceptable, context) \
+         exportfs_decode_fh(mnt, (struct fid*)(fid), len, type,          \
+                            acceptable, context)
+#else
+# define ll_exportfs_decode_fh(mnt, fid, len, type, acceptable, context) \
+         export_op_default.decode_fh((mnt)->mnt_sb, &(fid)->ino, len,    \
+                                     type, acceptable, context)
+# define FILEID_INO32_GEN 1
+extern struct export_operations export_op_default;
+#endif
+
+struct dentry *fsfilt_ext3_fid2dentry(struct vfsmount *mnt,
+                                      struct fsfilt_fid *fid, int ignore_gen)
+{
+        struct inode  *inode;
+        struct dentry *result;
+        
+        result = ll_exportfs_decode_fh(mnt, fid, 2, FILEID_INO32_GEN,
+                                       ll_decode_fh_accept, NULL);
+        if (IS_ERR(result)) {
+                CDEBUG(D_DENTRY, "%s of %u/%u failed %ld\n", __func__,
+                       fid->ino, fid->gen, PTR_ERR(result));
+                return result;
+        }
+
+        CDEBUG(D_DENTRY, "%s of %u/%u succeeded\n", __func__,
+               fid->ino, fid->gen);
+        inode = result->d_inode;
+        if (inode == NULL)
+                goto err_out;
+
+        if (inode->i_nlink == 0 &&
+            inode->i_mode == 0 && LTIME_S(inode->i_ctime) == 0) {
+                LCONSOLE_WARN("Found inode with zero nlink, mode and"
+                              " ctime -- this may indicate disk "
+                              "corruption (inode: %lu, link: %lu, "
+                              "count: %d)\n", inode->i_ino,
+                              (unsigned long)inode->i_nlink,
+                              atomic_read(&inode->i_count));
+                goto err_out;
+        }
+        if (fid->gen && inode->i_generation != fid->gen) {
+                /* we didn't find the right inode.. */
+                CDEBUG(D_INODE, "found wrong generation: inode %lu, link: %lu, "
+                       "count: %d, generation %u/%u\n",
+                       inode->i_ino, (unsigned long)inode->i_nlink,
+                       atomic_read(&inode->i_count), inode->i_generation,
+                       fid->gen);
+                goto err_out;
+        }
+
+        return result;
+err_out:
+        l_dput(result);
+        return ERR_PTR(-ENOENT);
+}
+
 static struct fsfilt_operations fsfilt_ext3_ops = {
         .fs_type                = "ext3",
         .fs_owner               = THIS_MODULE,
@@ -2361,6 +2451,7 @@ static struct fsfilt_operations fsfilt_ext3_ops = {
         .fs_get_mblk            = fsfilt_ext3_get_mblk,
 #endif
         .fs_journal_sbdev       = fsfilt_ext3_journal_sbdev,
+        .fs_fid2dentry          = fsfilt_ext3_fid2dentry,
 };
 
 static int __init fsfilt_ext3_init(void)