LU-709 build: clean up quota_read, follow_link and RCU

[fs/lustre-release.git] / lustre / lvfs / fsfilt_ext3.c
diff --git a/lustre/lvfs/fsfilt_ext3.c b/lustre/lvfs/fsfilt_ext3.c

index af1f719..88fb592 100644 (file)
--- a/lustre/lvfs/fsfilt_ext3.c
+++ b/lustre/lvfs/fsfilt_ext3.c
@@ -1,6 +1,4 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
+/*
   * GPL HEADER START
   *
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -29,11 +27,7 @@
   * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
   * Use is subject to license terms.
   *
- * Copyright (c) 2011 Whamcloud, Inc.
- *
- */
-/*
- * Copyright (c) 2011 Whamcloud, Inc.
+ * Copyright (c) 2011, 2012, Whamcloud, Inc.
   */
  /*
   * This file is part of Lustre, http://www.lustre.org/
@@ -55,14 +49,8 @@
  #ifdef HAVE_LINUX_EXPORTFS_H
  #include <linux/exportfs.h>
  #endif
-#ifdef HAVE_EXT4_LDISKFS
  #include <ext4/ext4.h>
  #include <ext4/ext4_jbd2.h>
-#else
-#include <linux/jbd.h>
-#include <linux/ext3_fs.h>
-#include <linux/ext3_jbd.h>
-#endif
  #include <linux/version.h>
  #include <linux/bitops.h>
  #include <linux/quota.h>
@@ -72,7 +60,7 @@
  # include <quota/quotaio_v2.h>
  # include <quota/quota_tree.h>
  # define V2_DQTREEOFF    QT_TREEOFF
-#elif defined(HAVE_FS_QUOTAIO_V1_H)
+#elif defined(HAVE_FS_QUOTAIO_H)
  # include <quotaio_v2.h>
  # include <quota_tree.h>
  # define V2_DQTREEOFF    QT_TREEOFF
@@ -102,11 +90,7 @@ extern int ext3_xattr_set_handle(handle_t *, struct inode *, int, const char *,
  #include <linux/lustre_compat25.h>
  #include <linux/lprocfs_status.h>
  
-#ifdef HAVE_EXT4_LDISKFS
  #include <ext4/ext4_extents.h>
-#else
-#include <linux/ext3_extents.h>
-#endif
  
  #include "lustre_quota_fmt.h"
  
@@ -127,16 +111,8 @@ extern int ext3_xattr_set_handle(handle_t *, struct inode *, int, const char *,
  #define ext3_mb_discard_inode_preallocations(inode) \
                   ext3_discard_preallocations(inode)
  
-#ifdef HAVE_EXT4_LDISKFS
  #define fsfilt_log_start_commit(journal, tid) jbd2_log_start_commit(journal, tid)
  #define fsfilt_log_wait_commit(journal, tid) jbd2_log_wait_commit(journal, tid)
-#else
-#define fsfilt_log_start_commit(journal, tid) log_start_commit(journal, tid)
-#define fsfilt_log_wait_commit(journal, tid) log_wait_commit(journal, tid)
-#define ext_pblock(ex) le32_to_cpu((ex)->ee_start)
-#define ext3_ext_store_pblock(ex, pblock)  ((ex)->ee_start = cpu_to_le32(pblock))
-#define ext3_inode_bitmap(sb,desc) le32_to_cpu((desc)->bg_inode_bitmap)
-#endif
  
  #ifdef HAVE_EXT4_JOURNAL_CALLBACK_ADD
  # define journal_callback ext4_journal_cb_entry
@@ -290,21 +266,6 @@ static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private,
                  nblocks += 3;
                  /* no break */
          case FSFILT_OP_CREATE: {
-#if defined(EXT3_EXTENTS_FL) && defined(EXT3_INDEX_FL) && !defined(HAVE_EXT4_LDISKFS)
-                static int warned;
-                if (!warned) {
-                        if (!test_opt(inode->i_sb, EXTENTS)) {
-                                warned = 1;
-                        } else if (((EXT3_I(inode)->i_flags &
-                              cpu_to_le32(EXT3_EXTENTS_FL | EXT3_INDEX_FL)) ==
-                              cpu_to_le32(EXT3_EXTENTS_FL | EXT3_INDEX_FL))) {
-                                CWARN("extent-mapped directory found with "
-                                      "ext3-based ldiskfs - contact "
-                                      "http://bugs.whamcloud.com/\n");
-                                warned = 1;
-                        }
-                }
-#endif
                  /* no break */
          }
          case FSFILT_OP_MKDIR:
@@ -581,6 +542,41 @@ static int fsfilt_ext3_setattr(struct dentry *dentry, void *handle,
          struct inode *inode = dentry->d_inode;
          int rc = 0;
  
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2,7,50,0)
+        /* Try to correct for a bug in 2.1.0 (LU-221) that caused negative
+         * timestamps to appear to be in the far future, due old timestamp
+         * being stored on disk as an unsigned value.  This fixes up any
+         * bad values held by the client before storing them on disk,
+         * and ensures any timestamp updates are correct.  LU-1042 */
+        if (unlikely(LTIME_S(inode->i_atime) == LU221_BAD_TIME &&
+                     !(iattr->ia_valid & ATTR_ATIME))) {
+                iattr->ia_valid |= ATTR_ATIME;
+                LTIME_S(iattr->ia_atime) = 0;
+        }
+        if (unlikely(LTIME_S(inode->i_mtime) == LU221_BAD_TIME &&
+                     !(iattr->ia_valid & ATTR_MTIME))) {
+                iattr->ia_valid |= ATTR_MTIME;
+                LTIME_S(iattr->ia_mtime) = 0;
+        }
+        if (unlikely((LTIME_S(inode->i_ctime) == LU221_BAD_TIME ||
+                      LTIME_S(inode->i_ctime) == 0) &&
+                     !(iattr->ia_valid & ATTR_CTIME))) {
+                iattr->ia_valid |= ATTR_CTIME;
+                LTIME_S(iattr->ia_ctime) = 0;
+        }
+#else
+#warning "remove old LU-221/LU-1042 workaround code"
+#endif
+
+        /* When initializating timestamps for new inodes, use the filesystem
+         * mkfs time for ctime to avoid e2fsck ibadness incorrectly thinking
+         * that this is potentially an invalid inode.  Files with an old ctime
+         * migrated to a newly-formatted OST with a newer s_mkfs_time will not
+         * hit this check, since it is only for ctime == 0.  LU-1010/LU-1042 */
+        if ((iattr->ia_valid & ATTR_CTIME) && LTIME_S(iattr->ia_ctime) == 0)
+                LTIME_S(iattr->ia_ctime) =
+                        EXT4_SB(inode->i_sb)->s_es->s_mkfs_time;
+
          /* Avoid marking the inode dirty on the superblock list unnecessarily.
           * We are already writing the inode to disk as part of this
           * transaction and want to avoid a lot of extra inode writeout
@@ -621,14 +617,16 @@ static int fsfilt_ext3_setattr(struct dentry *dentry, void *handle,
  
          /* We set these flags on the client, but have already checked perms
           * so don't confuse inode_change_ok. */
-        iattr->ia_valid &= ~(ATTR_MTIME_SET | ATTR_ATIME_SET);
+        iattr->ia_valid &= ~TIMES_SET_FLAGS;
  
          if (inode->i_op->setattr) {
                  rc = inode->i_op->setattr(dentry, iattr);
          } else {
+#ifndef HAVE_SIMPLE_SETATTR /* simple_setattr() already call it */
                  rc = inode_change_ok(inode, iattr);
                  if (!rc)
-                        rc = inode_setattr(inode, iattr);
+#endif
+                        rc = simple_setattr(dentry, iattr);
          }
  
   out:
@@ -659,14 +657,9 @@ static int fsfilt_ext3_iocontrol(struct inode *inode, struct file *file,
                  *(int *)arg |= EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL;
          }
  
-#ifdef HAVE_EXT4_LDISKFS
          /* ext4_ioctl does not have a inode argument */
          if (inode->i_fop->unlocked_ioctl)
                  rc = inode->i_fop->unlocked_ioctl(file, cmd, arg);
-#else
-        if (inode->i_fop->ioctl)
-                rc = inode->i_fop->ioctl(inode, file, cmd, arg);
-#endif
          else
                  RETURN(-ENOTTY);
  
@@ -837,13 +830,8 @@ static int fsfilt_ext3_sync(struct super_block *sb)
  # define fsfilt_up_truncate_sem(inode)  up(&LDISKFS_I(inode)->truncate_sem);
  # define fsfilt_down_truncate_sem(inode)  down(&LDISKFS_I(inode)->truncate_sem);
  #else
-# ifdef HAVE_EXT4_LDISKFS
-#   define fsfilt_up_truncate_sem(inode) do{ }while(0)
-#   define fsfilt_down_truncate_sem(inode) do{ }while(0)
-# else
-#  define fsfilt_up_truncate_sem(inode)  mutex_unlock(&EXT3_I(inode)->truncate_mutex)
-#  define fsfilt_down_truncate_sem(inode)  mutex_lock(&EXT3_I(inode)->truncate_mutex)
-# endif
+# define fsfilt_up_truncate_sem(inode) do{ }while(0)
+# define fsfilt_down_truncate_sem(inode) do{ }while(0)
  #endif
  
  #ifndef EXT_ASSERT
@@ -852,11 +840,7 @@ static int fsfilt_ext3_sync(struct super_block *sb)
  
  #ifdef EXT3_EXT_HAS_NO_TREE
  /* for kernels 2.6.18 and later */
-#ifdef HAVE_EXT4_LDISKFS
  #define EXT_GENERATION(inode)           (EXT4_I(inode)->i_ext_generation)
-#else
-#define EXT_GENERATION(inode)           ext_generation(inode)
-#endif
  #define ext3_ext_base                   inode
  #define ext3_ext_base2inode(inode)      (inode)
  #define EXT_DEPTH(inode)                ext_depth(inode)
@@ -869,8 +853,6 @@ static int fsfilt_ext3_sync(struct super_block *sb)
                          ext3_ext_walk_space(tree, block, num, cb);
  #endif
  
-#include <linux/lustre_version.h>
-
  struct bpointers {
          unsigned long *blocks;
          int *created;
@@ -983,20 +965,12 @@ static int ext3_ext_new_extent_cb(struct ext3_ext_base *base,
  #endif
          struct inode *inode = ext3_ext_base2inode(base);
          struct ext3_extent nex;
-#if defined(HAVE_EXT4_LDISKFS)
-        struct ext4_ext_path *tmppath = NULL;
-        struct ext4_extent *tmpex;
-#endif
          unsigned long pblock;
          unsigned long tgen;
-        int err, i, depth;
+        int err, i;
          unsigned long count;
          handle_t *handle;
  
-        i = depth = EXT_DEPTH(base);
-        EXT_ASSERT(i == path->p_depth);
-        EXT_ASSERT(path[i].p_hdr);
-
          if (cex->ec_type == EXT3_EXT_CACHE_EXTENT) {
                  err = EXT_CONTINUE;
                  goto map;
@@ -1038,28 +1012,18 @@ static int ext3_ext_new_extent_cb(struct ext3_ext_base *base,
                  return EXT_REPEAT;
          }
  
-#if defined(HAVE_EXT4_LDISKFS)
          /* In 2.6.32 kernel, ext4_ext_walk_space()'s callback func is not
-         * protected by i_data_sem, we need revalidate extent to be created */
+         * protected by i_data_sem as whole. so we patch it to store
+        * generation to path and now verify the tree hasn't changed */
          down_write((&EXT4_I(inode)->i_data_sem));
  
          /* validate extent, make sure the extent tree does not changed */
-        tmppath = ext4_ext_find_extent(inode, cex->ec_block, NULL);
-        if (IS_ERR(tmppath)) {
-                up_write(&EXT4_I(inode)->i_data_sem);
-                ext3_journal_stop(handle);
-                return PTR_ERR(tmppath);
-        }
-        tmpex = tmppath[depth].p_ext;
-        if (tmpex != ex) {
+       if (EXT_GENERATION(base) != path[0].p_generation) {
                  /* cex is invalid, try again */
-                ext4_ext_drop_refs(tmppath);
-                kfree(tmppath);
                  up_write(&EXT4_I(inode)->i_data_sem);
                  ext3_journal_stop(handle);
                  return EXT_REPEAT;
          }
-#endif
  
          count = cex->ec_len;
          pblock = new_blocks(handle, base, path, cex->ec_block, &count, &err);
@@ -1095,11 +1059,7 @@ static int ext3_ext_new_extent_cb(struct ext3_ext_base *base,
          BUG_ON(le32_to_cpu(nex.ee_block) != cex->ec_block);
  
  out:
-#if defined(HAVE_EXT4_LDISKFS)
-        ext4_ext_drop_refs(tmppath);
-        kfree(tmppath);
          up_write((&EXT4_I(inode)->i_data_sem));
-#endif
          ext3_journal_stop(handle);
  map:
          if (err >= 0) {
@@ -1251,7 +1211,7 @@ int fsfilt_ext3_map_bm_inode_pages(struct inode *inode, struct page **page,
  int fsfilt_ext3_map_inode_pages(struct inode *inode, struct page **page,
                                  int pages, unsigned long *blocks,
                                  int *created, int create,
-                                cfs_semaphore_t *optional_sem)
+                                cfs_mutex_t *optional_mutex)
  {
          int rc;
  
@@ -1260,12 +1220,12 @@ int fsfilt_ext3_map_inode_pages(struct inode *inode, struct page **page,
                                                       blocks, created, create);
                  return rc;
          }
-        if (optional_sem != NULL)
-                cfs_down(optional_sem);
+        if (optional_mutex != NULL)
+                cfs_mutex_lock(optional_mutex);
          rc = fsfilt_ext3_map_bm_inode_pages(inode, page, pages, blocks,
                                              created, create);
-        if (optional_sem != NULL)
-                cfs_up(optional_sem);
+        if (optional_mutex != NULL)
+                cfs_mutex_unlock(optional_mutex);
  
          return rc;
  }
@@ -1277,10 +1237,10 @@ int fsfilt_ext3_read(struct inode *inode, void *buf, int size, loff_t *offs)
          int err, blocksize, csize, boffs, osize = size;
  
          /* prevent reading after eof */
-        cfs_lock_kernel();
+       spin_lock(&inode->i_lock);
          if (i_size_read(inode) < *offs + size) {
                  size = i_size_read(inode) - *offs;
-                cfs_unlock_kernel();
+               spin_unlock(&inode->i_lock);
                  if (size < 0) {
                          CDEBUG(D_EXT2, "size %llu is too short for read @%llu\n",
                                 i_size_read(inode), *offs);
@@ -1289,7 +1249,7 @@ int fsfilt_ext3_read(struct inode *inode, void *buf, int size, loff_t *offs)
                          return 0;
                  }
          } else {
-                cfs_unlock_kernel();
+               spin_unlock(&inode->i_lock);
          }
  
          blocksize = 1 << inode->i_blkbits;
@@ -1372,14 +1332,17 @@ int fsfilt_ext3_write_handle(struct inode *inode, void *buf, int bufsize,
  
          /* correct in-core and on-disk sizes */
          if (new_size > i_size_read(inode)) {
-                cfs_lock_kernel();
+               spin_lock(&inode->i_lock);
                  if (new_size > i_size_read(inode))
                          i_size_write(inode, new_size);
                  if (i_size_read(inode) > EXT3_I(inode)->i_disksize)
                          EXT3_I(inode)->i_disksize = i_size_read(inode);
-                if (i_size_read(inode) > old_size)
+                if (i_size_read(inode) > old_size) {
+                       spin_unlock(&inode->i_lock);
                          mark_inode_dirty(inode);
-                cfs_unlock_kernel();
+                } else {
+                       spin_unlock(&inode->i_lock);
+                }
          }
  
          if (err == 0)
@@ -1428,6 +1391,12 @@ static int fsfilt_ext3_setup(struct super_block *sb)
          sbi->dx_unlock = fsfilt_ext3_dx_unlock;
  #endif
  #endif
+        if (!EXT3_HAS_COMPAT_FEATURE(sb,
+                                EXT3_FEATURE_COMPAT_HAS_JOURNAL)) {
+                CERROR("ext3 mounted without journal\n");
+                return -EINVAL;
+        }
+
  #ifdef S_PDIROPS
          CWARN("Enabling PDIROPS\n");
          set_opt(sbi->s_mount_opt, PDIROPS);
@@ -1751,40 +1720,6 @@ static inline int read_old_dqinfo(struct super_block *sb, int type,
          RETURN(rc);
  }
  
-#ifndef HAVE_EXT4_LDISKFS
-static inline struct ext3_group_desc *
-get_group_desc(struct super_block *sb, int group, struct buffer_head **bh)
-{
-        unsigned long desc_block, desc;
-        struct ext3_group_desc *gdp;
-
-        desc_block = group / EXT3_DESC_PER_BLOCK(sb);
-        desc = group % EXT3_DESC_PER_BLOCK(sb);
-        gdp = (struct ext3_group_desc *)
-              EXT3_SB(sb)->s_group_desc[desc_block]->b_data;
-
-        return gdp + desc;
-}
-
-static inline struct buffer_head *
-ext3_read_inode_bitmap(struct super_block *sb, unsigned long group)
-{
-        struct ext3_group_desc *desc;
-        struct buffer_head *bh;
-
-        desc = get_group_desc(sb, group, NULL);
-        bh = sb_bread(sb, ext3_inode_bitmap(sb, desc));
-        return bh;
-}
-
-static __u32 ext3_itable_unused_count(struct super_block *sb,
-                               struct ext3_group_desc *bg) {
-       return le16_to_cpu(bg->bg_itable_unused);
-}
-#else
-#define get_group_desc ext3_get_group_desc
-#endif
-
  struct qchk_ctxt {
          cfs_hlist_head_t        qckt_hash[NR_DQHASH];      /* quotacheck hash */
          cfs_list_t              qckt_list;                 /* quotacheck list */
@@ -2046,34 +1981,22 @@ static int fsfilt_ext3_quotacheck(struct super_block *sb,
  
                  if (uninit_feat) {
                          struct ext3_group_desc *desc;
-                        desc = get_group_desc(sb, group, NULL);
+                        desc = ext3_get_group_desc(sb, group, NULL);
                          if (!desc)
                                  GOTO(out, -EIO);
  
                          /* we don't really need to take the group lock here,
                           * but it may be useful if one day we support online
                           * quotacheck */
-#ifdef HAVE_EXT4_LDISKFS
                          ext4_lock_group(sb, group);
-#else
-                        spin_lock(sb_bgl_lock(sbi, group));
-#endif
                          if (desc->bg_flags & cpu_to_le16(EXT3_BG_INODE_UNINIT)) {
                                  /* no inode in use in this group, just skip it */
-#ifdef HAVE_EXT4_LDISKFS
                                  ext3_unlock_group(sb, group);
-#else
-                                spin_unlock(sb_bgl_lock(sbi, group));
-#endif
                                  continue;
                          }
  
                          used_count -= ext3_itable_unused_count(sb, desc);
-#ifdef HAVE_EXT4_LDISKFS
                          ext3_unlock_group(sb, group);
-#else
-                        spin_unlock(sb_bgl_lock(sbi, group));
-#endif
                  }
  
                  ino = group * sbi->s_inodes_per_group + 1;
@@ -2093,11 +2016,7 @@ static int fsfilt_ext3_quotacheck(struct super_block *sb,
                          ino = i + group * sbi->s_inodes_per_group;
                          if (ino < sbi->s_first_ino)
                                  continue;
-#if defined(HAVE_EXT4_LDISKFS) || !defined(HAVE_READ_INODE_IN_SBOPS)
                          inode = ext3_iget(sb, ino);
-#else
-                        inode = iget(sb, ino);
-#endif
                          if (!inode || IS_ERR(inode))
                                  continue;
  
@@ -2125,15 +2044,11 @@ static int fsfilt_ext3_quotacheck(struct super_block *sb,
                          continue;
  
  
-                LASSERT(sb_dqopt(sb)->files[i] != NULL);
-                CFS_INIT_LIST_HEAD(&id_list);
-#ifndef KERNEL_SUPPORTS_QUOTA_READ
-                rc = lustre_get_qids(sb_dqopt(sb)->files[i], NULL, i, &id_list);
-#else
-                rc = lustre_get_qids(NULL, sb_dqopt(sb)->files[i], i, &id_list);
-#endif
-                if (rc)
-                        CERROR("read old limits failed. (rc:%d)\n", rc);
+               LASSERT(sb_dqopt(sb)->files[i] != NULL);
+               CFS_INIT_LIST_HEAD(&id_list);
+               rc = lustre_get_qids(NULL, sb_dqopt(sb)->files[i], i, &id_list);
+               if (rc)
+                       CERROR("read old limits failed. (rc:%d)\n", rc);
  
                  cfs_list_for_each_entry_safe(dqid, tmp, &id_list, di_link) {
                          cfs_list_del_init(&dqid->di_link);