From 3c357081f5c0af79d76e2f556c14ca74ca47cf3b Mon Sep 17 00:00:00 2001
From: Fan Yong <fan.yong@intel.com>
Date: Fri, 14 Nov 2014 00:45:52 +0800
Subject: [PATCH] LU-5510 scrub: ldiskfs_create_inode returns locked inode

There was race condition between creating new inode and OI scrub:
the OI scrub may find the new created inode just after the creator
creating it but before setting the LMA EA. Originally, to resolve
such trouble, the creator will set the new created inode's state
as LDISKFS_STATE_LUSTRE_NOSCRUB. But such state is set after the
new inode unlocked. So the OI scrub still has some chance to find
the new created inode with neither LDISKFS_STATE_LUSTRE_NOSCRUB
nor LMA EA.

Be as improvement, this patch makes the ldiskfs_create_inode() to
return the new created inode with lock. The caller can set more
state (not only for LFSCK, but also for other purposes in future)
on the new created inode before unlock it.

Signed-off-by: Fan Yong <fan.yong@intel.com>
Change-Id: Idc1a8fbd3701f7e431ef4b7858cfdf4674d74add
Reviewed-on: http://review.whamcloud.com/13187
Tested-by: Jenkins
Reviewed-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
---
 .../patches/rhel6.3/ext4-osd-iop-common.patch      | 18 +++++++++--------
 .../patches/sles11sp2/ext4-osd-iop-common.patch    | 15 +++++++-------
 lustre/osd-ldiskfs/osd_handler.c                   | 23 +++++++++++++---------
 lustre/osd-ldiskfs/osd_oi.c                        |  3 +++
 4 files changed, 35 insertions(+), 24 deletions(-)

diff --git a/ldiskfs/kernel_patches/patches/rhel6.3/ext4-osd-iop-common.patch b/ldiskfs/kernel_patches/patches/rhel6.3/ext4-osd-iop-common.patch
index 1f1fb2a..bfa6b13 100644
--- a/ldiskfs/kernel_patches/patches/rhel6.3/ext4-osd-iop-common.patch
+++ b/ldiskfs/kernel_patches/patches/rhel6.3/ext4-osd-iop-common.patch
@@ -94,18 +94,21 @@
  /*
   * DIR_NLINK feature is set if 1) nlinks > EXT4_LINK_MAX or 2) nlinks == 2,
   * since this indicates that nlinks count was previously 1.
-@@ -1806,6 +1809,27 @@ static unsigned ext4_dentry_goal(struct
+@@ -1806,6 +1809,29 @@ static unsigned ext4_dentry_goal(struct
  	return inum;
  }
  
-+struct inode * ext4_create_inode(handle_t *handle, struct inode * dir, int mode)
++ /* Return locked inode, then the caller can modify the inode's states/flags
++  * before others finding it. The caller should unlock the inode by itself. */
++struct inode *ext4_create_inode(handle_t *handle, struct inode *dir, int mode)
 +{
 +	struct inode *inode;
 +
-+	inode = ext4_new_inode(handle, dir, mode, 0, EXT4_SB(dir->i_sb)->s_inode_goal);
++	inode = ext4_new_inode(handle, dir, mode, 0,
++			       EXT4_SB(dir->i_sb)->s_inode_goal);
 +	if (!IS_ERR(inode)) {
 +		if (S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode)) {
-+#ifdef CONFIG_LDISKFS_FS_XATTR
++#ifdef CONFIG_EXT4_FS_XATTR
 +			inode->i_op = &ext4_special_inode_operations;
 +#endif
 +		} else {
@@ -113,7 +116,6 @@
 +			inode->i_fop = &ext4_file_operations;
 +			ext4_set_aops(inode);
 +		}
-+		unlock_new_inode(inode);
 +	}
 +	return inode;
 +}
@@ -122,7 +124,7 @@
  /*
   * By the time this is called, we already have created
   * the directory cache entry for the new file, but it
-@@ -1882,44 +1906,32 @@ retry:
+@@ -1882,44 +1908,32 @@ retry:
  	return err;
  }
  
@@ -176,7 +178,7 @@
  	de = (struct ext4_dir_entry_2 *) dir_block->b_data;
  	de->inode = cpu_to_le32(inode->i_ino);
  	de->name_len = 1;
-@@ -1938,18 +1950,47 @@ retry:
+@@ -1938,18 +1952,47 @@ retry:
  	BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
  	err = ext4_handle_dirty_metadata(handle, inode, dir_block);
  	if (err)
@@ -234,7 +236,7 @@
  	ext4_inc_count(handle, dir);
  	ext4_update_dx_flag(dir);
  	err = ext4_mark_inode_dirty(handle, dir);
-@@ -1958,11 +1999,16 @@ out_clear_inode:
+@@ -1958,11 +2001,16 @@ out_clear_inode:
  	d_instantiate(dentry, inode);
  	unlock_new_inode(inode);
  out_stop:
diff --git a/ldiskfs/kernel_patches/patches/sles11sp2/ext4-osd-iop-common.patch b/ldiskfs/kernel_patches/patches/sles11sp2/ext4-osd-iop-common.patch
index 3581df0..9c48794 100644
--- a/ldiskfs/kernel_patches/patches/sles11sp2/ext4-osd-iop-common.patch
+++ b/ldiskfs/kernel_patches/patches/sles11sp2/ext4-osd-iop-common.patch
@@ -99,11 +99,13 @@
  /*
   * DIR_NLINK feature is set if 1) nlinks > EXT4_LINK_MAX or 2) nlinks == 2,
   * since this indicates that nlinks count was previously 1.
-@@ -1808,6 +1811,28 @@ static unsigned ext4_dentry_goal(struct
+@@ -1808,6 +1811,29 @@ static unsigned ext4_dentry_goal(struct
  	return inum;
  }
 
-+struct inode *ext4_create_inode(handle_t *handle, struct inode * dir, int mode)
++ /* Return locked inode, then the caller can modify the inode's states/flags
++  * before others finding it. The caller should unlock the inode by itself. */
++struct inode *ext4_create_inode(handle_t *handle, struct inode *dir, int mode)
 +{
 +	struct inode *inode;
 +
@@ -111,7 +113,7 @@
 +			       EXT4_SB(dir->i_sb)->s_inode_goal);
 +	if (!IS_ERR(inode)) {
 +		if (S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode)) {
-+#ifdef CONFIG_LDISKFS_FS_XATTR
++#ifdef CONFIG_EXT4_FS_XATTR
 +			inode->i_op = &ext4_special_inode_operations;
 +#endif
 +		} else {
@@ -119,7 +121,6 @@
 +			inode->i_fop = &ext4_file_operations;
 +			ext4_set_aops(inode);
 +		}
-+		unlock_new_inode(inode);
 +	}
 +	return inode;
 +}
@@ -128,7 +129,7 @@
  /*
   * By the time this is called, we already have created
   * the directory cache entry for the new file, but it
-@@ -1886,46 +1911,32 @@ retry:
+@@ -1886,46 +1912,32 @@ retry:
  	return err;
  }
 
@@ -183,7 +184,7 @@
  	de = (struct ext4_dir_entry_2 *) dir_block->b_data;
  	de->inode = cpu_to_le32(inode->i_ino);
  	de->name_len = 1;
-@@ -1944,18 +1955,47 @@ retry:
+@@ -1944,18 +1956,47 @@ retry:
  	BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
  	err = ext4_handle_dirty_metadata(handle, inode, dir_block);
  	if (err)
@@ -241,7 +242,7 @@
  	ext4_inc_count(handle, dir);
  	ext4_update_dx_flag(dir);
  	err = ext4_mark_inode_dirty(handle, dir);
-@@ -1964,11 +2004,16 @@ out_clear_inode:
+@@ -1964,11 +2005,16 @@ out_clear_inode:
  	d_instantiate(dentry, inode);
  	unlock_new_inode(inode);
  out_stop:
diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c
index 086a812..ef1799b 100644
--- a/lustre/osd-ldiskfs/osd_handler.c
+++ b/lustre/osd-ldiskfs/osd_handler.c
@@ -2092,14 +2092,13 @@ static int osd_mkfile(struct osd_thread_info *info, struct osd_object *obj,
                                               osd_sb(osd)->s_root->d_inode,
                                      mode);
         if (!IS_ERR(inode)) {
-                /* Do not update file c/mtime in ldiskfs.
-                 * NB: don't need any lock because no contention at this
-                 * early stage */
-                inode->i_flags |= S_NOCMTIME;
+		/* Do not update file c/mtime in ldiskfs. */
+		inode->i_flags |= S_NOCMTIME;
 
 		/* For new created object, it must be consistent,
 		 * and it is unnecessary to scrub against it. */
 		ldiskfs_set_inode_state(inode, LDISKFS_STATE_LUSTRE_NOSCRUB);
+
                 obj->oo_inode = inode;
                 result = 0;
         } else {
@@ -2326,13 +2325,16 @@ static int __osd_object_create(struct osd_thread_info *info,
 
 	result = osd_create_type_f(dof->dof_type)(info, obj, attr, hint, dof,
 						  th);
-        if (result == 0) {
+	if (result == 0) {
 		osd_attr_init(info, obj, attr, dof);
 		osd_object_init0(obj);
-		/* bz 24037 */
-		if (obj->oo_inode && (obj->oo_inode->i_state & I_NEW))
-			unlock_new_inode(obj->oo_inode);
-        }
+	}
+
+	if (obj->oo_inode != NULL) {
+		LASSERT(obj->oo_inode->i_state & I_NEW);
+
+		unlock_new_inode(obj->oo_inode);
+	}
 
 	/* restore previous umask value */
 	current->fs->umask = umask;
@@ -2704,6 +2706,9 @@ static struct inode *osd_create_local_agent_inode(const struct lu_env *env,
 		RETURN(local);
 	}
 
+	ldiskfs_set_inode_state(local, LDISKFS_STATE_LUSTRE_NOSCRUB);
+	unlock_new_inode(local);
+
 	/* Set special LMA flag for local agent inode */
 	rc = osd_ea_fid_set(info, local, fid, 0, LMAI_AGENT);
 	if (rc != 0) {
diff --git a/lustre/osd-ldiskfs/osd_oi.c b/lustre/osd-ldiskfs/osd_oi.c
index 1704dab..9926dd1 100644
--- a/lustre/osd-ldiskfs/osd_oi.c
+++ b/lustre/osd-ldiskfs/osd_oi.c
@@ -139,6 +139,9 @@ static int osd_oi_index_create_one(struct osd_thread_info *info,
 		return PTR_ERR(inode);
 	}
 
+	ldiskfs_set_inode_state(inode, LDISKFS_STATE_LUSTRE_NOSCRUB);
+	unlock_new_inode(inode);
+
 	if (feat->dif_flags & DT_IND_VARKEY)
 		rc = iam_lvar_create(inode, feat->dif_keysize_max,
 				     feat->dif_ptrsize, feat->dif_recsize_max,
-- 
1.8.3.1