LU-6475 mdt: race between open and migrate

author wang di <di.wang@intel.com>

Sun, 10 May 2015 23:40:46 +0000 (16:40 -0700)

committer Oleg Drokin <oleg.drokin@intel.com>

Mon, 24 Aug 2015 14:12:22 +0000 (14:12 +0000)
author wang di <di.wang@intel.com>
Sun, 10 May 2015 23:40:46 +0000 (16:40 -0700)
committer Oleg Drokin <oleg.drokin@intel.com>
Mon, 24 Aug 2015 14:12:22 +0000 (14:12 +0000)
diff --git a/lustre/include/dt_object.h b/lustre/include/dt_object.h

index dd3f40b..8fc9c66 100644 (file)
--- a/lustre/include/dt_object.h
+++ b/lustre/include/dt_object.h
@@ -1825,7 +1825,9 @@ struct thandle {
         /* local transation, no need to inform other layers */
                                 th_local:1,
         /* Whether we need wait the transaction to be submitted */
-                               th_wait_submit:1;
+                               th_wait_submit:1,
+       /* complex transaction which will track updates on all targets */
+                               th_complex:1;
  };
  
  /**
diff --git a/lustre/include/lustre_req_layout.h b/lustre/include/lustre_req_layout.h

index 9d859a8..bf4e8b3 100644 (file)
--- a/lustre/include/lustre_req_layout.h
+++ b/lustre/include/lustre_req_layout.h
@@ -185,6 +185,7 @@ extern struct req_format RQF_MDS_REINT_SETXATTR;
  extern struct req_format RQF_MDS_QUOTACTL;
  extern struct req_format RQF_QUOTA_DQACQ;
  extern struct req_format RQF_MDS_SWAP_LAYOUTS;
+extern struct req_format RQF_MDS_REINT_MIGRATE;
  /* MDS hsm formats */
  extern struct req_format RQF_MDS_HSM_STATE_GET;
  extern struct req_format RQF_MDS_HSM_STATE_SET;
diff --git a/lustre/include/md_object.h b/lustre/include/md_object.h

index 0085000..9abf055 100644 (file)
--- a/lustre/include/md_object.h
+++ b/lustre/include/md_object.h
@@ -146,8 +146,8 @@ struct md_op_spec {
         unsigned int no_create:1,
                      sp_cr_lookup:1, /* do lookup sanity check or not. */
                      sp_rm_entry:1,  /* only remove name entry */
-                    sp_permitted:1; /* do not check permission */
-
+                    sp_permitted:1, /* do not check permission */
+                    sp_migrate_close:1; /* close the file during migrate */
         /** Current lock mode for parent dir where create is performing. */
          mdl_mode_t sp_cr_mode;
  
diff --git a/lustre/llite/dir.c b/lustre/llite/dir.c

index c038694..a098c62 100644 (file)
--- a/lustre/llite/dir.c
+++ b/lustre/llite/dir.c
@@ -1140,7 +1140,7 @@ static long ll_dir_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
                         GOTO(out_free, rc = -EINVAL);
                 }
  
-               rc = ll_get_fid_by_name(inode, filename, namelen, NULL);
+               rc = ll_get_fid_by_name(inode, filename, namelen, NULL, NULL);
                 if (rc < 0) {
                         CERROR("%s: lookup %.*s failed: rc = %d\n",
                                ll_get_fsname(inode->i_sb, NULL, 0), namelen,
diff --git a/lustre/llite/file.c b/lustre/llite/file.c

index 97ad193..f71cfb2 100644 (file)
--- a/lustre/llite/file.c
+++ b/lustre/llite/file.c
@@ -2901,7 +2901,8 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
  }
  
  int ll_get_fid_by_name(struct inode *parent, const char *name,
-                      int namelen, struct lu_fid *fid)
+                      int namelen, struct lu_fid *fid,
+                      struct inode **inode)
  {
         struct md_op_data       *op_data = NULL;
         struct mdt_body         *body;
@@ -2914,7 +2915,7 @@ int ll_get_fid_by_name(struct inode *parent, const char *name,
         if (IS_ERR(op_data))
                 RETURN(PTR_ERR(op_data));
  
-       op_data->op_valid = OBD_MD_FLID;
+       op_data->op_valid = OBD_MD_FLID | OBD_MD_FLTYPE;
         rc = md_getattr_name(ll_i2sbi(parent)->ll_md_exp, op_data, &req);
         ll_finish_md_op_data(op_data);
         if (rc < 0)
@@ -2925,6 +2926,9 @@ int ll_get_fid_by_name(struct inode *parent, const char *name,
                 GOTO(out_req, rc = -EFAULT);
         if (fid != NULL)
                 *fid = body->mbo_fid1;
+
+       if (inode != NULL)
+               rc = ll_prep_inode(inode, req, parent->i_sb, NULL);
  out_req:
         ptlrpc_req_finished(req);
         RETURN(rc);
@@ -2937,8 +2941,11 @@ int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
         struct inode          *child_inode = NULL;
         struct md_op_data     *op_data;
         struct ptlrpc_request *request = NULL;
+       struct obd_client_handle *och = NULL;
         struct qstr           qstr;
+       struct mdt_body         *body;
         int                    rc;
+       __u64                   data_version = 0;
         ENTRY;
  
         CDEBUG(D_VFSTRACE, "migrate %s under "DFID" to MDT%04x\n",
@@ -2955,22 +2962,23 @@ int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
         qstr.len = namelen;
         dchild = d_lookup(file->f_path.dentry, &qstr);
         if (dchild != NULL) {
-               if (dchild->d_inode != NULL) {
+               if (dchild->d_inode != NULL)
                         child_inode = igrab(dchild->d_inode);
-                       if (child_inode != NULL) {
-                               mutex_lock(&child_inode->i_mutex);
-                               op_data->op_fid3 = *ll_inode2fid(child_inode);
-                               ll_invalidate_aliases(child_inode);
-                       }
-               }
                 dput(dchild);
-       } else {
+       }
+
+       if (child_inode == NULL) {
                 rc = ll_get_fid_by_name(parent, name, namelen,
-                                       &op_data->op_fid3);
+                                       &op_data->op_fid3, &child_inode);
                 if (rc != 0)
                         GOTO(out_free, rc);
         }
  
+       if (child_inode == NULL)
+               GOTO(out_free, rc = -EINVAL);
+
+       mutex_lock(&child_inode->i_mutex);
+       op_data->op_fid3 = *ll_inode2fid(child_inode);
         if (!fid_is_sane(&op_data->op_fid3)) {
                 CERROR("%s: migrate %s , but fid "DFID" is insane\n",
                        ll_get_fsname(parent->i_sb, NULL, 0), name,
@@ -2987,6 +2995,26 @@ int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
                        PFID(&op_data->op_fid3), mdtidx);
                 GOTO(out_free, rc = 0);
         }
+again:
+       if (S_ISREG(child_inode->i_mode)) {
+               och = ll_lease_open(child_inode, NULL, FMODE_WRITE, 0);
+               if (IS_ERR(och)) {
+                       rc = PTR_ERR(och);
+                       och = NULL;
+                       GOTO(out_free, rc);
+               }
+
+               rc = ll_data_version(child_inode, &data_version,
+                                    LL_DV_WR_FLUSH);
+               if (rc != 0)
+                       GOTO(out_free, rc);
+
+               op_data->op_handle = och->och_fh;
+               op_data->op_data = och->och_mod;
+               op_data->op_data_version = data_version;
+               op_data->op_lease_handle = och->och_lease_handle;
+               op_data->op_bias |= MDS_RENAME_MIGRATE;
+       }
  
         op_data->op_mds = mdtidx;
         op_data->op_cli_flags = CLI_MIGRATE;
@@ -2995,12 +3023,28 @@ int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
         if (rc == 0)
                 ll_update_times(request, parent);
  
-       ptlrpc_req_finished(request);
-       if (rc != 0)
-               GOTO(out_free, rc);
+       body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY);
+       if (body == NULL)
+               GOTO(out_free, rc = -EPROTO);
+
+       /* If the server does release layout lock, then we cleanup
+        * the client och here, otherwise release it in out_free: */
+       if (och != NULL && body->mbo_valid & OBD_MD_CLOSE_INTENT_EXECED) {
+               obd_mod_put(och->och_mod);
+               md_clear_open_replay_data(ll_i2sbi(parent)->ll_md_exp, och);
+               och->och_fh.cookie = DEAD_HANDLE_MAGIC;
+               OBD_FREE_PTR(och);
+               och = NULL;
+       }
  
+       ptlrpc_req_finished(request);
+       /* Try again if the file layout has changed. */
+       if (rc == -EAGAIN && S_ISREG(child_inode->i_mode))
+               goto again;
  out_free:
         if (child_inode != NULL) {
+               if (och != NULL) /* close the file */
+                       ll_lease_close(och, child_inode, NULL);
                 clear_nlink(child_inode);
                 mutex_unlock(&child_inode->i_mutex);
                 iput(child_inode);
diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h

index 8d6dfb8..1936f69 100644 (file)
--- a/lustre/llite/llite_internal.h
+++ b/lustre/llite/llite_internal.h
@@ -844,7 +844,7 @@ struct posix_acl *ll_get_acl(struct inode *inode, int type);
  int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
                const char *name, int namelen);
  int ll_get_fid_by_name(struct inode *parent, const char *name,
-                      int namelen, struct lu_fid *fid);
+                      int namelen, struct lu_fid *fid, struct inode **inode);
  #ifdef HAVE_GENERIC_PERMISSION_4ARGS
  int ll_inode_permission(struct inode *inode, int mask, unsigned int flags);
  #else
diff --git a/lustre/lod/lod_object.c b/lustre/lod/lod_object.c

index 95b5982..255b8c8 100644 (file)
--- a/lustre/lod/lod_object.c
+++ b/lustre/lod/lod_object.c
@@ -2035,16 +2035,19 @@ static int lod_dir_declare_xattr_set(const struct lu_env *env,
         if (rc != 0)
                 RETURN(rc);
  
+       /* Note: Do not set LinkEA on sub-stripes, otherwise
+        * it will confuse the fid2path process(see mdt_path_current()).
+        * The linkEA between master and sub-stripes is set in
+        * lod_xattr_set_lmv(). */
+       if (strcmp(name, XATTR_NAME_LINK) == 0)
+               RETURN(0);
+
         /* set xattr to each stripes, if needed */
         rc = lod_load_striping(env, lo);
         if (rc != 0)
                 RETURN(rc);
  
-       /* Note: Do not set LinkEA on sub-stripes, otherwise
-        * it will confuse the fid2path process(see mdt_path_current()).
-        * The linkEA between master and sub-stripes is set in
-        * lod_xattr_set_lmv(). */
-       if (lo->ldo_stripenr == 0 || strcmp(name, XATTR_NAME_LINK) == 0)
+       if (lo->ldo_stripenr == 0)
                 RETURN(0);
  
         for (i = 0; i < lo->ldo_stripenr; i++) {
@@ -2060,6 +2063,85 @@ static int lod_dir_declare_xattr_set(const struct lu_env *env,
  }
  
  /**
+ * Reset parent FID on OST object
+ *
+ * Replace parent FID with @dt object FID, which is only called during migration
+ * to reset the parent FID after the MDT object is migrated to the new MDT, i.e.
+ * the FID is changed.
+ *
+ * \param[in] env execution environment
+ * \param[in] dt dt_object whose stripes's parent FID will be reset
+ * \parem[in] th thandle
+ * \param[in] declare if it is declare
+ *
+ * \retval     0 if reset succeeds
+ * \retval     negative errno if reset fais
+ */
+static int lod_object_replace_parent_fid(const struct lu_env *env,
+                                        struct dt_object *dt,
+                                        struct thandle *th, bool declare)
+{
+       struct lod_object *lo = lod_dt_obj(dt);
+       struct lod_thread_info  *info = lod_env_info(env);
+       struct lu_buf *buf = &info->lti_buf;
+       struct filter_fid *ff;
+       int i, rc;
+       ENTRY;
+
+       LASSERT(S_ISREG(dt->do_lu.lo_header->loh_attr));
+
+       /* set xattr to each stripes, if needed */
+       rc = lod_load_striping(env, lo);
+       if (rc != 0)
+               RETURN(rc);
+
+       if (lo->ldo_stripenr == 0)
+               RETURN(0);
+
+       if (info->lti_ea_store_size < sizeof(*ff)) {
+               rc = lod_ea_store_resize(info, sizeof(*ff));
+               if (rc != 0)
+                       RETURN(rc);
+       }
+
+       buf->lb_buf = info->lti_ea_store;
+       buf->lb_len = info->lti_ea_store_size;
+
+       for (i = 0; i < lo->ldo_stripenr; i++) {
+               if (lo->ldo_stripe[i] == NULL)
+                       continue;
+
+               rc = dt_xattr_get(env, lo->ldo_stripe[i], buf,
+                                 XATTR_NAME_FID);
+               if (rc < 0) {
+                       rc = 0;
+                       continue;
+               }
+
+               ff = buf->lb_buf;
+               fid_le_to_cpu(&ff->ff_parent, &ff->ff_parent);
+               ff->ff_parent.f_seq = lu_object_fid(&dt->do_lu)->f_seq;
+               ff->ff_parent.f_oid = lu_object_fid(&dt->do_lu)->f_oid;
+               fid_cpu_to_le(&ff->ff_parent, &ff->ff_parent);
+
+               if (declare) {
+                       rc = lod_sub_object_declare_xattr_set(env,
+                                               lo->ldo_stripe[i], buf,
+                                               XATTR_NAME_FID,
+                                               LU_XATTR_REPLACE, th);
+               } else {
+                       rc = lod_sub_object_xattr_set(env, lo->ldo_stripe[i],
+                                                     buf, XATTR_NAME_FID,
+                                                     LU_XATTR_REPLACE, th);
+               }
+               if (rc < 0)
+                       break;
+       }
+
+       RETURN(rc);
+}
+
+/**
   * Implementation of dt_object_operations::do_declare_xattr_set.
   *
   * \see dt_object_operations::do_declare_xattr_set() in the API description
@@ -2105,6 +2187,8 @@ static int lod_declare_xattr_set(const struct lu_env *env,
                 rc = lod_declare_striped_object(env, dt, attr, buf, th);
         } else if (S_ISDIR(mode)) {
                 rc = lod_dir_declare_xattr_set(env, dt, buf, name, fl, th);
+       } else if (strcmp(name, XATTR_NAME_FID) == 0) {
+               rc = lod_object_replace_parent_fid(env, dt, th, true);
         } else {
                 rc = lod_sub_object_declare_xattr_set(env, next, buf, name,
                                                       fl, th);
@@ -2757,6 +2841,10 @@ static int lod_xattr_set(const struct lu_env *env,
                         rc = lod_striping_create(env, dt, NULL, NULL, th);
                 }
                 RETURN(rc);
+       } else if (strcmp(name, XATTR_NAME_FID) == 0) {
+               rc = lod_object_replace_parent_fid(env, dt, th, false);
+
+               RETURN(rc);
         }
  
         /* then all other xattr */
diff --git a/lustre/lod/lod_sub_object.c b/lustre/lod/lod_sub_object.c

index cb4f8fd..75271b2 100644 (file)
--- a/lustre/lod/lod_sub_object.c
+++ b/lustre/lod/lod_sub_object.c
@@ -95,7 +95,9 @@ struct thandle *lod_sub_get_thandle(const struct lu_env *env,
         if (rc < 0)
                 RETURN(ERR_PTR(rc));
  
-       if (type == LU_SEQ_RANGE_OST)
+       /* th_complex means we need track all of updates for this
+        * transaction, include changes on OST */
+       if (type == LU_SEQ_RANGE_OST && !th->th_complex)
                 RETURN(tth->tt_master_sub_thandle);
  
         sub_th = thandle_get_sub(env, th, sub_obj);
diff --git a/lustre/mdc/mdc_lib.c b/lustre/mdc/mdc_lib.c

index d3a9365..db7cbb5 100644 (file)
--- a/lustre/mdc/mdc_lib.c
+++ b/lustre/mdc/mdc_lib.c
@@ -397,6 +397,31 @@ void mdc_link_pack(struct ptlrpc_request *req, struct md_op_data *op_data)
         mdc_pack_name(req, &RMF_NAME, op_data->op_name, op_data->op_namelen);
  }
  
+static void mdc_intent_close_pack(struct ptlrpc_request *req,
+                                 struct md_op_data *op_data)
+{
+       struct close_data       *data;
+       struct ldlm_lock        *lock;
+       enum mds_op_bias         bias = op_data->op_bias;
+
+       if (!(bias & (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP |
+                     MDS_RENAME_MIGRATE)))
+               return;
+
+       data = req_capsule_client_get(&req->rq_pill, &RMF_CLOSE_DATA);
+       LASSERT(data != NULL);
+
+       lock = ldlm_handle2lock(&op_data->op_lease_handle);
+       if (lock != NULL) {
+               data->cd_handle = lock->l_remote_handle;
+               LDLM_LOCK_PUT(lock);
+       }
+       ldlm_cli_cancel(&op_data->op_lease_handle, LCF_LOCAL);
+
+       data->cd_data_version = op_data->op_data_version;
+       data->cd_fid = op_data->op_fid2;
+}
+
  void mdc_rename_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
                      const char *old, size_t oldlen,
                      const char *new, size_t newlen)
@@ -424,6 +449,15 @@ void mdc_rename_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
  
         if (new != NULL)
                 mdc_pack_name(req, &RMF_SYMTGT, new, newlen);
+
+       if (op_data->op_cli_flags & CLI_MIGRATE &&
+           op_data->op_bias & MDS_RENAME_MIGRATE) {
+               struct mdt_ioepoch *epoch;
+
+               mdc_intent_close_pack(req, op_data);
+               epoch = req_capsule_client_get(&req->rq_pill, &RMF_MDT_EPOCH);
+               mdc_ioepoch_pack(epoch, op_data);
+       }
  }
  
  void mdc_getattr_pack(struct ptlrpc_request *req, __u64 valid, __u32 flags,
@@ -450,30 +484,6 @@ void mdc_getattr_pack(struct ptlrpc_request *req, __u64 valid, __u32 flags,
                               op_data->op_namelen);
  }
  
-static void mdc_intent_close_pack(struct ptlrpc_request *req,
-                                 struct md_op_data *op_data)
-{
-       struct close_data       *data;
-       struct ldlm_lock        *lock;
-       enum mds_op_bias         bias = op_data->op_bias;
-
-       if (!(bias & (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP)))
-               return;
-
-       data = req_capsule_client_get(&req->rq_pill, &RMF_CLOSE_DATA);
-       LASSERT(data != NULL);
-
-       lock = ldlm_handle2lock(&op_data->op_lease_handle);
-       if (lock != NULL) {
-               data->cd_handle = lock->l_remote_handle;
-               LDLM_LOCK_PUT(lock);
-       }
-       ldlm_cli_cancel(&op_data->op_lease_handle, LCF_LOCAL);
-
-       data->cd_data_version = op_data->op_data_version;
-       data->cd_fid = op_data->op_fid2;
-}
-
  void mdc_close_pack(struct ptlrpc_request *req, struct md_op_data *op_data)
  {
          struct mdt_ioepoch *epoch;
diff --git a/lustre/mdc/mdc_reint.c b/lustre/mdc/mdc_reint.c

index 31985a2..1ca6276 100644 (file)
--- a/lustre/mdc/mdc_reint.c
+++ b/lustre/mdc/mdc_reint.c
@@ -378,12 +378,13 @@ int mdc_rename(struct obd_export *exp, struct md_op_data *op_data,
                                                   &cancels, LCK_EX,
                                                   MDS_INODELOCK_FULL);
  
-        req = ptlrpc_request_alloc(class_exp2cliimp(exp),
-                                   &RQF_MDS_REINT_RENAME);
-        if (req == NULL) {
-                ldlm_lock_list_put(&cancels, l_bl_ast, count);
-                RETURN(-ENOMEM);
-        }
+       req = ptlrpc_request_alloc(class_exp2cliimp(exp),
+                          op_data->op_cli_flags & CLI_MIGRATE ?
+                          &RQF_MDS_REINT_MIGRATE : &RQF_MDS_REINT_RENAME);
+       if (req == NULL) {
+               ldlm_lock_list_put(&cancels, l_bl_ast, count);
+               RETURN(-ENOMEM);
+       }
  
          req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT, oldlen + 1);
          req_capsule_set_size(&req->rq_pill, &RMF_SYMTGT, RCL_CLIENT, newlen+1);
@@ -394,6 +395,21 @@ int mdc_rename(struct obd_export *exp, struct md_op_data *op_data,
                 RETURN(rc);
         }
  
+       if (op_data->op_cli_flags & CLI_MIGRATE && op_data->op_data != NULL) {
+               struct md_open_data *mod = op_data->op_data;
+
+               LASSERTF(mod->mod_open_req != NULL &&
+                        mod->mod_open_req->rq_type != LI_POISON,
+                        "POISONED open %p!\n", mod->mod_open_req);
+
+               DEBUG_REQ(D_HA, mod->mod_open_req, "matched open");
+               /* We no longer want to preserve this open for replay even
+                * though the open was committed. b=3632, b=3633 */
+               spin_lock(&mod->mod_open_req->rq_lock);
+               mod->mod_open_req->rq_replay = 0;
+               spin_unlock(&mod->mod_open_req->rq_lock);
+       }
+
          if (exp_connect_cancelset(exp) && req)
                  ldlm_cli_cancel_list(&cancels, count, req, 0);
  
diff --git a/lustre/mdd/mdd_dir.c b/lustre/mdd/mdd_dir.c

index 67b95a6..17b654d 100644 (file)
--- a/lustre/mdd/mdd_dir.c
+++ b/lustre/mdd/mdd_dir.c
@@ -3205,6 +3205,7 @@ static int mdd_migrate_xattrs(const struct lu_env *env,
         int                     list_xsize;
         struct lu_buf           list_xbuf;
         int                     rc;
+       int                     rc1;
  
         /* retrieve xattr list from the old object */
         list_xsize = mdo_xattr_list(env, mdd_sobj, &LU_BUF_NULL);
@@ -3279,7 +3280,9 @@ static int mdd_migrate_xattrs(const struct lu_env *env,
                 if (rc != 0)
                         GOTO(stop_trans, rc);
  stop_trans:
-               mdd_trans_stop(env, mdd, rc, handle);
+               rc1 = mdd_trans_stop(env, mdd, rc, handle);
+               if (rc == 0)
+                       rc = rc1;
                 if (rc != 0)
                         GOTO(out, rc);
  next:
@@ -3397,7 +3400,7 @@ static int mdd_migrate_create(const struct lu_env *env,
                         RETURN(rc);
                 }
                 spec->u.sp_symname = link_buf.lb_buf;
-       } else if S_ISREG(la->la_mode) {
+       } else if (S_ISREG(la->la_mode)) {
                 /* retrieve lov of the old object */
                 rc = mdd_get_lov_ea(env, mdd_sobj, &lmm_buf);
                 if (rc != 0 && rc != -ENODATA)
@@ -3473,8 +3476,13 @@ static int mdd_migrate_create(const struct lu_env *env,
         la_flag->la_flags = la->la_flags | LUSTRE_IMMUTABLE_FL;
         rc = mdo_attr_set(env, mdd_sobj, la_flag, handle);
  stop_trans:
-       if (handle != NULL)
-               mdd_trans_stop(env, mdd, rc, handle);
+       if (handle != NULL) {
+               int rc1;
+
+               rc1 = mdd_trans_stop(env, mdd, rc, handle);
+               if (rc == 0)
+                       rc = rc1;
+       }
  out_free:
         if (lmm_buf.lb_buf != NULL)
                 OBD_FREE(lmm_buf.lb_buf, lmm_buf.lb_len);
@@ -3529,6 +3537,7 @@ static int mdd_migrate_entries(const struct lu_env *env,
                 int                     recsize;
                 int                     is_dir;
                 bool                    target_exist = false;
+               int                     rc1;
  
                 len = iops->key_size(env, it);
                 if (len == 0)
@@ -3556,6 +3565,7 @@ static int mdd_migrate_entries(const struct lu_env *env,
                 if (IS_ERR(child))
                         GOTO(out, rc = PTR_ERR(child));
  
+               mdd_write_lock(env, child, MOR_SRC_CHILD);
                 is_dir = S_ISDIR(mdd_object_type(child));
  
                 snprintf(name, ent->lde_namelen + 1, "%s", ent->lde_name);
@@ -3658,8 +3668,12 @@ static int mdd_migrate_entries(const struct lu_env *env,
                                              strlen(name), handle);
  
  out_put:
+               mdd_write_unlock(env, child);
                 mdd_object_put(env, child);
-               mdd_trans_stop(env, mdd, rc, handle);
+               rc1 = mdd_trans_stop(env, mdd, rc, handle);
+               if (rc == 0)
+                       rc = rc1;
+
                 if (rc != 0)
                         GOTO(out, rc);
  next:
@@ -3734,6 +3748,13 @@ static int mdd_declare_migrate_update_name(const struct lu_env *env,
                                            handle);
                 if (rc != 0)
                         return rc;
+
+               handle->th_complex = 1;
+               rc = mdo_declare_xattr_set(env, mdd_tobj, NULL,
+                                          XATTR_NAME_FID,
+                                          LU_XATTR_REPLACE, handle);
+               if (rc < 0)
+                       return rc;
         }
  
         if (S_ISDIR(mdd_object_type(mdd_sobj))) {
@@ -3857,6 +3878,12 @@ static int mdd_migrate_update_name(const struct lu_env *env,
                                            handle);
                         if (rc != 0 && rc != -ENODATA)
                                 GOTO(stop_trans, rc);
+
+                       rc = mdo_xattr_set(env, mdd_tobj, NULL,
+                                          XATTR_NAME_FID,
+                                          LU_XATTR_REPLACE, handle);
+                       if (rc < 0)
+                               GOTO(stop_trans, rc);
                 }
         }
  
@@ -3871,7 +3898,18 @@ static int mdd_migrate_update_name(const struct lu_env *env,
         if (rc != 0)
                 GOTO(stop_trans, rc);
  
-       mdd_write_lock(env, mdd_sobj, MOR_SRC_CHILD);
+       mdd_write_lock(env, mdd_sobj, MOR_TGT_CHILD);
+
+       /* Increase mod_count to add the source object to the orphan list,
+        * so if other clients still send RPC to the old object, then these
+        * objects can help the request to find the new object, see
+        * mdt_reint_open() */
+       mdd_sobj->mod_count++;
+       rc = mdd_finish_unlink(env, mdd_sobj, ma, mdd_pobj, lname, handle);
+       mdd_sobj->mod_count--;
+       if (rc != 0)
+               GOTO(out_unlock, rc);
+
         mdo_ref_del(env, mdd_sobj, handle);
         if (is_dir)
                 mdo_ref_del(env, mdd_sobj, handle);
@@ -3883,9 +3921,6 @@ static int mdd_migrate_update_name(const struct lu_env *env,
  
         ma->ma_attr = *so_attr;
         ma->ma_valid |= MA_INODE;
-       rc = mdd_finish_unlink(env, mdd_sobj, ma, mdd_pobj, lname, handle);
-       if (rc != 0)
-               GOTO(out_unlock, rc);
  
         rc = mdd_attr_set_internal(env, mdd_pobj, p_la, handle, 0);
         if (rc != 0)
@@ -3962,15 +3997,15 @@ static int mdd_migrate_sanity_check(const struct lu_env *env,
         if (rc != 0) {
                 /* For multiple links files, if there are no linkEA data at all,
                  * means the file might be created before linkEA is enabled, and
-                * all all of its links should not be migrated yet, otherwise
-                * it should have some linkEA there */
+                * all of its links should not be migrated yet, otherwise it
+                * should have some linkEA there */
                 if (rc == -ENOENT || rc == -ENODATA)
                         RETURN(1);
                 RETURN(rc);
         }
  
-       /* If it is mulitple links file, we need update the name entry for
-        * all parent */
+       /* If there are still links locally, then the file will not be
+        * migrated. */
         LASSERT(ldata->ld_leh != NULL);
         ldata->ld_lee = (struct link_ea_entry *)(ldata->ld_leh + 1);
         for (count = 0; count < ldata->ld_leh->leh_reccount; count++) {
@@ -4025,7 +4060,7 @@ static int mdd_migrate(const struct lu_env *env, struct md_object *pobj,
         /* If the file will being migrated, it will check whether
          * the file is being opened by someone else right now */
         mdd_read_lock(env, mdd_sobj, MOR_SRC_CHILD);
-       if (mdd_sobj->mod_count >= 1) {
+       if (mdd_sobj->mod_count > 0) {
                 CERROR("%s: "DFID"%s is already opened count %d: rc = %d\n",
                        mdd2obd_dev(mdd)->obd_name,
                        PFID(mdd_object_fid(mdd_sobj)), lname->ln_name,
@@ -4080,6 +4115,7 @@ static int mdd_migrate(const struct lu_env *env, struct md_object *pobj,
                         GOTO(put, rc);
         }
  
+       LASSERT(mdd_object_exists(mdd_tobj));
         /* step 2: migrate xattr */
         rc = mdd_migrate_xattrs(env, mdd_sobj, mdd_tobj);
         if (rc != 0)
@@ -4097,6 +4133,7 @@ static int mdd_migrate(const struct lu_env *env, struct md_object *pobj,
                 OBD_FAIL_TIMEOUT(OBD_FAIL_MIGRATE_DELAY, cfs_fail_val);
         }
  
+       LASSERT(mdd_object_exists(mdd_tobj));
         /* step 4: update name entry to the new object */
         rc = mdd_migrate_update_name(env, mdd_pobj, mdd_sobj, mdd_tobj, lname,
                                      ma);
diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c

index 2642a12..e1ffb3d 100644 (file)
--- a/lustre/mdt/mdt_handler.c
+++ b/lustre/mdt/mdt_handler.c
@@ -2741,6 +2741,7 @@ void mdt_thread_info_init(struct ptlrpc_request *req,
          info->mti_spec.no_create = 0;
         info->mti_spec.sp_rm_entry = 0;
         info->mti_spec.sp_permitted = 0;
+       info->mti_spec.sp_migrate_close = 0;
  
         info->mti_spec.u.sp_ea.eadata = NULL;
         info->mti_spec.u.sp_ea.eadatalen = 0;
diff --git a/lustre/mdt/mdt_internal.h b/lustre/mdt/mdt_internal.h

index eb1ad98..0ebcac2 100644 (file)
--- a/lustre/mdt/mdt_internal.h
+++ b/lustre/mdt/mdt_internal.h
@@ -720,6 +720,8 @@ int mdt_remote_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
  int mdt_links_read(struct mdt_thread_info *info,
                    struct mdt_object *mdt_obj,
                    struct linkea_data *ldata);
+int mdt_close_internal(struct mdt_thread_info *info, struct ptlrpc_request *req,
+                      struct mdt_body *repbody);
  /* mdt_idmap.c */
  int mdt_init_idmap(struct tgt_session_info *tsi);
  void mdt_cleanup_idmap(struct mdt_export_data *);
@@ -872,6 +874,9 @@ int mdt_hsm_update_request_state(struct mdt_thread_info *mti,
                                  struct hsm_progress_kernel *pgs,
                                  const int update_record);
  
+int mdt_close_swap_layouts(struct mdt_thread_info *info,
+                          struct mdt_object *o, struct md_attr *ma);
+
  extern struct lu_context_key       mdt_thread_key;
  
  /* debug issues helper starts here*/
diff --git a/lustre/mdt/mdt_lib.c b/lustre/mdt/mdt_lib.c

index 984cfa2..fca8a40 100644 (file)
--- a/lustre/mdt/mdt_lib.c
+++ b/lustre/mdt/mdt_lib.c
@@ -1286,6 +1286,14 @@ static int mdt_rename_unpack(struct mdt_thread_info *info)
          else
                  ma->ma_attr_flags &= ~MDS_VTX_BYPASS;
  
+       if (rec->rn_bias & MDS_RENAME_MIGRATE) {
+               req_capsule_extend(info->mti_pill, &RQF_MDS_REINT_MIGRATE);
+               rc = mdt_close_handle_unpack(info);
+               if (rc < 0)
+                       RETURN(rc);
+               info->mti_spec.sp_migrate_close = 1;
+       }
+
          info->mti_spec.no_create = !!req_is_replay(mdt_info_req(info));
  
  
diff --git a/lustre/mdt/mdt_open.c b/lustre/mdt/mdt_open.c

index 4af70ff..38e7eb0 100644 (file)
--- a/lustre/mdt/mdt_open.c
+++ b/lustre/mdt/mdt_open.c
@@ -1273,6 +1273,7 @@ int mdt_reint_open(struct mdt_thread_info *info, struct mdt_lock_handle *lhc)
         if (!lu_name_is_valid(&rr->rr_name))
                 GOTO(out, result = -EPROTO);
  
+again:
          lh = &info->mti_lh[MDT_LH_PARENT];
         mdt_lock_pdo_init(lh,
                           (create_flags & MDS_OPEN_CREAT) ? LCK_PW : LCK_PR,
@@ -1306,16 +1307,45 @@ int mdt_reint_open(struct mdt_thread_info *info, struct mdt_lock_handle *lhc)
          if (result != 0 && result != -ENOENT && result != -ESTALE)
                  GOTO(out_parent, result);
  
-        if (result == -ENOENT || result == -ESTALE) {
-                mdt_set_disposition(info, ldlm_rep, DISP_LOOKUP_NEG);
-                if (result == -ESTALE) {
-                        /*
-                         * -ESTALE means the parent is a dead(unlinked) dir, so
-                         * it should return -ENOENT to in accordance with the
-                         * original mds implementaion.
-                         */
-                        GOTO(out_parent, result = -ENOENT);
-                }
+       if (result == -ENOENT || result == -ESTALE) {
+               /* If the object is dead, let's check if the object
+                * is being migrated to a new object */
+               if (result == -ESTALE) {
+                       struct lu_buf lmv_buf;
+
+                       lmv_buf.lb_buf = info->mti_xattr_buf;
+                       lmv_buf.lb_len = sizeof(info->mti_xattr_buf);
+                       rc = mo_xattr_get(info->mti_env,
+                                         mdt_object_child(parent),
+                                         &lmv_buf, XATTR_NAME_LMV);
+                       if (rc > 0) {
+                               struct lmv_mds_md_v1 *lmv;
+
+                               lmv = lmv_buf.lb_buf;
+                               if (le32_to_cpu(lmv->lmv_hash_type) &
+                                               LMV_HASH_FLAG_MIGRATION) {
+                                       /* Get the new parent FID and retry */
+                                       mdt_object_unlock_put(info, parent,
+                                                             lh, 1);
+                                       mdt_lock_handle_init(lh);
+                                       fid_le_to_cpu(
+                                               (struct lu_fid *)rr->rr_fid1,
+                                               &lmv->lmv_stripe_fids[1]);
+                                       goto again;
+                               }
+                       }
+               }
+
+               mdt_set_disposition(info, ldlm_rep, DISP_LOOKUP_NEG);
+               if (result == -ESTALE) {
+                       /*
+                        * -ESTALE means the parent is a dead(unlinked) dir, so
+                        * it should return -ENOENT to in accordance with the
+                        * original mds implementaion.
+                        */
+                       GOTO(out_parent, result = -ENOENT);
+               }
+
                  if (!(create_flags & MDS_OPEN_CREAT))
                          GOTO(out_parent, result);
                 if (exp_connect_flags(req->rq_export) & OBD_CONNECT_RDONLY)
@@ -1760,8 +1790,8 @@ out_reprocess:
         return rc;
  }
  
-static int mdt_close_swap_layouts(struct mdt_thread_info *info,
-                                 struct mdt_object *o, struct md_attr *ma)
+int mdt_close_swap_layouts(struct mdt_thread_info *info,
+                          struct mdt_object *o, struct md_attr *ma)
  {
         struct mdt_lock_handle  *lh1 = &info->mti_lh[MDT_LH_NEW];
         struct mdt_lock_handle  *lh2 = &info->mti_lh[MDT_LH_OLD];
@@ -1888,8 +1918,10 @@ out_obj:
  out_lease:
         LDLM_LOCK_PUT(lease);
  
-       ma->ma_valid = 0;
-       ma->ma_need = 0;
+       if (ma != NULL) {
+               ma->ma_valid = 0;
+               ma->ma_need = 0;
+       }
  
         return rc;
  }
@@ -1970,13 +2002,48 @@ int mdt_mfd_close(struct mdt_thread_info *info, struct mdt_file_data *mfd)
         RETURN(rc);
  }
  
+int mdt_close_internal(struct mdt_thread_info *info, struct ptlrpc_request *req,
+                      struct mdt_body *repbody)
+{
+       struct mdt_export_data *med;
+       struct mdt_file_data   *mfd;
+       struct mdt_object      *o;
+       struct md_attr         *ma = &info->mti_attr;
+       int                     ret = 0;
+       int                     rc = 0;
+       ENTRY;
+
+       med = &req->rq_export->exp_mdt_data;
+       spin_lock(&med->med_open_lock);
+       mfd = mdt_handle2mfd(med, &info->mti_close_handle, req_is_replay(req));
+       if (mdt_mfd_closed(mfd)) {
+               spin_unlock(&med->med_open_lock);
+               CDEBUG(D_INODE, "no handle for file close: fid = "DFID
+                      ": cookie = "LPX64"\n", PFID(info->mti_rr.rr_fid1),
+                      info->mti_close_handle.cookie);
+               /** not serious error since bug 3633 */
+               rc = -ESTALE;
+       } else {
+               class_handle_unhash(&mfd->mfd_handle);
+               list_del_init(&mfd->mfd_list);
+               spin_unlock(&med->med_open_lock);
+
+               /* Do not lose object before last unlink. */
+               o = mfd->mfd_object;
+               mdt_object_get(info->mti_env, o);
+               ret = mdt_mfd_close(info, mfd);
+               if (repbody != NULL)
+                       rc = mdt_handle_last_unlink(info, o, ma);
+               mdt_object_put(info->mti_env, o);
+       }
+
+       RETURN(rc ? rc : ret);
+}
+
  int mdt_close(struct tgt_session_info *tsi)
  {
         struct mdt_thread_info  *info = tsi2mdt_info(tsi);
         struct ptlrpc_request   *req = tgt_ses_req(tsi);
-        struct mdt_export_data *med;
-        struct mdt_file_data   *mfd;
-        struct mdt_object      *o;
          struct md_attr         *ma = &info->mti_attr;
          struct mdt_body        *repbody = NULL;
          int rc, ret = 0;
@@ -2019,30 +2086,10 @@ int mdt_close(struct tgt_session_info *tsi)
                  rc = err_serious(rc);
          }
  
-        med = &req->rq_export->exp_mdt_data;
-       spin_lock(&med->med_open_lock);
-       mfd = mdt_handle2mfd(med, &info->mti_close_handle, req_is_replay(req));
-       if (mdt_mfd_closed(mfd)) {
-               spin_unlock(&med->med_open_lock);
-               CDEBUG(D_INODE, "no handle for file close: fid = "DFID
-                      ": cookie = "LPX64"\n", PFID(info->mti_rr.rr_fid1),
-                      info->mti_close_handle.cookie);
-               /** not serious error since bug 3633 */
-               rc = -ESTALE;
-       } else {
-               class_handle_unhash(&mfd->mfd_handle);
-               list_del_init(&mfd->mfd_list);
-               spin_unlock(&med->med_open_lock);
+       rc = mdt_close_internal(info, req, repbody);
+       if (rc != -ESTALE)
+               mdt_empty_transno(info, rc);
  
-                /* Do not lose object before last unlink. */
-                o = mfd->mfd_object;
-                mdt_object_get(info->mti_env, o);
-                ret = mdt_mfd_close(info, mfd);
-                if (repbody != NULL)
-                        rc = mdt_handle_last_unlink(info, o, ma);
-                mdt_empty_transno(info, rc);
-                mdt_object_put(info->mti_env, o);
-        }
          if (repbody != NULL) {
                  mdt_client_compatibility(info);
                  rc = mdt_fix_reply(info);
diff --git a/lustre/mdt/mdt_reint.c b/lustre/mdt/mdt_reint.c

index 647fd69..67e6a33 100644 (file)
--- a/lustre/mdt/mdt_reint.c
+++ b/lustre/mdt/mdt_reint.c
@@ -1345,6 +1345,8 @@ static int mdt_reint_migrate_internal(struct mdt_thread_info *info,
         struct lu_fid           *old_fid = &info->mti_tmp_fid1;
         struct list_head        lock_list;
         __u64                   lock_ibits;
+       struct ldlm_lock        *lease = NULL;
+       bool                    lock_open_sem = false;
         int                     rc;
         ENTRY;
  
@@ -1414,6 +1416,55 @@ static int mdt_reint_migrate_internal(struct mdt_thread_info *info,
         if (rc != 0)
                 GOTO(out_put_child, rc);
  
+       if (info->mti_spec.sp_migrate_close) {
+               struct close_data *data;
+               struct mdt_body  *repbody;
+               bool lease_broken = false;
+
+               if (!req_capsule_field_present(info->mti_pill, &RMF_MDT_EPOCH,
+                                     RCL_CLIENT) ||
+                   !req_capsule_field_present(info->mti_pill, &RMF_CLOSE_DATA,
+                                     RCL_CLIENT))
+                       GOTO(out_lease, rc = -EPROTO);
+
+               data = req_capsule_client_get(info->mti_pill, &RMF_CLOSE_DATA);
+               if (data == NULL)
+                       GOTO(out_lease, rc = -EPROTO);
+
+               lease = ldlm_handle2lock(&data->cd_handle);
+               if (lease == NULL)
+                       GOTO(out_lease, rc = -ESTALE);
+
+               /* try to hold open_sem so that nobody else can open the file */
+               if (!down_write_trylock(&mold->mot_open_sem)) {
+                       ldlm_lock_cancel(lease);
+                       GOTO(out_lease, rc = -EBUSY);
+               }
+
+               lock_open_sem = true;
+               /* Check if the lease open lease has already canceled */
+               lock_res_and_lock(lease);
+               lease_broken = ldlm_is_cancel(lease);
+               unlock_res_and_lock(lease);
+
+               LDLM_DEBUG(lease, DFID " lease broken? %d\n",
+                          PFID(mdt_object_fid(mold)), lease_broken);
+
+               /* Cancel server side lease. Client side counterpart should
+                * have been cancelled. It's okay to cancel it now as we've
+                * held mot_open_sem. */
+               ldlm_lock_cancel(lease);
+
+               if (lease_broken)
+                       GOTO(out_lease, rc = -EAGAIN);
+out_lease:
+               rc = mdt_close_internal(info, mdt_info_req(info), NULL);
+               repbody = req_capsule_server_get(info->mti_pill, &RMF_MDT_BODY);
+               repbody->mbo_valid |= OBD_MD_CLOSE_INTENT_EXECED;
+               if (rc != 0)
+                       GOTO(out_unlock_list, rc);
+       }
+
         /* 4: lock of the object migrated object */
         lh_childp = &info->mti_lh[MDT_LH_OLD];
         mdt_lock_reg_init(lh_childp, LCK_EX);
@@ -1505,6 +1556,7 @@ static int mdt_reint_migrate_internal(struct mdt_thread_info *info,
                          mdt_object_child(mnew), ma);
         if (rc != 0)
                 GOTO(out_unlock_new, rc);
+
  out_unlock_new:
         if (lh_tgtp != NULL)
                 mdt_object_unlock(info, mnew, lh_tgtp, rc);
@@ -1515,6 +1567,13 @@ out_unlock_child:
         mdt_object_unlock(info, mold, lh_childp, rc);
  out_unlock_list:
         mdt_unlock_list(info, &lock_list, rc);
+       if (lease != NULL) {
+               ldlm_reprocess_all(lease->l_resource);
+               LDLM_LOCK_PUT(lease);
+       }
+
+       if (lock_open_sem)
+               up_write(&mold->mot_open_sem);
  out_put_child:
         mdt_object_put(info->mti_env, mold);
  out_unlock_parent:
diff --git a/lustre/osp/osp_trans.c b/lustre/osp/osp_trans.c

index df648fc..e700acf 100644 (file)
--- a/lustre/osp/osp_trans.c
+++ b/lustre/osp/osp_trans.c
@@ -982,9 +982,11 @@ static int osp_send_update_req(const struct lu_env *env,
                 if (top_device->ld_obd->obd_recovering)
                         req->rq_allow_replay = 1;
  
-               osp_get_rpc_lock(osp);
+               if (osp->opd_connect_mdt)
+                       osp_get_rpc_lock(osp);
                 rc = ptlrpc_queue_wait(req);
-               osp_put_rpc_lock(osp);
+               if (osp->opd_connect_mdt)
+                       osp_put_rpc_lock(osp);
                 if ((rc == -ENOMEM && req->rq_set == NULL) ||
                     (req->rq_transno == 0 && !req->rq_committed)) {
                         if (args->oaua_update != NULL) {
@@ -1321,6 +1323,7 @@ int osp_trans_stop(const struct lu_env *env, struct dt_device *dt,
         }
  
         if (!osp->opd_connect_mdt) {
+               osp_trans_callback(env, oth, th->th_result);
                 rc = osp_send_update_req(env, osp, oth->ot_our);
                 GOTO(out, rc);
         }
diff --git a/lustre/ptlrpc/layout.c b/lustre/ptlrpc/layout.c

index 575f4c8..3a0d7dd 100644 (file)
--- a/lustre/ptlrpc/layout.c
+++ b/lustre/ptlrpc/layout.c
@@ -279,6 +279,18 @@ static const struct req_msg_field *mds_reint_rename_client[] = {
          &RMF_DLM_REQ
  };
  
+static const struct req_msg_field *mds_reint_migrate_client[] = {
+       &RMF_PTLRPC_BODY,
+       &RMF_REC_REINT,
+       &RMF_CAPA1,
+       &RMF_CAPA2,
+       &RMF_NAME,
+       &RMF_SYMTGT,
+       &RMF_DLM_REQ,
+       &RMF_MDT_EPOCH,
+       &RMF_CLOSE_DATA
+};
+
  static const struct req_msg_field *mds_last_unlink_server[] = {
          &RMF_PTLRPC_BODY,
          &RMF_MDT_BODY,
@@ -731,6 +743,7 @@ static struct req_format *req_formats[] = {
          &RQF_MDS_REINT_UNLINK,
          &RQF_MDS_REINT_LINK,
          &RQF_MDS_REINT_RENAME,
+       &RQF_MDS_REINT_MIGRATE,
          &RQF_MDS_REINT_SETATTR,
          &RQF_MDS_REINT_SETXATTR,
          &RQF_MDS_QUOTACTL,
@@ -1367,6 +1380,11 @@ struct req_format RQF_MDS_REINT_RENAME =
                          mds_last_unlink_server);
  EXPORT_SYMBOL(RQF_MDS_REINT_RENAME);
  
+struct req_format RQF_MDS_REINT_MIGRATE =
+       DEFINE_REQ_FMT0("MDS_REINT_MIGRATE", mds_reint_migrate_client,
+                       mds_last_unlink_server);
+EXPORT_SYMBOL(RQF_MDS_REINT_MIGRATE);
+
  struct req_format RQF_MDS_REINT_SETATTR =
          DEFINE_REQ_FMT0("MDS_REINT_SETATTR",
                          mds_reint_setattr_client, mds_setattr_server);
diff --git a/lustre/target/update_trans.c b/lustre/target/update_trans.c

index 82e67fc..013b1d9 100644 (file)
--- a/lustre/target/update_trans.c
+++ b/lustre/target/update_trans.c
@@ -122,11 +122,12 @@ static int sub_declare_updates_write(const struct lu_env *env,
          * for example if the the OSP is used to connect to OST */
         ctxt = llog_get_context(dt->dd_lu_dev.ld_obd,
                                 LLOG_UPDATELOG_ORIG_CTXT);
-       LASSERT(ctxt != NULL);
  
         /* Not ready to record updates yet. */
-       if (ctxt->loc_handle == NULL)
-               GOTO(out_put, rc = 0);
+       if (ctxt == NULL || ctxt->loc_handle == NULL) {
+               llog_ctxt_put(ctxt);
+               return 0;
+       }
  
         rc = llog_declare_add(env, ctxt->loc_handle,
                               &record->lur_hdr, sub_th);
@@ -184,12 +185,14 @@ static int sub_updates_write(const struct lu_env *env,
  
         ctxt = llog_get_context(dt->dd_lu_dev.ld_obd,
                                 LLOG_UPDATELOG_ORIG_CTXT);
-       LASSERT(ctxt != NULL);
-
-       /* Not ready to record updates yet, usually happens
-        * in error handler path */
-       if (ctxt->loc_handle == NULL)
-               GOTO(llog_put, rc = 0);
+       /* If ctxt == NULL, then it means updates on OST (only happens
+        * during migration), and we do not track those updates for now */
+       /* If ctxt->loc_handle == NULL, then it does not need to record
+        * update, usually happens in error handler path */
+       if (ctxt == NULL || ctxt->loc_handle == NULL) {
+               llog_ctxt_put(ctxt);
+               RETURN(0);
+       }
  
         /* Since the cross-MDT updates will includes both local
          * and remote updates, the update ops count must > 1 */
@@ -1234,7 +1237,8 @@ static int distribute_txn_cancel_records(const struct lu_env *env,
  
                 obd = st->st_dt->dd_lu_dev.ld_obd;
                 ctxt = llog_get_context(obd, LLOG_UPDATELOG_ORIG_CTXT);
-               LASSERT(ctxt);
+               if (ctxt == NULL)
+                       continue;
                 list_for_each_entry(stc, &st->st_cookie_list, stc_list) {
                         cookie = &stc->stc_cookie;
                         if (fid_is_zero(&cookie->lgc_lgl.lgl_oi.oi_fid))
diff --git a/lustre/tests/sanity-lfsck.sh b/lustre/tests/sanity-lfsck.sh

index 5ddd8d8..fa5f59d 100644 (file)
--- a/lustre/tests/sanity-lfsck.sh
+++ b/lustre/tests/sanity-lfsck.sh
@@ -1590,6 +1590,9 @@ test_15c() {
         [ $MDSCOUNT -lt 2 ] &&
                 skip "We need at least 2 MDSes for this test" && return
  
+       [ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.7.55) ] &&
+               skip "Skip the test after 2.7.55 see LU-6437" && return
+
         echo "#####"
         echo "According to current metadata migration implementation,"
         echo "before the old MDT-object is removed, both the new MDT-object"
diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh

index 6e906ef..cc0166c 100644 (file)
--- a/lustre/tests/sanity.sh
+++ b/lustre/tests/sanity.sh
@@ -12609,7 +12609,7 @@ test_230b() {
         ln -s $other_dir/$tfile $migrate_dir/${tfile}_ln_other
  
         $LFS migrate -m $MDTIDX $migrate_dir ||
-               error "migrate remote dir error"
+               error "fails on migrating remote dir to MDT1"
  
         echo "migratate to MDT1, then checking.."
         for ((i = 0; i < 10; i++)); do
@@ -12672,8 +12672,9 @@ test_230b() {
  
         #migrate back to MDT0
         MDTIDX=0
+
         $LFS migrate -m $MDTIDX $migrate_dir ||
-               error "migrate remote dir error"
+               error "fails on migrating remote dir to MDT0"
  
         echo "migrate back to MDT0, checking.."
         for file in $(find $migrate_dir); do
@@ -12752,6 +12753,12 @@ test_230c() {
         local t=$(ls $migrate_dir | wc -l)
         $LFS migrate --mdt-index $MDTIDX $migrate_dir &&
                 error "migrate should fail after 5 entries"
+
+       mkdir $migrate_dir/dir &&
+               error "mkdir succeeds under migrating directory"
+       touch $migrate_dir/file &&
+               error "touch file succeeds under migrating directory"
+
         local u=$(ls $migrate_dir | wc -l)
         [ "$u" == "$t" ] || error "$u != $t during migration"
  
diff --git a/lustre/tests/sanityn.sh b/lustre/tests/sanityn.sh

index 9acd1cb..4fa584a 100644 (file)
--- a/lustre/tests/sanityn.sh
+++ b/lustre/tests/sanityn.sh
@@ -3081,7 +3081,7 @@ test_77g() {
  }
  run_test 77g "Change TBF type directly"
  
-test_80() {
+test_80a() {
         [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return
         local MDTIDX=1
         local mdt_index
@@ -3121,7 +3121,130 @@ test_80() {
  
         rm -rf $DIR1/$tdir || error "rm dir failed after migration"
  }
-run_test 80 "migrate directory when some children is being opened"
+run_test 80a "migrate directory when some children is being opened"
+
+cleanup_80b() {
+       trap 0
+       kill -9 $migrate_pid
+}
+
+test_80b() {
+       [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return
+       local migrate_dir1=$DIR1/$tdir/migrate_dir
+       local migrate_dir2=$DIR2/$tdir/migrate_dir
+       local migrate_run=$LUSTRE/tests/migrate.sh
+       local start_time
+       local end_time
+       local show_time=1
+       local mdt_idx
+       local rc=0
+       local rc1=0
+
+       trap cleanup_80b EXIT
+       #prepare migrate directory
+       mkdir -p $migrate_dir1
+       for F in {1,2,3,4,5}; do
+               echo "$F$F$F$F$F" > $migrate_dir1/file$F
+               echo "$F$F$F$F$F" > $DIR/$tdir/file$F
+       done
+
+       #migrate the directories among MDTs
+       (
+               while true; do
+                       mdt_idx=$((RANDOM % MDSCOUNT))
+                       $LFS migrate -m $mdt_idx $migrate_dir1 2&>/dev/null ||
+                               rc=$?
+                       [ $rc -ne 0 -o $rc -ne 16 ] || break
+               done
+       ) &
+       migrate_pid=$!
+
+       echo "start migration thread $migrate_pid"
+       #Access the files at the same time
+       start_time=$(date +%s)
+       echo "accessing the migrating directory for 5 minutes..."
+       while true; do
+               ls $migrate_dir2 > /dev/null || {
+                       echo "read dir fails"
+                       break
+               }
+               diff -u $DIR2/$tdir/file1 $migrate_dir2/file1 || {
+                       echo "access file1 fails"
+                       break
+               }
+
+               cat $migrate_dir2/file2 > $migrate_dir2/file3 || {
+                       echo "access file2/3 fails"
+                       break
+               }
+
+               echo "aaaaa" > $migrate_dir2/file4 > /dev/null || {
+                       echo "access file4 fails"
+                       break
+               }
+
+               stat $migrate_dir2/file5 > /dev/null || {
+                       echo "stat file5 fails"
+                       break
+               }
+
+               touch $migrate_dir2/source_file > /dev/null || rc1=$?
+               [ $rc1 -ne 0 -o $rc1 -ne 1 ] || {
+                       echo "touch file failed with $rc1"
+                       break;
+               }
+
+               if [ -e $migrate_dir2/source_file ]; then
+                       ln $migrate_dir2/source_file $migrate_dir2/link_file \
+                                       2&>/dev/null || rc1=$?
+                       if [ -e $migrate_dir2/link_file ]; then
+                               rm -rf $migrate_dir2/link_file
+                       fi
+
+                       mrename $migrate_dir2/source_file \
+                               $migrate_dir2/target_file 2&>/dev/null || rc1=$?
+                       [ $rc1 -ne 0 -o $rc1 -ne 1 ] || {
+                               echo "rename failed with $rc1"
+                               break
+                       }
+
+                       if [ -e $migrate_dir2/target_file ]; then
+                               rm -rf $migrate_dir2/target_file 2&>/dev/null ||
+                                                               rc1=$?
+                       else
+                               rm -rf $migrate_dir2/source_file 2&>/dev/null ||
+                                                               rc1=$?
+                       fi
+                       [ $rc1 -ne 0 -o $rc1 -ne 1 ] || {
+                               echo "unlink failed with $rc1"
+                               break
+                       }
+               fi
+
+               end_time=$(date +%s)
+               duration=$((end_time - start_time))
+               if [ $((duration % 10)) -eq 0 ]; then
+                       if [ $show_time -eq 1 ]; then
+                               echo "...$duration seconds"
+                               show_time=0
+                       fi
+               else
+                       show_time=1
+               fi
+
+               kill -0 $migrate_pid || {
+                       echo "migration stopped 1"
+                       break
+               }
+
+               [ $duration -ge 300 ] && break
+       done
+
+       #check migration are still there
+       kill -0 $migrate_pid || error "migration stopped 2"
+       cleanup_80b
+}
+run_test 80b "Accessing directory during migration"
  
  test_81() {
         [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return
author	wang di <di.wang@intel.com>
	Sun, 10 May 2015 23:40:46 +0000 (16:40 -0700)
committer	Oleg Drokin <oleg.drokin@intel.com>
	Mon, 24 Aug 2015 14:12:22 +0000 (14:12 +0000)
lustre/include/dt_object.h		patch \| blob \| history
lustre/include/lustre_req_layout.h		patch \| blob \| history
lustre/include/md_object.h		patch \| blob \| history
lustre/llite/dir.c		patch \| blob \| history
lustre/llite/file.c		patch \| blob \| history
lustre/llite/llite_internal.h		patch \| blob \| history
lustre/lod/lod_object.c		patch \| blob \| history
lustre/lod/lod_sub_object.c		patch \| blob \| history
lustre/mdc/mdc_lib.c		patch \| blob \| history
lustre/mdc/mdc_reint.c		patch \| blob \| history
lustre/mdd/mdd_dir.c		patch \| blob \| history
lustre/mdt/mdt_handler.c		patch \| blob \| history
lustre/mdt/mdt_internal.h		patch \| blob \| history
lustre/mdt/mdt_lib.c		patch \| blob \| history
lustre/mdt/mdt_open.c		patch \| blob \| history
lustre/mdt/mdt_reint.c		patch \| blob \| history
lustre/osp/osp_trans.c		patch \| blob \| history
lustre/ptlrpc/layout.c		patch \| blob \| history
lustre/target/update_trans.c		patch \| blob \| history
lustre/tests/sanity-lfsck.sh		patch \| blob \| history
lustre/tests/sanity.sh		patch \| blob \| history
lustre/tests/sanityn.sh		patch \| blob \| history