Whamcloud - gitweb
LU-3677 mdt: Set HSM dirty open-for-write file when evicted.
[fs/lustre-release.git] / lustre / mdt / mdt_handler.c
index 276ddfd..4ceb146 100644 (file)
@@ -62,7 +62,6 @@
 /* lu2dt_dev() */
 #include <dt_object.h>
 #include <lustre_mds.h>
-#include <lustre_mdt.h>
 #include <lustre_log.h>
 #include "mdt_internal.h"
 #include <lustre_acl.h>
@@ -101,6 +100,12 @@ static const struct lu_object_operations mdt_obj_ops;
 /* Slab for MDT object allocation */
 static struct kmem_cache *mdt_object_kmem;
 
+/* For HSM restore handles */
+struct kmem_cache *mdt_hsm_cdt_kmem;
+
+/* For HSM request handles */
+struct kmem_cache *mdt_hsm_car_kmem;
+
 static struct lu_kmem_descr mdt_caches[] = {
        {
                .ckd_cache = &mdt_object_kmem,
@@ -108,6 +113,16 @@ static struct lu_kmem_descr mdt_caches[] = {
                .ckd_size  = sizeof(struct mdt_object)
        },
        {
+               .ckd_cache      = &mdt_hsm_cdt_kmem,
+               .ckd_name       = "mdt_cdt_restore_handle",
+               .ckd_size       = sizeof(struct cdt_restore_handle)
+       },
+       {
+               .ckd_cache      = &mdt_hsm_car_kmem,
+               .ckd_name       = "mdt_cdt_agent_req",
+               .ckd_size       = sizeof(struct cdt_agent_req)
+       },
+       {
                .ckd_cache = NULL
        }
 };
@@ -1329,35 +1344,33 @@ static int mdt_getattr_name_lock(struct mdt_thread_info *info,
          */
         child = mdt_object_find(info->mti_env, info->mti_mdt, child_fid);
 
-        if (unlikely(IS_ERR(child)))
-                GOTO(out_parent, rc = PTR_ERR(child));
-        if (is_resent) {
-                /* Do not take lock for resent case. */
-                lock = ldlm_handle2lock(&lhc->mlh_reg_lh);
-                LASSERTF(lock != NULL, "Invalid lock handle "LPX64"\n",
-                         lhc->mlh_reg_lh.cookie);
-
-                res_id = &lock->l_resource->lr_name;
-                if (!fid_res_name_eq(mdt_object_fid(child),
-                                    &lock->l_resource->lr_name)) {
-                         LASSERTF(fid_res_name_eq(mdt_object_fid(parent),
-                                                 &lock->l_resource->lr_name),
-                                 "Lock res_id: %lu/%lu/%lu, Fid: "DFID".\n",
-                                 (unsigned long)res_id->name[0],
-                                 (unsigned long)res_id->name[1],
-                                 (unsigned long)res_id->name[2],
-                                 PFID(mdt_object_fid(parent)));
-                          CWARN("Although resent, but still not get child lock"
-                                "parent:"DFID" child:"DFID"\n",
-                                PFID(mdt_object_fid(parent)),
-                                PFID(mdt_object_fid(child)));
-                          lustre_msg_clear_flags(req->rq_reqmsg, MSG_RESENT);
-                          LDLM_LOCK_PUT(lock);
-                          GOTO(relock, 0);
-                }
-                LDLM_LOCK_PUT(lock);
-                rc = 0;
-        } else {
+       if (unlikely(IS_ERR(child)))
+               GOTO(out_parent, rc = PTR_ERR(child));
+       if (is_resent) {
+               /* Do not take lock for resent case. */
+               lock = ldlm_handle2lock(&lhc->mlh_reg_lh);
+               LASSERTF(lock != NULL, "Invalid lock handle "LPX64"\n",
+                        lhc->mlh_reg_lh.cookie);
+
+               res_id = &lock->l_resource->lr_name;
+               if (!fid_res_name_eq(mdt_object_fid(child),
+                                    &lock->l_resource->lr_name)) {
+                       LASSERTF(fid_res_name_eq(mdt_object_fid(parent),
+                                                &lock->l_resource->lr_name),
+                                "Lock res_id: "DLDLMRES", fid: "DFID"\n",
+                                PLDLMRES(lock->l_resource),
+                                PFID(mdt_object_fid(parent)));
+                       CWARN("Although resent, but still not get child lock"
+                             "parent:"DFID" child:"DFID"\n",
+                             PFID(mdt_object_fid(parent)),
+                             PFID(mdt_object_fid(child)));
+                       lustre_msg_clear_flags(req->rq_reqmsg, MSG_RESENT);
+                       LDLM_LOCK_PUT(lock);
+                       GOTO(relock, 0);
+               }
+               LDLM_LOCK_PUT(lock);
+               rc = 0;
+       } else {
                bool try_layout = false;
 
 relock:
@@ -1439,17 +1452,15 @@ relock:
         rc = mdt_getattr_internal(info, child, ma_need);
         if (unlikely(rc != 0)) {
                 mdt_object_unlock(info, child, lhc, 1);
-        } else if (lock) {
-                /* Debugging code. */
-                res_id = &lock->l_resource->lr_name;
-                LDLM_DEBUG(lock, "Returning lock to client");
-                LASSERTF(fid_res_name_eq(mdt_object_fid(child),
-                                         &lock->l_resource->lr_name),
-                         "Lock res_id: %lu/%lu/%lu, Fid: "DFID".\n",
-                         (unsigned long)res_id->name[0],
-                         (unsigned long)res_id->name[1],
-                         (unsigned long)res_id->name[2],
-                         PFID(mdt_object_fid(child)));
+       } else if (lock) {
+               /* Debugging code. */
+               res_id = &lock->l_resource->lr_name;
+               LDLM_DEBUG(lock, "Returning lock to client");
+               LASSERTF(fid_res_name_eq(mdt_object_fid(child),
+                                        &lock->l_resource->lr_name),
+                        "Lock res_id: "DLDLMRES", fid: "DFID"\n",
+                        PLDLMRES(lock->l_resource),
+                        PFID(mdt_object_fid(child)));
                if (mdt_object_exists(child) && !mdt_object_remote(child))
                        mdt_pack_size2body(info, child);
         }
@@ -1571,6 +1582,9 @@ int mdt_set_info(struct mdt_thread_info *info)
         RETURN(0);
 }
 
+int mdt_connect_check_sptlrpc(struct mdt_device *mdt, struct obd_export *exp,
+                             struct ptlrpc_request *req);
+
 /**
  * Top-level handler for MDT connection requests.
  */
@@ -1581,32 +1595,39 @@ int mdt_connect(struct mdt_thread_info *info)
        struct obd_export *exp;
        struct ptlrpc_request *req = mdt_info_req(info);
 
+       ENTRY;
+
        rc = target_handle_connect(req);
        if (rc != 0)
-               return err_serious(rc);
+               RETURN(err_serious(rc));
 
        LASSERT(req->rq_export != NULL);
-       info->mti_mdt = mdt_dev(req->rq_export->exp_obd->obd_lu_dev);
+       exp = req->rq_export;
+       info->mti_exp = exp;
+       info->mti_mdt = mdt_dev(exp->exp_obd->obd_lu_dev);
        rc = mdt_init_sec_level(info);
-       if (rc != 0) {
-               obd_disconnect(class_export_get(req->rq_export));
-               return rc;
-       }
+       if (rc != 0)
+               GOTO(err, rc);
+
+       rc = mdt_connect_check_sptlrpc(info->mti_mdt, exp, req);
+       if (rc)
+               GOTO(err, rc);
 
        /* To avoid exposing partially initialized connection flags, changes up
         * to this point have been staged in reply->ocd_connect_flags. Now that
         * connection handling has completed successfully, atomically update
         * the connect flags in the shared export data structure. LU-1623 */
        reply = req_capsule_server_get(info->mti_pill, &RMF_CONNECT_DATA);
-       exp = req->rq_export;
        spin_lock(&exp->exp_lock);
        *exp_connect_flags_ptr(exp) = reply->ocd_connect_flags;
        spin_unlock(&exp->exp_lock);
 
        rc = mdt_init_idmap(info);
        if (rc != 0)
-               obd_disconnect(class_export_get(req->rq_export));
-
+               GOTO(err, rc);
+       RETURN(0);
+err:
+       obd_disconnect(class_export_get(req->rq_export));
        return rc;
 }
 
@@ -3081,12 +3102,11 @@ void mdt_lock_handle_fini(struct mdt_lock_handle *lh)
  * uninitialized state, because it's too expensive to zero out whole
  * mdt_thread_info (> 1K) on each request arrival.
  */
-static void mdt_thread_info_init(struct ptlrpc_request *req,
-                                 struct mdt_thread_info *info)
+void mdt_thread_info_init(struct ptlrpc_request *req,
+                         struct mdt_thread_info *info)
 {
         int i;
 
-        req_capsule_init(&req->rq_pill, req, RCL_SERVER);
         info->mti_pill = &req->rq_pill;
 
         /* lock handle */
@@ -3119,11 +3139,10 @@ static void mdt_thread_info_init(struct ptlrpc_request *req,
        info->mti_spec.sp_rm_entry = 0;
 }
 
-static void mdt_thread_info_fini(struct mdt_thread_info *info)
+void mdt_thread_info_fini(struct mdt_thread_info *info)
 {
        int i;
 
-       req_capsule_fini(info->mti_pill);
        if (info->mti_object != NULL) {
                mdt_object_put(info->mti_env, info->mti_object);
                info->mti_object = NULL;
@@ -3132,11 +3151,41 @@ static void mdt_thread_info_fini(struct mdt_thread_info *info)
        for (i = 0; i < ARRAY_SIZE(info->mti_lh); i++)
                mdt_lock_handle_fini(&info->mti_lh[i]);
        info->mti_env = NULL;
+       info->mti_pill = NULL;
+       info->mti_exp = NULL;
 
        if (unlikely(info->mti_big_buf.lb_buf != NULL))
                lu_buf_free(&info->mti_big_buf);
 }
 
+int mdt_tgt_connect(struct tgt_session_info *tsi)
+{
+       struct ptlrpc_request   *req = tgt_ses_req(tsi);
+       struct mdt_thread_info  *mti;
+       int                      rc;
+
+       ENTRY;
+
+       rc = tgt_connect(tsi);
+       if (rc != 0)
+               RETURN(rc);
+
+       /* XXX: switch mdt_init_idmap() to use tgt_session_info */
+       lu_env_refill((void *)tsi->tsi_env);
+       mti = lu_context_key_get(&tsi->tsi_env->le_ctx, &mdt_thread_key);
+       LASSERT(mti != NULL);
+
+       mdt_thread_info_init(req, mti);
+       rc = mdt_init_idmap(mti);
+       mdt_thread_info_fini(mti);
+       if (rc != 0)
+               GOTO(err, rc);
+       RETURN(0);
+err:
+       obd_disconnect(class_export_get(req->rq_export));
+       return rc;
+}
+
 static int mdt_filter_recovery_request(struct ptlrpc_request *req,
                                        struct obd_device *obd, int *process)
 {
@@ -3411,11 +3460,13 @@ int mdt_handle_common(struct ptlrpc_request *req,
         info = lu_context_key_get(&env->le_ctx, &mdt_thread_key);
         LASSERT(info != NULL);
 
+       req_capsule_init(&req->rq_pill, req, RCL_SERVER);
         mdt_thread_info_init(req, info);
 
         rc = mdt_handle0(req, info, supported);
 
         mdt_thread_info_fini(info);
+       req_capsule_fini(&req->rq_pill);
         RETURN(rc);
 }
 
@@ -3425,22 +3476,13 @@ int mdt_handle_common(struct ptlrpc_request *req,
  */
 int mdt_recovery_handle(struct ptlrpc_request *req)
 {
-        int rc;
-        ENTRY;
+       int rc;
 
-        switch (lustre_msg_get_opc(req->rq_reqmsg)) {
-        case FLD_QUERY:
-                rc = mdt_handle_common(req, mdt_fld_handlers);
-                break;
-        case SEQ_QUERY:
-                rc = mdt_handle_common(req, mdt_seq_handlers);
-                break;
-        default:
-                rc = mdt_handle_common(req, mdt_regular_handlers);
-                break;
-        }
+       ENTRY;
 
-        RETURN(rc);
+       rc = mdt_handle_common(req, mdt_regular_handlers);
+
+       RETURN(rc);
 }
 
 enum mdt_it_code {
@@ -3462,6 +3504,12 @@ static int mdt_intent_getattr(enum mdt_it_code opcode,
                               struct mdt_thread_info *info,
                               struct ldlm_lock **,
                              __u64);
+
+static int mdt_intent_getxattr(enum mdt_it_code opcode,
+                               struct mdt_thread_info *info,
+                               struct ldlm_lock **lockp,
+                               __u64 flags);
+
 static int mdt_intent_layout(enum mdt_it_code opcode,
                             struct mdt_thread_info *info,
                             struct ldlm_lock **,
@@ -3526,9 +3574,9 @@ static struct mdt_it_flavor {
                 .it_act   = NULL
         },
         [MDT_IT_GETXATTR] = {
-                .it_fmt   = NULL,
+               .it_fmt   = &RQF_LDLM_INTENT_GETXATTR,
                 .it_flags = 0,
-                .it_act   = NULL
+               .it_act   = mdt_intent_getxattr
         },
        [MDT_IT_LAYOUT] = {
                .it_fmt   = &RQF_LDLM_INTENT_LAYOUT,
@@ -3679,6 +3727,44 @@ static void mdt_intent_fixup_resent(struct mdt_thread_info *info,
                   remote_hdl.cookie);
 }
 
+static int mdt_intent_getxattr(enum mdt_it_code opcode,
+                               struct mdt_thread_info *info,
+                               struct ldlm_lock **lockp,
+                               __u64 flags)
+{
+       struct mdt_lock_handle *lhc = &info->mti_lh[MDT_LH_RMT];
+       struct ldlm_reply      *ldlm_rep = NULL;
+       int rc, grc;
+
+       /*
+        * Initialize lhc->mlh_reg_lh either from a previously granted lock
+        * (for the resend case) or a new lock. Below we will use it to
+        * replace the original lock.
+        */
+       mdt_intent_fixup_resent(info, *lockp, NULL, lhc);
+       if (!lustre_handle_is_used(&lhc->mlh_reg_lh)) {
+               mdt_lock_reg_init(lhc, (*lockp)->l_req_mode);
+               rc = mdt_object_lock(info, info->mti_object, lhc,
+                                       MDS_INODELOCK_XATTR,
+                                       MDT_LOCAL_LOCK);
+               if (rc)
+                       return rc;
+       }
+
+       grc = mdt_getxattr(info);
+
+       rc = mdt_intent_lock_replace(info, lockp, NULL, lhc, flags);
+
+       if (mdt_info_req(info)->rq_repmsg != NULL)
+               ldlm_rep = req_capsule_server_get(info->mti_pill, &RMF_DLM_REP);
+       if (ldlm_rep == NULL)
+               RETURN(err_serious(-EFAULT));
+
+       ldlm_rep->lock_policy_res2 = grc;
+
+       return rc;
+}
+
 static int mdt_intent_getattr(enum mdt_it_code opcode,
                               struct mdt_thread_info *info,
                               struct ldlm_lock **lockp,
@@ -4600,6 +4686,56 @@ static void mdt_quota_fini(const struct lu_env *env, struct mdt_device *mdt)
        EXIT;
 }
 
+static struct tgt_handler mdt_tgt_handlers[] = {
+TGT_RPC_HANDLER(MDS_FIRST_OPC,
+               0,                      MDS_CONNECT,    mdt_tgt_connect,
+               &RQF_CONNECT, LUSTRE_OBD_VERSION),
+TGT_RPC_HANDLER(MDS_FIRST_OPC,
+               0,                      MDS_DISCONNECT, tgt_disconnect,
+               &RQF_MDS_DISCONNECT, LUSTRE_OBD_VERSION),
+};
+
+static struct tgt_opc_slice mdt_common_slice[] = {
+       {
+               .tos_opc_start  = MDS_FIRST_OPC,
+               .tos_opc_end    = MDS_LAST_OPC,
+               .tos_hs         = mdt_tgt_handlers
+       },
+       {
+               .tos_opc_start  = OBD_FIRST_OPC,
+               .tos_opc_end    = OBD_LAST_OPC,
+               .tos_hs         = tgt_obd_handlers
+       },
+       {
+               .tos_opc_start  = LDLM_FIRST_OPC,
+               .tos_opc_end    = LDLM_LAST_OPC,
+               .tos_hs         = tgt_dlm_handlers
+       },
+       {
+               .tos_opc_start  = SEC_FIRST_OPC,
+               .tos_opc_end    = SEC_LAST_OPC,
+               .tos_hs         = tgt_sec_ctx_handlers
+       },
+       {
+               .tos_opc_start  = UPDATE_OBJ,
+               .tos_opc_end    = UPDATE_LAST_OPC,
+               .tos_hs         = tgt_out_handlers
+       },
+       {
+               .tos_opc_start  = FLD_FIRST_OPC,
+               .tos_opc_end    = FLD_LAST_OPC,
+               .tos_hs         = fld_handlers
+       },
+       {
+               .tos_opc_start  = SEQ_FIRST_OPC,
+               .tos_opc_end    = SEQ_LAST_OPC,
+               .tos_hs         = seq_handlers
+       },
+       {
+               .tos_hs         = NULL
+       }
+};
+
 static void mdt_fini(const struct lu_env *env, struct mdt_device *m)
 {
        struct md_device  *next = m->mdt_child;
@@ -4613,6 +4749,11 @@ static void mdt_fini(const struct lu_env *env, struct mdt_device *m)
 
        mdt_stack_pre_fini(env, m, md2lu_dev(m->mdt_child));
 
+       if (m->mdt_opts.mo_coordinator)
+               mdt_hsm_cdt_stop(m);
+
+       mdt_hsm_cdt_fini(m);
+
        mdt_llog_ctxt_unclone(env, m, LLOG_AGENT_ORIG_CTXT);
         mdt_llog_ctxt_unclone(env, m, LLOG_CHANGELOG_ORIG_CTXT);
         obd_exports_barrier(obd);
@@ -4731,6 +4872,10 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m,
 
         m->mdt_opts.mo_cos = MDT_COS_DEFAULT;
 
+       /* default is coordinator off, it is started through conf_param
+        * or /proc */
+       m->mdt_opts.mo_coordinator = 0;
+
        lmi = server_get_mount(dev);
         if (lmi == NULL) {
                 CERROR("Cannot get mount info for %s!\n", dev);
@@ -4744,7 +4889,7 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m,
         }
 
        rwlock_init(&m->mdt_sptlrpc_lock);
-        sptlrpc_rule_set_init(&m->mdt_sptlrpc_rset);
+       sptlrpc_rule_set_init(&m->mdt_sptlrpc_rset);
 
        spin_lock_init(&m->mdt_ioepoch_lock);
         m->mdt_opts.mo_compat_resname = 0;
@@ -4801,13 +4946,9 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m,
                }
        }
 
-        rc = tgt_init(env, &m->mdt_lut, obd, m->mdt_bottom);
-        if (rc)
-                GOTO(err_fini_stack, rc);
-
        rc = mdt_fld_init(env, mdt_obd_name(m), m);
        if (rc)
-               GOTO(err_lut, rc);
+               GOTO(err_fini_stack, rc);
 
        rc = mdt_seq_init(env, mdt_obd_name(m), m);
        if (rc)
@@ -4831,13 +4972,26 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m,
 
         cfs_timer_init(&m->mdt_ck_timer, mdt_ck_timer_callback, m);
 
-        rc = mdt_ck_thread_start(m);
-        if (rc)
+       rc = mdt_hsm_cdt_init(m);
+       if (rc != 0) {
+               CERROR("%s: error initializing coordinator, rc %d\n",
+                      mdt_obd_name(m), rc);
                 GOTO(err_free_ns, rc);
+       }
 
-        rc = mdt_fs_setup(env, m, obd, lsi);
+        rc = mdt_ck_thread_start(m);
         if (rc)
-                GOTO(err_capa, rc);
+                GOTO(err_free_hsm, rc);
+
+       rc = tgt_init(env, &m->mdt_lut, obd, m->mdt_bottom, mdt_common_slice,
+                     OBD_FAIL_MDS_ALL_REQUEST_NET,
+                     OBD_FAIL_MDS_ALL_REPLY_NET);
+       if (rc)
+               GOTO(err_capa, rc);
+
+       rc = mdt_fs_setup(env, m, obd, lsi);
+       if (rc)
+               GOTO(err_tgt, rc);
 
         mdt_adapt_sptlrpc_conf(obd, 1);
 
@@ -4845,13 +4999,17 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m,
         rc = next->md_ops->mdo_iocontrol(env, next, OBD_IOC_GET_MNTOPT, 0,
                                          &mntopts);
         if (rc)
-               GOTO(err_llog_cleanup, rc);
+               GOTO(err_fs_cleanup, rc);
 
         if (mntopts & MNTOPT_USERXATTR)
                 m->mdt_opts.mo_user_xattr = 1;
         else
                 m->mdt_opts.mo_user_xattr = 0;
 
+       rc = next->md_ops->mdo_maxeasize_get(env, next, &m->mdt_max_ea_size);
+       if (rc)
+               GOTO(err_fs_cleanup, rc);
+
         if (mntopts & MNTOPT_ACL)
                 m->mdt_opts.mo_acl = 1;
         else
@@ -4868,7 +5026,7 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m,
        if (IS_ERR(m->mdt_identity_cache)) {
                rc = PTR_ERR(m->mdt_identity_cache);
                m->mdt_identity_cache = NULL;
-               GOTO(err_llog_cleanup, rc);
+               GOTO(err_fs_cleanup, rc);
        }
 
         rc = mdt_procfs_init(m, dev);
@@ -4885,7 +5043,7 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m,
        ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL,
                           "mdt_ldlm_client", m->mdt_ldlm_client);
 
-        ping_evictor_start();
+       ping_evictor_start();
 
        /* recovery will be started upon mdt_prepare()
         * when the whole stack is complete and ready
@@ -4900,20 +5058,21 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m,
                 ldlm_timeout = MDS_LDLM_TIMEOUT_DEFAULT;
 
         RETURN(0);
-
 err_procfs:
        mdt_procfs_fini(m);
 err_recovery:
        target_recovery_fini(obd);
        upcall_cache_cleanup(m->mdt_identity_cache);
        m->mdt_identity_cache = NULL;
-err_llog_cleanup:
-       mdt_llog_ctxt_unclone(env, m, LLOG_AGENT_ORIG_CTXT);
-       mdt_llog_ctxt_unclone(env, m, LLOG_CHANGELOG_ORIG_CTXT);
+err_fs_cleanup:
        mdt_fs_cleanup(env, m);
+err_tgt:
+       tgt_fini(env, &m->mdt_lut);
 err_capa:
        cfs_timer_disarm(&m->mdt_ck_timer);
        mdt_ck_thread_stop(m);
+err_free_hsm:
+       mdt_hsm_cdt_fini(m);
 err_free_ns:
        ldlm_namespace_free(m->mdt_namespace, NULL, 0);
        obd->obd_namespace = m->mdt_namespace = NULL;
@@ -4921,8 +5080,6 @@ err_fini_seq:
        mdt_seq_fini(env, m);
 err_fini_fld:
        mdt_fld_fini(env, m);
-err_lut:
-       tgt_fini(env, &m->mdt_lut);
 err_fini_stack:
        mdt_stack_fini(env, m, md2lu_dev(m->mdt_child));
 err_lmi:
@@ -5032,6 +5189,7 @@ static struct lu_object *mdt_object_alloc(const struct lu_env *env,
                mutex_init(&mo->mot_ioepoch_mutex);
                mutex_init(&mo->mot_lov_mutex);
                init_rwsem(&mo->mot_open_sem);
+               init_rwsem(&mo->mot_xattr_sem);
                RETURN(o);
        }
        RETURN(NULL);
@@ -5283,12 +5441,13 @@ static int mdt_connect_internal(struct obd_export *exp,
                }
        }
 
+       data->ocd_max_easize = mdt->mdt_max_ea_size;
+
        return 0;
 }
 
-static int mdt_connect_check_sptlrpc(struct mdt_device *mdt,
-                                    struct obd_export *exp,
-                                    struct ptlrpc_request *req)
+int mdt_connect_check_sptlrpc(struct mdt_device *mdt, struct obd_export *exp,
+                             struct ptlrpc_request *req)
 {
        struct sptlrpc_flavor   flvr;
        int                     rc = 0;
@@ -5337,11 +5496,9 @@ static int mdt_obd_connect(const struct lu_env *env,
                            struct obd_connect_data *data,
                            void *localdata)
 {
-        struct mdt_thread_info *info;
         struct obd_export      *lexp;
         struct lustre_handle    conn = { 0 };
         struct mdt_device      *mdt;
-        struct ptlrpc_request  *req;
         int                     rc;
         ENTRY;
 
@@ -5349,9 +5506,7 @@ static int mdt_obd_connect(const struct lu_env *env,
         if (!exp || !obd || !cluuid)
                 RETURN(-EINVAL);
 
-        info = lu_context_key_get(&env->le_ctx, &mdt_thread_key);
-        req = info->mti_pill->rc_req;
-        mdt = mdt_dev(obd->obd_lu_dev);
+       mdt = mdt_dev(obd->obd_lu_dev);
 
        /*
         * first, check whether the stack is ready to handle requests
@@ -5373,26 +5528,17 @@ static int mdt_obd_connect(const struct lu_env *env,
         lexp = class_conn2export(&conn);
         LASSERT(lexp != NULL);
 
-        rc = mdt_connect_check_sptlrpc(mdt, lexp, req);
-        if (rc)
-                GOTO(out, rc);
-
-        if (OBD_FAIL_CHECK(OBD_FAIL_TGT_RCVG_FLAG))
-                lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_RECOVERING);
-
         rc = mdt_connect_internal(lexp, mdt, data);
         if (rc == 0) {
                 struct lsd_client_data *lcd = lexp->exp_target_data.ted_lcd;
 
                 LASSERT(lcd);
-               info->mti_exp = lexp;
                memcpy(lcd->lcd_uuid, cluuid, sizeof lcd->lcd_uuid);
                rc = tgt_client_new(env, lexp);
                 if (rc == 0)
                         mdt_export_stats_init(obd, lexp, localdata);
         }
 
-out:
         if (rc != 0) {
                 class_disconnect(lexp);
                 *exp = NULL;
@@ -5409,23 +5555,12 @@ static int mdt_obd_reconnect(const struct lu_env *env,
                              struct obd_connect_data *data,
                              void *localdata)
 {
-        struct mdt_thread_info *info;
-        struct mdt_device      *mdt;
-        struct ptlrpc_request  *req;
         int                     rc;
         ENTRY;
 
         if (exp == NULL || obd == NULL || cluuid == NULL)
                 RETURN(-EINVAL);
 
-        info = lu_context_key_get(&env->le_ctx, &mdt_thread_key);
-        req = info->mti_pill->rc_req;
-        mdt = mdt_dev(obd->obd_lu_dev);
-
-        rc = mdt_connect_check_sptlrpc(mdt, exp, req);
-        if (rc)
-                RETURN(rc);
-
         rc = mdt_connect_internal(exp, mdt_dev(obd->obd_lu_dev), data);
         if (rc == 0)
                 mdt_export_stats_init(obd, exp, localdata);
@@ -5433,6 +5568,31 @@ static int mdt_obd_reconnect(const struct lu_env *env,
         RETURN(rc);
 }
 
+static int mdt_ctxt_add_dirty_flag(struct lu_env *env,
+                                  struct mdt_thread_info *info,
+                                  struct mdt_file_data *mfd)
+{
+       struct lu_context ses;
+       int rc;
+       ENTRY;
+
+       rc = lu_context_init(&ses, LCT_SESSION);
+       if (rc)
+               RETURN(rc);
+
+       env->le_ses = &ses;
+       lu_context_enter(&ses);
+
+       mdt_ucred(info)->uc_valid = UCRED_OLD;
+       rc = mdt_add_dirty_flag(info, mfd->mfd_object, &info->mti_attr);
+
+       lu_context_exit(&ses);
+       lu_context_fini(&ses);
+       env->le_ses = NULL;
+
+       RETURN(rc);
+}
+
 static int mdt_export_cleanup(struct obd_export *exp)
 {
         struct mdt_export_data *med = &exp->exp_mdt_data;
@@ -5477,6 +5637,24 @@ static int mdt_export_cleanup(struct obd_export *exp)
                 cfs_list_for_each_entry_safe(mfd, n, &closing_list, mfd_list) {
                         cfs_list_del_init(&mfd->mfd_list);
                        ma->ma_need = ma->ma_valid = 0;
+
+                       /* This file is being closed due to an eviction, it
+                        * could have been modified and now dirty regarding to
+                        * HSM archive, check this!
+                        * The logic here is to mark a file dirty if there's a
+                        * chance it was dirtied before the client was evicted,
+                        * so that we don't have to wait for a release attempt
+                        * before finding out the file was actually dirty and
+                        * fail the release. Aggressively marking it dirty here
+                        * will cause the policy engine to attempt to
+                        * re-archive it; when rearchiving, we can compare the
+                        * current version to the HSM data_version and make the
+                        * archive request into a noop if it's not actually
+                        * dirty.
+                        */
+                       if (mfd->mfd_mode & (FMODE_WRITE|MDS_FMODE_TRUNC))
+                               rc = mdt_ctxt_add_dirty_flag(&env, info, mfd);
+
                        /* Don't unlink orphan on failover umount, LU-184 */
                        if (exp->exp_flags & OBD_OPT_FAILOVER) {
                                ma->ma_valid = MA_FLAGS;