Whamcloud - gitweb
LU-4843 mdt: disallow old clients access striped dir
[fs/lustre-release.git] / lustre / mdt / mdt_handler.c
index 669d778..8ae3794 100644 (file)
@@ -53,6 +53,7 @@
  * struct OBD_{ALLOC,FREE}*()
  */
 #include <obd_support.h>
+#include <lustre_ioctl.h>
 /* struct ptlrpc_request */
 #include <lustre_net.h>
 /* struct obd_export */
@@ -262,7 +263,7 @@ static void mdt_lock_pdo_mode(struct mdt_thread_info *info, struct mdt_object *o
         EXIT;
 }
 
-int mdt_getstatus(struct tgt_session_info *tsi)
+static int mdt_getstatus(struct tgt_session_info *tsi)
 {
        struct mdt_thread_info  *info = tsi2mdt_info(tsi);
        struct mdt_device       *mdt = info->mti_mdt;
@@ -306,7 +307,7 @@ out:
        return rc;
 }
 
-int mdt_statfs(struct tgt_session_info *tsi)
+static int mdt_statfs(struct tgt_session_info *tsi)
 {
        struct ptlrpc_request           *req = tgt_ses_req(tsi);
        struct mdt_thread_info          *info = tsi2mdt_info(tsi);
@@ -481,8 +482,34 @@ void mdt_client_compatibility(struct mdt_thread_info *info)
         EXIT;
 }
 
-static int mdt_big_xattr_get(struct mdt_thread_info *info, struct mdt_object *o,
-                            const char *name)
+int mdt_attr_get_eabuf_size(struct mdt_thread_info *info, struct mdt_object *o)
+{
+       const struct lu_env *env = info->mti_env;
+       int rc, rc2;
+
+       rc = mo_xattr_get(env, mdt_object_child(o), &LU_BUF_NULL,
+                         XATTR_NAME_LOV);
+
+       if (rc == -ENODATA)
+               rc = 0;
+
+       if (rc < 0)
+               goto out;
+
+       /* Is it a directory? Let's check for the LMV as well */
+       if (S_ISDIR(lu_object_attr(&mdt_object_child(o)->mo_lu))) {
+               rc2 = mo_xattr_get(env, mdt_object_child(o), &LU_BUF_NULL,
+                                  XATTR_NAME_LMV);
+               if ((rc2 < 0 && rc2 != -ENODATA) || (rc2 > rc))
+                       rc = rc2;
+       }
+
+out:
+       return rc;
+}
+
+int mdt_big_xattr_get(struct mdt_thread_info *info, struct mdt_object *o,
+                     const char *name)
 {
        const struct lu_env *env = info->mti_env;
        int rc;
@@ -591,8 +618,8 @@ int mdt_stripe_get(struct mdt_thread_info *info, struct mdt_object *o,
        return rc;
 }
 
-int mdt_attr_get_pfid(struct mdt_thread_info *info,
-                     struct mdt_object *o, struct lu_fid *pfid)
+static int mdt_attr_get_pfid(struct mdt_thread_info *info,
+                            struct mdt_object *o, struct lu_fid *pfid)
 {
        struct lu_buf           *buf = &info->mti_buf;
        struct link_ea_header   *leh;
@@ -758,22 +785,24 @@ static int mdt_getattr_internal(struct mdt_thread_info *info,
 
        if (mdt_object_remote(o)) {
                /* This object is located on remote node.*/
-               /* Return -EIO for old client */
+               /* Return -ENOTSUPP for old client */
                if (!mdt_is_dne_client(req->rq_export))
-                       GOTO(out, rc = -EIO);
+                       GOTO(out, rc = -ENOTSUPP);
 
                repbody->fid1 = *mdt_object_fid(o);
                repbody->valid = OBD_MD_FLID | OBD_MD_MDS;
                GOTO(out, rc = 0);
        }
 
-       buffer->lb_len = reqbody->eadatasize;
-       if (buffer->lb_len > 0) {
+       if (reqbody->eadatasize > 0) {
                buffer->lb_buf = req_capsule_server_get(pill, &RMF_MDT_MD);
                if (buffer->lb_buf == NULL)
                        GOTO(out, rc = -EPROTO);
+               buffer->lb_len = req_capsule_get_size(pill, &RMF_MDT_MD,
+                                                     RCL_SERVER);
        } else {
                buffer->lb_buf = NULL;
+               buffer->lb_len = 0;
                ma_need &= ~(MA_LOV | MA_LMV);
                CDEBUG(D_INFO, "%s: RPC from %s: does not need LOVEA.\n",
                       mdt_obd_name(info->mti_mdt),
@@ -860,20 +889,27 @@ static int mdt_getattr_internal(struct mdt_thread_info *info,
         if (mdt_body_has_lov(la, reqbody)) {
                 if (ma->ma_valid & MA_LOV) {
                         LASSERT(ma->ma_lmm_size);
-                        mdt_dump_lmm(D_INFO, ma->ma_lmm);
                         repbody->eadatasize = ma->ma_lmm_size;
                         if (S_ISDIR(la->la_mode))
                                 repbody->valid |= OBD_MD_FLDIREA;
                         else
                                 repbody->valid |= OBD_MD_FLEASIZE;
+                        mdt_dump_lmm(D_INFO, ma->ma_lmm, repbody->valid);
                 }
                if (ma->ma_valid & MA_LMV) {
+                       /* Return -ENOTSUPP for old client */
+                       if (!mdt_is_striped_client(req->rq_export))
+                               RETURN(-ENOTSUPP);
+
                        LASSERT(S_ISDIR(la->la_mode));
                        mdt_dump_lmv(D_INFO, ma->ma_lmv);
                        repbody->eadatasize = ma->ma_lmv_size;
                        repbody->valid |= (OBD_MD_FLDIREA|OBD_MD_MEA);
                }
                if (ma->ma_valid & MA_LMV_DEF) {
+                       /* Return -ENOTSUPP for old client */
+                       if (!mdt_is_striped_client(req->rq_export))
+                               RETURN(-ENOTSUPP);
                        LASSERT(S_ISDIR(la->la_mode));
                        repbody->eadatasize = ma->ma_lmv_size;
                        repbody->valid |= (OBD_MD_FLDIREA|OBD_MD_DEFAULT_MEA);
@@ -1026,14 +1062,13 @@ static int mdt_renew_capa(struct mdt_thread_info *info)
         RETURN(rc);
 }
 
-int mdt_getattr(struct tgt_session_info *tsi)
+static int mdt_getattr(struct tgt_session_info *tsi)
 {
        struct mdt_thread_info  *info = tsi2mdt_info(tsi);
         struct mdt_object       *obj = info->mti_object;
         struct req_capsule      *pill = info->mti_pill;
         struct mdt_body         *reqbody;
         struct mdt_body         *repbody;
-        mode_t                   mode;
         int rc, rc2;
         ENTRY;
 
@@ -1051,13 +1086,36 @@ int mdt_getattr(struct tgt_session_info *tsi)
         LASSERT(obj != NULL);
        LASSERT(lu_object_assert_exists(&obj->mot_obj));
 
-       mode = lu_object_attr(&obj->mot_obj);
+       /* Unlike intent case where we need to pre-fill out buffers early on
+        * in intent policy for ldlm reasons, here we can have a much better
+        * guess at EA size by just reading it from disk.
+        * Exceptions are readdir and (missing) directory striping */
+       /* Readlink */
+       if (reqbody->valid & OBD_MD_LINKNAME) {
+               /* No easy way to know how long is the symlink, but it cannot
+                * be more than PATH_MAX, so we allocate +1 */
+               rc = PATH_MAX + 1;
+
+       /* A special case for fs ROOT: getattr there might fetch
+        * default EA for entire fs, not just for this dir!
+        */
+       } else if (lu_fid_eq(mdt_object_fid(obj),
+                            &info->mti_mdt->mdt_md_root_fid) &&
+                  (reqbody->valid & OBD_MD_FLDIREA) &&
+                  (lustre_msg_get_opc(mdt_info_req(info)->rq_reqmsg) ==
+                                                                MDS_GETATTR)) {
+               /* Should the default strping be bigger, mdt_fix_reply
+                * will reallocate */
+               rc = DEF_REP_MD_SIZE;
+       } else {
+               /* Read the actual EA size from disk */
+               rc = mdt_attr_get_eabuf_size(info, obj);
+       }
+
+       if (rc < 0)
+               GOTO(out_shrink, rc);
 
-       /* old clients may not report needed easize, use max value then */
-       req_capsule_set_size(pill, &RMF_MDT_MD, RCL_SERVER,
-                            reqbody->eadatasize == 0 ?
-                            info->mti_mdt->mdt_max_mdsize :
-                            reqbody->eadatasize);
+       req_capsule_set_size(pill, &RMF_MDT_MD, RCL_SERVER, rc);
 
        rc = req_capsule_server_pack(pill);
        if (unlikely(rc != 0))
@@ -1096,7 +1154,7 @@ out:
        return rc;
 }
 
-int mdt_is_subdir(struct tgt_session_info *tsi)
+static int mdt_is_subdir(struct tgt_session_info *tsi)
 {
        struct mdt_thread_info  *info = tsi2mdt_info(tsi);
         struct mdt_object     *o = info->mti_object;
@@ -1125,7 +1183,7 @@ int mdt_is_subdir(struct tgt_session_info *tsi)
        RETURN(rc);
 }
 
-int mdt_swap_layouts(struct tgt_session_info *tsi)
+static int mdt_swap_layouts(struct tgt_session_info *tsi)
 {
        struct mdt_thread_info  *info;
        struct ptlrpc_request   *req = tgt_ses_req(tsi);
@@ -1282,88 +1340,36 @@ static int mdt_getattr_name_lock(struct mdt_thread_info *info,
         struct lu_name         *lname     = NULL;
         struct mdt_lock_handle *lhp       = NULL;
         struct ldlm_lock       *lock;
-        struct ldlm_res_id     *res_id;
-        int                     is_resent;
-        int                     ma_need = 0;
-        int                     rc;
-
-        ENTRY;
-
-        is_resent = lustre_handle_is_used(&lhc->mlh_reg_lh);
-        LASSERT(ergo(is_resent,
-                     lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT));
+       bool                    is_resent;
+       bool                    try_layout;
+       int                     ma_need = 0;
+       int                     rc;
+       ENTRY;
 
-        LASSERT(parent != NULL);
+       is_resent = lustre_handle_is_used(&lhc->mlh_reg_lh);
+       LASSERT(ergo(is_resent,
+                    lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT));
 
-       lname = &info->mti_name;
-       mdt_name_unpack(info->mti_pill, &RMF_NAME, lname, MNF_FIX_ANON);
+       LASSERT(parent != NULL);
 
-       if (!info->mti_cross_ref) {
-               /*
-                * XXX: Check for anonymous name is for getattr by fid
-                * (OBD_CONNECT_ATTRFID), otherwise do not allow empty name,
-                * that is the name must contain at least one character and
-                * the terminating '\0'.
-                */
-               if (!lu_name_is_valid(lname)) {
-                       reqbody = req_capsule_client_get(info->mti_pill,
-                                                        &RMF_MDT_BODY);
-                       if (unlikely(reqbody == NULL))
-                               RETURN(err_serious(-EFAULT));
-
-                       if (unlikely(!fid_is_sane(&reqbody->fid2)))
-                               RETURN(err_serious(-EINVAL));
-
-                       CDEBUG(D_INODE, "getattr with lock for "DFID"/"DFID", "
-                              "ldlm_rep = %p\n",
-                              PFID(mdt_object_fid(parent)),
-                              PFID(&reqbody->fid2), ldlm_rep);
+       if (info->mti_cross_ref) {
+               /* Only getattr on the child. Parent is on another node. */
+               mdt_set_disposition(info, ldlm_rep,
+                                   DISP_LOOKUP_EXECD | DISP_LOOKUP_POS);
+               child = parent;
+               CDEBUG(D_INODE, "partial getattr_name child_fid = "DFID", "
+                      "ldlm_rep = %p\n",
+                      PFID(mdt_object_fid(child)), ldlm_rep);
+
+               if (is_resent) {
+                       /* Do not take lock for resent case. */
+                       lock = ldlm_handle2lock(&lhc->mlh_reg_lh);
+                       LASSERTF(lock != NULL, "Invalid lock handle "LPX64"\n",
+                                lhc->mlh_reg_lh.cookie);
+                       LASSERT(fid_res_name_eq(mdt_object_fid(child),
+                                               &lock->l_resource->lr_name));
+                       LDLM_LOCK_PUT(lock);
                } else {
-                       CDEBUG(D_INODE, "getattr with lock for "DFID"/"DNAME", "
-                              "ldlm_rep = %p\n", PFID(mdt_object_fid(parent)),
-                              PNAME(lname), ldlm_rep);
-               }
-       }
-
-        mdt_set_disposition(info, ldlm_rep, DISP_LOOKUP_EXECD);
-
-       if (unlikely(!mdt_object_exists(parent)) && lu_name_is_valid(lname)) {
-               LU_OBJECT_DEBUG(D_INODE, info->mti_env,
-                               &parent->mot_obj,
-                               "Parent doesn't exist!\n");
-               RETURN(-ESTALE);
-       } else if (!info->mti_cross_ref) {
-               LASSERTF(!mdt_object_remote(parent),
-                        "Parent "DFID" is on remote server\n",
-                        PFID(mdt_object_fid(parent)));
-       }
-
-       if (lu_name_is_valid(lname)) {
-                rc = mdt_raw_lookup(info, parent, lname, ldlm_rep);
-                if (rc != 0) {
-                        if (rc > 0)
-                                rc = 0;
-                        RETURN(rc);
-                }
-        }
-
-        if (info->mti_cross_ref) {
-                /* Only getattr on the child. Parent is on another node. */
-                mdt_set_disposition(info, ldlm_rep, DISP_LOOKUP_POS);
-                child = parent;
-                CDEBUG(D_INODE, "partial getattr_name child_fid = "DFID", "
-                       "ldlm_rep=%p\n", PFID(mdt_object_fid(child)), ldlm_rep);
-
-                if (is_resent) {
-                        /* Do not take lock for resent case. */
-                        lock = ldlm_handle2lock(&lhc->mlh_reg_lh);
-                        LASSERTF(lock != NULL, "Invalid lock handle "LPX64"\n",
-                                 lhc->mlh_reg_lh.cookie);
-                        LASSERT(fid_res_name_eq(mdt_object_fid(child),
-                                                &lock->l_resource->lr_name));
-                        LDLM_LOCK_PUT(lock);
-                        rc = 0;
-                } else {
                        mdt_lock_handle_init(lhc);
                        mdt_lock_reg_init(lhc, LCK_PR);
 
@@ -1377,27 +1383,74 @@ static int mdt_getattr_name_lock(struct mdt_thread_info *info,
 
                        rc = mdt_object_lock(info, child, lhc, child_bits,
                                             MDT_LOCAL_LOCK);
+                       if (rc < 0)
+                               RETURN(rc);
                }
-                if (rc == 0) {
-                        /* Finally, we can get attr for child. */
-                       if (!mdt_object_exists(child)) {
-                               LU_OBJECT_DEBUG(D_INFO, info->mti_env,
-                                               &child->mot_obj,
-                                            "remote object doesn't exist.\n");
-                                mdt_object_unlock(info, child, lhc, 1);
-                               RETURN(-ENOENT);
-                       }
 
-                        mdt_set_capainfo(info, 0, mdt_object_fid(child),
-                                         BYPASS_CAPA);
-                        rc = mdt_getattr_internal(info, child, 0);
-                        if (unlikely(rc != 0))
-                                mdt_object_unlock(info, child, lhc, 1);
-                }
+               /* Finally, we can get attr for child. */
+               if (!mdt_object_exists(child)) {
+                       LU_OBJECT_DEBUG(D_INFO, info->mti_env,
+                                       &child->mot_obj,
+                                       "remote object doesn't exist.\n");
+                       mdt_object_unlock(info, child, lhc, 1);
+                       RETURN(-ENOENT);
+               }
+
+               mdt_set_capainfo(info, 0, mdt_object_fid(child), BYPASS_CAPA);
+               rc = mdt_getattr_internal(info, child, 0);
+               if (unlikely(rc != 0))
+                       mdt_object_unlock(info, child, lhc, 1);
+
                 RETURN(rc);
         }
 
+       lname = &info->mti_name;
+       mdt_name_unpack(info->mti_pill, &RMF_NAME, lname, MNF_FIX_ANON);
+
+       if (lu_name_is_valid(lname)) {
+               CDEBUG(D_INODE, "getattr with lock for "DFID"/"DNAME", "
+                      "ldlm_rep = %p\n", PFID(mdt_object_fid(parent)),
+                      PNAME(lname), ldlm_rep);
+       } else {
+               reqbody = req_capsule_client_get(info->mti_pill, &RMF_MDT_BODY);
+               if (unlikely(reqbody == NULL))
+                       RETURN(err_serious(-EPROTO));
+
+               *child_fid = reqbody->fid2;
+
+               if (unlikely(!fid_is_sane(child_fid)))
+                       RETURN(err_serious(-EINVAL));
+
+               CDEBUG(D_INODE, "getattr with lock for "DFID"/"DFID", "
+                      "ldlm_rep = %p\n",
+                      PFID(mdt_object_fid(parent)),
+                      PFID(&reqbody->fid2), ldlm_rep);
+       }
+
+       mdt_set_disposition(info, ldlm_rep, DISP_LOOKUP_EXECD);
+
+       if (unlikely(!mdt_object_exists(parent)) && lu_name_is_valid(lname)) {
+               LU_OBJECT_DEBUG(D_INODE, info->mti_env,
+                               &parent->mot_obj,
+                               "Parent doesn't exist!\n");
+               RETURN(-ESTALE);
+       }
+
+       if (mdt_object_remote(parent)) {
+               CERROR("%s: parent "DFID" is on remote target\n",
+                      mdt_obd_name(info->mti_mdt),
+                      PFID(mdt_object_fid(parent)));
+               RETURN(-EIO);
+       }
+
        if (lu_name_is_valid(lname)) {
+               rc = mdt_raw_lookup(info, parent, lname, ldlm_rep);
+               if (rc != 0) {
+                       if (rc > 0)
+                               rc = 0;
+                       RETURN(rc);
+               }
+
                /* step 1: lock parent only if parent is a directory */
                if (S_ISDIR(lu_object_attr(&parent->mot_obj))) {
                        lhp = &info->mti_lh[MDT_LH_PARENT];
@@ -1413,18 +1466,14 @@ static int mdt_getattr_name_lock(struct mdt_thread_info *info,
                 fid_zero(child_fid);
                 rc = mdo_lookup(info->mti_env, next, lname, child_fid,
                                 &info->mti_spec);
+               if (rc == -ENOENT)
+                       mdt_set_disposition(info, ldlm_rep, DISP_LOOKUP_NEG);
 
-                if (rc != 0) {
-                        if (rc == -ENOENT)
-                                mdt_set_disposition(info, ldlm_rep,
-                                                    DISP_LOOKUP_NEG);
-                        GOTO(out_parent, rc);
-                } else
-                        mdt_set_disposition(info, ldlm_rep, DISP_LOOKUP_POS);
-        } else {
-                *child_fid = reqbody->fid2;
-                mdt_set_disposition(info, ldlm_rep, DISP_LOOKUP_POS);
-        }
+               if (rc != 0)
+                       GOTO(out_parent, rc);
+       }
+
+       mdt_set_disposition(info, ldlm_rep, DISP_LOOKUP_POS);
 
        /*
         *step 3: find the child object by fid & lock it.
@@ -1451,7 +1500,6 @@ static int mdt_getattr_name_lock(struct mdt_thread_info *info,
                LASSERTF(lock != NULL, "Invalid lock handle "LPX64"\n",
                         lhc->mlh_reg_lh.cookie);
 
-               res_id = &lock->l_resource->lr_name;
                if (!fid_res_name_eq(mdt_object_fid(child),
                                     &lock->l_resource->lr_name)) {
                        LASSERTF(fid_res_name_eq(mdt_object_fid(parent),
@@ -1468,14 +1516,12 @@ static int mdt_getattr_name_lock(struct mdt_thread_info *info,
                        GOTO(relock, 0);
                }
                LDLM_LOCK_PUT(lock);
-               rc = 0;
        } else {
-               bool try_layout = false;
-
 relock:
                 OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_RESEND, obd_timeout*2);
                 mdt_lock_handle_init(lhc);
                mdt_lock_reg_init(lhc, LCK_PR);
+               try_layout = false;
 
                if (!mdt_object_exists(child)) {
                        LU_OBJECT_DEBUG(D_INODE, info->mti_env,
@@ -1557,7 +1603,6 @@ relock:
                 mdt_object_unlock(info, child, lhc, 1);
        } else if (lock) {
                /* Debugging code. */
-               res_id = &lock->l_resource->lr_name;
                LDLM_DEBUG(lock, "Returning lock to client");
                LASSERTF(fid_res_name_eq(mdt_object_fid(child),
                                         &lock->l_resource->lr_name),
@@ -1580,7 +1625,7 @@ out_parent:
 }
 
 /* normal handler: should release the child lock */
-int mdt_getattr_name(struct tgt_session_info *tsi)
+static int mdt_getattr_name(struct tgt_session_info *tsi)
 {
        struct mdt_thread_info  *info = tsi2mdt_info(tsi);
         struct mdt_lock_handle *lhc = &info->mti_lh[MDT_LH_CHILD];
@@ -1621,7 +1666,7 @@ out_shrink:
 static int mdt_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
                          void *karg, void *uarg);
 
-int mdt_set_info(struct tgt_session_info *tsi)
+static int mdt_set_info(struct tgt_session_info *tsi)
 {
        struct ptlrpc_request   *req = tgt_ses_req(tsi);
        char                    *key;
@@ -1679,7 +1724,7 @@ int mdt_set_info(struct tgt_session_info *tsi)
        RETURN(rc);
 }
 
-int mdt_readpage(struct tgt_session_info *tsi)
+static int mdt_readpage(struct tgt_session_info *tsi)
 {
        struct mdt_thread_info  *info = mdt_th_info(tsi->tsi_env);
        struct mdt_object       *object = mdt_obj(tsi->tsi_corpus);
@@ -1768,7 +1813,7 @@ static int mdt_reint_internal(struct mdt_thread_info *info,
         /* for replay (no_create) lmm is not needed, client has it already */
         if (req_capsule_has_field(pill, &RMF_MDT_MD, RCL_SERVER))
                 req_capsule_set_size(pill, &RMF_MDT_MD, RCL_SERVER,
-                                     info->mti_rr.rr_eadatalen);
+                                    DEF_REP_MD_SIZE);
 
        /* llog cookies are always 0, the field is kept for compatibility */
         if (req_capsule_has_field(pill, &RMF_LOGCOOKIES, RCL_SERVER))
@@ -1845,7 +1890,7 @@ static long mdt_reint_opcode(struct ptlrpc_request *req,
        return opc;
 }
 
-int mdt_reint(struct tgt_session_info *tsi)
+static int mdt_reint(struct tgt_session_info *tsi)
 {
        long opc;
        int  rc;
@@ -1909,7 +1954,7 @@ static int mdt_object_sync(struct mdt_thread_info *info)
         RETURN(rc);
 }
 
-int mdt_sync(struct tgt_session_info *tsi)
+static int mdt_sync(struct tgt_session_info *tsi)
 {
        struct ptlrpc_request   *req = tgt_ses_req(tsi);
        struct req_capsule      *pill = tsi->tsi_pill;
@@ -1955,7 +2000,7 @@ int mdt_sync(struct tgt_session_info *tsi)
  * Handle quota control requests to consult current usage/limit, but also
  * to configure quota enforcement
  */
-int mdt_quotactl(struct tgt_session_info *tsi)
+static int mdt_quotactl(struct tgt_session_info *tsi)
 {
        struct obd_export       *exp  = tsi->tsi_exp;
        struct req_capsule      *pill = tsi->tsi_pill;
@@ -2107,7 +2152,7 @@ static int mdt_llog_ctxt_unclone(const struct lu_env *env,
 /*
  * sec context handlers
  */
-int mdt_sec_ctx_handle(struct tgt_session_info *tsi)
+static int mdt_sec_ctx_handle(struct tgt_session_info *tsi)
 {
        int rc;
 
@@ -2129,7 +2174,7 @@ int mdt_sec_ctx_handle(struct tgt_session_info *tsi)
 /*
  * quota request handlers
  */
-int mdt_quota_dqacq(struct tgt_session_info *tsi)
+static int mdt_quota_dqacq(struct tgt_session_info *tsi)
 {
        struct mdt_device       *mdt = mdt_exp2dev(tsi->tsi_exp);
        struct lu_device        *qmt = mdt->mdt_qmt_dev;
@@ -2430,9 +2475,10 @@ static int mdt_object_local_lock(struct mdt_thread_info *info,
         RETURN(rc);
 }
 
-int mdt_object_lock_internal(struct mdt_thread_info *info, struct mdt_object *o,
-                            struct mdt_lock_handle *lh, __u64 ibits,
-                            bool nonblock, int locality)
+static int
+mdt_object_lock_internal(struct mdt_thread_info *info, struct mdt_object *o,
+                        struct mdt_lock_handle *lh, __u64 ibits,
+                        bool nonblock, int locality)
 {
        int rc;
        ENTRY;
@@ -2702,7 +2748,7 @@ static int mdt_unpack_req_pack_rep(struct mdt_thread_info *info, __u32 flags)
                 /* Pack reply. */
                 if (req_capsule_has_field(pill, &RMF_MDT_MD, RCL_SERVER))
                         req_capsule_set_size(pill, &RMF_MDT_MD, RCL_SERVER,
-                                             info->mti_body->eadatasize);
+                                            DEF_REP_MD_SIZE);
                 if (req_capsule_has_field(pill, &RMF_LOGCOOKIES, RCL_SERVER))
                        req_capsule_set_size(pill, &RMF_LOGCOOKIES,
                                             RCL_SERVER, 0);
@@ -2832,7 +2878,7 @@ struct mdt_thread_info *tsi2mdt_info(struct tgt_session_info *tsi)
        return mti;
 }
 
-int mdt_tgt_connect(struct tgt_session_info *tsi)
+static int mdt_tgt_connect(struct tgt_session_info *tsi)
 {
        struct ptlrpc_request   *req = tgt_ses_req(tsi);
        int                      rc;
@@ -2959,11 +3005,10 @@ static struct mdt_it_flavor {
        }
 };
 
-int mdt_intent_lock_replace(struct mdt_thread_info *info,
-                            struct ldlm_lock **lockp,
-                            struct ldlm_lock *new_lock,
-                            struct mdt_lock_handle *lh,
-                           __u64 flags)
+static int
+mdt_intent_lock_replace(struct mdt_thread_info *info, struct ldlm_lock **lockp,
+                       struct ldlm_lock *new_lock, struct mdt_lock_handle *lh,
+                       __u64 flags)
 {
         struct ptlrpc_request  *req = mdt_info_req(info);
         struct ldlm_lock       *lock = *lockp;
@@ -3157,7 +3202,6 @@ static int mdt_intent_getattr(enum mdt_it_code opcode,
         struct ldlm_lock       *new_lock = NULL;
         __u64                   child_bits;
         struct ldlm_reply      *ldlm_rep;
-        struct ptlrpc_request  *req;
         struct mdt_body        *reqbody;
         struct mdt_body        *repbody;
         int                     rc, rc2;
@@ -3190,7 +3234,6 @@ static int mdt_intent_getattr(enum mdt_it_code opcode,
         if (rc)
                 GOTO(out_shrink, rc);
 
-        req = info->mti_pill->rc_req;
         ldlm_rep = req_capsule_server_get(info->mti_pill, &RMF_DLM_REP);
         mdt_set_disposition(info, ldlm_rep, DISP_IT_EXECD);
 
@@ -3228,8 +3271,7 @@ static int mdt_intent_layout(enum mdt_it_code opcode,
        struct layout_intent *layout;
        struct lu_fid *fid;
        struct mdt_object *obj = NULL;
-       struct md_object *child = NULL;
-       int rc;
+       int rc = 0;
        ENTRY;
 
        if (opcode != MDT_IT_LAYOUT) {
@@ -3246,11 +3288,10 @@ static int mdt_intent_layout(enum mdt_it_code opcode,
                RETURN(PTR_ERR(obj));
 
        if (mdt_object_exists(obj) && !mdt_object_remote(obj)) {
-               child = mdt_object_child(obj);
-
                /* get the length of lsm */
-               rc = mo_xattr_get(info->mti_env, child, &LU_BUF_NULL,
-                                 XATTR_NAME_LOV);
+               rc = mdt_attr_get_eabuf_size(info, obj);
+               if (rc < 0)
+                       RETURN(rc);
 
                if (rc > info->mti_mdt->mdt_max_mdsize)
                        info->mti_mdt->mdt_max_mdsize = rc;
@@ -3259,8 +3300,7 @@ static int mdt_intent_layout(enum mdt_it_code opcode,
        mdt_object_put(info->mti_env, obj);
 
        (*lockp)->l_lvb_type = LVB_T_LAYOUT;
-       req_capsule_set_size(info->mti_pill, &RMF_DLM_LVB, RCL_SERVER,
-                       ldlm_lvbo_size(*lockp));
+       req_capsule_set_size(info->mti_pill, &RMF_DLM_LVB, RCL_SERVER, rc);
        rc = req_capsule_server_pack(info->mti_pill);
        if (rc != 0)
                RETURN(-EINVAL);
@@ -3549,10 +3589,8 @@ static void mdt_seq_fini_cli(struct mdt_device *mdt)
        if (ss == NULL)
                return;
 
-       if (ss->ss_server_seq == NULL)
+       if (ss->ss_server_seq != NULL)
                seq_server_set_cli(NULL, ss->ss_server_seq, NULL);
-
-       return;
 }
 
 static int mdt_seq_fini(const struct lu_env *env, struct mdt_device *mdt)
@@ -3769,7 +3807,6 @@ static int mdt_fld_init(const struct lu_env *env,
 static void mdt_stack_pre_fini(const struct lu_env *env,
                           struct mdt_device *m, struct lu_device *top)
 {
-       struct obd_device       *obd;
        struct lustre_cfg_bufs  *bufs;
        struct lustre_cfg       *lcfg;
        struct mdt_thread_info  *info;
@@ -3784,7 +3821,6 @@ static void mdt_stack_pre_fini(const struct lu_env *env,
 
        LASSERT(m->mdt_child_exp);
        LASSERT(m->mdt_child_exp->exp_obd);
-       obd = m->mdt_child_exp->exp_obd;
 
        /* process cleanup, pass mdt obd name to get obd umount flags */
        /* XXX: this is needed because all layers are referenced by
@@ -4306,6 +4342,10 @@ static void mdt_fini(const struct lu_env *env, struct mdt_device *m)
        struct lfsck_stop        stop;
        ENTRY;
 
+       stop.ls_status = LS_PAUSED;
+       stop.ls_flags = 0;
+       next->md_ops->mdo_iocontrol(env, next, OBD_IOC_STOP_LFSCK, 0, &stop);
+
        target_recovery_fini(obd);
        ping_evictor_stop();
        mdt_stack_pre_fini(env, m, md2lu_dev(m->mdt_child));
@@ -4335,16 +4375,7 @@ static void mdt_fini(const struct lu_env *env, struct mdt_device *m)
 
        mdt_quota_fini(env, m);
 
-        cfs_free_nidlist(&m->mdt_nosquash_nids);
-        if (m->mdt_nosquash_str) {
-                OBD_FREE(m->mdt_nosquash_str, m->mdt_nosquash_strlen);
-                m->mdt_nosquash_str = NULL;
-                m->mdt_nosquash_strlen = 0;
-        }
-
-       stop.ls_status = LS_PAUSED;
-       stop.ls_flags = 0;
-       next->md_ops->mdo_iocontrol(env, next, OBD_IOC_STOP_LFSCK, 0, &stop);
+       cfs_free_nidlist(&m->mdt_squash.rsi_nosquash_nids);
 
         mdt_seq_fini(env, m);
         mdt_fld_fini(env, m);
@@ -4360,12 +4391,12 @@ static void mdt_fini(const struct lu_env *env, struct mdt_device *m)
 
        LASSERT(atomic_read(&d->ld_ref) == 0);
 
-       server_put_mount(mdt_obd_name(m));
+       server_put_mount(mdt_obd_name(m), true);
 
        EXIT;
 }
 
-int mdt_postrecov(const struct lu_env *, struct mdt_device *);
+static int mdt_postrecov(const struct lu_env *, struct mdt_device *);
 
 static int mdt_init0(const struct lu_env *env, struct mdt_device *m,
                      struct lu_device_type *ldt, struct lustre_cfg *cfg)
@@ -4430,12 +4461,10 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m,
         m->mdt_capa_timeout = CAPA_TIMEOUT;
         m->mdt_capa_alg = CAPA_HMAC_ALG_SHA1;
         m->mdt_ck_timeout = CAPA_KEY_TIMEOUT;
-        m->mdt_squash_uid = 0;
-        m->mdt_squash_gid = 0;
-        CFS_INIT_LIST_HEAD(&m->mdt_nosquash_nids);
-        m->mdt_nosquash_str = NULL;
-        m->mdt_nosquash_strlen = 0;
-       init_rwsem(&m->mdt_squash_sem);
+       m->mdt_squash.rsi_uid = 0;
+       m->mdt_squash.rsi_gid = 0;
+       INIT_LIST_HEAD(&m->mdt_squash.rsi_nosquash_nids);
+       init_rwsem(&m->mdt_squash.rsi_sem);
        spin_lock_init(&m->mdt_osfs_lock);
        m->mdt_osfs_age = cfs_time_shift_64(-1000);
        m->mdt_enable_remote_dir = 0;
@@ -4616,7 +4645,7 @@ err_fini_stack:
        mdt_stack_fini(env, m, md2lu_dev(m->mdt_child));
 err_lmi:
        if (lmi)
-               server_put_mount(dev);
+               server_put_mount(dev, true);
        return(rc);
 }
 
@@ -4644,7 +4673,6 @@ static int mdt_process_config(const struct lu_env *env,
 
        switch (cfg->lcfg_command) {
        case LCFG_PARAM: {
-               struct lprocfs_static_vars  lvars;
                struct obd_device          *obd = d->ld_obd;
 
                /* For interoperability */
@@ -4679,14 +4707,13 @@ static int mdt_process_config(const struct lu_env *env,
                        }
                }
 
-               lprocfs_mdt_init_vars(&lvars);
-               rc = class_process_proc_param(PARAM_MDT, lvars.obd_vars,
-                                             cfg, obd);
+               rc = class_process_proc_seq_param(PARAM_MDT, obd->obd_vars,
+                                                       cfg, obd);
                if (rc > 0 || rc == -ENOSYS) {
                        /* is it an HSM var ? */
-                       rc = class_process_proc_param(PARAM_HSM,
-                                                     hsm_cdt_get_proc_vars(),
-                                                     cfg, obd);
+                       rc = class_process_proc_seq_param(PARAM_HSM,
+                                                       hsm_cdt_get_proc_vars(),
+                                                       cfg, obd);
                        if (rc > 0 || rc == -ENOSYS)
                                /* we don't understand; pass it on */
                                rc = next->ld_ops->ldo_process_config(env, next,
@@ -4792,7 +4819,6 @@ static int mdt_prepare(const struct lu_env *env,
        struct mdt_device *mdt = mdt_dev(cdev);
        struct lu_device *next = &mdt->mdt_child->md_lu_dev;
        struct obd_device *obd = cdev->ld_obd;
-       struct lfsck_start_param lsp;
        int rc;
 
        ENTRY;
@@ -4816,17 +4842,6 @@ static int mdt_prepare(const struct lu_env *env,
         * register the namespace to such instance. */
        LASSERTF(rc == 0, "register namespace failed: rc = %d\n", rc);
 
-       lsp.lsp_start = NULL;
-       lsp.lsp_index_valid = 0;
-       rc = mdt->mdt_child->md_ops->mdo_iocontrol(env, mdt->mdt_child,
-                                                  OBD_IOC_START_LFSCK,
-                                                  0, &lsp);
-       if (rc != 0) {
-               CWARN("%s: auto trigger paused LFSCK failed: rc = %d\n",
-                     mdt_obd_name(mdt), rc);
-               rc = 0;
-       }
-
        if (mdt->mdt_seq_site.ss_node_id == 0) {
                rc = mdt->mdt_child->md_ops->mdo_root_get(env, mdt->mdt_child,
                                                         &mdt->mdt_md_root_fid);
@@ -5330,19 +5345,46 @@ static int mdt_path_current(struct mdt_thread_info *info,
        --ptr;
        pli->pli_fidcount = 0;
        pli->pli_fids[0] = *(struct lu_fid *)mdt_object_fid(pli->pli_mdt_obj);
-
+       *tmpfid = pli->pli_fids[0];
        /* root FID only exists on MDT0, and fid2path should also ends at MDT0,
         * so checking root_fid can only happen on MDT0. */
        while (!lu_fid_eq(&mdt->mdt_md_root_fid,
                          &pli->pli_fids[pli->pli_fidcount])) {
-               mdt_obj = mdt_object_find(info->mti_env, mdt,
-                                         &pli->pli_fids[pli->pli_fidcount]);
+               struct lu_buf           lmv_buf;
+
+               mdt_obj = mdt_object_find(info->mti_env, mdt, tmpfid);
                if (IS_ERR(mdt_obj))
                        GOTO(out, rc = PTR_ERR(mdt_obj));
+
                if (mdt_object_remote(mdt_obj)) {
                        mdt_object_put(info->mti_env, mdt_obj);
                        GOTO(remote_out, rc = -EREMOTE);
                }
+
+               lmv_buf.lb_buf = info->mti_xattr_buf;
+               lmv_buf.lb_len = sizeof(info->mti_xattr_buf);
+
+               /* Check if it is slave stripes */
+               rc = mo_xattr_get(info->mti_env, mdt_object_child(mdt_obj),
+                                 &lmv_buf, XATTR_NAME_LMV);
+               if (rc > 0) {
+                       union lmv_mds_md *lmm = lmv_buf.lb_buf;
+
+                       /* For slave stripes, get its master */
+                       if (le32_to_cpu(lmm->lmv_magic) == LMV_MAGIC_STRIPE) {
+                               struct lmv_mds_md_v1 *lmm1 = &lmm->lmv_md_v1;
+
+                               fid_le_to_cpu(tmpfid, &lmm1->lmv_master_fid);
+                               if (!fid_is_sane(tmpfid)) {
+                                       mdt_object_put(info->mti_env, mdt_obj);
+                                       GOTO(out, rc = -EINVAL);
+                               }
+                               mdt_object_put(info->mti_env, mdt_obj);
+                               pli->pli_fids[pli->pli_fidcount] = *tmpfid;
+                               continue;
+                       }
+               }
+
                if (!mdt_object_exists(mdt_obj)) {
                        mdt_object_put(info->mti_env, mdt_obj);
                        GOTO(out, rc = -ENOENT);
@@ -5725,17 +5767,27 @@ static int mdt_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
         RETURN(rc);
 }
 
-int mdt_postrecov(const struct lu_env *env, struct mdt_device *mdt)
+static int mdt_postrecov(const struct lu_env *env, struct mdt_device *mdt)
 {
-        struct lu_device *ld = md2lu_dev(mdt->mdt_child);
-        int rc;
-        ENTRY;
+       struct lu_device *ld = md2lu_dev(mdt->mdt_child);
+       struct lfsck_start_param lsp;
+       int rc;
+       ENTRY;
 
-        rc = ld->ld_ops->ldo_recovery_complete(env, ld);
-        RETURN(rc);
+       lsp.lsp_start = NULL;
+       lsp.lsp_index_valid = 0;
+       rc = mdt->mdt_child->md_ops->mdo_iocontrol(env, mdt->mdt_child,
+                                                  OBD_IOC_START_LFSCK,
+                                                  0, &lsp);
+       if (rc != 0 && rc != -EALREADY)
+               CWARN("%s: auto trigger paused LFSCK failed: rc = %d\n",
+                     mdt_obd_name(mdt), rc);
+
+       rc = ld->ld_ops->ldo_recovery_complete(env, ld);
+       RETURN(rc);
 }
 
-int mdt_obd_postrecov(struct obd_device *obd)
+static int mdt_obd_postrecov(struct obd_device *obd)
 {
         struct lu_env env;
         int rc;
@@ -5885,7 +5937,6 @@ static struct lu_device_type mdt_device_type = {
 
 static int __init mdt_mod_init(void)
 {
-       struct lprocfs_static_vars lvars;
        int rc;
 
        CLASSERT(sizeof("0x0123456789ABCDEF:0x01234567:0x01234567") ==
@@ -5900,12 +5951,11 @@ static int __init mdt_mod_init(void)
        if (rc)
                GOTO(lu_fini, rc);
 
-       lprocfs_mdt_init_vars(&lvars);
-       rc = class_register_type(&mdt_obd_device_ops, NULL, NULL,
+       rc = class_register_type(&mdt_obd_device_ops, NULL, true, NULL,
 #ifndef HAVE_ONLY_PROCFS_SEQ
-                               lvars.module_vars,
+                                NULL,
 #endif
-                               LUSTRE_MDT_NAME, &mdt_device_type);
+                                LUSTRE_MDT_NAME, &mdt_device_type);
        if (rc)
                GOTO(mds_fini, rc);
 lu_fini: