Whamcloud - gitweb
LU-4008 mdt: Shrink default LOVEA reply buffers 22/9322/10
authorOleg Drokin <oleg.drokin@intel.com>
Thu, 20 Feb 2014 05:00:15 +0000 (00:00 -0500)
committerOleg Drokin <oleg.drokin@intel.com>
Fri, 11 Apr 2014 19:21:49 +0000 (19:21 +0000)
Instead of allocating maximum possible LOVEA for intent requests,
allocate a sane default to fit up to 100 stripes, should the size
be bigger, the buffer reallocation code will kick in.

Additionally, since quite awhile ago we no longer send
unlink cookies or EA data to the clients from unlinks/renames,
so we no longer need to allocate reply buffer for them in
mdt_close() and other such places.
Also clean up unused fields related to these buffers in
md_attr structure.

Change-Id: I4c5bfb5f2d39653d5612c90967d5e1ac83d441e6
Signed-off-by: Oleg Drokin <oleg.drokin@intel.com>
Reviewed-on: http://review.whamcloud.com/9322
Tested-by: Jenkins
Reviewed-by: Matt Ezell <ezellma@ornl.gov>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: James Simmons <uja.ornl@gmail.com>
Reviewed-by: Mike Pershin <mike.pershin@intel.com>
lustre/include/lustre/lustre_idl.h
lustre/include/md_object.h
lustre/mdt/mdt_handler.c
lustre/mdt/mdt_internal.h
lustre/mdt/mdt_lib.c
lustre/mdt/mdt_open.c

index aa2c175..eccf646 100644 (file)
@@ -1699,6 +1699,12 @@ static inline void lmm_oi_cpu_to_le(struct ost_id *dst_oi,
 #define MAX_MD_SIZE (sizeof(struct lov_mds_md) + 4 * sizeof(struct lov_ost_data))
 #define MIN_MD_SIZE (sizeof(struct lov_mds_md) + 1 * sizeof(struct lov_ost_data))
 
+/* This is the default MDT reply size allocated, should the striping be bigger,
+ * it will be reallocated in mdt_fix_reply.
+ * 100 stripes is a bit less than 2.5k of data */
+#define DEF_REP_MD_SIZE (sizeof(struct lov_mds_md) + \
+                        100 * sizeof(struct lov_ost_data))
+
 #define XATTR_NAME_ACL_ACCESS   "system.posix_acl_access"
 #define XATTR_NAME_ACL_DEFAULT  "system.posix_acl_default"
 #define XATTR_USER_PREFIX       "user."
index caf1e16..ba88aed 100644 (file)
@@ -139,13 +139,11 @@ struct md_attr {
         struct lov_mds_md      *ma_lmm;
        union lmv_mds_md       *ma_lmv;
         void                   *ma_acl;
-        struct llog_cookie     *ma_cookie;
         struct lustre_capa     *ma_capa;
         struct md_som_data     *ma_som;
         int                     ma_lmm_size;
         int                     ma_lmv_size;
         int                     ma_acl_size;
-        int                     ma_cookie_size;
         __u16                   ma_layout_gen;
 };
 
index fb3ba0a..abb526a 100644 (file)
@@ -481,6 +481,32 @@ void mdt_client_compatibility(struct mdt_thread_info *info)
         EXIT;
 }
 
+int mdt_attr_get_eabuf_size(struct mdt_thread_info *info, struct mdt_object *o)
+{
+       const struct lu_env *env = info->mti_env;
+       int rc, rc2;
+
+       rc = mo_xattr_get(env, mdt_object_child(o), &LU_BUF_NULL,
+                         XATTR_NAME_LOV);
+
+       if (rc == -ENODATA)
+               rc = 0;
+
+       if (rc < 0)
+               goto out;
+
+       /* Is it a directory? Let's check for the LMV as well */
+       if (S_ISDIR(lu_object_attr(&mdt_object_child(o)->mo_lu))) {
+               rc2 = mo_xattr_get(env, mdt_object_child(o), &LU_BUF_NULL,
+                                  XATTR_NAME_LMV);
+               if ((rc2 < 0 && rc2 != -ENODATA) || (rc2 > rc))
+                       rc = rc2;
+       }
+
+out:
+       return rc;
+}
+
 static int mdt_big_xattr_get(struct mdt_thread_info *info, struct mdt_object *o,
                             const char *name)
 {
@@ -767,13 +793,15 @@ static int mdt_getattr_internal(struct mdt_thread_info *info,
                GOTO(out, rc = 0);
        }
 
-       buffer->lb_len = reqbody->eadatasize;
-       if (buffer->lb_len > 0) {
+       if (reqbody->eadatasize > 0) {
                buffer->lb_buf = req_capsule_server_get(pill, &RMF_MDT_MD);
                if (buffer->lb_buf == NULL)
                        GOTO(out, rc = -EPROTO);
+               buffer->lb_len = req_capsule_get_size(pill, &RMF_MDT_MD,
+                                                     RCL_SERVER);
        } else {
                buffer->lb_buf = NULL;
+               buffer->lb_len = 0;
                ma_need &= ~(MA_LOV | MA_LMV);
                CDEBUG(D_INFO, "%s: RPC from %s: does not need LOVEA.\n",
                       mdt_obd_name(info->mti_mdt),
@@ -1053,11 +1081,34 @@ int mdt_getattr(struct tgt_session_info *tsi)
 
        mode = lu_object_attr(&obj->mot_obj);
 
+       /* Readlink */
+       if (reqbody->valid & OBD_MD_LINKNAME) {
+               /* No easy way to know how long is the symlink, but it cannot
+                * be more than PATH_MAX, so we allocate +1 */
+               rc = PATH_MAX + 1;
+
+       /* A special case for fs ROOT: getattr there might fetch
+        * default EA for entire fs, not just for this dir!
+        */
+       } else if (lu_fid_eq(mdt_object_fid(obj),
+                            &info->mti_mdt->mdt_md_root_fid) &&
+                  (reqbody->valid & OBD_MD_FLDIREA) &&
+                  (lustre_msg_get_opc(mdt_info_req(info)->rq_reqmsg) ==
+                                                                MDS_GETATTR)) {
+               /* Should the default strping be bigger, mdt_fix_reply
+                * will reallocate */
+               rc = DEF_REP_MD_SIZE;
+       } else {
+               /* Hopefully no race in EA change for either file or directory?
+                */
+               rc = mdt_attr_get_eabuf_size(info, obj);
+       }
+
+       if (rc < 0)
+               GOTO(out_shrink, rc);
+
        /* old clients may not report needed easize, use max value then */
-       req_capsule_set_size(pill, &RMF_MDT_MD, RCL_SERVER,
-                            reqbody->eadatasize == 0 ?
-                            info->mti_mdt->mdt_max_mdsize :
-                            reqbody->eadatasize);
+       req_capsule_set_size(pill, &RMF_MDT_MD, RCL_SERVER, rc);
 
        rc = req_capsule_server_pack(pill);
        if (unlikely(rc != 0))
@@ -1768,7 +1819,7 @@ static int mdt_reint_internal(struct mdt_thread_info *info,
         /* for replay (no_create) lmm is not needed, client has it already */
         if (req_capsule_has_field(pill, &RMF_MDT_MD, RCL_SERVER))
                 req_capsule_set_size(pill, &RMF_MDT_MD, RCL_SERVER,
-                                     info->mti_rr.rr_eadatalen);
+                                    DEF_REP_MD_SIZE);
 
        /* llog cookies are always 0, the field is kept for compatibility */
         if (req_capsule_has_field(pill, &RMF_LOGCOOKIES, RCL_SERVER))
@@ -2702,7 +2753,7 @@ static int mdt_unpack_req_pack_rep(struct mdt_thread_info *info, __u32 flags)
                 /* Pack reply. */
                 if (req_capsule_has_field(pill, &RMF_MDT_MD, RCL_SERVER))
                         req_capsule_set_size(pill, &RMF_MDT_MD, RCL_SERVER,
-                                             info->mti_body->eadatasize);
+                                            DEF_REP_MD_SIZE);
                 if (req_capsule_has_field(pill, &RMF_LOGCOOKIES, RCL_SERVER))
                        req_capsule_set_size(pill, &RMF_LOGCOOKIES,
                                             RCL_SERVER, 0);
index 7970b86..6a19f9f 100644 (file)
@@ -193,7 +193,6 @@ struct mdt_device {
         /* these values should be updated from lov if necessary.
          * or should be placed somewhere else. */
         int                        mdt_max_mdsize;
-        int                        mdt_max_cookiesize;
 
        int                        mdt_max_ea_size;
 
index 0b0339c..ae30238 100644 (file)
@@ -628,16 +628,19 @@ int mdt_fix_reply(struct mdt_thread_info *info)
          * buffers before growing it */
        if (info->mti_big_lmm_used) {
                 LASSERT(req_capsule_has_field(pill, &RMF_MDT_MD, RCL_SERVER));
-                md_packed = req_capsule_get_size(pill, &RMF_MDT_MD,
-                                                 RCL_SERVER);
-                LASSERT(md_packed > 0);
-                /* buffer must be allocated separately */
-                LASSERT(info->mti_attr.ma_lmm !=
-                        req_capsule_server_get(pill, &RMF_MDT_MD));
-                req_capsule_shrink(pill, &RMF_MDT_MD, 0, RCL_SERVER);
+
                 /* free big lmm if md_size is not needed */
-                if (md_size == 0)
+               if (md_size == 0) {
                        info->mti_big_lmm_used = 0;
+               } else {
+                       md_packed = req_capsule_get_size(pill, &RMF_MDT_MD,
+                                                        RCL_SERVER);
+                       LASSERT(md_packed > 0);
+                       /* buffer must be allocated separately */
+                       LASSERT(info->mti_attr.ma_lmm !=
+                               req_capsule_server_get(pill, &RMF_MDT_MD));
+                       req_capsule_shrink(pill, &RMF_MDT_MD, 0, RCL_SERVER);
+               }
         } else if (req_capsule_has_field(pill, &RMF_MDT_MD, RCL_SERVER)) {
                 req_capsule_shrink(pill, &RMF_MDT_MD, md_size, RCL_SERVER);
         }
@@ -725,11 +728,6 @@ int mdt_handle_last_unlink(struct mdt_thread_info *info, struct mdt_object *mo,
         }
        repbody->eadatasize = 0;
 
-        if (ma->ma_cookie_size && (ma->ma_valid & MA_COOKIE)) {
-                repbody->aclsize = ma->ma_cookie_size;
-                repbody->valid |= OBD_MD_FLCOOKIE;
-        }
-
        if (info->mti_mdt->mdt_lut.lut_oss_capa &&
            exp_connect_flags(info->mti_exp) & OBD_CONNECT_OSS_CAPA &&
            repbody->valid & OBD_MD_FLEASIZE) {
index fc67526..62933b8 100644 (file)
@@ -1892,7 +1892,6 @@ int mdt_reint_open(struct mdt_thread_info *info, struct mdt_lock_handle *lhc)
                if (created) {
                        ma->ma_need = 0;
                        ma->ma_valid = 0;
-                       ma->ma_cookie_size = 0;
                        rc = mdo_unlink(info->mti_env,
                                        mdt_object_child(parent),
                                        mdt_object_child(child),
@@ -2296,10 +2295,12 @@ int mdt_close(struct tgt_session_info *tsi)
 
         LASSERT(info->mti_ioepoch);
 
+       /* These fields are no longer used and are left for compatibility.
+        * size is always zero */
         req_capsule_set_size(info->mti_pill, &RMF_MDT_MD, RCL_SERVER,
-                             info->mti_mdt->mdt_max_mdsize);
+                            0);
         req_capsule_set_size(info->mti_pill, &RMF_LOGCOOKIES, RCL_SERVER,
-                             info->mti_mdt->mdt_max_cookiesize);
+                            0);
         rc = req_capsule_server_pack(info->mti_pill);
         if (mdt_check_resent(info, mdt_reconstruct_generic, NULL)) {
                 mdt_client_compatibility(info);
@@ -2318,11 +2319,6 @@ int mdt_close(struct tgt_session_info *tsi)
                 ma->ma_lmm_size = req_capsule_get_size(info->mti_pill,
                                                        &RMF_MDT_MD,
                                                        RCL_SERVER);
-                ma->ma_cookie = req_capsule_server_get(info->mti_pill,
-                                                       &RMF_LOGCOOKIES);
-                ma->ma_cookie_size = req_capsule_get_size(info->mti_pill,
-                                                          &RMF_LOGCOOKIES,
-                                                          RCL_SERVER);
                 ma->ma_need = MA_INODE | MA_LOV | MA_COOKIE;
                 repbody->eadatasize = 0;
                 repbody->aclsize = 0;