Whamcloud - gitweb
LU-4921 lmv: try all stripes for unknown hash functions 41/10041/12
authorWang Di <di.wang@intel.com>
Thu, 26 Jun 2014 05:35:52 +0000 (22:35 -0700)
committerOleg Drokin <oleg.drokin@intel.com>
Tue, 1 Jul 2014 04:23:44 +0000 (04:23 +0000)
For unknown hash type, LMV should try all stripes to locate
the name entry. But it will only for lookup and unlink, i.e.
we can only list and unlink entries under striped dir with
unknown hash type.

Signed-off-by: wang di <di.wang@intel.com>
Change-Id: Ifeed7131c24e48277a6cc8fd4c09b7534e31079f
Reviewed-on: http://review.whamcloud.com/10041
Tested-by: Jenkins
Reviewed-by: John L. Hammond <john.hammond@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
lustre/include/lustre/lustre_user.h
lustre/include/obd_support.h
lustre/lmv/lmv_intent.c
lustre/lmv/lmv_internal.h
lustre/lmv/lmv_obd.c
lustre/tests/sanity.sh

index 6a828f3..e6f07c0 100644 (file)
@@ -405,6 +405,7 @@ struct lmv_user_mds_data {
 };
 
 enum lmv_hash_type {
+       LMV_HASH_TYPE_UNKNOWN   = 0,    /* 0 is reserved for testing purpose */
        LMV_HASH_TYPE_ALL_CHARS = 1,
        LMV_HASH_TYPE_FNV_1A_64 = 2,
 };
index 343343a..a20ec4c 100644 (file)
@@ -534,6 +534,9 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type,
 #define OBD_FAIL_MIGRATE_ENTRIES               0x1801
 #define OBD_FAIL_MIGRATE_LINKEA                        0x1802
 
+/* LMV */
+#define OBD_FAIL_UNKNOWN_LMV_STRIPE            0x1901
+
 /* Assign references to moved code to reduce code changes */
 #define OBD_FAIL_PRECHECK(id)                   CFS_FAIL_PRECHECK(id)
 #define OBD_FAIL_CHECK(id)                      CFS_FAIL_CHECK(id)
index fa0f194..061025d 100644 (file)
@@ -422,10 +422,26 @@ lmv_intent_lookup(struct obd_export *exp, struct md_op_data *op_data,
        int                     rc = 0;
        ENTRY;
 
+       /* If it returns ERR_PTR(-EBADFD) then it is an unknown hash type
+        * it will try all stripes to locate the object */
        tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1);
-       if (IS_ERR(tgt))
+       if (IS_ERR(tgt) && (PTR_ERR(tgt) != -EBADFD))
                RETURN(PTR_ERR(tgt));
 
+       /* Both migrating dir and unknown hash dir need to try
+        * all of sub-stripes */
+       if (lsm != NULL && !lmv_is_known_hash_type(lsm)) {
+               struct lmv_oinfo *oinfo;
+
+               oinfo = &lsm->lsm_md_oinfo[0];
+
+               op_data->op_fid1 = oinfo->lmo_fid;
+               op_data->op_mds = oinfo->lmo_mds;
+               tgt = lmv_get_target(lmv, oinfo->lmo_mds, NULL);
+               if (IS_ERR(tgt))
+                       RETURN(PTR_ERR(tgt));
+       }
+
        if (!fid_is_sane(&op_data->op_fid2))
                fid_zero(&op_data->op_fid2);
 
@@ -454,26 +470,38 @@ lmv_intent_lookup(struct obd_export *exp, struct md_op_data *op_data,
                }
                RETURN(rc);
        } else if (it_disposition(it, DISP_LOOKUP_NEG) && lsm != NULL &&
-                  lsm->lsm_md_hash_type & LMV_HASH_FLAG_MIGRATION) {
-               /* For migrating directory, if it can not find the child in
-                * the source directory(master stripe), try the targeting
-                * directory(stripe 1) */
-               tgt = lmv_find_target(lmv, &lsm->lsm_md_oinfo[1].lmo_fid);
-               if (IS_ERR(tgt))
-                       RETURN(PTR_ERR(tgt));
-
-               ptlrpc_req_finished(*reqp);
-               it->d.lustre.it_data = NULL;
-               *reqp = NULL;
-
-               CDEBUG(D_INODE, "For migrating dir, try target dir "DFID"\n",
-                      PFID(&lsm->lsm_md_oinfo[1].lmo_fid));
-
-               op_data->op_fid1 = lsm->lsm_md_oinfo[1].lmo_fid;
-               it->d.lustre.it_disposition &= ~DISP_ENQ_COMPLETE;
-               rc = md_intent_lock(tgt->ltd_exp, op_data, it, reqp,
-                                   cb_blocking, extra_lock_flags);
+                  lmv_need_try_all_stripes(lsm)) {
+               /* For migrating and unknown hash type directory, it will
+                * try to target the entry on other stripes */
+               int stripe_index;
+
+               for (stripe_index = 1;
+                    stripe_index < lsm->lsm_md_stripe_count &&
+                    it_disposition(it, DISP_LOOKUP_NEG); stripe_index++) {
+                       struct lmv_oinfo *oinfo;
+
+                       /* release the previous request */
+                       ptlrpc_req_finished(*reqp);
+                       it->d.lustre.it_data = NULL;
+                       *reqp = NULL;
+
+                       oinfo = &lsm->lsm_md_oinfo[stripe_index];
+                       tgt = lmv_find_target(lmv, &oinfo->lmo_fid);
+                       if (IS_ERR(tgt))
+                               RETURN(PTR_ERR(tgt));
+
+                       CDEBUG(D_INODE, "Try other stripes " DFID"\n",
+                              PFID(&oinfo->lmo_fid));
+
+                       op_data->op_fid1 = oinfo->lmo_fid;
+                       it->d.lustre.it_disposition &= ~DISP_ENQ_COMPLETE;
+                       rc = md_intent_lock(tgt->ltd_exp, op_data, it, reqp,
+                                           cb_blocking, extra_lock_flags);
+                       if (rc != 0)
+                               RETURN(rc);
+               }
        }
+
        /*
         * MDS has returned success. Probably name has been resolved in
         * remote inode. Let's check this.
index daf6a7b..63b55bc 100644 (file)
@@ -156,6 +156,18 @@ lsm_name_to_stripe_info(const struct lmv_stripe_md *lsm, const char *name,
        return &lsm->lsm_md_oinfo[stripe_index];
 }
 
+static inline bool lmv_is_known_hash_type(const struct lmv_stripe_md *lsm)
+{
+       return lsm->lsm_md_hash_type == LMV_HASH_TYPE_FNV_1A_64 ||
+              lsm->lsm_md_hash_type == LMV_HASH_TYPE_ALL_CHARS;
+}
+
+static inline bool lmv_need_try_all_stripes(const struct lmv_stripe_md *lsm)
+{
+       return !lmv_is_known_hash_type(lsm) ||
+              lsm->lsm_md_hash_type & LMV_HASH_FLAG_MIGRATION;
+}
+
 struct lmv_tgt_desc
 *lmv_locate_mds(struct lmv_obd *lmv, struct md_op_data *op_data,
                struct lu_fid *fid);
index 4994986..368057a 100644 (file)
@@ -112,8 +112,8 @@ int lmv_name_to_stripe_index(__u32 lmv_hash_type, unsigned int stripe_count,
                idx = lmv_hash_fnv1a(stripe_count, name, namelen);
                break;
        default:
-               CERROR("Unknown hash type 0x%x\n", hash_type);
-               return -EINVAL;
+               idx = -EBADFD;
+               break;
        }
 
        CDEBUG(D_INFO, "name %.*s hash_type %d idx %d\n", namelen, name,
@@ -1783,7 +1783,7 @@ lmv_locate_target_for_name(struct lmv_obd *lmv, struct lmv_stripe_md *lsm,
 
        oinfo = lsm_name_to_stripe_info(lsm, name, namelen);
        if (IS_ERR(oinfo))
-               RETURN((void *)oinfo);
+               RETURN(ERR_CAST(oinfo));
        *fid = oinfo->lmo_fid;
        *mds = oinfo->lmo_mds;
        tgt = lmv_get_target(lmv, *mds, NULL);
@@ -1792,6 +1792,23 @@ lmv_locate_target_for_name(struct lmv_obd *lmv, struct lmv_stripe_md *lsm,
        return tgt;
 }
 
+/**
+ * Locate mds by fid or name
+ *
+ * For striped directory (lsm != NULL), it will locate the stripe
+ * by name hash (see lsm_name_to_stripe_info()). Note: if the hash_type
+ * is unknown, it will return -EBADFD, and lmv_intent_lookup might need
+ * walk through all of stripes to locate the entry.
+ *
+ * For normal direcotry, it will locate MDS by FID directly.
+ * \param[in] lmv      LMV device
+ * \param[in] op_data  client MD stack parameters, name, namelen
+ *                      mds_num etc.
+ * \param[in] fid      object FID used to locate MDS.
+ *
+ * retval              pointer to the lmv_tgt_desc if succeed.
+ *                      ERR_PTR(errno) if failed.
+ */
 struct lmv_tgt_desc
 *lmv_locate_mds(struct lmv_obd *lmv, struct md_op_data *op_data,
                struct lu_fid *fid)
@@ -2664,6 +2681,30 @@ int lmv_read_page(struct obd_export *exp, struct md_op_data *op_data,
        RETURN(rc);
 }
 
+/**
+ * Unlink a file/directory
+ *
+ * Unlink a file or directory under the parent dir. The unlink request
+ * usually will be sent to the MDT where the child is located, but if
+ * the client does not have the child FID then request will be sent to the
+ * MDT where the parent is located.
+ *
+ * If the parent is a striped directory then it also needs to locate which
+ * stripe the name of the child is located, and replace the parent FID
+ * (@op->op_fid1) with the stripe FID. Note: if the stripe is unknown,
+ * it will walk through all of sub-stripes until the child is being
+ * unlinked finally.
+ *
+ * \param[in] exp      export refer to LMV
+ * \param[in] op_data  different parameters transferred beween client
+ *                      MD stacks, name, namelen, FIDs etc.
+ *                      op_fid1 is the parent FID, op_fid2 is the child
+ *                      FID.
+ * \param[out] request point to the request of unlink.
+ *
+ * retval              0 if succeed
+ *                      negative errno if failed.
+ */
 static int lmv_unlink(struct obd_export *exp, struct md_op_data *op_data,
                       struct ptlrpc_request **request)
 {
@@ -2673,38 +2714,58 @@ static int lmv_unlink(struct obd_export *exp, struct md_op_data *op_data,
        struct lmv_tgt_desc     *parent_tgt = NULL;
        struct mdt_body         *body;
        int                     rc;
+       int                     stripe_index = 0;
+       struct lmv_stripe_md    *lsm = op_data->op_mea1;
        ENTRY;
 
        rc = lmv_check_connect(obd);
        if (rc)
                RETURN(rc);
-retry:
+retry_unlink:
+       /* For striped dir, we need to locate the parent as well */
+       if (lsm != NULL) {
+               struct lmv_tgt_desc *tmp;
+
+               LASSERT(op_data->op_name != NULL &&
+                       op_data->op_namelen != 0);
+
+               tmp = lmv_locate_target_for_name(lmv, lsm,
+                                                op_data->op_name,
+                                                op_data->op_namelen,
+                                                &op_data->op_fid1,
+                                                &op_data->op_mds);
+
+               /* return -EBADFD means unknown hash type, might
+                * need try all sub-stripe here */
+               if (IS_ERR(tmp) && PTR_ERR(tmp) != -EBADFD)
+                       RETURN(PTR_ERR(tmp));
+
+               /* Note: both migrating dir and unknown hash dir need to
+                * try all of sub-stripes, so we need start search the
+                * name from stripe 0, but migrating dir is already handled
+                * inside lmv_locate_target_for_name(), so we only check
+                * unknown hash type directory here */
+               if (!lmv_is_known_hash_type(lsm)) {
+                       struct lmv_oinfo *oinfo;
+
+                       oinfo = &lsm->lsm_md_oinfo[stripe_index];
+
+                       op_data->op_fid1 = oinfo->lmo_fid;
+                       op_data->op_mds = oinfo->lmo_mds;
+               }
+       }
+
+try_next_stripe:
        /* Send unlink requests to the MDT where the child is located */
-       if (likely(!fid_is_zero(&op_data->op_fid2))) {
+       if (likely(!fid_is_zero(&op_data->op_fid2)))
                tgt = lmv_find_target(lmv, &op_data->op_fid2);
-               if (IS_ERR(tgt))
-                       RETURN(PTR_ERR(tgt));
-
-               /* For striped dir, we need to locate the parent as well */
-               if (op_data->op_mea1 != NULL) {
-                       struct lmv_tgt_desc *tmp;
-
-                       LASSERT(op_data->op_name != NULL &&
-                               op_data->op_namelen != 0);
-                       tmp = lmv_locate_target_for_name(lmv,
-                                                  op_data->op_mea1,
-                                                  op_data->op_name,
-                                                  op_data->op_namelen,
-                                                  &op_data->op_fid1,
-                                                  &op_data->op_mds);
-                       if (IS_ERR(tmp))
-                               RETURN(PTR_ERR(tmp));
-               }
-       } else {
+       else if (lsm != NULL)
+               tgt = lmv_get_target(lmv, op_data->op_mds, NULL);
+       else
                tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1);
-               if (IS_ERR(tgt))
-                       RETURN(PTR_ERR(tgt));
-       }
+
+       if (IS_ERR(tgt))
+               RETURN(PTR_ERR(tgt));
 
        op_data->op_fsuid = from_kuid(&init_user_ns, current_fsuid());
        op_data->op_fsgid = from_kgid(&init_user_ns, current_fsgid());
@@ -2741,9 +2802,28 @@ retry:
               PFID(&op_data->op_fid1), PFID(&op_data->op_fid2), tgt->ltd_idx);
 
        rc = md_unlink(tgt->ltd_exp, op_data, request);
-       if (rc != 0 && rc != -EREMOTE)
+       if (rc != 0 && rc != -EREMOTE && rc != -ENOENT)
                RETURN(rc);
 
+       /* Try next stripe if it is needed. */
+       if (rc == -ENOENT && lsm != NULL && lmv_need_try_all_stripes(lsm)) {
+               struct lmv_oinfo *oinfo;
+
+               stripe_index++;
+               if (stripe_index >= lsm->lsm_md_stripe_count)
+                       RETURN(rc);
+
+               oinfo = &lsm->lsm_md_oinfo[stripe_index];
+
+               op_data->op_fid1 = oinfo->lmo_fid;
+               op_data->op_mds = oinfo->lmo_mds;
+
+               ptlrpc_req_finished(*request);
+               *request = NULL;
+
+               goto try_next_stripe;
+       }
+
        body = req_capsule_server_get(&(*request)->rq_pill, &RMF_MDT_BODY);
        if (body == NULL)
                RETURN(-EPROTO);
@@ -2778,7 +2858,7 @@ retry:
        ptlrpc_req_finished(*request);
        *request = NULL;
 
-       goto retry;
+       goto retry_unlink;
 }
 
 static int lmv_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
@@ -3003,7 +3083,10 @@ static int lmv_unpack_md_v1(struct obd_export *exp, struct lmv_stripe_md *lsm,
        lsm->lsm_md_magic = le32_to_cpu(lmm1->lmv_magic);
        lsm->lsm_md_stripe_count = le32_to_cpu(lmm1->lmv_stripe_count);
        lsm->lsm_md_master_mdt_index = le32_to_cpu(lmm1->lmv_master_mdt_index);
-       lsm->lsm_md_hash_type = le32_to_cpu(lmm1->lmv_hash_type);
+       if (OBD_FAIL_CHECK(OBD_FAIL_UNKNOWN_LMV_STRIPE))
+               lsm->lsm_md_hash_type = LMV_HASH_TYPE_UNKNOWN;
+       else
+               lsm->lsm_md_hash_type = le32_to_cpu(lmm1->lmv_hash_type);
        lsm->lsm_md_layout_version = le32_to_cpu(lmm1->lmv_layout_version);
        fid_le_to_cpu(&lsm->lsm_md_master_fid, &lmm1->lmv_master_fid);
        cplen = strlcpy(lsm->lsm_md_pool_name, lmm1->lmv_pool_name,
index 050dde2..8401a76 100644 (file)
@@ -13004,6 +13004,46 @@ test_300g() {
 }
 run_test 300g "check default striped directory for striped directory"
 
+test_300h() {
+       [ $PARALLEL == "yes" ] && skip "skip parallel run" && return
+       [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return
+       local stripe_count
+       local file
+
+       mkdir $DIR/$tdir
+
+       $LFS setdirstripe -i 0 -c$MDSCOUNT -t all_char $DIR/$tdir/striped_dir ||
+               error "set striped dir error"
+
+       createmany -o $DIR/$tdir/striped_dir/f- 10 ||
+               error "create files under striped dir failed"
+
+       # unfortunately, we need to umount to clear dir layout cache for now
+       # once we fully implement dir layout, we can drop this
+       umount_client $MOUNT || error "umount failed"
+       mount_client $MOUNT || error "mount failed"
+
+       #set the stripe to be unknown hash type
+       #define OBD_FAIL_UNKNOWN_LMV_STRIPE     0x1901
+       $LCTL set_param fail_loc=0x1901
+       for ((i = 0; i < 10; i++)); do
+               $CHECKSTAT -t file $DIR/$tdir/striped_dir/f-$i ||
+                       error "stat f-$i failed"
+               rm $DIR/$tdir/striped_dir/f-$i || error "unlink f-$i failed"
+       done
+
+       touch $DIR/$tdir/striped_dir/f0 &&
+               error "create under striped dir with unknown hash should fail"
+
+       $LCTL set_param fail_loc=0
+
+       umount_client $MOUNT || error "umount failed"
+       mount_client $MOUNT || error "mount failed"
+
+       return 0
+}
+run_test 300h "client handle unknown hash type striped directory"
+
 test_400a() { # LU-1606, was conf-sanity test_74
        local extra_flags=''
        local out=$TMP/$tfile