X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Flmv%2Flmv_obd.c;h=8e9d844c58baf364149783230d52900f0428d457;hb=47c31bb3174a4f0ba81e088bed0d3dd5cb1223f4;hp=49949861ab84ecdbfc5c25e8ea7932cea74148c1;hpb=dee25c13da7c50c5d3279803bdd315e20863a533;p=fs%2Flustre-release.git diff --git a/lustre/lmv/lmv_obd.c b/lustre/lmv/lmv_obd.c index 4994986..8e9d844 100644 --- a/lustre/lmv/lmv_obd.c +++ b/lustre/lmv/lmv_obd.c @@ -35,7 +35,6 @@ */ #define DEBUG_SUBSYSTEM S_LMV -#ifdef __KERNEL__ #include #include #include @@ -45,9 +44,6 @@ #include #include #include -#else -#include -#endif #include #include @@ -112,8 +108,8 @@ int lmv_name_to_stripe_index(__u32 lmv_hash_type, unsigned int stripe_count, idx = lmv_hash_fnv1a(stripe_count, name, namelen); break; default: - CERROR("Unknown hash type 0x%x\n", hash_type); - return -EINVAL; + idx = -EBADFD; + break; } CDEBUG(D_INFO, "name %.*s hash_type %d idx %d\n", namelen, name, @@ -303,8 +299,10 @@ static int lmv_connect(const struct lu_env *env, obd->obd_proc_entry, NULL, NULL); if (IS_ERR(lmv->targets_proc_entry)) { - CERROR("could not register /proc/fs/lustre/%s/%s/target_obds.", - obd->obd_type->typ_name, obd->obd_name); + CERROR("%s: cannot register " + "/proc/fs/lustre/%s/%s/target_obds\n", + obd->obd_name, obd->obd_type->typ_name, + obd->obd_name); lmv->targets_proc_entry = NULL; } } @@ -390,7 +388,7 @@ static int lmv_init_ea_size(struct obd_export *exp, int easize, cookiesize, def_cookiesize); if (rc) { CERROR("%s: obd_init_ea_size() failed on MDT target %d:" - " rc = %d.\n", obd->obd_name, i, rc); + " rc = %d\n", obd->obd_name, i, rc); break; } } @@ -490,8 +488,8 @@ int lmv_connect_mdc(struct obd_device *obd, struct lmv_tgt_desc *tgt) mdc_obd->obd_type->typ_name, mdc_obd->obd_name); if (mdc_symlink == NULL) { - CERROR("Could not register LMV target " - "/proc/fs/lustre/%s/%s/target_obds/%s.", + CERROR("cannot register LMV target " + "/proc/fs/lustre/%s/%s/target_obds/%s\n", obd->obd_type->typ_name, obd->obd_name, mdc_obd->obd_name); } @@ -1122,6 +1120,19 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp, rc = obd_iocontrol(cmd, tgt->ltd_exp, len, karg, uarg); break; } + case LL_IOC_FID2MDTIDX: { + struct lu_fid *fid = karg; + int mdt_index; + + rc = lmv_fld_lookup(lmv, fid, &mdt_index); + if (rc != 0) + RETURN(rc); + + /* Note: this is from llite(see ll_dir_ioctl()), @uarg does not + * point to user space memory for FID2MDTIDX. */ + *(__u32 *)uarg = mdt_index; + break; + } case OBD_IOC_FID2PATH: { rc = lmv_fid2path(exp, len, karg, uarg); break; @@ -1783,7 +1794,7 @@ lmv_locate_target_for_name(struct lmv_obd *lmv, struct lmv_stripe_md *lsm, oinfo = lsm_name_to_stripe_info(lsm, name, namelen); if (IS_ERR(oinfo)) - RETURN((void *)oinfo); + RETURN(ERR_CAST(oinfo)); *fid = oinfo->lmo_fid; *mds = oinfo->lmo_mds; tgt = lmv_get_target(lmv, *mds, NULL); @@ -1792,13 +1803,58 @@ lmv_locate_target_for_name(struct lmv_obd *lmv, struct lmv_stripe_md *lsm, return tgt; } -struct lmv_tgt_desc -*lmv_locate_mds(struct lmv_obd *lmv, struct md_op_data *op_data, - struct lu_fid *fid) +/** + * Locate mds by fid or name + * + * For striped directory (lsm != NULL), it will locate the stripe + * by name hash (see lsm_name_to_stripe_info()). Note: if the hash_type + * is unknown, it will return -EBADFD, and lmv_intent_lookup might need + * walk through all of stripes to locate the entry. + * + * For normal direcotry, it will locate MDS by FID directly. + * \param[in] lmv LMV device + * \param[in] op_data client MD stack parameters, name, namelen + * mds_num etc. + * \param[in] fid object FID used to locate MDS. + * + * retval pointer to the lmv_tgt_desc if succeed. + * ERR_PTR(errno) if failed. + */ +struct lmv_tgt_desc* +lmv_locate_mds(struct lmv_obd *lmv, struct md_op_data *op_data, + struct lu_fid *fid) { struct lmv_stripe_md *lsm = op_data->op_mea1; struct lmv_tgt_desc *tgt; + /* During creating VOLATILE file, it should honor the mdt + * index if the file under striped dir is being restored, see + * ct_restore(). */ + if (op_data->op_bias & MDS_CREATE_VOLATILE && + (int)op_data->op_mds != -1 && lsm != NULL) { + int i; + tgt = lmv_get_target(lmv, op_data->op_mds, NULL); + if (IS_ERR(tgt)) + return tgt; + + /* refill the right parent fid */ + for (i = 0; i < lsm->lsm_md_stripe_count; i++) { + struct lmv_oinfo *oinfo; + + oinfo = &lsm->lsm_md_oinfo[i]; + if (oinfo->lmo_mds == op_data->op_mds) { + *fid = oinfo->lmo_fid; + break; + } + } + + /* Hmm, can not find the stripe by mdt_index(op_mds) */ + if (i == lsm->lsm_md_stripe_count) + tgt = ERR_PTR(-EINVAL); + + return tgt; + } + if (lsm == NULL || op_data->op_namelen == 0) { tgt = lmv_find_target(lmv, fid); if (IS_ERR(tgt)) @@ -2244,109 +2300,6 @@ static int lmv_fsync(struct obd_export *exp, const struct lu_fid *fid, RETURN(rc); } -/* - * Adjust a set of pages, each page containing an array of lu_dirpages, - * so that each page can be used as a single logical lu_dirpage. - * - * A lu_dirpage is laid out as follows, where s = ldp_hash_start, - * e = ldp_hash_end, f = ldp_flags, p = padding, and each "ent" is a - * struct lu_dirent. It has size up to LU_PAGE_SIZE. The ldp_hash_end - * value is used as a cookie to request the next lu_dirpage in a - * directory listing that spans multiple pages (two in this example): - * ________ - * | | - * .|--------v------- -----. - * |s|e|f|p|ent|ent| ... |ent| - * '--|-------------- -----' Each CFS_PAGE contains a single - * '------. lu_dirpage. - * .---------v------- -----. - * |s|e|f|p|ent| 0 | ... | 0 | - * '----------------- -----' - * - * However, on hosts where the native VM page size (PAGE_CACHE_SIZE) is - * larger than LU_PAGE_SIZE, a single host page may contain multiple - * lu_dirpages. After reading the lu_dirpages from the MDS, the - * ldp_hash_end of the first lu_dirpage refers to the one immediately - * after it in the same CFS_PAGE (arrows simplified for brevity, but - * in general e0==s1, e1==s2, etc.): - * - * .-------------------- -----. - * |s0|e0|f0|p|ent|ent| ... |ent| - * |---v---------------- -----| - * |s1|e1|f1|p|ent|ent| ... |ent| - * |---v---------------- -----| Here, each CFS_PAGE contains - * ... multiple lu_dirpages. - * |---v---------------- -----| - * |s'|e'|f'|p|ent|ent| ... |ent| - * '---|---------------- -----' - * v - * .----------------------------. - * | next CFS_PAGE | - * - * This structure is transformed into a single logical lu_dirpage as follows: - * - * - Replace e0 with e' so the request for the next lu_dirpage gets the page - * labeled 'next CFS_PAGE'. - * - * - Copy the LDF_COLLIDE flag from f' to f0 to correctly reflect whether - * a hash collision with the next page exists. - * - * - Adjust the lde_reclen of the ending entry of each lu_dirpage to span - * to the first entry of the next lu_dirpage. - */ -#if PAGE_CACHE_SIZE > LU_PAGE_SIZE -static void lmv_adjust_dirpages(struct page **pages, int ncfspgs, int nlupgs) -{ - int i; - - for (i = 0; i < ncfspgs; i++) { - struct lu_dirpage *dp = kmap(pages[i]); - struct lu_dirpage *first = dp; - struct lu_dirent *end_dirent = NULL; - struct lu_dirent *ent; - __u64 hash_end = dp->ldp_hash_end; - __u32 flags = dp->ldp_flags; - - while (--nlupgs > 0) { - ent = lu_dirent_start(dp); - for (end_dirent = ent; ent != NULL; - end_dirent = ent, ent = lu_dirent_next(ent)); - - /* Advance dp to next lu_dirpage. */ - dp = (struct lu_dirpage *)((char *)dp + LU_PAGE_SIZE); - - /* Check if we've reached the end of the CFS_PAGE. */ - if (!((unsigned long)dp & ~CFS_PAGE_MASK)) - break; - - /* Save the hash and flags of this lu_dirpage. */ - hash_end = dp->ldp_hash_end; - flags = dp->ldp_flags; - - /* Check if lu_dirpage contains no entries. */ - if (!end_dirent) - break; - - /* Enlarge the end entry lde_reclen from 0 to - * first entry of next lu_dirpage. */ - LASSERT(le16_to_cpu(end_dirent->lde_reclen) == 0); - end_dirent->lde_reclen = - cpu_to_le16((char *)(dp->ldp_entries) - - (char *)end_dirent); - } - - first->ldp_hash_end = hash_end; - first->ldp_flags &= ~cpu_to_le32(LDF_COLLIDE); - first->ldp_flags |= flags & cpu_to_le32(LDF_COLLIDE); - - kunmap(pages[i]); - } - LASSERTF(nlupgs == 0, "left = %d", nlupgs); -} -#else -#define lmv_adjust_dirpages(pages, ncfspgs, nlupgs) do {} while (0) -#endif /* PAGE_CACHE_SIZE > LU_PAGE_SIZE */ - /** * Get current minimum entry from striped directory * @@ -2664,6 +2617,30 @@ int lmv_read_page(struct obd_export *exp, struct md_op_data *op_data, RETURN(rc); } +/** + * Unlink a file/directory + * + * Unlink a file or directory under the parent dir. The unlink request + * usually will be sent to the MDT where the child is located, but if + * the client does not have the child FID then request will be sent to the + * MDT where the parent is located. + * + * If the parent is a striped directory then it also needs to locate which + * stripe the name of the child is located, and replace the parent FID + * (@op->op_fid1) with the stripe FID. Note: if the stripe is unknown, + * it will walk through all of sub-stripes until the child is being + * unlinked finally. + * + * \param[in] exp export refer to LMV + * \param[in] op_data different parameters transferred beween client + * MD stacks, name, namelen, FIDs etc. + * op_fid1 is the parent FID, op_fid2 is the child + * FID. + * \param[out] request point to the request of unlink. + * + * retval 0 if succeed + * negative errno if failed. + */ static int lmv_unlink(struct obd_export *exp, struct md_op_data *op_data, struct ptlrpc_request **request) { @@ -2673,38 +2650,58 @@ static int lmv_unlink(struct obd_export *exp, struct md_op_data *op_data, struct lmv_tgt_desc *parent_tgt = NULL; struct mdt_body *body; int rc; + int stripe_index = 0; + struct lmv_stripe_md *lsm = op_data->op_mea1; ENTRY; rc = lmv_check_connect(obd); if (rc) RETURN(rc); -retry: +retry_unlink: + /* For striped dir, we need to locate the parent as well */ + if (lsm != NULL) { + struct lmv_tgt_desc *tmp; + + LASSERT(op_data->op_name != NULL && + op_data->op_namelen != 0); + + tmp = lmv_locate_target_for_name(lmv, lsm, + op_data->op_name, + op_data->op_namelen, + &op_data->op_fid1, + &op_data->op_mds); + + /* return -EBADFD means unknown hash type, might + * need try all sub-stripe here */ + if (IS_ERR(tmp) && PTR_ERR(tmp) != -EBADFD) + RETURN(PTR_ERR(tmp)); + + /* Note: both migrating dir and unknown hash dir need to + * try all of sub-stripes, so we need start search the + * name from stripe 0, but migrating dir is already handled + * inside lmv_locate_target_for_name(), so we only check + * unknown hash type directory here */ + if (!lmv_is_known_hash_type(lsm)) { + struct lmv_oinfo *oinfo; + + oinfo = &lsm->lsm_md_oinfo[stripe_index]; + + op_data->op_fid1 = oinfo->lmo_fid; + op_data->op_mds = oinfo->lmo_mds; + } + } + +try_next_stripe: /* Send unlink requests to the MDT where the child is located */ - if (likely(!fid_is_zero(&op_data->op_fid2))) { + if (likely(!fid_is_zero(&op_data->op_fid2))) tgt = lmv_find_target(lmv, &op_data->op_fid2); - if (IS_ERR(tgt)) - RETURN(PTR_ERR(tgt)); - - /* For striped dir, we need to locate the parent as well */ - if (op_data->op_mea1 != NULL) { - struct lmv_tgt_desc *tmp; - - LASSERT(op_data->op_name != NULL && - op_data->op_namelen != 0); - tmp = lmv_locate_target_for_name(lmv, - op_data->op_mea1, - op_data->op_name, - op_data->op_namelen, - &op_data->op_fid1, - &op_data->op_mds); - if (IS_ERR(tmp)) - RETURN(PTR_ERR(tmp)); - } - } else { + else if (lsm != NULL) + tgt = lmv_get_target(lmv, op_data->op_mds, NULL); + else tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1); - if (IS_ERR(tgt)) - RETURN(PTR_ERR(tgt)); - } + + if (IS_ERR(tgt)) + RETURN(PTR_ERR(tgt)); op_data->op_fsuid = from_kuid(&init_user_ns, current_fsuid()); op_data->op_fsgid = from_kgid(&init_user_ns, current_fsgid()); @@ -2741,16 +2738,35 @@ retry: PFID(&op_data->op_fid1), PFID(&op_data->op_fid2), tgt->ltd_idx); rc = md_unlink(tgt->ltd_exp, op_data, request); - if (rc != 0 && rc != -EREMOTE) + if (rc != 0 && rc != -EREMOTE && rc != -ENOENT) RETURN(rc); + /* Try next stripe if it is needed. */ + if (rc == -ENOENT && lsm != NULL && lmv_need_try_all_stripes(lsm)) { + struct lmv_oinfo *oinfo; + + stripe_index++; + if (stripe_index >= lsm->lsm_md_stripe_count) + RETURN(rc); + + oinfo = &lsm->lsm_md_oinfo[stripe_index]; + + op_data->op_fid1 = oinfo->lmo_fid; + op_data->op_mds = oinfo->lmo_mds; + + ptlrpc_req_finished(*request); + *request = NULL; + + goto try_next_stripe; + } + body = req_capsule_server_get(&(*request)->rq_pill, &RMF_MDT_BODY); if (body == NULL) RETURN(-EPROTO); /* Not cross-ref case, just get out of here. */ if (likely(!(body->mbo_valid & OBD_MD_MDS))) - RETURN(0); + RETURN(rc); CDEBUG(D_INODE, "%s: try unlink to another MDT for "DFID"\n", exp->exp_obd->obd_name, PFID(&body->mbo_fid1)); @@ -2778,7 +2794,7 @@ retry: ptlrpc_req_finished(*request); *request = NULL; - goto retry; + goto retry_unlink; } static int lmv_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage) @@ -3003,15 +3019,14 @@ static int lmv_unpack_md_v1(struct obd_export *exp, struct lmv_stripe_md *lsm, lsm->lsm_md_magic = le32_to_cpu(lmm1->lmv_magic); lsm->lsm_md_stripe_count = le32_to_cpu(lmm1->lmv_stripe_count); lsm->lsm_md_master_mdt_index = le32_to_cpu(lmm1->lmv_master_mdt_index); - lsm->lsm_md_hash_type = le32_to_cpu(lmm1->lmv_hash_type); + if (OBD_FAIL_CHECK(OBD_FAIL_UNKNOWN_LMV_STRIPE)) + lsm->lsm_md_hash_type = LMV_HASH_TYPE_UNKNOWN; + else + lsm->lsm_md_hash_type = le32_to_cpu(lmm1->lmv_hash_type); lsm->lsm_md_layout_version = le32_to_cpu(lmm1->lmv_layout_version); - fid_le_to_cpu(&lsm->lsm_md_master_fid, &lmm1->lmv_master_fid); cplen = strlcpy(lsm->lsm_md_pool_name, lmm1->lmv_pool_name, sizeof(lsm->lsm_md_pool_name)); - if (!fid_is_sane(&lsm->lsm_md_master_fid)) - RETURN(-EPROTO); - if (cplen >= sizeof(lsm->lsm_md_pool_name)) RETURN(-E2BIG); @@ -3049,7 +3064,6 @@ int lmv_unpack_md(struct obd_export *exp, struct lmv_stripe_md **lsmp, lsm = *lsmp; /* Free memmd */ if (lsm != NULL && lmm == NULL) { -#ifdef __KERNEL__ int i; for (i = 0; i < lsm->lsm_md_stripe_count; i++) { /* For migrating inode, the master stripe and master @@ -3059,7 +3073,6 @@ int lmv_unpack_md(struct obd_export *exp, struct lmv_stripe_md **lsmp, i == 0) && lsm->lsm_md_oinfo[i].lmo_root != NULL) iput(lsm->lsm_md_oinfo[i].lmo_root); } -#endif lsm_size = lmv_stripe_md_size(lsm->lsm_md_stripe_count); OBD_FREE(lsm, lsm_size); *lsmp = NULL; @@ -3512,7 +3525,6 @@ int lmv_update_lsm_md(struct obd_export *exp, struct lmv_stripe_md *lsm, int lmv_merge_attr(struct obd_export *exp, const struct lmv_stripe_md *lsm, struct cl_attr *attr) { -#ifdef __KERNEL__ int i; for (i = 0; i < lsm->lsm_md_stripe_count; i++) { @@ -3541,7 +3553,6 @@ int lmv_merge_attr(struct obd_export *exp, const struct lmv_stripe_md *lsm, if (attr->cat_mtime < LTIME_S(inode->i_mtime)) attr->cat_mtime = LTIME_S(inode->i_mtime); } -#endif return 0; } @@ -3611,7 +3622,6 @@ int __init lmv_init(void) LUSTRE_LMV_NAME, NULL); } -#ifdef __KERNEL__ static void lmv_exit(void) { class_unregister_type(LUSTRE_LMV_NAME); @@ -3623,4 +3633,3 @@ MODULE_LICENSE("GPL"); module_init(lmv_init); module_exit(lmv_exit); -#endif