*/
#define DEBUG_SUBSYSTEM S_LMV
-#ifdef __KERNEL__
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/math64.h>
#include <linux/seq_file.h>
#include <linux/namei.h>
-#else
-#include <liblustre.h>
-#endif
#include <lustre/lustre_idl.h>
#include <obd_support.h>
idx = lmv_hash_fnv1a(stripe_count, name, namelen);
break;
default:
- CERROR("Unknown hash type 0x%x\n", hash_type);
- return -EINVAL;
+ idx = -EBADFD;
+ break;
}
CDEBUG(D_INFO, "name %.*s hash_type %d idx %d\n", namelen, name,
obd->obd_proc_entry,
NULL, NULL);
if (IS_ERR(lmv->targets_proc_entry)) {
- CERROR("could not register /proc/fs/lustre/%s/%s/target_obds.",
- obd->obd_type->typ_name, obd->obd_name);
+ CERROR("%s: cannot register "
+ "/proc/fs/lustre/%s/%s/target_obds\n",
+ obd->obd_name, obd->obd_type->typ_name,
+ obd->obd_name);
lmv->targets_proc_entry = NULL;
}
}
cookiesize, def_cookiesize);
if (rc) {
CERROR("%s: obd_init_ea_size() failed on MDT target %d:"
- " rc = %d.\n", obd->obd_name, i, rc);
+ " rc = %d\n", obd->obd_name, i, rc);
break;
}
}
mdc_obd->obd_type->typ_name,
mdc_obd->obd_name);
if (mdc_symlink == NULL) {
- CERROR("Could not register LMV target "
- "/proc/fs/lustre/%s/%s/target_obds/%s.",
+ CERROR("cannot register LMV target "
+ "/proc/fs/lustre/%s/%s/target_obds/%s\n",
obd->obd_type->typ_name, obd->obd_name,
mdc_obd->obd_name);
}
oinfo = lsm_name_to_stripe_info(lsm, name, namelen);
if (IS_ERR(oinfo))
- RETURN((void *)oinfo);
+ RETURN(ERR_CAST(oinfo));
*fid = oinfo->lmo_fid;
*mds = oinfo->lmo_mds;
tgt = lmv_get_target(lmv, *mds, NULL);
return tgt;
}
+/**
+ * Locate mds by fid or name
+ *
+ * For striped directory (lsm != NULL), it will locate the stripe
+ * by name hash (see lsm_name_to_stripe_info()). Note: if the hash_type
+ * is unknown, it will return -EBADFD, and lmv_intent_lookup might need
+ * walk through all of stripes to locate the entry.
+ *
+ * For normal direcotry, it will locate MDS by FID directly.
+ * \param[in] lmv LMV device
+ * \param[in] op_data client MD stack parameters, name, namelen
+ * mds_num etc.
+ * \param[in] fid object FID used to locate MDS.
+ *
+ * retval pointer to the lmv_tgt_desc if succeed.
+ * ERR_PTR(errno) if failed.
+ */
struct lmv_tgt_desc
*lmv_locate_mds(struct lmv_obd *lmv, struct md_op_data *op_data,
struct lu_fid *fid)
RETURN(rc);
}
-/*
- * Adjust a set of pages, each page containing an array of lu_dirpages,
- * so that each page can be used as a single logical lu_dirpage.
- *
- * A lu_dirpage is laid out as follows, where s = ldp_hash_start,
- * e = ldp_hash_end, f = ldp_flags, p = padding, and each "ent" is a
- * struct lu_dirent. It has size up to LU_PAGE_SIZE. The ldp_hash_end
- * value is used as a cookie to request the next lu_dirpage in a
- * directory listing that spans multiple pages (two in this example):
- * ________
- * | |
- * .|--------v------- -----.
- * |s|e|f|p|ent|ent| ... |ent|
- * '--|-------------- -----' Each CFS_PAGE contains a single
- * '------. lu_dirpage.
- * .---------v------- -----.
- * |s|e|f|p|ent| 0 | ... | 0 |
- * '----------------- -----'
- *
- * However, on hosts where the native VM page size (PAGE_CACHE_SIZE) is
- * larger than LU_PAGE_SIZE, a single host page may contain multiple
- * lu_dirpages. After reading the lu_dirpages from the MDS, the
- * ldp_hash_end of the first lu_dirpage refers to the one immediately
- * after it in the same CFS_PAGE (arrows simplified for brevity, but
- * in general e0==s1, e1==s2, etc.):
- *
- * .-------------------- -----.
- * |s0|e0|f0|p|ent|ent| ... |ent|
- * |---v---------------- -----|
- * |s1|e1|f1|p|ent|ent| ... |ent|
- * |---v---------------- -----| Here, each CFS_PAGE contains
- * ... multiple lu_dirpages.
- * |---v---------------- -----|
- * |s'|e'|f'|p|ent|ent| ... |ent|
- * '---|---------------- -----'
- * v
- * .----------------------------.
- * | next CFS_PAGE |
- *
- * This structure is transformed into a single logical lu_dirpage as follows:
- *
- * - Replace e0 with e' so the request for the next lu_dirpage gets the page
- * labeled 'next CFS_PAGE'.
- *
- * - Copy the LDF_COLLIDE flag from f' to f0 to correctly reflect whether
- * a hash collision with the next page exists.
- *
- * - Adjust the lde_reclen of the ending entry of each lu_dirpage to span
- * to the first entry of the next lu_dirpage.
- */
-#if PAGE_CACHE_SIZE > LU_PAGE_SIZE
-static void lmv_adjust_dirpages(struct page **pages, int ncfspgs, int nlupgs)
-{
- int i;
-
- for (i = 0; i < ncfspgs; i++) {
- struct lu_dirpage *dp = kmap(pages[i]);
- struct lu_dirpage *first = dp;
- struct lu_dirent *end_dirent = NULL;
- struct lu_dirent *ent;
- __u64 hash_end = dp->ldp_hash_end;
- __u32 flags = dp->ldp_flags;
-
- while (--nlupgs > 0) {
- ent = lu_dirent_start(dp);
- for (end_dirent = ent; ent != NULL;
- end_dirent = ent, ent = lu_dirent_next(ent));
-
- /* Advance dp to next lu_dirpage. */
- dp = (struct lu_dirpage *)((char *)dp + LU_PAGE_SIZE);
-
- /* Check if we've reached the end of the CFS_PAGE. */
- if (!((unsigned long)dp & ~CFS_PAGE_MASK))
- break;
-
- /* Save the hash and flags of this lu_dirpage. */
- hash_end = dp->ldp_hash_end;
- flags = dp->ldp_flags;
-
- /* Check if lu_dirpage contains no entries. */
- if (!end_dirent)
- break;
-
- /* Enlarge the end entry lde_reclen from 0 to
- * first entry of next lu_dirpage. */
- LASSERT(le16_to_cpu(end_dirent->lde_reclen) == 0);
- end_dirent->lde_reclen =
- cpu_to_le16((char *)(dp->ldp_entries) -
- (char *)end_dirent);
- }
-
- first->ldp_hash_end = hash_end;
- first->ldp_flags &= ~cpu_to_le32(LDF_COLLIDE);
- first->ldp_flags |= flags & cpu_to_le32(LDF_COLLIDE);
-
- kunmap(pages[i]);
- }
- LASSERTF(nlupgs == 0, "left = %d", nlupgs);
-}
-#else
-#define lmv_adjust_dirpages(pages, ncfspgs, nlupgs) do {} while (0)
-#endif /* PAGE_CACHE_SIZE > LU_PAGE_SIZE */
-
/**
* Get current minimum entry from striped directory
*
RETURN(rc);
}
+/**
+ * Unlink a file/directory
+ *
+ * Unlink a file or directory under the parent dir. The unlink request
+ * usually will be sent to the MDT where the child is located, but if
+ * the client does not have the child FID then request will be sent to the
+ * MDT where the parent is located.
+ *
+ * If the parent is a striped directory then it also needs to locate which
+ * stripe the name of the child is located, and replace the parent FID
+ * (@op->op_fid1) with the stripe FID. Note: if the stripe is unknown,
+ * it will walk through all of sub-stripes until the child is being
+ * unlinked finally.
+ *
+ * \param[in] exp export refer to LMV
+ * \param[in] op_data different parameters transferred beween client
+ * MD stacks, name, namelen, FIDs etc.
+ * op_fid1 is the parent FID, op_fid2 is the child
+ * FID.
+ * \param[out] request point to the request of unlink.
+ *
+ * retval 0 if succeed
+ * negative errno if failed.
+ */
static int lmv_unlink(struct obd_export *exp, struct md_op_data *op_data,
struct ptlrpc_request **request)
{
struct lmv_tgt_desc *parent_tgt = NULL;
struct mdt_body *body;
int rc;
+ int stripe_index = 0;
+ struct lmv_stripe_md *lsm = op_data->op_mea1;
ENTRY;
rc = lmv_check_connect(obd);
if (rc)
RETURN(rc);
-retry:
+retry_unlink:
+ /* For striped dir, we need to locate the parent as well */
+ if (lsm != NULL) {
+ struct lmv_tgt_desc *tmp;
+
+ LASSERT(op_data->op_name != NULL &&
+ op_data->op_namelen != 0);
+
+ tmp = lmv_locate_target_for_name(lmv, lsm,
+ op_data->op_name,
+ op_data->op_namelen,
+ &op_data->op_fid1,
+ &op_data->op_mds);
+
+ /* return -EBADFD means unknown hash type, might
+ * need try all sub-stripe here */
+ if (IS_ERR(tmp) && PTR_ERR(tmp) != -EBADFD)
+ RETURN(PTR_ERR(tmp));
+
+ /* Note: both migrating dir and unknown hash dir need to
+ * try all of sub-stripes, so we need start search the
+ * name from stripe 0, but migrating dir is already handled
+ * inside lmv_locate_target_for_name(), so we only check
+ * unknown hash type directory here */
+ if (!lmv_is_known_hash_type(lsm)) {
+ struct lmv_oinfo *oinfo;
+
+ oinfo = &lsm->lsm_md_oinfo[stripe_index];
+
+ op_data->op_fid1 = oinfo->lmo_fid;
+ op_data->op_mds = oinfo->lmo_mds;
+ }
+ }
+
+try_next_stripe:
/* Send unlink requests to the MDT where the child is located */
- if (likely(!fid_is_zero(&op_data->op_fid2))) {
+ if (likely(!fid_is_zero(&op_data->op_fid2)))
tgt = lmv_find_target(lmv, &op_data->op_fid2);
- if (IS_ERR(tgt))
- RETURN(PTR_ERR(tgt));
-
- /* For striped dir, we need to locate the parent as well */
- if (op_data->op_mea1 != NULL) {
- struct lmv_tgt_desc *tmp;
-
- LASSERT(op_data->op_name != NULL &&
- op_data->op_namelen != 0);
- tmp = lmv_locate_target_for_name(lmv,
- op_data->op_mea1,
- op_data->op_name,
- op_data->op_namelen,
- &op_data->op_fid1,
- &op_data->op_mds);
- if (IS_ERR(tmp))
- RETURN(PTR_ERR(tmp));
- }
- } else {
+ else if (lsm != NULL)
+ tgt = lmv_get_target(lmv, op_data->op_mds, NULL);
+ else
tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1);
- if (IS_ERR(tgt))
- RETURN(PTR_ERR(tgt));
- }
+
+ if (IS_ERR(tgt))
+ RETURN(PTR_ERR(tgt));
op_data->op_fsuid = from_kuid(&init_user_ns, current_fsuid());
op_data->op_fsgid = from_kgid(&init_user_ns, current_fsgid());
PFID(&op_data->op_fid1), PFID(&op_data->op_fid2), tgt->ltd_idx);
rc = md_unlink(tgt->ltd_exp, op_data, request);
- if (rc != 0 && rc != -EREMOTE)
+ if (rc != 0 && rc != -EREMOTE && rc != -ENOENT)
RETURN(rc);
+ /* Try next stripe if it is needed. */
+ if (rc == -ENOENT && lsm != NULL && lmv_need_try_all_stripes(lsm)) {
+ struct lmv_oinfo *oinfo;
+
+ stripe_index++;
+ if (stripe_index >= lsm->lsm_md_stripe_count)
+ RETURN(rc);
+
+ oinfo = &lsm->lsm_md_oinfo[stripe_index];
+
+ op_data->op_fid1 = oinfo->lmo_fid;
+ op_data->op_mds = oinfo->lmo_mds;
+
+ ptlrpc_req_finished(*request);
+ *request = NULL;
+
+ goto try_next_stripe;
+ }
+
body = req_capsule_server_get(&(*request)->rq_pill, &RMF_MDT_BODY);
if (body == NULL)
RETURN(-EPROTO);
ptlrpc_req_finished(*request);
*request = NULL;
- goto retry;
+ goto retry_unlink;
}
static int lmv_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
lsm->lsm_md_magic = le32_to_cpu(lmm1->lmv_magic);
lsm->lsm_md_stripe_count = le32_to_cpu(lmm1->lmv_stripe_count);
lsm->lsm_md_master_mdt_index = le32_to_cpu(lmm1->lmv_master_mdt_index);
- lsm->lsm_md_hash_type = le32_to_cpu(lmm1->lmv_hash_type);
+ if (OBD_FAIL_CHECK(OBD_FAIL_UNKNOWN_LMV_STRIPE))
+ lsm->lsm_md_hash_type = LMV_HASH_TYPE_UNKNOWN;
+ else
+ lsm->lsm_md_hash_type = le32_to_cpu(lmm1->lmv_hash_type);
lsm->lsm_md_layout_version = le32_to_cpu(lmm1->lmv_layout_version);
- fid_le_to_cpu(&lsm->lsm_md_master_fid, &lmm1->lmv_master_fid);
cplen = strlcpy(lsm->lsm_md_pool_name, lmm1->lmv_pool_name,
sizeof(lsm->lsm_md_pool_name));
- if (!fid_is_sane(&lsm->lsm_md_master_fid))
- RETURN(-EPROTO);
-
if (cplen >= sizeof(lsm->lsm_md_pool_name))
RETURN(-E2BIG);
lsm = *lsmp;
/* Free memmd */
if (lsm != NULL && lmm == NULL) {
-#ifdef __KERNEL__
int i;
for (i = 0; i < lsm->lsm_md_stripe_count; i++) {
/* For migrating inode, the master stripe and master
i == 0) && lsm->lsm_md_oinfo[i].lmo_root != NULL)
iput(lsm->lsm_md_oinfo[i].lmo_root);
}
-#endif
lsm_size = lmv_stripe_md_size(lsm->lsm_md_stripe_count);
OBD_FREE(lsm, lsm_size);
*lsmp = NULL;
int lmv_merge_attr(struct obd_export *exp, const struct lmv_stripe_md *lsm,
struct cl_attr *attr)
{
-#ifdef __KERNEL__
int i;
for (i = 0; i < lsm->lsm_md_stripe_count; i++) {
if (attr->cat_mtime < LTIME_S(inode->i_mtime))
attr->cat_mtime = LTIME_S(inode->i_mtime);
}
-#endif
return 0;
}
LUSTRE_LMV_NAME, NULL);
}
-#ifdef __KERNEL__
static void lmv_exit(void)
{
class_unregister_type(LUSTRE_LMV_NAME);
module_init(lmv_init);
module_exit(lmv_exit);
-#endif