X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fmdt%2Fmdt_handler.c;h=f786f008bb777e335d8df482d028786f6c6609ef;hp=ef4eeda1c87628bec21fd1b00e8ebf102ae19f4c;hb=06072de19cf490f52b9f20dd5a8d9dc8509ceb3f;hpb=42e786f742fa3d13f7d6b66afaba63e77707015c diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index ef4eeda..f786f00 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -49,31 +49,30 @@ #define DEBUG_SUBSYSTEM S_MDS #include -/* - * struct OBD_{ALLOC,FREE}*() - */ -#include + +#include +#include +#include #include -/* struct ptlrpc_request */ +#include +#include #include -/* struct obd_export */ -#include -/* struct obd_device */ -#include -/* lu2dt_dev() */ -#include +#include #include -#include -#include "mdt_internal.h" -#include #include #include -#include -#include +#include +#include +#include + +#include + +#include "mdt_internal.h" + static unsigned int max_mod_rpcs_per_client = 8; -CFS_MODULE_PARM(max_mod_rpcs_per_client, "i", uint, 0644, - "maximum number of modify RPCs in flight allowed per client"); +module_param(max_mod_rpcs_per_client, uint, 0644); +MODULE_PARM_DESC(max_mod_rpcs_per_client, "maximum number of modify RPCs in flight allowed per client"); mdl_mode_t mdt_mdl_lock_modes[] = { [LCK_MINMODE] = MDL_MINMODE, @@ -173,6 +172,7 @@ void mdt_lock_pdo_init(struct mdt_lock_handle *lh, enum ldlm_mode lock_mode, const struct lu_name *lname) { lh->mlh_reg_mode = lock_mode; + lh->mlh_pdo_mode = LCK_MINMODE; lh->mlh_rreg_mode = lock_mode; lh->mlh_type = MDT_PDO_LOCK; @@ -268,12 +268,98 @@ static void mdt_lock_pdo_mode(struct mdt_thread_info *info, struct mdt_object *o EXIT; } -static int mdt_getstatus(struct tgt_session_info *tsi) +static int mdt_lookup_fileset(struct mdt_thread_info *info, const char *fileset, + struct lu_fid *fid) +{ + struct mdt_device *mdt = info->mti_mdt; + struct lu_name *lname = &info->mti_name; + char *name = NULL; + struct mdt_object *parent; + u32 mode; + int rc = 0; + + LASSERT(!info->mti_cross_ref); + + OBD_ALLOC(name, NAME_MAX + 1); + if (name == NULL) + return -ENOMEM; + lname->ln_name = name; + + /* + * We may want to allow this to mount a completely separate + * fileset from the MDT in the future, but keeping it to + * ROOT/ only for now avoid potential security issues. + */ + *fid = mdt->mdt_md_root_fid; + + while (rc == 0 && fileset != NULL && *fileset != '\0') { + const char *s1 = fileset; + const char *s2; + + while (*++s1 == '/') + ; + s2 = s1; + while (*s2 != '/' && *s2 != '\0') + s2++; + + if (s2 == s1) + break; + + fileset = s2; + + lname->ln_namelen = s2 - s1; + if (lname->ln_namelen > NAME_MAX) { + rc = -EINVAL; + break; + } + + /* reject .. as a path component */ + if (lname->ln_namelen == 2 && + strncmp(s1, "..", 2) == 0) { + rc = -EINVAL; + break; + } + + strncpy(name, s1, lname->ln_namelen); + name[lname->ln_namelen] = '\0'; + + parent = mdt_object_find(info->mti_env, mdt, fid); + if (IS_ERR(parent)) { + rc = PTR_ERR(parent); + break; + } + /* Only got the fid of this obj by name */ + fid_zero(fid); + rc = mdo_lookup(info->mti_env, mdt_object_child(parent), lname, + fid, &info->mti_spec); + mdt_object_put(info->mti_env, parent); + } + if (!rc) { + parent = mdt_object_find(info->mti_env, mdt, fid); + if (IS_ERR(parent)) + rc = PTR_ERR(parent); + else { + mode = lu_object_attr(&parent->mot_obj); + mdt_object_put(info->mti_env, parent); + if (!S_ISDIR(mode)) + rc = -ENOTDIR; + } + } + + OBD_FREE(name, NAME_MAX + 1); + + return rc; +} + +static int mdt_get_root(struct tgt_session_info *tsi) { struct mdt_thread_info *info = tsi2mdt_info(tsi); struct mdt_device *mdt = info->mti_mdt; struct mdt_body *repbody; + char *fileset = NULL, *buffer = NULL; int rc; + struct obd_export *exp = info->mti_exp; + char *nodemap_fileset; ENTRY; @@ -281,16 +367,48 @@ static int mdt_getstatus(struct tgt_session_info *tsi) if (rc) GOTO(out, rc = err_serious(rc)); - if (OBD_FAIL_CHECK(OBD_FAIL_MDS_GETSTATUS_PACK)) + if (OBD_FAIL_CHECK(OBD_FAIL_MDS_GET_ROOT_PACK)) GOTO(out, rc = err_serious(-ENOMEM)); repbody = req_capsule_server_get(info->mti_pill, &RMF_MDT_BODY); - repbody->mbo_fid1 = mdt->mdt_md_root_fid; + if (req_capsule_get_size(info->mti_pill, &RMF_NAME, RCL_CLIENT) > 0) { + fileset = req_capsule_client_get(info->mti_pill, &RMF_NAME); + if (fileset == NULL) + GOTO(out, rc = err_serious(-EFAULT)); + } + + nodemap_fileset = nodemap_get_fileset(exp->exp_target_data.ted_nodemap); + if (nodemap_fileset && nodemap_fileset[0]) { + if (fileset) { + /* consider fileset from client as a sub-fileset + * of the nodemap one */ + OBD_ALLOC(buffer, PATH_MAX + 1); + if (buffer == NULL) + GOTO(out, rc = err_serious(-ENOMEM)); + if (snprintf(buffer, PATH_MAX + 1, "%s/%s", + nodemap_fileset, fileset) >= PATH_MAX + 1) + GOTO(out, rc = err_serious(-EINVAL)); + fileset = buffer; + } else { + /* enforce fileset as specified in the nodemap */ + fileset = nodemap_fileset; + } + } + + if (fileset) { + rc = mdt_lookup_fileset(info, fileset, &repbody->mbo_fid1); + if (rc < 0) + GOTO(out, rc = err_serious(rc)); + } else { + repbody->mbo_fid1 = mdt->mdt_md_root_fid; + } repbody->mbo_valid |= OBD_MD_FLID; EXIT; out: mdt_thread_info_fini(info); + if (buffer) + OBD_FREE(buffer, PATH_MAX+1); return rc; } @@ -407,9 +525,9 @@ int mdt_pack_acl2body(struct mdt_thread_info *info, struct mdt_body *repbody, void mdt_pack_attr2body(struct mdt_thread_info *info, struct mdt_body *b, const struct lu_attr *attr, const struct lu_fid *fid) { - struct md_attr *ma = &info->mti_attr; - struct obd_export *exp = info->mti_exp; - struct lu_nodemap *nodemap = exp->exp_target_data.ted_nodemap; + struct md_attr *ma = &info->mti_attr; + struct obd_export *exp = info->mti_exp; + struct lu_nodemap *nodemap = NULL; LASSERT(ma->ma_valid & MA_INODE); @@ -433,6 +551,11 @@ void mdt_pack_attr2body(struct mdt_thread_info *info, struct mdt_body *b, b->mbo_nlink = attr->la_nlink; b->mbo_valid |= OBD_MD_FLNLINK; } + if (attr->la_valid & (LA_UID|LA_GID)) { + nodemap = nodemap_get_from_exp(exp); + if (IS_ERR(nodemap)) + goto out; + } if (attr->la_valid & LA_UID) { b->mbo_uid = nodemap_map_id(nodemap, NODEMAP_UID, NODEMAP_FS_TO_CLIENT, @@ -445,6 +568,7 @@ void mdt_pack_attr2body(struct mdt_thread_info *info, struct mdt_body *b, attr->la_gid); b->mbo_valid |= OBD_MD_FLGID; } + b->mbo_mode = attr->la_mode; if (attr->la_valid & LA_MODE) b->mbo_valid |= OBD_MD_FLMODE; @@ -494,6 +618,10 @@ void mdt_pack_attr2body(struct mdt_thread_info *info, struct mdt_body *b, if (fid != NULL && (b->mbo_valid & OBD_MD_FLSIZE)) CDEBUG(D_VFSTRACE, DFID": returning size %llu\n", PFID(fid), (unsigned long long)b->mbo_size); + +out: + if (!IS_ERR_OR_NULL(nodemap)) + nodemap_putref(nodemap); } static inline int mdt_body_has_lov(const struct lu_attr *la, @@ -821,7 +949,6 @@ static int mdt_getattr_internal(struct mdt_thread_info *info, struct mdt_body *repbody; struct lu_buf *buffer = &info->mti_buf; struct obd_export *exp = info->mti_exp; - struct lu_nodemap *nodemap = exp->exp_target_data.ted_nodemap; int rc; int is_root; ENTRY; @@ -1028,8 +1155,14 @@ static int mdt_getattr_internal(struct mdt_thread_info *info, } #ifdef CONFIG_FS_POSIX_ACL else if ((exp_connect_flags(req->rq_export) & OBD_CONNECT_ACL) && - (reqbody->mbo_valid & OBD_MD_FLACL)) + (reqbody->mbo_valid & OBD_MD_FLACL)) { + struct lu_nodemap *nodemap = nodemap_get_from_exp(exp); + if (IS_ERR(nodemap)) + RETURN(PTR_ERR(nodemap)); + rc = mdt_pack_acl2body(info, repbody, o, nodemap); + nodemap_putref(nodemap); + } #endif out: @@ -1328,7 +1461,7 @@ static int mdt_getattr_name_lock(struct mdt_thread_info *info, if (!mdt_object_exists(child)) { LU_OBJECT_DEBUG(D_INFO, info->mti_env, &child->mot_obj, - "remote object doesn't exist.\n"); + "remote object doesn't exist."); mdt_object_unlock(info, child, lhc, 1); RETURN(-ENOENT); } @@ -1368,7 +1501,7 @@ static int mdt_getattr_name_lock(struct mdt_thread_info *info, if (unlikely(!mdt_object_exists(parent)) && lu_name_is_valid(lname)) { LU_OBJECT_DEBUG(D_INODE, info->mti_env, &parent->mot_obj, - "Parent doesn't exist!\n"); + "Parent doesn't exist!"); RETURN(-ESTALE); } @@ -1436,6 +1569,13 @@ static int mdt_getattr_name_lock(struct mdt_thread_info *info, GOTO(out_parent, rc = PTR_ERR(child)); OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_RESEND, obd_timeout * 2); + if (!mdt_object_exists(child)) { + LU_OBJECT_DEBUG(D_INODE, info->mti_env, + &child->mot_obj, + "Object doesn't exist!"); + GOTO(out_child, rc = -ENOENT); + } + rc = mdt_check_resent_lock(info, child, lhc); if (rc < 0) { GOTO(out_child, rc); @@ -1444,13 +1584,6 @@ static int mdt_getattr_name_lock(struct mdt_thread_info *info, mdt_lock_reg_init(lhc, LCK_PR); try_layout = false; - if (!mdt_object_exists(child)) { - LU_OBJECT_DEBUG(D_INODE, info->mti_env, - &child->mot_obj, - "Object doesn't exist!\n"); - GOTO(out_child, rc = -ENOENT); - } - if (!(child_bits & MDS_INODELOCK_UPDATE) && mdt_object_exists(child) && !mdt_object_remote(child)) { struct md_attr *ma = &info->mti_attr; @@ -1555,7 +1688,7 @@ static int mdt_getattr_name(struct tgt_session_info *tsi) repbody->mbo_eadatasize = 0; repbody->mbo_aclsize = 0; - rc = mdt_init_ucred(info, reqbody); + rc = mdt_init_ucred_intent_getattr(info, reqbody); if (unlikely(rc)) GOTO(out_shrink, rc); @@ -1576,7 +1709,7 @@ out_shrink: } static int mdt_iocontrol(unsigned int cmd, struct obd_export *exp, int len, - void *karg, void *uarg); + void *karg, void __user *uarg); static int mdt_set_info(struct tgt_session_info *tsi) { @@ -1927,7 +2060,7 @@ static int mdt_quotactl(struct tgt_session_info *tsi) int id, rc; struct mdt_device *mdt = mdt_exp2dev(exp); struct lu_device *qmt = mdt->mdt_qmt_dev; - struct lu_nodemap *nodemap = exp->exp_target_data.ted_nodemap; + struct lu_nodemap *nodemap; ENTRY; oqctl = req_capsule_client_get(pill, &RMF_OBD_QUOTACTL); @@ -1938,23 +2071,27 @@ static int mdt_quotactl(struct tgt_session_info *tsi) if (rc) RETURN(err_serious(rc)); + nodemap = nodemap_get_from_exp(exp); + if (IS_ERR(nodemap)) + RETURN(PTR_ERR(nodemap)); + switch (oqctl->qc_cmd) { /* master quotactl */ case Q_SETINFO: case Q_SETQUOTA: if (!nodemap_can_setquota(nodemap)) - RETURN(-EPERM); + GOTO(out_nodemap, rc = -EPERM); case Q_GETINFO: case Q_GETQUOTA: if (qmt == NULL) - RETURN(-EOPNOTSUPP); + GOTO(out_nodemap, rc = -EOPNOTSUPP); /* slave quotactl */ case Q_GETOINFO: case Q_GETOQUOTA: break; default: CERROR("Unsupported quotactl command: %d\n", oqctl->qc_cmd); - RETURN(-EFAULT); + GOTO(out_nodemap, rc = -EFAULT); } /* map uid/gid for remote client */ @@ -1966,7 +2103,7 @@ static int mdt_quotactl(struct tgt_session_info *tsi) if (unlikely(oqctl->qc_cmd != Q_GETQUOTA && oqctl->qc_cmd != Q_GETINFO)) - RETURN(-EPERM); + GOTO(out_nodemap, rc = -EPERM); if (oqctl->qc_type == USRQUOTA) id = lustre_idmap_lookup_uid(NULL, idmap, 0, @@ -1975,11 +2112,11 @@ static int mdt_quotactl(struct tgt_session_info *tsi) id = lustre_idmap_lookup_gid(NULL, idmap, 0, oqctl->qc_id); else - RETURN(-EINVAL); + GOTO(out_nodemap, rc = -EINVAL); if (id == CFS_IDMAP_NOTFOUND) { CDEBUG(D_QUOTA, "no mapping for id %u\n", oqctl->qc_id); - RETURN(-EACCES); + GOTO(out_nodemap, rc = -EACCES); } } @@ -1992,7 +2129,7 @@ static int mdt_quotactl(struct tgt_session_info *tsi) repoqc = req_capsule_server_get(pill, &RMF_OBD_QUOTACTL); if (repoqc == NULL) - RETURN(err_serious(-EFAULT)); + GOTO(out_nodemap, rc = err_serious(-EFAULT)); if (oqctl->qc_id != id) swap(oqctl->qc_id, id); @@ -2016,14 +2153,20 @@ static int mdt_quotactl(struct tgt_session_info *tsi) default: CERROR("Unsupported quotactl command: %d\n", oqctl->qc_cmd); - RETURN(-EFAULT); + GOTO(out_nodemap, rc = -EFAULT); } if (oqctl->qc_id != id) swap(oqctl->qc_id, id); *repoqc = *oqctl; - RETURN(rc); + + EXIT; + +out_nodemap: + nodemap_putref(nodemap); + + return rc; } /** clone llog ctxt from child (mdd) @@ -2158,11 +2301,14 @@ static void mdt_device_commit_async(const struct lu_env *env, { struct dt_device *dt = mdt->mdt_bottom; int rc; + ENTRY; rc = dt->dd_ops->dt_commit_async(env, dt); if (unlikely(rc != 0)) CWARN("%s: async commit start failed: rc = %d\n", mdt_obd_name(mdt), rc); + atomic_inc(&mdt->mdt_async_commit_count); + EXIT; } /** @@ -2218,17 +2364,22 @@ int mdt_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, if (flag == LDLM_CB_CANCELING) RETURN(0); + lock_res_and_lock(lock); if (lock->l_blocking_ast != mdt_blocking_ast) { unlock_res_and_lock(lock); RETURN(0); } - if (mdt_cos_is_enabled(mdt) && - lock->l_req_mode & (LCK_PW | LCK_EX) && - lock->l_blocking_lock != NULL && - lock->l_client_cookie != lock->l_blocking_lock->l_client_cookie) { - mdt_set_lock_sync(lock); - } + if (lock->l_req_mode & (LCK_PW | LCK_EX) && + lock->l_blocking_lock != NULL) { + if (mdt_cos_is_enabled(mdt) && + lock->l_client_cookie != + lock->l_blocking_lock->l_client_cookie) + mdt_set_lock_sync(lock); + else if (mdt_slc_is_enabled(mdt) && + ldlm_is_cos_incompat(lock->l_blocking_lock)) + mdt_set_lock_sync(lock); + } rc = ldlm_blocking_ast_nocheck(lock); /* There is no lock conflict if l_blocking_lock == NULL, @@ -2249,28 +2400,45 @@ int mdt_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, RETURN(rc); } -/* Used for cross-MDT lock */ +/* + * Blocking AST for cross-MDT lock + * + * Discard lock from uncommitted_slc_locks and cancel it. + * + * \param lock the lock which blocks a request or cancelling lock + * \param desc unused + * \param data unused + * \param flag indicates whether this cancelling or blocking callback + * \retval 0 on success + * \retval negative number on error + */ int mdt_remote_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, void *data, int flag) { struct lustre_handle lockh; int rc; + ENTRY; switch (flag) { case LDLM_CB_BLOCKING: ldlm_lock2handle(lock, &lockh); - rc = ldlm_cli_cancel(&lockh, LCF_ASYNC); + rc = ldlm_cli_cancel(&lockh, + ldlm_is_atomic_cb(lock) ? 0 : LCF_ASYNC); if (rc < 0) { CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc); RETURN(rc); } break; case LDLM_CB_CANCELING: - LDLM_DEBUG(lock, "Revoke remote lock\n"); + LDLM_DEBUG(lock, "Revoke remote lock"); + /* discard slc lock here so that it can be cleaned anytime, + * especially for cleanup_resource() */ + tgt_discard_slc_lock(lock); break; default: LBUG(); } + RETURN(0); } @@ -2289,7 +2457,7 @@ int mdt_check_resent_lock(struct mdt_thread_info *info, /* Lock is pinned by ldlm_handle_enqueue0() as it is * a resend case, however, it could be already destroyed * due to client eviction or a raced cancel RPC. */ - LDLM_DEBUG_NOLOCK("Invalid lock handle "LPX64"\n", + LDLM_DEBUG_NOLOCK("Invalid lock handle "LPX64, lhc->mlh_reg_lh.cookie); RETURN(-ESTALE); } @@ -2344,12 +2512,12 @@ int mdt_remote_object_lock(struct mdt_thread_info *mti, struct mdt_object *o, static int mdt_object_local_lock(struct mdt_thread_info *info, struct mdt_object *o, struct mdt_lock_handle *lh, __u64 ibits, - bool nonblock) + bool nonblock, bool cos_incompat) { struct ldlm_namespace *ns = info->mti_mdt->mdt_namespace; union ldlm_policy_data *policy = &info->mti_policy; struct ldlm_res_id *res_id = &info->mti_res_id; - __u64 dlmflags; + __u64 dlmflags = 0; int rc; ENTRY; @@ -2358,6 +2526,14 @@ static int mdt_object_local_lock(struct mdt_thread_info *info, LASSERT(lh->mlh_reg_mode != LCK_MINMODE); LASSERT(lh->mlh_type != MDT_NUL_LOCK); + if (cos_incompat) { + LASSERT(lh->mlh_reg_mode == LCK_PW || + lh->mlh_reg_mode == LCK_EX); + dlmflags |= LDLM_FL_COS_INCOMPAT; + } else if (mdt_cos_is_enabled(info->mti_mdt)) { + dlmflags |= LDLM_FL_COS_ENABLED; + } + /* Only enqueue LOOKUP lock for remote object */ if (mdt_object_remote(o)) LASSERT(ibits == MDS_INODELOCK_LOOKUP); @@ -2377,7 +2553,7 @@ static int mdt_object_local_lock(struct mdt_thread_info *info, memset(policy, 0, sizeof(*policy)); fid_build_reg_res_name(mdt_object_fid(o), res_id); - dlmflags = LDLM_FL_ATOMIC_CB; + dlmflags |= LDLM_FL_ATOMIC_CB; if (nonblock) dlmflags |= LDLM_FL_BLOCK_NOWAIT; @@ -2434,15 +2610,18 @@ out_unlock: static int mdt_object_lock_internal(struct mdt_thread_info *info, struct mdt_object *o, - struct mdt_lock_handle *lh, __u64 ibits, - bool nonblock) + struct mdt_lock_handle *lh, __u64 ibits, bool nonblock, + bool cos_incompat) { struct mdt_lock_handle *local_lh = NULL; int rc; ENTRY; - if (!mdt_object_remote(o)) - return mdt_object_local_lock(info, o, lh, ibits, nonblock); + if (!mdt_object_remote(o)) { + rc = mdt_object_local_lock(info, o, lh, ibits, nonblock, + cos_incompat); + RETURN(rc); + } /* XXX do not support PERM/LAYOUT/XATTR lock for remote object yet */ ibits &= ~(MDS_INODELOCK_PERM | MDS_INODELOCK_LAYOUT | @@ -2450,9 +2629,8 @@ mdt_object_lock_internal(struct mdt_thread_info *info, struct mdt_object *o, /* Only enqueue LOOKUP lock for remote object */ if (ibits & MDS_INODELOCK_LOOKUP) { - rc = mdt_object_local_lock(info, o, lh, - MDS_INODELOCK_LOOKUP, - nonblock); + rc = mdt_object_local_lock(info, o, lh, MDS_INODELOCK_LOOKUP, + nonblock, cos_incompat); if (rc != ELDLM_OK) RETURN(rc); @@ -2490,7 +2668,16 @@ mdt_object_lock_internal(struct mdt_thread_info *info, struct mdt_object *o, int mdt_object_lock(struct mdt_thread_info *info, struct mdt_object *o, struct mdt_lock_handle *lh, __u64 ibits) { - return mdt_object_lock_internal(info, o, lh, ibits, false); + return mdt_object_lock_internal(info, o, lh, ibits, false, false); +} + +int mdt_reint_object_lock(struct mdt_thread_info *info, struct mdt_object *o, + struct mdt_lock_handle *lh, __u64 ibits, + bool cos_incompat) +{ + LASSERT(lh->mlh_reg_mode == LCK_PW || lh->mlh_reg_mode == LCK_EX); + return mdt_object_lock_internal(info, o, lh, ibits, false, + cos_incompat); } int mdt_object_lock_try(struct mdt_thread_info *info, struct mdt_object *o, @@ -2499,7 +2686,22 @@ int mdt_object_lock_try(struct mdt_thread_info *info, struct mdt_object *o, struct mdt_lock_handle tmp = *lh; int rc; - rc = mdt_object_lock_internal(info, o, &tmp, ibits, true); + rc = mdt_object_lock_internal(info, o, &tmp, ibits, true, false); + if (rc == 0) + *lh = tmp; + + return rc == 0; +} + +int mdt_reint_object_lock_try(struct mdt_thread_info *info, + struct mdt_object *o, struct mdt_lock_handle *lh, + __u64 ibits, bool cos_incompat) +{ + struct mdt_lock_handle tmp = *lh; + int rc; + + LASSERT(lh->mlh_reg_mode == LCK_PW || lh->mlh_reg_mode == LCK_EX); + rc = mdt_object_lock_internal(info, o, &tmp, ibits, true, cos_incompat); if (rc == 0) *lh = tmp; @@ -2532,24 +2734,27 @@ static void mdt_save_lock(struct mdt_thread_info *info, struct lustre_handle *h, struct mdt_device *mdt = info->mti_mdt; struct ldlm_lock *lock = ldlm_handle2lock(h); struct ptlrpc_request *req = mdt_info_req(info); - int no_ack = 0; + int cos; + + cos = (mdt_cos_is_enabled(mdt) || + mdt_slc_is_enabled(mdt)); LASSERTF(lock != NULL, "no lock for cookie "LPX64"\n", h->cookie); + /* there is no request if mdt_object_unlock() is called * from mdt_export_cleanup()->mdt_add_dirty_flag() */ if (likely(req != NULL)) { CDEBUG(D_HA, "request = %p reply state = %p" " transno = "LPD64"\n", req, req->rq_reply_state, req->rq_transno); - if (mdt_cos_is_enabled(mdt)) { - no_ack = 1; + if (cos) { ldlm_lock_downgrade(lock, LCK_COS); mode = LCK_COS; } - ptlrpc_save_lock(req, h, mode, no_ack); + ptlrpc_save_lock(req, h, mode, cos); } else { - ldlm_lock_decref(h, mode); + mdt_fid_unlock(h, mode); } if (mdt_is_lock_sync(lock)) { CDEBUG(D_HA, "found sync-lock," @@ -2566,6 +2771,41 @@ static void mdt_save_lock(struct mdt_thread_info *info, struct lustre_handle *h, } /** + * Save cross-MDT lock in uncommitted_slc_locks + * + * Keep the lock referenced until transaction commit happens or release the lock + * immediately depending on input parameters. + * + * \param info thead info object + * \param h lock handle + * \param mode lock mode + * \param decref force immediate lock releasing + */ +static void mdt_save_remote_lock(struct mdt_thread_info *info, + struct lustre_handle *h, enum ldlm_mode mode, + int decref) +{ + ENTRY; + + if (lustre_handle_is_used(h)) { + if (decref || !info->mti_has_trans || + !(mode & (LCK_PW | LCK_EX))) { + ldlm_lock_decref_and_cancel(h, mode); + } else { + struct ldlm_lock *lock = ldlm_handle2lock(h); + struct ptlrpc_request *req = mdt_info_req(info); + + LASSERT(req != NULL); + tgt_save_slc_lock(lock, req->rq_transno); + ldlm_lock_decref(h, mode); + } + h->cookie = 0ull; + } + + EXIT; +} + +/** * Unlock mdt object. * * Immeditely release the regular lock and the PDO lock or save the @@ -2578,17 +2818,15 @@ static void mdt_save_lock(struct mdt_thread_info *info, struct lustre_handle *h, * \param decref force immediate lock releasing */ void mdt_object_unlock(struct mdt_thread_info *info, struct mdt_object *o, - struct mdt_lock_handle *lh, int decref) + struct mdt_lock_handle *lh, int decref) { - ENTRY; - - mdt_save_lock(info, &lh->mlh_pdo_lh, lh->mlh_pdo_mode, decref); - mdt_save_lock(info, &lh->mlh_reg_lh, lh->mlh_reg_mode, decref); + ENTRY; - if (lustre_handle_is_used(&lh->mlh_rreg_lh)) - ldlm_lock_decref(&lh->mlh_rreg_lh, lh->mlh_rreg_mode); + mdt_save_lock(info, &lh->mlh_pdo_lh, lh->mlh_pdo_mode, decref); + mdt_save_lock(info, &lh->mlh_reg_lh, lh->mlh_reg_mode, decref); + mdt_save_remote_lock(info, &lh->mlh_rreg_lh, lh->mlh_rreg_mode, decref); - EXIT; + EXIT; } struct mdt_object *mdt_object_find_lock(struct mdt_thread_info *info, @@ -2932,7 +3170,7 @@ static int mdt_intent_lock_replace(struct mdt_thread_info *info, struct ldlm_lock **lockp, struct mdt_lock_handle *lh, - __u64 flags) + __u64 flags, int result) { struct ptlrpc_request *req = mdt_info_req(info); struct ldlm_lock *lock = *lockp; @@ -2946,8 +3184,19 @@ mdt_intent_lock_replace(struct mdt_thread_info *info, RETURN(0); } - LASSERTF(new_lock != NULL, - "lockh "LPX64"\n", lh->mlh_reg_lh.cookie); + if (new_lock == NULL && (flags & LDLM_FL_RESENT)) { + /* Lock is pinned by ldlm_handle_enqueue0() as it is + * a resend case, however, it could be already destroyed + * due to client eviction or a raced cancel RPC. */ + LDLM_DEBUG_NOLOCK("Invalid lock handle "LPX64"\n", + lh->mlh_reg_lh.cookie); + lh->mlh_reg_lh.cookie = 0; + RETURN(-ESTALE); + } + + LASSERTF(new_lock != NULL, + "lockh "LPX64" flags "LPX64" rc %d\n", + lh->mlh_reg_lh.cookie, flags, result); /* * If we've already given this lock to a client once, then we should @@ -3081,7 +3330,7 @@ static int mdt_intent_getxattr(enum mdt_it_code opcode, grc = mdt_getxattr(info); - rc = mdt_intent_lock_replace(info, lockp, lhc, flags); + rc = mdt_intent_lock_replace(info, lockp, lhc, flags, 0); if (mdt_info_req(info)->rq_repmsg != NULL) ldlm_rep = req_capsule_server_get(info->mti_pill, &RMF_DLM_REP); @@ -3150,7 +3399,7 @@ static int mdt_intent_getattr(enum mdt_it_code opcode, GOTO(out_ucred, rc = ELDLM_LOCK_ABORTED); } - rc = mdt_intent_lock_replace(info, lockp, lhc, flags); + rc = mdt_intent_lock_replace(info, lockp, lhc, flags, rc); EXIT; out_ucred: mdt_exit_ucred(info); @@ -3171,6 +3420,7 @@ static int mdt_intent_layout(enum mdt_it_code opcode, struct layout_intent *layout; struct lu_fid *fid; struct mdt_object *obj = NULL; + int layout_size = 0; int rc = 0; ENTRY; @@ -3180,6 +3430,16 @@ static int mdt_intent_layout(enum mdt_it_code opcode, RETURN(-EINVAL); } + layout = req_capsule_client_get(info->mti_pill, &RMF_LAYOUT_INTENT); + if (layout == NULL) + RETURN(-EPROTO); + + if (layout->li_opc != LAYOUT_INTENT_ACCESS) { + CERROR("%s: Unsupported layout intent opc %d\n", + mdt_obd_name(info->mti_mdt), layout->li_opc); + RETURN(-EINVAL); + } + fid = &info->mti_tmp_fid2; fid_extract_from_res_name(fid, &(*lockp)->l_resource->lr_name); @@ -3188,40 +3448,33 @@ static int mdt_intent_layout(enum mdt_it_code opcode, obj = mdt_object_find(info->mti_env, info->mti_mdt, fid); if (IS_ERR(obj)) - RETURN(PTR_ERR(obj)); + GOTO(out, rc = PTR_ERR(obj)); if (mdt_object_exists(obj) && !mdt_object_remote(obj)) { - /* get the length of lsm */ - rc = mdt_attr_get_eabuf_size(info, obj); - if (rc < 0) { - mdt_object_put(info->mti_env, obj); - RETURN(rc); - } + layout_size = mdt_attr_get_eabuf_size(info, obj); + if (layout_size < 0) + GOTO(out_obj, rc = layout_size); - if (rc > info->mti_mdt->mdt_max_mdsize) - info->mti_mdt->mdt_max_mdsize = rc; + if (layout_size > info->mti_mdt->mdt_max_mdsize) + info->mti_mdt->mdt_max_mdsize = layout_size; } - mdt_object_put(info->mti_env, obj); - (*lockp)->l_lvb_type = LVB_T_LAYOUT; - req_capsule_set_size(info->mti_pill, &RMF_DLM_LVB, RCL_SERVER, rc); + req_capsule_set_size(info->mti_pill, &RMF_DLM_LVB, RCL_SERVER, + layout_size); rc = req_capsule_server_pack(info->mti_pill); - if (rc != 0) - RETURN(-EINVAL); + GOTO(out_obj, rc); - if (lustre_handle_is_used(&lhc->mlh_reg_lh)) - rc = mdt_intent_lock_replace(info, lockp, lhc, flags); +out_obj: + mdt_object_put(info->mti_env, obj); - layout = req_capsule_client_get(info->mti_pill, &RMF_LAYOUT_INTENT); - LASSERT(layout != NULL); - if (layout->li_opc == LAYOUT_INTENT_ACCESS) - /* return to normal/resent ldlm handling */ - RETURN(rc); + if (rc == 0 && lustre_handle_is_used(&lhc->mlh_reg_lh)) + rc = mdt_intent_lock_replace(info, lockp, lhc, flags, rc); + +out: + lhc->mlh_reg_lh.cookie = 0; - CERROR("%s: Unsupported layout intent (%d)\n", - mdt_obd_name(info->mti_mdt), layout->li_opc); - RETURN(-EINVAL); + return rc; } static int mdt_intent_reint(enum mdt_it_code opcode, @@ -3271,15 +3524,16 @@ static int mdt_intent_reint(enum mdt_it_code opcode, if (lustre_handle_is_used(&lhc->mlh_reg_lh) && (rc == 0 || rc == -MDT_EREMOTE_OPEN)) { rep->lock_policy_res2 = 0; - rc = mdt_intent_lock_replace(info, lockp, lhc, flags); + rc = mdt_intent_lock_replace(info, lockp, lhc, flags, rc); RETURN(rc); } rep->lock_policy_res2 = clear_serious(rc); if (rep->lock_policy_res2 == -ENOENT && - mdt_get_disposition(rep, DISP_LOOKUP_NEG)) - rep->lock_policy_res2 = 0; + mdt_get_disposition(rep, DISP_LOOKUP_NEG) && + !mdt_get_disposition(rep, DISP_OPEN_CREATE)) + rep->lock_policy_res2 = 0; lhc->mlh_reg_lh.cookie = 0ull; if (rc == -ENOTCONN || rc == -ENODEV || @@ -3390,6 +3644,8 @@ static int mdt_intent_opc(enum ldlm_intent_flags itopc, if (flv->it_act != NULL) { struct ldlm_reply *rep; + OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_INTENT_DELAY, 10); + /* execute policy */ rc = flv->it_act(opc, info, lockp, flags); @@ -4141,7 +4397,7 @@ TGT_RPC_HANDLER(MDS_FIRST_OPC, HABEO_REFERO, MDS_SET_INFO, mdt_set_info, &RQF_OBD_SET_INFO, LUSTRE_MDS_VERSION), TGT_MDT_HDL(0, MDS_GET_INFO, mdt_get_info), -TGT_MDT_HDL(0 | HABEO_REFERO, MDS_GETSTATUS, mdt_getstatus), +TGT_MDT_HDL(0 | HABEO_REFERO, MDS_GET_ROOT, mdt_get_root), TGT_MDT_HDL(HABEO_CORPUS, MDS_GETATTR, mdt_getattr), TGT_MDT_HDL(HABEO_CORPUS| HABEO_REFERO, MDS_GETATTR_NAME, mdt_getattr_name), @@ -4356,6 +4612,9 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m, LASSERT(num); node_id = simple_strtol(num, NULL, 10); obd->u.obt.obt_magic = OBT_MAGIC; + if (lsi->lsi_lmd != NULL && + lsi->lsi_lmd->lmd_flags & LMD_FLG_SKIP_LFSCK) + m->mdt_skip_lfsck = 1; } m->mdt_squash.rsi_uid = 0; @@ -4367,6 +4626,9 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m, m->mdt_enable_remote_dir = 0; m->mdt_enable_remote_dir_gid = 0; + atomic_set(&m->mdt_mds_mds_conns, 0); + atomic_set(&m->mdt_async_commit_count, 0); + m->mdt_lu_dev.ld_ops = &mdt_lu_ops; m->mdt_lu_dev.ld_obd = obd; /* Set this lu_device to obd for error handling purposes. */ @@ -4542,6 +4804,7 @@ static struct cfg_interop_param mdt_interop_param[] = { { "mdt.group_upcall", NULL }, { "mdt.quota_type", NULL }, { "mdd.quota_type", NULL }, + { "mdt.som", NULL }, { "mdt.rootsquash", "mdt.root_squash" }, { "mdt.nosquash_nid", "mdt.nosquash_nids" }, { NULL } @@ -4808,6 +5071,7 @@ static int mdt_connect_internal(struct obd_export *exp, LASSERT(data != NULL); data->ocd_connect_flags &= MDT_CONNECT_SUPPORTED; + data->ocd_connect_flags2 &= MDT_CONNECT_SUPPORTED2; data->ocd_ibits_known &= MDS_INODELOCK_FULL; if (!(data->ocd_connect_flags & OBD_CONNECT_MDS_MDS) && @@ -4996,6 +5260,18 @@ static int mdt_export_cleanup(struct obd_export *exp) RETURN(rc); } +static inline void mdt_enable_slc(struct mdt_device *mdt) +{ + if (mdt->mdt_lut.lut_sync_lock_cancel == NEVER_SYNC_ON_CANCEL) + mdt->mdt_lut.lut_sync_lock_cancel = BLOCKING_SYNC_ON_CANCEL; +} + +static inline void mdt_disable_slc(struct mdt_device *mdt) +{ + if (mdt->mdt_lut.lut_sync_lock_cancel == BLOCKING_SYNC_ON_CANCEL) + mdt->mdt_lut.lut_sync_lock_cancel = NEVER_SYNC_ON_CANCEL; +} + static int mdt_obd_disconnect(struct obd_export *exp) { int rc; @@ -5004,12 +5280,20 @@ static int mdt_obd_disconnect(struct obd_export *exp) LASSERT(exp); class_export_get(exp); - nodemap_del_member(exp); + if ((exp_connect_flags(exp) & OBD_CONNECT_MDS_MDS) && + !(exp_connect_flags(exp) & OBD_CONNECT_LIGHTWEIGHT)) { + struct mdt_device *mdt = mdt_dev(exp->exp_obd->obd_lu_dev); + + if (atomic_dec_and_test(&mdt->mdt_mds_mds_conns)) + mdt_disable_slc(mdt); + } + rc = server_disconnect_export(exp); if (rc != 0) CDEBUG(D_IOCTL, "server disconnect error: rc = %d\n", rc); rc = mdt_export_cleanup(exp); + nodemap_del_member(exp); class_export_put(exp); RETURN(rc); } @@ -5029,17 +5313,25 @@ static int mdt_obd_connect(const struct lu_env *env, ENTRY; LASSERT(env != NULL); + LASSERT(data != NULL); + if (!exp || !obd || !cluuid) RETURN(-EINVAL); mdt = mdt_dev(obd->obd_lu_dev); + if ((data->ocd_connect_flags & OBD_CONNECT_MDS_MDS) && + !(data->ocd_connect_flags & OBD_CONNECT_LIGHTWEIGHT)) { + atomic_inc(&mdt->mdt_mds_mds_conns); + mdt_enable_slc(mdt); + } + /* * first, check whether the stack is ready to handle requests * XXX: probably not very appropriate method is used now * at some point we should find a better one */ - if (!test_bit(MDT_FL_SYNCED, &mdt->mdt_state) && data != NULL && + if (!test_bit(MDT_FL_SYNCED, &mdt->mdt_state) && !(data->ocd_connect_flags & OBD_CONNECT_LIGHTWEIGHT) && !(data->ocd_connect_flags & OBD_CONNECT_MDS_MDS)) { rc = obd_get_info(env, mdt->mdt_child_exp, @@ -5057,6 +5349,10 @@ static int mdt_obd_connect(const struct lu_env *env, lexp = class_conn2export(&conn); LASSERT(lexp != NULL); + rc = nodemap_add_member(*client_nid, lexp); + if (rc != 0 && rc != -EEXIST) + GOTO(out, rc); + rc = mdt_connect_internal(lexp, mdt, data); if (rc == 0) { struct lsd_client_data *lcd = lexp->exp_target_data.ted_lcd; @@ -5064,20 +5360,23 @@ static int mdt_obd_connect(const struct lu_env *env, LASSERT(lcd); memcpy(lcd->lcd_uuid, cluuid, sizeof lcd->lcd_uuid); rc = tgt_client_new(env, lexp); - if (rc == 0) { - rc = nodemap_add_member(*client_nid, lexp); - if (rc != 0 && rc != -EEXIST) - goto out; - + if (rc == 0) mdt_export_stats_init(obd, lexp, localdata); - } } out: if (rc != 0) { class_disconnect(lexp); + nodemap_del_member(lexp); *exp = NULL; } else { *exp = lexp; + /* Because we do not want this export to be evicted by pinger, + * let's not add this export to the timed chain list. */ + if (data->ocd_connect_flags & OBD_CONNECT_MDS_MDS) { + spin_lock(&lexp->exp_obd->obd_dev_lock); + list_del_init(&lexp->exp_obd_chain_timed); + spin_unlock(&lexp->exp_obd->obd_dev_lock); + } } RETURN(rc); @@ -5096,12 +5395,15 @@ static int mdt_obd_reconnect(const struct lu_env *env, if (exp == NULL || obd == NULL || cluuid == NULL) RETURN(-EINVAL); + rc = nodemap_add_member(*client_nid, exp); + if (rc != 0 && rc != -EEXIST) + RETURN(rc); + rc = mdt_connect_internal(exp, mdt_dev(obd->obd_lu_dev), data); - if (rc == 0) { - rc = nodemap_add_member(*client_nid, exp); - if (rc == 0 || rc == -EEXIST) - mdt_export_stats_init(obd, exp, localdata); - } + if (rc == 0) + mdt_export_stats_init(obd, exp, localdata); + else + nodemap_del_member(exp); RETURN(rc); } @@ -5205,6 +5507,7 @@ int mdt_links_read(struct mdt_thread_info *info, struct mdt_object *mdt_obj, * \param[in] info Per-thread common data shared by MDT level handlers. * \param[in] obj Object to do path lookup of * \param[in,out] fp User-provided struct to store path information + * \param[in] root_fid Root FID of current path should reach * * \retval 0 Lookup successful, path information stored in fp * \retval -EAGAIN Lookup failed, usually because object is being moved @@ -5212,7 +5515,8 @@ int mdt_links_read(struct mdt_thread_info *info, struct mdt_object *mdt_obj, */ static int mdt_path_current(struct mdt_thread_info *info, struct mdt_object *obj, - struct getinfo_fid2path *fp) + struct getinfo_fid2path *fp, + struct lu_fid *root_fid) { struct mdt_device *mdt = info->mti_mdt; struct mdt_object *mdt_obj; @@ -5235,16 +5539,18 @@ static int mdt_path_current(struct mdt_thread_info *info, RETURN(-ENOMEM); ldata.ld_buf = buf; - ptr = fp->gf_path + fp->gf_pathlen - 1; + ptr = fp->gf_u.gf_path + fp->gf_pathlen - 1; *ptr = 0; --ptr; *tmpfid = fp->gf_fid = *mdt_object_fid(obj); - /* root FID only exists on MDT0, and fid2path should also ends at MDT0, - * so checking root_fid can only happen on MDT0. */ - while (!lu_fid_eq(&mdt->mdt_md_root_fid, &fp->gf_fid)) { + while (!lu_fid_eq(root_fid, &fp->gf_fid)) { struct lu_buf lmv_buf; + if (!lu_fid_eq(root_fid, &mdt->mdt_md_root_fid) && + lu_fid_eq(&mdt->mdt_md_root_fid, &fp->gf_fid)) + GOTO(out, rc = -ENOENT); + mdt_obj = mdt_object_find(info->mti_env, mdt, tmpfid); if (IS_ERR(mdt_obj)) GOTO(out, rc = PTR_ERR(mdt_obj)); @@ -5305,7 +5611,7 @@ static int mdt_path_current(struct mdt_thread_info *info, /* Pack the name in the end of the buffer */ ptr -= tmpname->ln_namelen; - if (ptr - 1 <= fp->gf_path) + if (ptr - 1 <= fp->gf_u.gf_path) GOTO(out, rc = -EOVERFLOW); strncpy(ptr, tmpname->ln_name, tmpname->ln_namelen); *(--ptr) = '/'; @@ -5320,7 +5626,8 @@ static int mdt_path_current(struct mdt_thread_info *info, remote_out: ptr++; /* skip leading / */ - memmove(fp->gf_path, ptr, fp->gf_path + fp->gf_pathlen - ptr); + memmove(fp->gf_u.gf_path, ptr, + fp->gf_u.gf_path + fp->gf_pathlen - ptr); out: RETURN(rc); @@ -5342,7 +5649,7 @@ out: * \retval negative errno if there was a problem */ static int mdt_path(struct mdt_thread_info *info, struct mdt_object *obj, - struct getinfo_fid2path *fp) + struct getinfo_fid2path *fp, struct lu_fid *root_fid) { struct mdt_device *mdt = info->mti_mdt; int tries = 3; @@ -5352,14 +5659,17 @@ static int mdt_path(struct mdt_thread_info *info, struct mdt_object *obj, if (fp->gf_pathlen < 3) RETURN(-EOVERFLOW); - if (lu_fid_eq(&mdt->mdt_md_root_fid, mdt_object_fid(obj))) { - fp->gf_path[0] = '\0'; + if (root_fid == NULL) + root_fid = &mdt->mdt_md_root_fid; + + if (lu_fid_eq(root_fid, mdt_object_fid(obj))) { + fp->gf_u.gf_path[0] = '\0'; RETURN(0); } /* Retry multiple times in case file is being moved */ while (tries-- && rc == -EAGAIN) - rc = mdt_path_current(info, obj, fp); + rc = mdt_path_current(info, obj, fp, root_fid); RETURN(rc); } @@ -5381,6 +5691,7 @@ static int mdt_path(struct mdt_thread_info *info, struct mdt_object *obj, * \retval negative errno if there was a problem */ static int mdt_fid2path(struct mdt_thread_info *info, + struct lu_fid *root_fid, struct getinfo_fid2path *fp) { struct mdt_device *mdt = info->mti_mdt; @@ -5423,20 +5734,22 @@ static int mdt_fid2path(struct mdt_thread_info *info, RETURN(rc); } - rc = mdt_path(info, obj, fp); + rc = mdt_path(info, obj, fp, root_fid); CDEBUG(D_INFO, "fid "DFID", path %s recno "LPX64" linkno %u\n", - PFID(&fp->gf_fid), fp->gf_path, fp->gf_recno, fp->gf_linkno); + PFID(&fp->gf_fid), fp->gf_u.gf_path, + fp->gf_recno, fp->gf_linkno); mdt_object_put(info->mti_env, obj); RETURN(rc); } -static int mdt_rpc_fid2path(struct mdt_thread_info *info, void *key, +static int mdt_rpc_fid2path(struct mdt_thread_info *info, void *key, int keylen, void *val, int vallen) { struct getinfo_fid2path *fpout, *fpin; + struct lu_fid *root_fid = NULL; int rc = 0; fpin = key + cfs_size_round(sizeof(KEY_FID2PATH)); @@ -5449,7 +5762,18 @@ static int mdt_rpc_fid2path(struct mdt_thread_info *info, void *key, if (fpout->gf_pathlen != vallen - sizeof(*fpin)) RETURN(-EINVAL); - rc = mdt_fid2path(info, fpout); + if (keylen >= cfs_size_round(sizeof(KEY_FID2PATH)) + sizeof(*fpin) + + sizeof(struct lu_fid)) { + /* client sent its root FID, which is normally fileset FID */ + root_fid = fpin->gf_u.gf_root_fid; + if (ptlrpc_req_need_swab(info->mti_pill->rc_req)) + lustre_swab_lu_fid(root_fid); + + if (root_fid != NULL && !fid_is_sane(root_fid)) + RETURN(-EINVAL); + } + + rc = mdt_fid2path(info, root_fid, fpout); RETURN(rc); } @@ -5494,7 +5818,7 @@ int mdt_get_info(struct tgt_session_info *tsi) if (KEY_IS(KEY_FID2PATH)) { struct mdt_thread_info *info = tsi2mdt_info(tsi); - rc = mdt_rpc_fid2path(info, key, valout, *vallen); + rc = mdt_rpc_fid2path(info, key, keylen, valout, *vallen); mdt_thread_info_fini(info); } else { rc = -EINVAL; @@ -5575,7 +5899,7 @@ static int mdt_ioc_version_get(struct mdt_thread_info *mti, void *karg) /* ioctls on obd dev */ static int mdt_iocontrol(unsigned int cmd, struct obd_export *exp, int len, - void *karg, void *uarg) + void *karg, void __user *uarg) { struct lu_env env; struct obd_device *obd = exp->exp_obd; @@ -5589,16 +5913,18 @@ static int mdt_iocontrol(unsigned int cmd, struct obd_export *exp, int len, if (rc) RETURN(rc); - switch (cmd) { - case OBD_IOC_SYNC: - rc = mdt_device_sync(&env, mdt); - break; - case OBD_IOC_SET_READONLY: - rc = dt->dd_ops->dt_ro(&env, dt); - break; + switch (cmd) { + case OBD_IOC_SYNC: + rc = mdt_device_sync(&env, mdt); + break; + case OBD_IOC_SET_READONLY: + rc = dt_sync(&env, dt); + if (rc == 0) + rc = dt_ro(&env, dt); + break; case OBD_IOC_ABORT_RECOVERY: CERROR("%s: Aborting recovery for device\n", mdt_obd_name(mdt)); - obd->obd_force_abort_recovery = 1; + obd->obd_abort_recovery = 1; target_stop_recovery_thread(obd); rc = 0; break; @@ -5638,6 +5964,14 @@ static int mdt_iocontrol(unsigned int cmd, struct obd_export *exp, int len, rc = next->md_ops->mdo_iocontrol(&env, next, cmd, 0, &stop); break; } + case OBD_IOC_QUERY_LFSCK: { + struct md_device *next = mdt->mdt_child; + struct obd_ioctl_data *data = karg; + + rc = next->md_ops->mdo_iocontrol(&env, next, cmd, 0, + data->ioc_inlbuf1); + break; + } case OBD_IOC_GET_OBJ_VERSION: { struct mdt_thread_info *mti; mti = lu_context_key_get(&env.le_ctx, &mdt_thread_key); @@ -5671,18 +6005,21 @@ static int mdt_iocontrol(unsigned int cmd, struct obd_export *exp, int len, static int mdt_postrecov(const struct lu_env *env, struct mdt_device *mdt) { struct lu_device *ld = md2lu_dev(mdt->mdt_child); - struct lfsck_start_param lsp; int rc; ENTRY; - lsp.lsp_start = NULL; - lsp.lsp_index_valid = 0; - rc = mdt->mdt_child->md_ops->mdo_iocontrol(env, mdt->mdt_child, - OBD_IOC_START_LFSCK, - 0, &lsp); - if (rc != 0 && rc != -EALREADY) - CWARN("%s: auto trigger paused LFSCK failed: rc = %d\n", - mdt_obd_name(mdt), rc); + if (!mdt->mdt_skip_lfsck) { + struct lfsck_start_param lsp; + + lsp.lsp_start = NULL; + lsp.lsp_index_valid = 0; + rc = mdt->mdt_child->md_ops->mdo_iocontrol(env, mdt->mdt_child, + OBD_IOC_START_LFSCK, + 0, &lsp); + if (rc != 0 && rc != -EALREADY) + CWARN("%s: auto trigger paused LFSCK failed: rc = %d\n", + mdt_obd_name(mdt), rc); + } rc = ld->ld_ops->ldo_recovery_complete(env, ld); RETURN(rc); @@ -5836,7 +6173,7 @@ static struct lu_device_type mdt_device_type = { .ldt_ctx_tags = LCT_MD_THREAD }; -static int __init mdt_mod_init(void) +static int __init mdt_init(void) { int rc; @@ -5865,7 +6202,7 @@ mds_fini: return rc; } -static void __exit mdt_mod_exit(void) +static void __exit mdt_exit(void) { class_unregister_type(LUSTRE_MDT_NAME); mds_mod_exit(); @@ -5877,5 +6214,5 @@ MODULE_DESCRIPTION("Lustre Metadata Target ("LUSTRE_MDT_NAME")"); MODULE_VERSION(LUSTRE_VERSION_STRING); MODULE_LICENSE("GPL"); -module_init(mdt_mod_init); -module_exit(mdt_mod_exit); +module_init(mdt_init); +module_exit(mdt_exit);