X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Flmv%2Flmv_intent.c;h=08a5a609e3fdbdc8248407285703c763d2102ee3;hb=5a6aa0e6d1583cc0d4c82ae8c95fb7b9856d6284;hp=7e35465c1df12ee445e4c6bf1935ce443d31f8b1;hpb=54e6e2442374d11ba55518b552f7230a989c9b1c;p=fs%2Flustre-release.git diff --git a/lustre/lmv/lmv_intent.c b/lustre/lmv/lmv_intent.c index 7e35465..08a5a60 100644 --- a/lustre/lmv/lmv_intent.c +++ b/lustre/lmv/lmv_intent.c @@ -1,6 +1,4 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * +/* * GPL HEADER START * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -17,758 +15,502 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved + * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. + * + * Copyright (c) 2011, 2016, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ * Lustre is a trademark of Sun Microsystems, Inc. */ -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif #define DEBUG_SUBSYSTEM S_LMV -#ifdef __KERNEL__ #include #include #include #include #include -#include +#include #include #include -# ifndef HAVE_VFS_INTENT_PATCHES -# include -# endif -#else -#include -#endif +#include -#include #include +#include #include #include #include +#include #include #include #include "lmv_internal.h" -int lmv_intent_remote(struct obd_export *exp, void *lmm, - int lmmsize, struct lookup_intent *it, - int flags, struct ptlrpc_request **reqp, - ldlm_blocking_callback cb_blocking, - int extra_lock_flags) +static int lmv_intent_remote(struct obd_export *exp, struct lookup_intent *it, + const struct lu_fid *parent_fid, + struct ptlrpc_request **reqp, + ldlm_blocking_callback cb_blocking, + __u64 extra_lock_flags) { - struct obd_device *obd = exp->exp_obd; - struct lmv_obd *lmv = &obd->u.lmv; - struct ptlrpc_request *req = NULL; - struct lustre_handle plock; - struct md_op_data *op_data; - struct lmv_tgt_desc *tgt; - struct mdt_body *body; - int pmode; - int rc = 0; - ENTRY; - - body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY); - if (body == NULL) - RETURN(-EPROTO); - - /* - * Not cross-ref case, just get out of here. - */ - if (!(body->valid & OBD_MD_MDS)) - RETURN(0); - - /* - * Unfortunately, we have to lie to MDC/MDS to retrieve - * attributes llite needs and provideproper locking. - */ - if (it->it_op & IT_LOOKUP) - it->it_op = IT_GETATTR; - - /* - * We got LOOKUP lock, but we really need attrs. - */ - pmode = it->d.lustre.it_lock_mode; - if (pmode) { - plock.cookie = it->d.lustre.it_lock_handle; - it->d.lustre.it_lock_mode = 0; - it->d.lustre.it_data = NULL; - } - - LASSERT(fid_is_sane(&body->fid1)); - - tgt = lmv_find_target(lmv, &body->fid1); - if (IS_ERR(tgt)) - GOTO(out, rc = PTR_ERR(tgt)); - - OBD_ALLOC_PTR(op_data); - if (op_data == NULL) - GOTO(out, rc = -ENOMEM); - - op_data->op_fid1 = body->fid1; - op_data->op_bias = MDS_CROSS_REF; - - CDEBUG(D_INODE, - "REMOTE_INTENT with fid="DFID" -> mds #%d\n", - PFID(&body->fid1), tgt->ltd_idx); - - it->d.lustre.it_disposition &= ~DISP_ENQ_COMPLETE; - rc = md_intent_lock(tgt->ltd_exp, op_data, lmm, lmmsize, it, - flags, &req, cb_blocking, extra_lock_flags); + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + struct ptlrpc_request *req = NULL; + struct lustre_handle plock; + struct md_op_data *op_data; + struct lmv_tgt_desc *tgt; + struct mdt_body *body; + int pmode; + int rc = 0; + ENTRY; + + body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY); + if (body == NULL) + RETURN(-EPROTO); + + LASSERT((body->mbo_valid & OBD_MD_MDS)); + + /* + * Unfortunately, we have to lie to MDC/MDS to retrieve + * attributes llite needs and provideproper locking. + */ + if (it->it_op & IT_LOOKUP) + it->it_op = IT_GETATTR; + + /* + * We got LOOKUP lock, but we really need attrs. + */ + pmode = it->it_lock_mode; + if (pmode) { + plock.cookie = it->it_lock_handle; + it->it_lock_mode = 0; + it->it_request = NULL; + } + + LASSERT(fid_is_sane(&body->mbo_fid1)); + + tgt = lmv_find_target(lmv, &body->mbo_fid1); + if (IS_ERR(tgt)) + GOTO(out, rc = PTR_ERR(tgt)); + + OBD_ALLOC_PTR(op_data); + if (op_data == NULL) + GOTO(out, rc = -ENOMEM); + + op_data->op_fid1 = body->mbo_fid1; + /* Sent the parent FID to the remote MDT */ + if (parent_fid != NULL) { + /* The parent fid is only for remote open to + * check whether the open is from OBF, + * see mdt_cross_open */ + LASSERT(it->it_op & IT_OPEN); + op_data->op_fid2 = *parent_fid; + } + + op_data->op_bias = MDS_CROSS_REF; + CDEBUG(D_INODE, "REMOTE_INTENT with fid="DFID" -> mds #%u\n", + PFID(&body->mbo_fid1), tgt->ltd_idx); + + rc = md_intent_lock(tgt->ltd_exp, op_data, it, &req, cb_blocking, + extra_lock_flags); if (rc) GOTO(out_free_op_data, rc); - /* - * LLite needs LOOKUP lock to track dentry revocation in order to - * maintain dcache consistency. Thus drop UPDATE lock here and put - * LOOKUP in request. - */ - if (it->d.lustre.it_lock_mode != 0) { - ldlm_lock_decref((void *)&it->d.lustre.it_lock_handle, - it->d.lustre.it_lock_mode); - it->d.lustre.it_lock_mode = 0; - } - it->d.lustre.it_lock_handle = plock.cookie; - it->d.lustre.it_lock_mode = pmode; - - EXIT; + /* + * LLite needs LOOKUP lock to track dentry revocation in order to + * maintain dcache consistency. Thus drop UPDATE|PERM lock here + * and put LOOKUP in request. + */ + if (it->it_lock_mode != 0) { + it->it_remote_lock_handle = + it->it_lock_handle; + it->it_remote_lock_mode = it->it_lock_mode; + } + + if (pmode) { + it->it_lock_handle = plock.cookie; + it->it_lock_mode = pmode; + } + + EXIT; out_free_op_data: - OBD_FREE_PTR(op_data); + OBD_FREE_PTR(op_data); out: - if (rc && pmode) - ldlm_lock_decref(&plock, pmode); + if (rc && pmode) + ldlm_lock_decref(&plock, pmode); + + ptlrpc_req_finished(*reqp); + *reqp = req; + return rc; +} + +int lmv_revalidate_slaves(struct obd_export *exp, + const struct lmv_stripe_md *lsm, + ldlm_blocking_callback cb_blocking, + int extra_lock_flags) +{ + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + struct ptlrpc_request *req = NULL; + struct mdt_body *body; + struct md_op_data *op_data; + int i; + int rc = 0; + + ENTRY; + + /** + * revalidate slaves has some problems, temporarily return, + * we may not need that + */ + OBD_ALLOC_PTR(op_data); + if (op_data == NULL) + RETURN(-ENOMEM); + + /** + * Loop over the stripe information, check validity and update them + * from MDS if needed. + */ + for (i = 0; i < lsm->lsm_md_stripe_count; i++) { + struct lu_fid fid; + struct lookup_intent it = { .it_op = IT_GETATTR }; + struct lustre_handle *lockh = NULL; + struct lmv_tgt_desc *tgt = NULL; + struct inode *inode; + + fid = lsm->lsm_md_oinfo[i].lmo_fid; + inode = lsm->lsm_md_oinfo[i].lmo_root; + + /* + * Prepare op_data for revalidating. Note that @fid2 shluld be + * defined otherwise it will go to server and take new lock + * which is not needed here. + */ + memset(op_data, 0, sizeof(*op_data)); + op_data->op_fid1 = fid; + op_data->op_fid2 = fid; + + tgt = lmv_locate_mds(lmv, op_data, &fid); + if (IS_ERR(tgt)) + GOTO(cleanup, rc = PTR_ERR(tgt)); + + CDEBUG(D_INODE, "Revalidate slave "DFID" -> mds #%u\n", + PFID(&fid), tgt->ltd_idx); + + if (req != NULL) { + ptlrpc_req_finished(req); + req = NULL; + } + + rc = md_intent_lock(tgt->ltd_exp, op_data, &it, &req, + cb_blocking, extra_lock_flags); + if (rc < 0) + GOTO(cleanup, rc); + + lockh = (struct lustre_handle *)&it.it_lock_handle; + if (rc > 0 && req == NULL) { + /* slave inode is still valid */ + CDEBUG(D_INODE, "slave "DFID" is still valid.\n", + PFID(&fid)); + rc = 0; + } else { + /* refresh slave from server */ + body = req_capsule_server_get(&req->rq_pill, + &RMF_MDT_BODY); + if (body == NULL) { + if (it.it_lock_mode && lockh) { + ldlm_lock_decref(lockh, + it.it_lock_mode); + it.it_lock_mode = 0; + } + GOTO(cleanup, rc = -ENOENT); + } + + i_size_write(inode, body->mbo_size); + inode->i_blocks = body->mbo_blocks; + set_nlink(inode, body->mbo_nlink); + LTIME_S(inode->i_atime) = body->mbo_atime; + LTIME_S(inode->i_ctime) = body->mbo_ctime; + LTIME_S(inode->i_mtime) = body->mbo_mtime; + } + + md_set_lock_data(tgt->ltd_exp, lockh, inode, NULL); + if (it.it_lock_mode != 0 && lockh != NULL) { + ldlm_lock_decref(lockh, it.it_lock_mode); + it.it_lock_mode = 0; + } + } + +cleanup: + if (req != NULL) + ptlrpc_req_finished(req); - ptlrpc_req_finished(*reqp); - *reqp = req; - return rc; + OBD_FREE_PTR(op_data); + RETURN(rc); } + /* * IT_OPEN is intended to open (and create, possible) an object. Parent (pid) * may be split dir. */ -int lmv_intent_open(struct obd_export *exp, struct md_op_data *op_data, - void *lmm, int lmmsize, struct lookup_intent *it, - int flags, struct ptlrpc_request **reqp, - ldlm_blocking_callback cb_blocking, - int extra_lock_flags) +static int lmv_intent_open(struct obd_export *exp, struct md_op_data *op_data, + struct lookup_intent *it, + struct ptlrpc_request **reqp, + ldlm_blocking_callback cb_blocking, + __u64 extra_lock_flags) { - struct obd_device *obd = exp->exp_obd; - struct lu_fid rpid = op_data->op_fid1; - struct lmv_obd *lmv = &obd->u.lmv; - struct md_op_data *sop_data; - struct lmv_stripe_md *mea; - struct lmv_tgt_desc *tgt; - struct mdt_body *body; - struct lmv_object *obj; - int rc; - int loop = 0; - int sidx; - ENTRY; - - OBD_ALLOC_PTR(sop_data); - if (sop_data == NULL) - RETURN(-ENOMEM); - - /* save op_data fro repeat case */ - *sop_data = *op_data; - -repeat: - - ++loop; - LASSERT(loop <= 2); - obj = lmv_object_find(obd, &rpid); - if (obj) { - /* - * Directory is already split, so we have to forward request to - * the right MDS. - */ - sidx = raw_name2idx(obj->lo_hashtype, obj->lo_objcount, - (char *)op_data->op_name, - op_data->op_namelen); - - rpid = obj->lo_stripes[sidx].ls_fid; - - sop_data->op_mds = obj->lo_stripes[sidx].ls_mds; - tgt = lmv_get_target(lmv, sop_data->op_mds); - sop_data->op_bias &= ~MDS_CHECK_SPLIT; - lmv_object_put(obj); - - CDEBUG(D_INODE, - "Choose slave dir ("DFID") -> mds #%d\n", - PFID(&rpid), tgt->ltd_idx); - } else { - sop_data->op_bias |= MDS_CHECK_SPLIT; - tgt = lmv_find_target(lmv, &rpid); - sop_data->op_mds = tgt->ltd_idx; - } - if (IS_ERR(tgt)) - GOTO(out_free_sop_data, rc = PTR_ERR(tgt)); - - sop_data->op_fid1 = rpid; - - if (it->it_op & IT_CREAT) { - /* - * For open with IT_CREATE and for IT_CREATE cases allocate new - * fid and setup FLD for it. - */ - sop_data->op_fid3 = sop_data->op_fid2; - rc = lmv_fid_alloc(exp, &sop_data->op_fid2, sop_data); - if (rc) - GOTO(out_free_sop_data, rc); - - if (rc == -ERESTART) - goto repeat; - else if (rc) - GOTO(out_free_sop_data, rc); - } - - CDEBUG(D_INODE, - "OPEN_INTENT with fid1="DFID", fid2="DFID", name='%s' -> mds #%d\n", - PFID(&sop_data->op_fid1), PFID(&sop_data->op_fid2), - sop_data->op_name, tgt->ltd_idx); - - rc = md_intent_lock(tgt->ltd_exp, sop_data, lmm, lmmsize, it, flags, - reqp, cb_blocking, extra_lock_flags); - - if (rc == -ERESTART) { - LASSERT(*reqp != NULL); - DEBUG_REQ(D_WARNING|D_RPCTRACE, *reqp, - "Got -ERESTART during open!\n"); - ptlrpc_req_finished(*reqp); - *reqp = NULL; - it->d.lustre.it_data = NULL; - - /* - * Directory got split. Time to update local object and repeat - * the request with proper MDS. - */ - LASSERT(lu_fid_eq(&op_data->op_fid1, &rpid)); - rc = lmv_handle_split(exp, &rpid); - if (rc == 0) { - /* We should reallocate child FID. */ - rc = lmv_allocate_slaves(obd, &rpid, op_data, - &sop_data->op_fid2); - if (rc == 0) - goto repeat; - } - } - - if (rc != 0) - GOTO(out_free_sop_data, rc); - - /* - * Nothing is found, do not access body->fid1 as it is zero and thus - * pointless. - */ - if ((it->d.lustre.it_disposition & DISP_LOOKUP_NEG) && - !(it->d.lustre.it_disposition & DISP_OPEN_CREATE) && - !(it->d.lustre.it_disposition & DISP_OPEN_OPEN)) - GOTO(out_free_sop_data, rc = 0); - - /* - * Okay, MDS has returned success. Probably name has been resolved in - * remote inode. - */ - rc = lmv_intent_remote(exp, lmm, lmmsize, it, flags, reqp, - cb_blocking, extra_lock_flags); - if (rc != 0) { - LASSERT(rc < 0); - /* - * This is possible, that some userspace application will try to - * open file as directory and we will have -ENOTDIR here. As - * this is normal situation, we should not print error here, - * only debug info. - */ - CDEBUG(D_INODE, "Can't handle remote %s: dir "DFID"("DFID"):" - "%*s: %d\n", LL_IT2STR(it), PFID(&op_data->op_fid2), - PFID(&rpid), op_data->op_namelen, op_data->op_name, rc); - GOTO(out_free_sop_data, rc); - } - - /* - * Caller may use attrs MDS returns on IT_OPEN lock request so, we have - * to update them for split dir. - */ - body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY); - LASSERT(body != NULL); - - /* - * Could not find object, FID is not present in response. - */ - if (!(body->valid & OBD_MD_FLID)) - GOTO(out_free_sop_data, rc = 0); - - obj = lmv_object_find(obd, &body->fid1); - if (obj == NULL) { - /* - * XXX: Capability for remote call! - */ - mea = lmv_get_mea(*reqp); - if (mea != NULL) { - obj = lmv_object_create(exp, &body->fid1, mea); - if (IS_ERR(obj)) - GOTO(out_free_sop_data, rc = (int)PTR_ERR(obj)); - } - } - - if (obj) { - /* - * This is split dir and we'd want to get attrs. - */ - CDEBUG(D_INODE, "Slave attributes for "DFID"\n", - PFID(&body->fid1)); - - rc = lmv_revalidate_slaves(exp, reqp, &body->fid1, it, 1, - cb_blocking, extra_lock_flags); - lmv_object_put(obj); - } - EXIT; -out_free_sop_data: - OBD_FREE_PTR(sop_data); - return rc; + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + struct lmv_tgt_desc *tgt; + struct mdt_body *body; + int rc; + ENTRY; + + if (it->it_flags & MDS_OPEN_BY_FID) { + LASSERT(fid_is_sane(&op_data->op_fid2)); + + /* for striped directory, we can't know parent stripe fid + * without name, but we can set it to child fid, and MDT + * will obtain it from linkea in open in such case. */ + if (op_data->op_mea1 != NULL) + op_data->op_fid1 = op_data->op_fid2; + + tgt = lmv_find_target(lmv, &op_data->op_fid2); + if (IS_ERR(tgt)) + RETURN(PTR_ERR(tgt)); + + op_data->op_mds = tgt->ltd_idx; + } else { + LASSERT(fid_is_sane(&op_data->op_fid1)); + LASSERT(fid_is_zero(&op_data->op_fid2)); + LASSERT(op_data->op_name != NULL); + + tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1); + if (IS_ERR(tgt)) + RETURN(PTR_ERR(tgt)); + } + + /* If it is ready to open the file by FID, do not need + * allocate FID at all, otherwise it will confuse MDT */ + if ((it->it_op & IT_CREAT) && !(it->it_flags & MDS_OPEN_BY_FID)) { + /* + * For lookup(IT_CREATE) cases allocate new fid and setup FLD + * for it. + */ + rc = lmv_fid_alloc(NULL, exp, &op_data->op_fid2, op_data); + if (rc != 0) + RETURN(rc); + } + + CDEBUG(D_INODE, "OPEN_INTENT with fid1="DFID", fid2="DFID"," + " name='%s' -> mds #%u\n", PFID(&op_data->op_fid1), + PFID(&op_data->op_fid2), op_data->op_name, tgt->ltd_idx); + + rc = md_intent_lock(tgt->ltd_exp, op_data, it, reqp, cb_blocking, + extra_lock_flags); + if (rc != 0) + RETURN(rc); + /* + * Nothing is found, do not access body->fid1 as it is zero and thus + * pointless. + */ + if ((it->it_disposition & DISP_LOOKUP_NEG) && + !(it->it_disposition & DISP_OPEN_CREATE) && + !(it->it_disposition & DISP_OPEN_OPEN)) + RETURN(rc); + + body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY); + if (body == NULL) + RETURN(-EPROTO); + + /* Not cross-ref case, just get out of here. */ + if (unlikely((body->mbo_valid & OBD_MD_MDS))) { + rc = lmv_intent_remote(exp, it, &op_data->op_fid1, reqp, + cb_blocking, extra_lock_flags); + if (rc != 0) + RETURN(rc); + + body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY); + if (body == NULL) + RETURN(-EPROTO); + } + + RETURN(rc); } /* * Handler for: getattr, lookup and revalidate cases. */ -int lmv_intent_lookup(struct obd_export *exp, struct md_op_data *op_data, - void *lmm, int lmmsize, struct lookup_intent *it, - int flags, struct ptlrpc_request **reqp, - ldlm_blocking_callback cb_blocking, - int extra_lock_flags) +static int +lmv_intent_lookup(struct obd_export *exp, struct md_op_data *op_data, + struct lookup_intent *it, struct ptlrpc_request **reqp, + ldlm_blocking_callback cb_blocking, + __u64 extra_lock_flags) { - struct obd_device *obd = exp->exp_obd; - struct lu_fid rpid = op_data->op_fid1; - struct lmv_obd *lmv = &obd->u.lmv; - struct lmv_object *obj = NULL; - struct md_op_data *sop_data; - struct lmv_stripe_md *mea; - struct lmv_tgt_desc *tgt = NULL; - struct mdt_body *body; - int sidx; - int loop = 0; - int rc = 0; - ENTRY; - - OBD_ALLOC_PTR(sop_data); - if (sop_data == NULL) - RETURN(-ENOMEM); - - *sop_data = *op_data; - -repeat: - ++loop; - LASSERT(loop <= 2); - - obj = lmv_object_find(obd, &op_data->op_fid1); - if (obj && op_data->op_namelen) { - sidx = raw_name2idx(obj->lo_hashtype, - obj->lo_objcount, - (char *)op_data->op_name, - op_data->op_namelen); - rpid = obj->lo_stripes[sidx].ls_fid; - tgt = lmv_get_target(lmv, - obj->lo_stripes[sidx].ls_mds); - CDEBUG(D_INODE, - "Choose slave dir ("DFID") -> mds #%d\n", - PFID(&rpid), tgt->ltd_idx); - sop_data->op_bias &= ~MDS_CHECK_SPLIT; - } else { - tgt = lmv_find_target(lmv, &op_data->op_fid1); - sop_data->op_bias |= MDS_CHECK_SPLIT; - } - if (obj) - lmv_object_put(obj); - - if (IS_ERR(tgt)) - GOTO(out_free_sop_data, rc = PTR_ERR(tgt)); - - if (!fid_is_sane(&sop_data->op_fid2)) - fid_zero(&sop_data->op_fid2); - - CDEBUG(D_INODE, - "LOOKUP_INTENT with fid1="DFID", fid2="DFID - ", name='%s' -> mds #%d\n", - PFID(&sop_data->op_fid1), PFID(&sop_data->op_fid2), - sop_data->op_name ? sop_data->op_name : "", - tgt->ltd_idx); - - sop_data->op_bias &= ~MDS_CROSS_REF; - sop_data->op_fid1 = rpid; - - rc = md_intent_lock(tgt->ltd_exp, sop_data, lmm, lmmsize, it, - flags, reqp, cb_blocking, extra_lock_flags); - - if (rc == -ERESTART) { - LASSERT(*reqp != NULL); - DEBUG_REQ(D_WARNING|D_RPCTRACE, *reqp, - "Got -ERESTART during lookup!\n"); - ptlrpc_req_finished(*reqp); - *reqp = NULL; - it->d.lustre.it_data = 0; - - /* - * Directory got split since last update. This shouldn't be - * because splitting causes lock revocation, so revalidate had - * to fail and lookup on dir had to return mea. - */ - LASSERT(obj == NULL); - - obj = lmv_object_create(exp, &rpid, NULL); - if (IS_ERR(obj)) - GOTO(out_free_sop_data, rc = PTR_ERR(obj)); - lmv_object_put(obj); - goto repeat; - } - - if (rc < 0) - GOTO(out_free_sop_data, rc); - - if (obj && rc > 0) { - /* - * This is split dir. In order to optimize things a bit, we - * consider obj valid updating missing parts. - */ - CDEBUG(D_INODE, - "Revalidate slaves for "DFID", rc %d\n", - PFID(&op_data->op_fid1), rc); - - LASSERT(fid_is_sane(&op_data->op_fid2)); - rc = lmv_revalidate_slaves(exp, reqp, &op_data->op_fid1, it, rc, - cb_blocking, extra_lock_flags); - GOTO(out_free_sop_data, rc); - } - - if (*reqp == NULL) - GOTO(out_free_sop_data, rc); - - /* - * MDS has returned success. Probably name has been resolved in - * remote inode. Let's check this. - */ - rc = lmv_intent_remote(exp, lmm, lmmsize, it, flags, - reqp, cb_blocking, extra_lock_flags); - if (rc < 0) - GOTO(out_free_sop_data, rc); - - /* - * Nothing is found, do not access body->fid1 as it is zero and thus - * pointless. - */ - if (it->d.lustre.it_disposition & DISP_LOOKUP_NEG) - GOTO(out_free_sop_data, rc = 0); - - LASSERT(*reqp != NULL); - LASSERT((*reqp)->rq_repmsg != NULL); - body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY); - LASSERT(body != NULL); - - /* - * Could not find object, FID is not present in response. - */ - if (!(body->valid & OBD_MD_FLID)) - GOTO(out_free_sop_data, rc = 0); - - obj = lmv_object_find(obd, &body->fid1); - if (obj == NULL) { - /* - * XXX: Remote capability is not handled. - */ - mea = lmv_get_mea(*reqp); - if (mea != NULL) { - obj = lmv_object_create(exp, &body->fid1, mea); - if (IS_ERR(obj)) - GOTO(out_free_sop_data, rc = (int)PTR_ERR(obj)); - } - } else { - CDEBUG(D_INODE, "Slave attributes for "DFID", rc %d\n", - PFID(&body->fid1), rc); - - rc = lmv_revalidate_slaves(exp, reqp, &body->fid1, it, 1, - cb_blocking, extra_lock_flags); - lmv_object_put(obj); - } - - EXIT; -out_free_sop_data: - OBD_FREE_PTR(sop_data); - return rc; + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + struct lmv_tgt_desc *tgt = NULL; + struct mdt_body *body; + struct lmv_stripe_md *lsm = op_data->op_mea1; + int rc = 0; + ENTRY; + + /* If it returns ERR_PTR(-EBADFD) then it is an unknown hash type + * it will try all stripes to locate the object */ + tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1); + if (IS_ERR(tgt) && (PTR_ERR(tgt) != -EBADFD)) + RETURN(PTR_ERR(tgt)); + + /* Both migrating dir and unknown hash dir need to try + * all of sub-stripes */ + if (lsm != NULL && !lmv_is_known_hash_type(lsm->lsm_md_hash_type)) { + struct lmv_oinfo *oinfo; + + oinfo = &lsm->lsm_md_oinfo[0]; + + op_data->op_fid1 = oinfo->lmo_fid; + op_data->op_mds = oinfo->lmo_mds; + tgt = lmv_get_target(lmv, oinfo->lmo_mds, NULL); + if (IS_ERR(tgt)) + RETURN(PTR_ERR(tgt)); + } + + if (!fid_is_sane(&op_data->op_fid2)) + fid_zero(&op_data->op_fid2); + + CDEBUG(D_INODE, "LOOKUP_INTENT with fid1="DFID", fid2="DFID + ", name='%s' -> mds #%u lsm=%p lsm_magic=%x\n", + PFID(&op_data->op_fid1), PFID(&op_data->op_fid2), + op_data->op_name ? op_data->op_name : "", + tgt->ltd_idx, lsm, lsm == NULL ? -1 : lsm->lsm_md_magic); + + op_data->op_bias &= ~MDS_CROSS_REF; + + rc = md_intent_lock(tgt->ltd_exp, op_data, it, reqp, cb_blocking, + extra_lock_flags); + if (rc < 0) + RETURN(rc); + + if (*reqp == NULL) { + /* If RPC happens, lsm information will be revalidated + * during update_inode process (see ll_update_lsm_md) */ + if (op_data->op_mea2 != NULL) { + rc = lmv_revalidate_slaves(exp, op_data->op_mea2, + cb_blocking, + extra_lock_flags); + if (rc != 0) + RETURN(rc); + } + RETURN(rc); + } else if (it_disposition(it, DISP_LOOKUP_NEG) && lsm != NULL && + lmv_need_try_all_stripes(lsm)) { + /* For migrating and unknown hash type directory, it will + * try to target the entry on other stripes */ + int stripe_index; + + for (stripe_index = 1; + stripe_index < lsm->lsm_md_stripe_count && + it_disposition(it, DISP_LOOKUP_NEG); stripe_index++) { + struct lmv_oinfo *oinfo; + + /* release the previous request */ + ptlrpc_req_finished(*reqp); + it->it_request = NULL; + *reqp = NULL; + + oinfo = &lsm->lsm_md_oinfo[stripe_index]; + tgt = lmv_find_target(lmv, &oinfo->lmo_fid); + if (IS_ERR(tgt)) + RETURN(PTR_ERR(tgt)); + + CDEBUG(D_INODE, "Try other stripes " DFID"\n", + PFID(&oinfo->lmo_fid)); + + op_data->op_fid1 = oinfo->lmo_fid; + it->it_disposition &= ~DISP_ENQ_COMPLETE; + rc = md_intent_lock(tgt->ltd_exp, op_data, it, reqp, + cb_blocking, extra_lock_flags); + if (rc != 0) + RETURN(rc); + } + } + + if (!it_has_reply_body(it)) + RETURN(0); + + /* + * MDS has returned success. Probably name has been resolved in + * remote inode. Let's check this. + */ + body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY); + if (body == NULL) + RETURN(-EPROTO); + + /* Not cross-ref case, just get out of here. */ + if (unlikely((body->mbo_valid & OBD_MD_MDS))) { + rc = lmv_intent_remote(exp, it, NULL, reqp, cb_blocking, + extra_lock_flags); + if (rc != 0) + RETURN(rc); + body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY); + if (body == NULL) + RETURN(-EPROTO); + } + + RETURN(rc); } int lmv_intent_lock(struct obd_export *exp, struct md_op_data *op_data, - void *lmm, int lmmsize, struct lookup_intent *it, - int flags, struct ptlrpc_request **reqp, - ldlm_blocking_callback cb_blocking, - int extra_lock_flags) -{ - struct obd_device *obd = exp->exp_obd; - int rc; - ENTRY; - - LASSERT(it != NULL); - LASSERT(fid_is_sane(&op_data->op_fid1)); - - CDEBUG(D_INODE, "INTENT LOCK '%s' for '%*s' on "DFID"\n", - LL_IT2STR(it), op_data->op_namelen, op_data->op_name, - PFID(&op_data->op_fid1)); - - rc = lmv_check_connect(obd); - if (rc) - RETURN(rc); - - if (it->it_op & (IT_LOOKUP | IT_GETATTR)) - rc = lmv_intent_lookup(exp, op_data, lmm, lmmsize, it, - flags, reqp, cb_blocking, - extra_lock_flags); - else if (it->it_op & IT_OPEN) - rc = lmv_intent_open(exp, op_data, lmm, lmmsize, it, - flags, reqp, cb_blocking, - extra_lock_flags); - else - LBUG(); - RETURN(rc); -} - -int lmv_revalidate_slaves(struct obd_export *exp, struct ptlrpc_request **reqp, - const struct lu_fid *mid, struct lookup_intent *oit, - int master_valid, ldlm_blocking_callback cb_blocking, - int extra_lock_flags) -{ - struct obd_device *obd = exp->exp_obd; - struct lmv_obd *lmv = &obd->u.lmv; - int master_lockm = 0; - struct lustre_handle *lockh = NULL; - struct ptlrpc_request *mreq = *reqp; - struct lustre_handle master_lockh; - struct md_op_data *op_data; - struct ldlm_lock *lock; - unsigned long size = 0; - struct mdt_body *body; - struct lmv_object *obj; - int i; - int rc = 0; - struct lu_fid fid; - struct ptlrpc_request *req; - ldlm_blocking_callback cb; - struct lookup_intent it; - struct lmv_tgt_desc *tgt; - int master; - ENTRY; - - CDEBUG(D_INODE, "Revalidate master obj "DFID"\n", PFID(mid)); - - OBD_ALLOC_PTR(op_data); - if (op_data == NULL) - RETURN(-ENOMEM); - - /* - * We have to loop over the subobjects, check validity and update them - * from MDS if needed. It's very useful that we need not to update all - * the fields. Say, common fields (that are equal on all the subojects - * need not to be update, another fields (i_size, for example) are - * cached all the time. - */ - obj = lmv_object_find_lock(obd, mid); - if (obj == NULL) - RETURN(-EALREADY); - - for (i = 0; i < obj->lo_objcount; i++) { - fid = obj->lo_stripes[i].ls_fid; - master = lu_fid_eq(&fid, &obj->lo_fid); - cb = master ? cb_blocking : lmv_blocking_ast; - - /* - * We need i_size and we would like to check possible cached locks, - * so this is is IT_GETATTR intent. - */ - memset(&it, 0, sizeof(it)); - it.it_op = IT_GETATTR; - - if (master && master_valid) { - /* - * lmv_intent_lookup() already checked - * validness and took the lock. - */ - if (mreq != NULL) { - body = req_capsule_server_get(&mreq->rq_pill, - &RMF_MDT_BODY); - LASSERT(body != NULL); - goto update; - } - /* - * Take already cached attrs into account. - */ - CDEBUG(D_INODE, - "Master "DFID"is locked and cached\n", - PFID(mid)); - goto release_lock; - } - - /* - * Prepare op_data for revalidating. Note that @fid2 shuld be - * defined otherwise it will go to server and take new lock - * which is what we reall not need here. - */ - memset(op_data, 0, sizeof(*op_data)); - op_data->op_bias = MDS_CROSS_REF; - op_data->op_fid1 = fid; - op_data->op_fid2 = fid; - req = NULL; - - tgt = lmv_get_target(lmv, obj->lo_stripes[i].ls_mds); - if (IS_ERR(tgt)) - GOTO(cleanup, rc = PTR_ERR(tgt)); - - CDEBUG(D_INODE, "Revalidate slave obj "DFID" -> mds #%d\n", - PFID(&fid), tgt->ltd_idx); - - rc = md_intent_lock(tgt->ltd_exp, op_data, NULL, 0, &it, 0, - &req, cb, extra_lock_flags); - - lockh = (struct lustre_handle *)&it.d.lustre.it_lock_handle; - if (rc > 0 && req == NULL) { - /* - * Nice, this slave is valid. - */ - CDEBUG(D_INODE, "Cached slave "DFID"\n", PFID(&fid)); - goto release_lock; - } - - if (rc < 0) - GOTO(cleanup, rc); - - if (master) { - /* - * Save lock on master to be returned to the caller. - */ - CDEBUG(D_INODE, "No lock on master "DFID" yet\n", - PFID(mid)); - memcpy(&master_lockh, lockh, sizeof(master_lockh)); - master_lockm = it.d.lustre.it_lock_mode; - it.d.lustre.it_lock_mode = 0; - } else { - /* - * This is slave. We want to control it. - */ - lock = ldlm_handle2lock(lockh); - LASSERT(lock != NULL); - lock->l_ast_data = lmv_object_get(obj); - LDLM_LOCK_PUT(lock); - } - - if (*reqp == NULL) { - /* - * This is first reply, we'll use it to return updated - * data back to the caller. - */ - LASSERT(req != NULL); - ptlrpc_request_addref(req); - *reqp = req; - } - - body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); - LASSERT(body != NULL); - -update: - obj->lo_stripes[i].ls_size = body->size; - - CDEBUG(D_INODE, "Fresh size %lu from "DFID"\n", - (unsigned long)obj->lo_stripes[i].ls_size, PFID(&fid)); - - if (req) - ptlrpc_req_finished(req); -release_lock: - size += obj->lo_stripes[i].ls_size; - - if (it.d.lustre.it_lock_mode && lockh) { - ldlm_lock_decref(lockh, it.d.lustre.it_lock_mode); - it.d.lustre.it_lock_mode = 0; - } - } - - if (*reqp) { - /* - * Some attrs got refreshed, we have reply and it's time to put - * fresh attrs to it. - */ - CDEBUG(D_INODE, "Return refreshed attrs: size = %lu for "DFID"\n", - (unsigned long)size, PFID(mid)); - - body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY); - LASSERT(body != NULL); - body->size = size; - - if (mreq == NULL) { - /* - * Very important to maintain mds num the same because - * of revalidation. mreq == NULL means that caller has - * no reply and the only attr we can return is size. - */ - body->valid = OBD_MD_FLSIZE; - } - if (master_valid == 0) { - oit->d.lustre.it_lock_handle = master_lockh.cookie; - oit->d.lustre.it_lock_mode = master_lockm; - } - rc = 0; - } else { - /* - * It seems all the attrs are fresh and we did no request. - */ - CDEBUG(D_INODE, "All the attrs were fresh on "DFID"\n", - PFID(mid)); - if (master_valid == 0) - oit->d.lustre.it_lock_mode = master_lockm; - rc = 1; - } - - EXIT; -cleanup: - OBD_FREE_PTR(op_data); - lmv_object_put_unlock(obj); - return rc; -} - -int lmv_allocate_slaves(struct obd_device *obd, struct lu_fid *pid, - struct md_op_data *op, struct lu_fid *fid) + struct lookup_intent *it, struct ptlrpc_request **reqp, + ldlm_blocking_callback cb_blocking, + __u64 extra_lock_flags) { - struct lmv_obd *lmv = &obd->u.lmv; - struct lmv_object *obj; - mdsno_t mds; - int sidx; - int rc; - ENTRY; - - obj = lmv_object_find(obd, pid); - if (obj == NULL) - RETURN(-EALREADY); - - sidx = raw_name2idx(obj->lo_hashtype, obj->lo_objcount, - (char *)op->op_name, op->op_namelen); - mds = obj->lo_stripes[sidx].ls_mds; - lmv_object_put(obj); - - rc = __lmv_fid_alloc(lmv, fid, mds); - if (rc) { - CERROR("Can't allocate fid, rc %d\n", rc); - RETURN(rc); - } - - CDEBUG(D_INODE, "Allocate new fid "DFID" for slave " - "obj -> mds #"LPU64"\n", PFID(fid), mds); - - RETURN(rc); + int rc; + ENTRY; + + LASSERT(it != NULL); + LASSERT(fid_is_sane(&op_data->op_fid1)); + + CDEBUG(D_INODE, "INTENT LOCK '%s' for "DFID" '%.*s' on "DFID"\n", + LL_IT2STR(it), PFID(&op_data->op_fid2), + (int)op_data->op_namelen, op_data->op_name, + PFID(&op_data->op_fid1)); + + if (it->it_op & (IT_LOOKUP | IT_GETATTR | IT_LAYOUT | IT_GETXATTR)) + rc = lmv_intent_lookup(exp, op_data, it, reqp, cb_blocking, + extra_lock_flags); + else if (it->it_op & IT_OPEN) + rc = lmv_intent_open(exp, op_data, it, reqp, cb_blocking, + extra_lock_flags); + else + LBUG(); + + if (rc < 0) { + struct lustre_handle lock_handle; + + if (it->it_lock_mode != 0) { + lock_handle.cookie = it->it_lock_handle; + ldlm_lock_decref_and_cancel(&lock_handle, + it->it_lock_mode); + } + + it->it_lock_handle = 0; + it->it_lock_mode = 0; + + if (it->it_remote_lock_mode != 0) { + lock_handle.cookie = it->it_remote_lock_handle; + ldlm_lock_decref_and_cancel(&lock_handle, + it->it_remote_lock_mode); + } + + it->it_remote_lock_handle = 0; + it->it_remote_lock_mode = 0; + } + + RETURN(rc); }