X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Flmv%2Flmv_intent.c;h=960a7bd1027c2d3066825adfcf20c045431c0074;hb=4bbc97c3522199891e8522cd88c8301adceafde9;hp=eb1f40b3e8f23041f82a0f7c6ddd82f071042c63;hpb=2dc9c16e770415d56839e1996015fec5fab93f29;p=fs%2Flustre-release.git diff --git a/lustre/lmv/lmv_intent.c b/lustre/lmv/lmv_intent.c index eb1f40b..960a7bd 100644 --- a/lustre/lmv/lmv_intent.c +++ b/lustre/lmv/lmv_intent.c @@ -1,22 +1,37 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * - * Copyright (C) 2002, 2003 Cluster File Systems, Inc. + * GPL HEADER START * - * This file is part of Lustre, http://www.lustre.org. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. */ #ifndef EXPORT_SYMTAB @@ -31,722 +46,729 @@ #include #include #include +#include +# ifndef HAVE_VFS_INTENT_PATCHES +# include +# endif #else #include #endif -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include #include "lmv_internal.h" - -int lmv_handle_remote_inode(struct obd_export *exp, struct ll_uctxt *uctxt, - void *lmm, int lmmsize, - struct lookup_intent *it, int flags, - struct ptlrpc_request **reqp, - ldlm_blocking_callback cb_blocking) +int lmv_intent_remote(struct obd_export *exp, void *lmm, + int lmmsize, struct lookup_intent *it, + int flags, struct ptlrpc_request **reqp, + ldlm_blocking_callback cb_blocking, + int extra_lock_flags) { - struct obd_device *obd = exp->exp_obd; - struct lmv_obd *lmv = &obd->u.lmv; - struct mds_body *body = NULL; - int rc = 0; + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + struct ptlrpc_request *req = NULL; + struct lustre_handle plock; + struct md_op_data *op_data; + struct lmv_tgt_desc *tgt; + struct mdt_body *body; + int pmode; + int rc = 0; ENTRY; - body = lustre_msg_buf((*reqp)->rq_repmsg, 1, sizeof(*body)); - LASSERT(body != NULL); - - if (body->valid & OBD_MD_MDS) { - /* oh, MDS reports that this is remote inode case - * i.e. we have to ask for real attrs on another MDS */ - struct ptlrpc_request *req; - struct ll_fid nfid; - struct lustre_handle plock; - int pmode; - - if (it->it_op == IT_LOOKUP) { - /* unfortunately, we have to lie to MDC/MDS to - * retrieve attributes llite needs */ - it->it_op = IT_GETATTR; - } - - /* we got LOOKUP lock, but we really need attrs */ - pmode = it->d.lustre.it_lock_mode; - if (pmode) { - memcpy(&plock, &it->d.lustre.it_lock_handle, - sizeof(plock)); - it->d.lustre.it_lock_mode = 0; - } - - nfid = body->fid1; - it->d.lustre.it_disposition &= ~DISP_ENQ_COMPLETE; - rc = md_intent_lock(lmv->tgts[nfid.mds].exp, uctxt, &nfid, - NULL, 0, lmm, lmmsize, NULL, it, flags, - &req, cb_blocking); - - /* llite needs LOOKUP lock to track dentry revocation in - * order to maintain dcache consistency. thus drop UPDATE - * lock here and put LOOKUP in request */ - if (rc == 0) { - LASSERT(it->d.lustre.it_lock_mode != 0); - ldlm_lock_decref((void *)&it->d.lustre.it_lock_handle, - it->d.lustre.it_lock_mode); - memcpy(&it->d.lustre.it_lock_handle, &plock, - sizeof(plock)); - it->d.lustre.it_lock_mode = pmode; - - } else if (pmode) - ldlm_lock_decref(&plock, pmode); + body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY); + if (body == NULL) + RETURN(-EPROTO); + + /* + * Not cross-ref case, just get out of here. + */ + if (!(body->valid & OBD_MD_MDS)) + RETURN(0); + + /* + * Unfortunately, we have to lie to MDC/MDS to retrieve + * attributes llite needs and provideproper locking. + */ + if (it->it_op & IT_LOOKUP) + it->it_op = IT_GETATTR; + + /* + * We got LOOKUP lock, but we really need attrs. + */ + pmode = it->d.lustre.it_lock_mode; + if (pmode) { + plock.cookie = it->d.lustre.it_lock_handle; + it->d.lustre.it_lock_mode = 0; + it->d.lustre.it_data = NULL; + } - ptlrpc_req_finished(*reqp); - *reqp = req; + LASSERT(fid_is_sane(&body->fid1)); + + tgt = lmv_find_target(lmv, &body->fid1); + if (IS_ERR(tgt)) + GOTO(out, rc = PTR_ERR(tgt)); + + OBD_ALLOC_PTR(op_data); + if (op_data == NULL) + GOTO(out, rc = -ENOMEM); + + op_data->op_fid1 = body->fid1; + op_data->op_bias = MDS_CROSS_REF; + + CDEBUG(D_INODE, + "REMOTE_INTENT with fid="DFID" -> mds #%d\n", + PFID(&body->fid1), tgt->ltd_idx); + + it->d.lustre.it_disposition &= ~DISP_ENQ_COMPLETE; + rc = md_intent_lock(tgt->ltd_exp, op_data, lmm, lmmsize, it, + flags, &req, cb_blocking, extra_lock_flags); + if (rc) + GOTO(out_free_op_data, rc); + + /* + * LLite needs LOOKUP lock to track dentry revocation in order to + * maintain dcache consistency. Thus drop UPDATE lock here and put + * LOOKUP in request. + */ + if (it->d.lustre.it_lock_mode != 0) { + ldlm_lock_decref((void *)&it->d.lustre.it_lock_handle, + it->d.lustre.it_lock_mode); + it->d.lustre.it_lock_mode = 0; } - RETURN(rc); + it->d.lustre.it_lock_handle = plock.cookie; + it->d.lustre.it_lock_mode = pmode; + + EXIT; +out_free_op_data: + OBD_FREE_PTR(op_data); +out: + if (rc && pmode) + ldlm_lock_decref(&plock, pmode); + + ptlrpc_req_finished(*reqp); + *reqp = req; + return rc; } -int lmv_intent_open(struct obd_export *exp, struct ll_uctxt *uctxt, - struct ll_fid *pfid, const char *name, int len, - void *lmm, int lmmsize, struct ll_fid *cfid, - struct lookup_intent *it, int flags, - struct ptlrpc_request **reqp, - ldlm_blocking_callback cb_blocking) +/* + * IT_OPEN is intended to open (and create, possible) an object. Parent (pid) + * may be split dir. + */ +int lmv_intent_open(struct obd_export *exp, struct md_op_data *op_data, + void *lmm, int lmmsize, struct lookup_intent *it, + int flags, struct ptlrpc_request **reqp, + ldlm_blocking_callback cb_blocking, + int extra_lock_flags) { - struct obd_device *obd = exp->exp_obd; - struct lmv_obd *lmv = &obd->u.lmv; - struct mds_body *body = NULL; - struct ll_fid rpfid = *pfid; - struct lmv_obj *obj; - struct mea *mea; - int rc, mds; + struct obd_device *obd = exp->exp_obd; + struct lu_fid rpid = op_data->op_fid1; + struct lmv_obd *lmv = &obd->u.lmv; + struct md_op_data *sop_data; + struct lmv_stripe_md *mea; + struct lmv_tgt_desc *tgt; + struct mdt_body *body; + struct lmv_object *obj; + int rc; + int loop = 0; + int sidx; ENTRY; - /* IT_OPEN is intended to open (and create, possible) an object. - * parent (pfid) may be splitted dir */ + OBD_ALLOC_PTR(sop_data); + if (sop_data == NULL) + RETURN(-ENOMEM); + + /* save op_data fro repeat case */ + *sop_data = *op_data; repeat: - mds = rpfid.mds; - obj = lmv_grab_obj(obd, &rpfid, 0); + + ++loop; + LASSERT(loop <= 2); + obj = lmv_object_find(obd, &rpid); if (obj) { - /* directory is already splitted, so we have to forward - * request to the right MDS */ - mds = raw_name2idx(obj->objcount, (char *)name, len); - rpfid = obj->objs[mds].fid; - CDEBUG(D_OTHER, "forward to MDS #%u\n", mds); + /* + * Directory is already split, so we have to forward request to + * the right MDS. + */ + sidx = raw_name2idx(obj->lo_hashtype, obj->lo_objcount, + (char *)op_data->op_name, + op_data->op_namelen); + + rpid = obj->lo_stripes[sidx].ls_fid; + + sop_data->op_mds = obj->lo_stripes[sidx].ls_mds; + tgt = lmv_get_target(lmv, sop_data->op_mds); + sop_data->op_bias &= ~MDS_CHECK_SPLIT; + lmv_object_put(obj); + + CDEBUG(D_INODE, + "Choose slave dir ("DFID") -> mds #%d\n", + PFID(&rpid), tgt->ltd_idx); + } else { + sop_data->op_bias |= MDS_CHECK_SPLIT; + tgt = lmv_find_target(lmv, &rpid); + sop_data->op_mds = tgt->ltd_idx; } + if (IS_ERR(tgt)) + GOTO(out_free_sop_data, rc = PTR_ERR(tgt)); + + sop_data->op_fid1 = rpid; + + if (it->it_op & IT_CREAT) { + /* + * For open with IT_CREATE and for IT_CREATE cases allocate new + * fid and setup FLD for it. + */ + sop_data->op_fid3 = sop_data->op_fid2; + rc = lmv_fid_alloc(exp, &sop_data->op_fid2, sop_data); + if (rc) + GOTO(out_free_sop_data, rc); - rc = md_intent_lock(lmv->tgts[mds].exp, uctxt, &rpfid, name, - len, lmm, lmmsize, cfid, it, flags, reqp, - cb_blocking); - lmv_put_obj(obj); - if (rc == -ERESTART) { - /* directory got splitted. time to update local object - * and repeat the request with proper MDS */ - LASSERT(fid_equal(pfid, &rpfid)); - rc = lmv_get_mea_and_update_object(exp, &rpfid); - if (rc == 0) { - ptlrpc_req_finished(*reqp); + if (rc == -ERESTART) goto repeat; - } - } - if (rc != 0) - RETURN(rc); - - /* okay, MDS has returned success. probably name has been - * resolved in remote inode */ - rc = lmv_handle_remote_inode(exp, uctxt, lmm, lmmsize, it, flags, - reqp, cb_blocking); - if (rc != 0) { - LASSERT(rc < 0); - RETURN(rc); + else if (rc) + GOTO(out_free_sop_data, rc); } - /* caller may use attrs MDS returns on IT_OPEN lock request - * so, we have to update them for splitted dir */ - body = lustre_msg_buf((*reqp)->rq_repmsg, 1, sizeof(*body)); - LASSERT(body != NULL); - cfid = &body->fid1; - obj = lmv_grab_obj(obd, cfid, 0); - if (rc == 0 && !obj && (mea = is_body_of_splitted_dir(*reqp, 1))) { - /* wow! this is splitted dir, we'd like to handle it */ - rc = lmv_create_obj_from_attrs(exp, &body->fid1, mea); - } - obj = lmv_grab_obj(obd, cfid, 0); - if (obj) { - /* this is splitted dir and we'd want to get attrs */ - CDEBUG(D_OTHER, "attrs from slaves for %lu/%lu/%lu\n", - (unsigned long) cfid->mds, - (unsigned long) cfid->id, - (unsigned long) cfid->generation); - rc = lmv_revalidate_slaves(exp, reqp, cfid, - it, 1, cb_blocking); - } else if (S_ISDIR(body->mode)) { - /*CWARN("hmmm, %lu/%lu/%lu has not lmv obj?!\n", - (unsigned long) cfid->mds, - (unsigned long) cfid->id, - (unsigned long) cfid->generation);*/ - } - lmv_put_obj(obj); - RETURN(rc); -} + CDEBUG(D_INODE, + "OPEN_INTENT with fid1="DFID", fid2="DFID", name='%s' -> mds #%d\n", + PFID(&sop_data->op_fid1), PFID(&sop_data->op_fid2), + sop_data->op_name, tgt->ltd_idx); -int lmv_intent_getattr(struct obd_export *exp, struct ll_uctxt *uctxt, - struct ll_fid *pfid, const char *name, int len, - void *lmm, int lmmsize, struct ll_fid *cfid, - struct lookup_intent *it, int flags, - struct ptlrpc_request **reqp, - ldlm_blocking_callback cb_blocking) -{ - struct obd_device *obd = exp->exp_obd; - struct lmv_obd *lmv = &obd->u.lmv; - struct mds_body *body = NULL; - struct ll_fid rpfid = *pfid; - struct lmv_obj *obj, *obj2; - struct mea *mea; - int rc = 0, mds; - ENTRY; + rc = md_intent_lock(tgt->ltd_exp, sop_data, lmm, lmmsize, it, flags, + reqp, cb_blocking, extra_lock_flags); - if (cfid) { - /* caller wants to revalidate attrs of obj - * we have to revalidate slaves if requested - * object is splitted directory */ - CDEBUG(D_OTHER, "revalidate attrs for %lu/%lu/%lu\n", - (unsigned long) cfid->mds, - (unsigned long) cfid->id, - (unsigned long) cfid->generation); - mds = cfid->mds; - obj = lmv_grab_obj(obd, cfid, 0); - if (obj) { - /* in fact, we need not this with current - * _intent_lock(), but it may change some day */ - rpfid = obj->objs[mds].fid; - } - rc = md_intent_lock(lmv->tgts[mds].exp, uctxt, &rpfid, name, - len, lmm, lmmsize, cfid, it, flags, reqp, - cb_blocking); - if (obj && rc >= 0) { - /* this is splitted dir. in order to optimize things - * a bit, we consider obj valid updating missing - * parts. FIXME: do we need to return any lock here? - * it would be fine if we don't. this means that - * nobody should use UPDATE lock to notify about - * object removal */ - CDEBUG(D_OTHER, - "revalidate slaves for %lu/%lu/%lu, rc %d\n", - (unsigned long) cfid->mds, - (unsigned long) cfid->id, - (unsigned long) cfid->generation, rc); - rc = lmv_revalidate_slaves(exp, reqp, cfid, it, rc, - cb_blocking); + if (rc == -ERESTART) { + LASSERT(*reqp != NULL); + DEBUG_REQ(D_WARNING|D_RPCTRACE, *reqp, + "Got -ERESTART during open!\n"); + ptlrpc_req_finished(*reqp); + *reqp = NULL; + it->d.lustre.it_data = NULL; + + /* + * Directory got split. Time to update local object and repeat + * the request with proper MDS. + */ + LASSERT(lu_fid_eq(&op_data->op_fid1, &rpid)); + rc = lmv_handle_split(exp, &rpid); + if (rc == 0) { + /* We should reallocate child FID. */ + rc = lmv_allocate_slaves(obd, &rpid, op_data, + &sop_data->op_fid2); + if (rc == 0) + goto repeat; } - RETURN(rc); } - CDEBUG(D_OTHER, "INTENT getattr for %*s on %lu/%lu/%lu\n", - len, name, (unsigned long) pfid->mds, - (unsigned long) pfid->id, - (unsigned long) pfid->generation); - - mds = pfid->mds; - obj = lmv_grab_obj(obd, pfid, 0); - if (obj && len) { - /* directory is already splitted. calculate mds */ - mds = raw_name2idx(obj->objcount, (char *) name, len); - rpfid = obj->objs[mds].fid; - CDEBUG(D_OTHER, "forward to MDS #%u (slave %lu/%lu/%lu)\n", - mds, (unsigned long) rpfid.mds, - (unsigned long) rpfid.id, - (unsigned long) rpfid.generation); + if (rc != 0) + GOTO(out_free_sop_data, rc); + + /* + * Nothing is found, do not access body->fid1 as it is zero and thus + * pointless. + */ + if ((it->d.lustre.it_disposition & DISP_LOOKUP_NEG) && + !(it->d.lustre.it_disposition & DISP_OPEN_CREATE) && + !(it->d.lustre.it_disposition & DISP_OPEN_OPEN)) + GOTO(out_free_sop_data, rc = 0); + + /* + * Okay, MDS has returned success. Probably name has been resolved in + * remote inode. + */ + rc = lmv_intent_remote(exp, lmm, lmmsize, it, flags, reqp, + cb_blocking, extra_lock_flags); + if (rc != 0) { + LASSERT(rc < 0); + /* + * This is possible, that some userspace application will try to + * open file as directory and we will have -ENOTDIR here. As + * this is normal situation, we should not print error here, + * only debug info. + */ + CDEBUG(D_INODE, "Can't handle remote %s: dir "DFID"("DFID"):" + "%*s: %d\n", LL_IT2STR(it), PFID(&op_data->op_fid2), + PFID(&rpid), op_data->op_namelen, op_data->op_name, rc); + GOTO(out_free_sop_data, rc); } - rc = md_intent_lock(lmv->tgts[mds].exp, uctxt, &rpfid, name, - len, lmm, lmmsize, NULL, it, flags, reqp, - cb_blocking); - if (rc < 0) - RETURN(rc); - LASSERT(rc == 0); - /* okay, MDS has returned success. probably name has been - * resolved in remote inode */ - rc = lmv_handle_remote_inode(exp, uctxt, lmm, lmmsize, it, flags, - reqp, cb_blocking); - if (rc < 0) - RETURN(rc); - - body = lustre_msg_buf((*reqp)->rq_repmsg, 1, sizeof(*body)); + /* + * Caller may use attrs MDS returns on IT_OPEN lock request so, we have + * to update them for split dir. + */ + body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY); LASSERT(body != NULL); - cfid = &body->fid1; - obj2 = lmv_grab_obj(obd, cfid, 0); - - if (rc == 0 && !obj2 && (mea = is_body_of_splitted_dir(*reqp, 1))) { - /* wow! this is splitted dir, we'd like to handle it */ - body = lustre_msg_buf((*reqp)->rq_repmsg, 1, sizeof(*body)); - LASSERT(body != NULL); - rc = lmv_create_obj_from_attrs(exp, &body->fid1, mea); - obj2 = lmv_grab_obj(obd, cfid, 0); + + /* + * Could not find object, FID is not present in response. + */ + if (!(body->valid & OBD_MD_FLID)) + GOTO(out_free_sop_data, rc = 0); + + obj = lmv_object_find(obd, &body->fid1); + if (obj == NULL) { + /* + * XXX: Capability for remote call! + */ + mea = lmv_get_mea(*reqp); + if (mea != NULL) { + obj = lmv_object_create(exp, &body->fid1, mea); + if (IS_ERR(obj)) + GOTO(out_free_sop_data, rc = (int)PTR_ERR(obj)); + } } - if (obj2) { - /* this is splitted dir and we'd want to get attrs */ - CDEBUG(D_OTHER, - "attrs from slaves for %lu/%lu/%lu, rc %d\n", - (unsigned long) cfid->mds, - (unsigned long) cfid->id, - (unsigned long) cfid->generation, rc); - rc = lmv_revalidate_slaves(exp, reqp, cfid, - it, 1, cb_blocking); + if (obj) { + /* + * This is split dir and we'd want to get attrs. + */ + CDEBUG(D_INODE, "Slave attributes for "DFID"\n", + PFID(&body->fid1)); + + rc = lmv_revalidate_slaves(exp, reqp, &body->fid1, it, 1, + cb_blocking, extra_lock_flags); + lmv_object_put(obj); } - RETURN(rc); + EXIT; +out_free_sop_data: + OBD_FREE_PTR(sop_data); + return rc; } -void lmv_update_body_from_obj(struct mds_body *body, struct lmv_inode *obj) -{ - /* update size */ - body->size += obj->size; - - /* update atime */ - /* update ctime */ - /* update mtime */ - /* update nlink */ -} - -int lmv_lookup_slaves(struct obd_export *exp, struct ptlrpc_request **reqp) +/* + * Handler for: getattr, lookup and revalidate cases. + */ +int lmv_intent_lookup(struct obd_export *exp, struct md_op_data *op_data, + void *lmm, int lmmsize, struct lookup_intent *it, + int flags, struct ptlrpc_request **reqp, + ldlm_blocking_callback cb_blocking, + int extra_lock_flags) { - struct obd_device *obd = exp->exp_obd; - struct lmv_obd *lmv = &obd->u.lmv; - struct mds_body *body = NULL; - struct lustre_handle *lockh; - struct ldlm_lock *lock; - struct mds_body *body2; - struct ll_uctxt uctxt; - struct lmv_obj *obj; - int i, rc = 0; + struct obd_device *obd = exp->exp_obd; + struct lu_fid rpid = op_data->op_fid1; + struct lmv_obd *lmv = &obd->u.lmv; + struct lmv_object *obj = NULL; + struct md_op_data *sop_data; + struct lmv_stripe_md *mea; + struct lmv_tgt_desc *tgt = NULL; + struct mdt_body *body; + int sidx; + int loop = 0; + int rc = 0; ENTRY; - LASSERT(reqp); - LASSERT(*reqp); - - /* master is locked. we'd like to take locks on slaves - * and update attributes to be returned from the slaves - * it's important that lookup is called in two cases: - * - for first time (dcache has no such a resolving yet - * - ->d_revalidate() returned false - * last case possible only if all the objs (master and - * all slaves aren't valid */ - - body = lustre_msg_buf((*reqp)->rq_repmsg, 1, sizeof(*body)); - LASSERT(body != NULL); - - obj = lmv_grab_obj(obd, &body->fid1, 0); - LASSERT(obj); - - CDEBUG(D_OTHER, "lookup slaves for %lu/%lu/%lu\n", - (unsigned long) body->fid1.mds, - (unsigned long) body->fid1.id, - (unsigned long) body->fid1.generation); - - uctxt.gid1 = 0; - uctxt.gid2 = 0; - for (i = 0; i < obj->objcount; i++) { - struct ll_fid fid = obj->objs[i].fid; - struct ptlrpc_request *req = NULL; - struct lookup_intent it; - - if (fid_equal(&fid, &obj->fid)) { - /* skip master obj */ - continue; - } - - CDEBUG(D_OTHER, "lookup slave %lu/%lu/%lu\n", - (unsigned long) fid.mds, - (unsigned long) fid.id, - (unsigned long) fid.generation); - - /* is obj valid? */ - memset(&it, 0, sizeof(it)); - it.it_op = IT_GETATTR; - rc = md_intent_lock(lmv->tgts[fid.mds].exp, &uctxt, &fid, - NULL, 0, NULL, 0, &fid, &it, 0, &req, - lmv_dirobj_blocking_ast); - lockh = (struct lustre_handle *) &it.d.lustre.it_lock_handle; - if (rc > 0) { - /* nice, this slave is valid */ - LASSERT(req == NULL); - CDEBUG(D_OTHER, "cached\n"); - goto release_lock; - } - - if (rc < 0) { - /* error during revalidation */ - GOTO(cleanup, rc); - } - - /* rc == 0, this means we have no such a lock and can't - * think obj is still valid. lookup it again */ - LASSERT(req == NULL); - req = NULL; - memset(&it, 0, sizeof(it)); - it.it_op = IT_GETATTR; - rc = md_intent_lock(lmv->tgts[fid.mds].exp, &uctxt, &fid, - NULL, 0, NULL, 0, NULL, &it, 0, &req, - lmv_dirobj_blocking_ast); - lockh = (struct lustre_handle *) &it.d.lustre.it_lock_handle; - LASSERT(rc <= 0); - if (rc < 0) { - /* error during lookup */ - GOTO(cleanup, rc); - } - - lock = ldlm_handle2lock(lockh); - LASSERT(lock); - lock->l_ast_data = obj; - atomic_inc(&obj->count); + OBD_ALLOC_PTR(sop_data); + if (sop_data == NULL) + RETURN(-ENOMEM); - body2 = lustre_msg_buf(req->rq_repmsg, 1, sizeof(*body2)); - LASSERT(body2); - - obj->objs[i].size = body2->size; - CDEBUG(D_OTHER, "fresh: %lu\n", - (unsigned long) obj->objs[i].size); - - LDLM_LOCK_PUT(lock); - - if (req) - ptlrpc_req_finished(req); -release_lock: - lmv_update_body_from_obj(body, obj->objs + i); - if (it.d.lustre.it_lock_mode) - ldlm_lock_decref(lockh, it.d.lustre.it_lock_mode); - } -cleanup: - RETURN(rc); -} - -int lmv_intent_lookup(struct obd_export *exp, struct ll_uctxt *uctxt, - struct ll_fid *pfid, const char *name, int len, - void *lmm, int lmmsize, struct ll_fid *cfid, - struct lookup_intent *it, int flags, - struct ptlrpc_request **reqp, - ldlm_blocking_callback cb_blocking) -{ - struct obd_device *obd = exp->exp_obd; - struct lmv_obd *lmv = &obd->u.lmv; - struct mds_body *body = NULL; - struct ll_fid rpfid = *pfid; - struct lmv_obj *obj; - struct mea *mea; - int rc, mds; - ENTRY; + *sop_data = *op_data; - /* IT_LOOKUP is intended to produce name -> fid resolving - * (let's call this lookup below) or to confirm requested - * resolving is still valid (let's call this revalidation) - * cfid != NULL specifies revalidation */ - - if (cfid) { - /* this is revalidation: we have to check is LOOKUP - * lock still valid for given fid. very important - * part is that we have to choose right mds because - * namespace is per mds */ - rpfid = *pfid; - obj = lmv_grab_obj(obd, pfid, 0); - if (obj) { - mds = raw_name2idx(obj->objcount, (char *) name, len); - rpfid = obj->objs[mds].fid; - lmv_put_obj(obj); - } - mds = rpfid.mds; - CDEBUG(D_OTHER, "revalidate lookup for %lu/%lu/%lu to %d MDS\n", - (unsigned long) cfid->mds, - (unsigned long) cfid->id, - (unsigned long) cfid->generation, mds); - rc = md_intent_lock(lmv->tgts[mds].exp, uctxt, pfid, name, - len, lmm, lmmsize, cfid, it, flags, - reqp, cb_blocking); - RETURN(rc); - } - - mds = pfid->mds; repeat: - /* this is lookup. during lookup we have to update all the - * attributes, because returned values will be put in struct - * inode */ - - obj = lmv_grab_obj(obd, pfid, 0); - if (obj && len) { - /* directory is already splitted. calculate mds */ - mds = raw_name2idx(obj->objcount, (char *) name, len); - rpfid = obj->objs[mds].fid; - lmv_put_obj(obj); - } - - rc = md_intent_lock(lmv->tgts[mds].exp, uctxt, &rpfid, name, - len, lmm, lmmsize, NULL, it, flags, reqp, - cb_blocking); - if (rc > 0) { - /* very interesting. it seems object is still valid - * but for some reason llite calls lookup, not revalidate */ - CWARN("lookup for %lu/%lu/%lu and data should be uptodate\n", - (unsigned long) rpfid.mds, - (unsigned long) rpfid.id, - (unsigned long) rpfid.generation); - LASSERT(*reqp == NULL); - RETURN(rc); + ++loop; + LASSERT(loop <= 2); + + obj = lmv_object_find(obd, &op_data->op_fid1); + if (obj && op_data->op_namelen) { + sidx = raw_name2idx(obj->lo_hashtype, + obj->lo_objcount, + (char *)op_data->op_name, + op_data->op_namelen); + rpid = obj->lo_stripes[sidx].ls_fid; + tgt = lmv_get_target(lmv, + obj->lo_stripes[sidx].ls_mds); + CDEBUG(D_INODE, + "Choose slave dir ("DFID") -> mds #%d\n", + PFID(&rpid), tgt->ltd_idx); + sop_data->op_bias &= ~MDS_CHECK_SPLIT; + } else { + tgt = lmv_find_target(lmv, &op_data->op_fid1); + sop_data->op_bias |= MDS_CHECK_SPLIT; } + if (obj) + lmv_object_put(obj); + + if (IS_ERR(tgt)) + GOTO(out_free_sop_data, rc = PTR_ERR(tgt)); + + if (!fid_is_sane(&sop_data->op_fid2)) + fid_zero(&sop_data->op_fid2); + + CDEBUG(D_INODE, + "LOOKUP_INTENT with fid1="DFID", fid2="DFID + ", name='%s' -> mds #%d\n", + PFID(&sop_data->op_fid1), PFID(&sop_data->op_fid2), + sop_data->op_name ? sop_data->op_name : "", + tgt->ltd_idx); + + sop_data->op_bias &= ~MDS_CROSS_REF; + sop_data->op_fid1 = rpid; + + rc = md_intent_lock(tgt->ltd_exp, sop_data, lmm, lmmsize, it, + flags, reqp, cb_blocking, extra_lock_flags); - if (rc == 0 && *reqp == NULL) { - /* once again, we're asked for lookup, not revalidate */ - CWARN("lookup for %lu/%lu/%lu and data should be uptodate\n", - (unsigned long) rpfid.mds, - (unsigned long) rpfid.id, - (unsigned long) rpfid.generation); - RETURN(rc); - } - if (rc == -ERESTART) { - /* directory got splitted since last update. this shouldn't - * be becasue splitting causes lock revocation, so revalidate - * had to fail and lookup on dir had to return mea */ - CWARN("we haven't knew about directory splitting!\n"); + LASSERT(*reqp != NULL); + DEBUG_REQ(D_WARNING|D_RPCTRACE, *reqp, + "Got -ERESTART during lookup!\n"); + ptlrpc_req_finished(*reqp); + *reqp = NULL; + it->d.lustre.it_data = 0; + + /* + * Directory got split since last update. This shouldn't be + * because splitting causes lock revocation, so revalidate had + * to fail and lookup on dir had to return mea. + */ LASSERT(obj == NULL); - rc = lmv_create_obj_from_attrs(exp, &rpfid, NULL); - if (rc) - RETURN(rc); + + obj = lmv_object_create(exp, &rpid, NULL); + if (IS_ERR(obj)) + GOTO(out_free_sop_data, rc = PTR_ERR(obj)); + lmv_object_put(obj); goto repeat; } + + if (rc < 0) + GOTO(out_free_sop_data, rc); + + if (obj && rc > 0) { + /* + * This is split dir. In order to optimize things a bit, we + * consider obj valid updating missing parts. + */ + CDEBUG(D_INODE, + "Revalidate slaves for "DFID", rc %d\n", + PFID(&op_data->op_fid1), rc); + + LASSERT(fid_is_sane(&op_data->op_fid2)); + rc = lmv_revalidate_slaves(exp, reqp, &op_data->op_fid1, it, rc, + cb_blocking, extra_lock_flags); + GOTO(out_free_sop_data, rc); + } + if (*reqp == NULL) + GOTO(out_free_sop_data, rc); + + /* + * MDS has returned success. Probably name has been resolved in + * remote inode. Let's check this. + */ + rc = lmv_intent_remote(exp, lmm, lmmsize, it, flags, + reqp, cb_blocking, extra_lock_flags); if (rc < 0) - RETURN(rc); + GOTO(out_free_sop_data, rc); + + /* + * Nothing is found, do not access body->fid1 as it is zero and thus + * pointless. + */ + if (it->d.lustre.it_disposition & DISP_LOOKUP_NEG) + GOTO(out_free_sop_data, rc = 0); + + LASSERT(*reqp != NULL); + LASSERT((*reqp)->rq_repmsg != NULL); + body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY); + LASSERT(body != NULL); - /* okay, MDS has returned success. probably name has been - * resolved in remote inode */ - rc = lmv_handle_remote_inode(exp, uctxt, lmm, lmmsize, it, flags, - reqp, cb_blocking); + /* + * Could not find object, FID is not present in response. + */ + if (!(body->valid & OBD_MD_FLID)) + GOTO(out_free_sop_data, rc = 0); + + obj = lmv_object_find(obd, &body->fid1); + if (obj == NULL) { + /* + * XXX: Remote capability is not handled. + */ + mea = lmv_get_mea(*reqp); + if (mea != NULL) { + obj = lmv_object_create(exp, &body->fid1, mea); + if (IS_ERR(obj)) + GOTO(out_free_sop_data, rc = (int)PTR_ERR(obj)); + } + } else { + CDEBUG(D_INODE, "Slave attributes for "DFID", rc %d\n", + PFID(&body->fid1), rc); - if (rc == 0 && (mea = is_body_of_splitted_dir(*reqp, 1))) { - /* wow! this is splitted dir, we'd like to handle it */ - body = lustre_msg_buf((*reqp)->rq_repmsg, 1, sizeof(*body)); - LASSERT(body != NULL); - obj = lmv_grab_obj(obd, &body->fid1, 0); - if (!obj) - rc = lmv_create_obj_from_attrs(exp, &body->fid1, mea); - lmv_put_obj(obj); + rc = lmv_revalidate_slaves(exp, reqp, &body->fid1, it, 1, + cb_blocking, extra_lock_flags); + lmv_object_put(obj); } - RETURN(rc); + EXIT; +out_free_sop_data: + OBD_FREE_PTR(sop_data); + return rc; } -int lmv_intent_lock(struct obd_export *exp, struct ll_uctxt *uctxt, - struct ll_fid *pfid, const char *name, int len, - void *lmm, int lmmsize, struct ll_fid *cfid, - struct lookup_intent *it, int flags, - struct ptlrpc_request **reqp, - ldlm_blocking_callback cb_blocking) +int lmv_intent_lock(struct obd_export *exp, struct md_op_data *op_data, + void *lmm, int lmmsize, struct lookup_intent *it, + int flags, struct ptlrpc_request **reqp, + ldlm_blocking_callback cb_blocking, + int extra_lock_flags) { struct obd_device *obd = exp->exp_obd; - int rc = 0; + int rc; ENTRY; - LASSERT(it); - LASSERT(pfid); + LASSERT(it != NULL); + LASSERT(fid_is_sane(&op_data->op_fid1)); + + CDEBUG(D_INODE, "INTENT LOCK '%s' for '%*s' on "DFID"\n", + LL_IT2STR(it), op_data->op_namelen, op_data->op_name, + PFID(&op_data->op_fid1)); - CDEBUG(D_OTHER, "INTENT LOCK '%s' for '%*s' on %lu/%lu -> %u\n", - LL_IT2STR(it), len, name, (unsigned long) pfid->id, - (unsigned long) pfid->generation, pfid->mds); + rc = lmv_check_connect(obd); + if (rc) + RETURN(rc); - lmv_connect(obd); - if (it->it_op == IT_LOOKUP) - rc = lmv_intent_lookup(exp, uctxt, pfid, name, len, lmm, - lmmsize, cfid, it, flags, reqp, - cb_blocking); + if (it->it_op & (IT_LOOKUP | IT_GETATTR)) + rc = lmv_intent_lookup(exp, op_data, lmm, lmmsize, it, + flags, reqp, cb_blocking, + extra_lock_flags); else if (it->it_op & IT_OPEN) - rc = lmv_intent_open(exp, uctxt, pfid, name, len, lmm, - lmmsize, cfid, it, flags, reqp, - cb_blocking); - else if (it->it_op == IT_GETATTR) - rc = lmv_intent_getattr(exp, uctxt, pfid, name, len, lmm, - lmmsize, cfid, it, flags, reqp, - cb_blocking); + rc = lmv_intent_open(exp, op_data, lmm, lmmsize, it, + flags, reqp, cb_blocking, + extra_lock_flags); else LBUG(); RETURN(rc); } int lmv_revalidate_slaves(struct obd_export *exp, struct ptlrpc_request **reqp, - struct ll_fid *mfid, struct lookup_intent *oit, - int master_valid, ldlm_blocking_callback cb_blocking) + const struct lu_fid *mid, struct lookup_intent *oit, + int master_valid, ldlm_blocking_callback cb_blocking, + int extra_lock_flags) { - struct obd_device *obd = exp->exp_obd; - struct ptlrpc_request *mreq = *reqp; - struct lmv_obd *lmv = &obd->u.lmv; - struct lustre_handle master_lockh; - unsigned long size = 0; - struct ldlm_lock *lock; - struct mds_body *body; - struct ll_uctxt uctxt; - struct lmv_obj *obj; - int master_lock_mode; - int i, rc = 0; + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + int master_lockm = 0; + struct lustre_handle *lockh = NULL; + struct ptlrpc_request *mreq = *reqp; + struct lustre_handle master_lockh = { 0 }; + struct md_op_data *op_data; + struct ldlm_lock *lock; + unsigned long size = 0; + struct mdt_body *body; + struct lmv_object *obj; + int i; + int rc = 0; + struct lu_fid fid; + struct ptlrpc_request *req; + ldlm_blocking_callback cb; + struct lookup_intent it; + struct lmv_tgt_desc *tgt; + int master; ENTRY; - /* we have to loop over the subobjects, check validity and update - * them from MDSs if needed. it's very useful that we need not to - * update all the fields. say, common fields (that are equal on - * all the subojects need not to be update, another fields (i_size, - * for example) are cached all the time */ - obj = lmv_grab_obj(obd, mfid, 0); - LASSERT(obj); - - master_lock_mode = 0; - uctxt.gid1 = 0; - uctxt.gid2 = 0; - for (i = 0; i < obj->objcount; i++) { - struct ll_fid fid = obj->objs[i].fid; - struct lustre_handle *lockh = NULL; - struct ptlrpc_request *req = NULL; - ldlm_blocking_callback cb; - struct lookup_intent it; - int master = 0; - - CDEBUG(D_OTHER, "revalidate subobj %lu/%lu/%lu\n", - (unsigned long) fid.mds, - (unsigned long) fid.id, - (unsigned long) fid.generation); - + CDEBUG(D_INODE, "Revalidate master obj "DFID"\n", PFID(mid)); + + OBD_ALLOC_PTR(op_data); + if (op_data == NULL) + RETURN(-ENOMEM); + + /* + * We have to loop over the subobjects, check validity and update them + * from MDS if needed. It's very useful that we need not to update all + * the fields. Say, common fields (that are equal on all the subojects + * need not to be update, another fields (i_size, for example) are + * cached all the time. + */ + obj = lmv_object_find_lock(obd, mid); + if (obj == NULL) + RETURN(-EALREADY); + + for (i = 0; i < obj->lo_objcount; i++) { + fid = obj->lo_stripes[i].ls_fid; + master = lu_fid_eq(&fid, &obj->lo_fid); + cb = master ? cb_blocking : lmv_blocking_ast; + + /* + * We need i_size and we would like to check possible cached locks, + * so this is is IT_GETATTR intent. + */ memset(&it, 0, sizeof(it)); it.it_op = IT_GETATTR; - cb = lmv_dirobj_blocking_ast; - - if (fid_equal(&fid, &obj->fid)) { - if (master_valid) { - /* lmv_intent_getattr() already checked - * validness and took the lock */ - if (mreq) { - /* it even got the reply - * refresh attrs from that reply */ - body = lustre_msg_buf(mreq->rq_repmsg, - 1,sizeof(*body)); - LASSERT(body != NULL); - goto update; - } - /* take already cached attrs into account */ - CDEBUG(D_OTHER, - "master is locked and cached\n"); - goto release_lock; + + if (master && master_valid) { + /* + * lmv_intent_lookup() already checked + * validness and took the lock. + */ + if (mreq != NULL) { + body = req_capsule_server_get(&mreq->rq_pill, + &RMF_MDT_BODY); + LASSERT(body != NULL); + goto update; } - master = 1; - cb = cb_blocking; + /* + * Take already cached attrs into account. + */ + CDEBUG(D_INODE, + "Master "DFID"is locked and cached\n", + PFID(mid)); + goto release_lock; } - /* is obj valid? */ - rc = md_intent_lock(lmv->tgts[fid.mds].exp, &uctxt, &fid, - NULL, 0, NULL, 0, &fid, &it, 0, &req, cb); - lockh = (struct lustre_handle *) &it.d.lustre.it_lock_handle; - if (rc > 0) { - /* nice, this slave is valid */ - LASSERT(req == NULL); - CDEBUG(D_OTHER, "cached\n"); + /* + * Prepare op_data for revalidating. Note that @fid2 shuld be + * defined otherwise it will go to server and take new lock + * which is what we reall not need here. + */ + memset(op_data, 0, sizeof(*op_data)); + op_data->op_bias = MDS_CROSS_REF; + op_data->op_fid1 = fid; + op_data->op_fid2 = fid; + req = NULL; + + tgt = lmv_get_target(lmv, obj->lo_stripes[i].ls_mds); + if (IS_ERR(tgt)) + GOTO(cleanup, rc = PTR_ERR(tgt)); + + CDEBUG(D_INODE, "Revalidate slave obj "DFID" -> mds #%d\n", + PFID(&fid), tgt->ltd_idx); + + rc = md_intent_lock(tgt->ltd_exp, op_data, NULL, 0, &it, 0, + &req, cb, extra_lock_flags); + + lockh = (struct lustre_handle *)&it.d.lustre.it_lock_handle; + if (rc > 0 && req == NULL) { + /* + * Nice, this slave is valid. + */ + CDEBUG(D_INODE, "Cached slave "DFID"\n", PFID(&fid)); goto release_lock; } - if (rc < 0) { - /* error during revalidation */ + if (rc < 0) GOTO(cleanup, rc); - } - /* rc == 0, this means we have no such a lock and can't - * think obj is still valid. lookup it again */ - LASSERT(req == NULL); - req = NULL; - memset(&it, 0, sizeof(it)); - it.it_op = IT_GETATTR; - rc = md_intent_lock(lmv->tgts[fid.mds].exp, &uctxt, &fid, - NULL, 0, NULL, 0, NULL, &it, 0, &req, cb); - lockh = (struct lustre_handle *) &it.d.lustre.it_lock_handle; - LASSERT(rc <= 0); - if (rc < 0) { - /* error during lookup */ - GOTO(cleanup, rc); - } - if (master) { - LASSERT(master_valid == 0); - /* save lock on master to be returned to the caller */ - CDEBUG(D_OTHER, "no lock on master yet\n"); + /* + * Save lock on master to be returned to the caller. + */ + CDEBUG(D_INODE, "No lock on master "DFID" yet\n", + PFID(mid)); memcpy(&master_lockh, lockh, sizeof(master_lockh)); - master_lock_mode = it.d.lustre.it_lock_mode; + master_lockm = it.d.lustre.it_lock_mode; it.d.lustre.it_lock_mode = 0; } else { - /* this is slave. we want to control it */ + /* + * This is slave. We want to control it. + */ lock = ldlm_handle2lock(lockh); - LASSERT(lock); - lock->l_ast_data = obj; - atomic_inc(&obj->count); + LASSERT(lock != NULL); + lock->l_ast_data = lmv_object_get(obj); LDLM_LOCK_PUT(lock); } if (*reqp == NULL) { - /* this is first reply, we'll use it to return - * updated data back to the caller */ - LASSERT(req); + /* + * This is first reply, we'll use it to return updated + * data back to the caller. + */ + LASSERT(req != NULL); ptlrpc_request_addref(req); *reqp = req; - } - body = lustre_msg_buf(req->rq_repmsg, 1, sizeof(*body)); - LASSERT(body); - + body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); + LASSERT(body != NULL); + update: - obj->objs[i].size = body->size; - CDEBUG(D_OTHER, "fresh: %lu\n", - (unsigned long) obj->objs[i].size); + obj->lo_stripes[i].ls_size = body->size; + + CDEBUG(D_INODE, "Fresh size %lu from "DFID"\n", + (unsigned long)obj->lo_stripes[i].ls_size, PFID(&fid)); if (req) ptlrpc_req_finished(req); release_lock: - size += obj->objs[i].size; - if (it.d.lustre.it_lock_mode) + size += obj->lo_stripes[i].ls_size; + + if (it.d.lustre.it_lock_mode && lockh) { ldlm_lock_decref(lockh, it.d.lustre.it_lock_mode); + it.d.lustre.it_lock_mode = 0; + } } if (*reqp) { - /* some attrs got refreshed, we have reply and it's time - * to put fresh attrs to it */ - CDEBUG(D_OTHER, "return refreshed attrs: size = %lu\n", - (unsigned long) size); - body = lustre_msg_buf((*reqp)->rq_repmsg, 1, sizeof(*body)); - LASSERT(body); - /* FIXME: what about another attributes? */ + /* + * Some attrs got refreshed, we have reply and it's time to put + * fresh attrs to it. + */ + CDEBUG(D_INODE, "Return refreshed attrs: size = %lu for "DFID"\n", + (unsigned long)size, PFID(mid)); + + body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY); + LASSERT(body != NULL); body->size = size; + if (mreq == NULL) { - /* very important to maintain lli->mds the same - * because of revalidation. mreq == NULL means - * that caller has no reply and the only attr - * we can return is size */ + /* + * Very important to maintain mds num the same because + * of revalidation. mreq == NULL means that caller has + * no reply and the only attr we can return is size. + */ body->valid = OBD_MD_FLSIZE; - body->mds = obj->fid.mds; } if (master_valid == 0) { - memcpy(&oit->d.lustre.it_lock_handle, - &master_lockh, sizeof(master_lockh)); - oit->d.lustre.it_lock_mode = master_lock_mode; + oit->d.lustre.it_lock_handle = master_lockh.cookie; + oit->d.lustre.it_lock_mode = master_lockm; } rc = 0; } else { - /* it seems all the attrs are fresh and we did no request */ - CDEBUG(D_OTHER, "all the attrs were fresh\n"); + /* + * It seems all the attrs are fresh and we did no request. + */ + CDEBUG(D_INODE, "All the attrs were fresh on "DFID"\n", + PFID(mid)); if (master_valid == 0) - oit->d.lustre.it_lock_mode = master_lock_mode; + oit->d.lustre.it_lock_mode = master_lockm; rc = 1; } + + EXIT; cleanup: - RETURN(rc); + OBD_FREE_PTR(op_data); + lmv_object_put_unlock(obj); + return rc; } +int lmv_allocate_slaves(struct obd_device *obd, struct lu_fid *pid, + struct md_op_data *op, struct lu_fid *fid) +{ + struct lmv_obd *lmv = &obd->u.lmv; + struct lmv_object *obj; + mdsno_t mds; + int sidx; + int rc; + ENTRY; + + obj = lmv_object_find(obd, pid); + if (obj == NULL) + RETURN(-EALREADY); + + sidx = raw_name2idx(obj->lo_hashtype, obj->lo_objcount, + (char *)op->op_name, op->op_namelen); + mds = obj->lo_stripes[sidx].ls_mds; + lmv_object_put(obj); + + rc = __lmv_fid_alloc(lmv, fid, mds); + if (rc) { + CERROR("Can't allocate fid, rc %d\n", rc); + RETURN(rc); + } + + CDEBUG(D_INODE, "Allocate new fid "DFID" for slave " + "obj -> mds #%x\n", PFID(fid), mds); + + RETURN(rc); +}