1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
30 * Use is subject to license terms.
32 * Copyright (c) 2011, 2012, Whamcloud, Inc.
35 * This file is part of Lustre, http://www.lustre.org/
36 * Lustre is a trademark of Sun Microsystems, Inc.
40 # define EXPORT_SYMTAB
42 #define DEBUG_SUBSYSTEM S_LMV
44 #include <linux/slab.h>
45 #include <linux/module.h>
46 #include <linux/init.h>
47 #include <linux/slab.h>
48 #include <linux/pagemap.h>
49 #include <asm/div64.h>
50 #include <linux/seq_file.h>
51 #include <linux/namei.h>
52 #include <linux/lustre_intent.h>
54 #include <liblustre.h>
57 #include <obd_support.h>
58 #include <lustre/lustre_idl.h>
59 #include <lustre_lib.h>
60 #include <lustre_net.h>
61 #include <lustre_dlm.h>
62 #include <obd_class.h>
63 #include <lprocfs_status.h>
64 #include "lmv_internal.h"
66 int lmv_intent_remote(struct obd_export *exp, void *lmm,
67 int lmmsize, struct lookup_intent *it,
68 int flags, struct ptlrpc_request **reqp,
69 ldlm_blocking_callback cb_blocking,
72 struct obd_device *obd = exp->exp_obd;
73 struct lmv_obd *lmv = &obd->u.lmv;
74 struct ptlrpc_request *req = NULL;
75 struct lustre_handle plock;
76 struct md_op_data *op_data;
77 struct lmv_tgt_desc *tgt;
78 struct mdt_body *body;
83 body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY);
88 * Not cross-ref case, just get out of here.
90 if (!(body->valid & OBD_MD_MDS))
94 * Unfortunately, we have to lie to MDC/MDS to retrieve
95 * attributes llite needs and provideproper locking.
97 if (it->it_op & IT_LOOKUP)
98 it->it_op = IT_GETATTR;
101 * We got LOOKUP lock, but we really need attrs.
103 pmode = it->d.lustre.it_lock_mode;
105 plock.cookie = it->d.lustre.it_lock_handle;
106 it->d.lustre.it_lock_mode = 0;
107 it->d.lustre.it_data = NULL;
110 LASSERT(fid_is_sane(&body->fid1));
112 tgt = lmv_find_target(lmv, &body->fid1);
114 GOTO(out, rc = PTR_ERR(tgt));
116 OBD_ALLOC_PTR(op_data);
118 GOTO(out, rc = -ENOMEM);
120 op_data->op_fid1 = body->fid1;
121 op_data->op_bias = MDS_CROSS_REF;
124 "REMOTE_INTENT with fid="DFID" -> mds #%d\n",
125 PFID(&body->fid1), tgt->ltd_idx);
127 it->d.lustre.it_disposition &= ~DISP_ENQ_COMPLETE;
128 rc = md_intent_lock(tgt->ltd_exp, op_data, lmm, lmmsize, it,
129 flags, &req, cb_blocking, extra_lock_flags);
131 GOTO(out_free_op_data, rc);
134 * LLite needs LOOKUP lock to track dentry revocation in order to
135 * maintain dcache consistency. Thus drop UPDATE lock here and put
138 if (it->d.lustre.it_lock_mode != 0) {
139 ldlm_lock_decref((void *)&it->d.lustre.it_lock_handle,
140 it->d.lustre.it_lock_mode);
141 it->d.lustre.it_lock_mode = 0;
143 it->d.lustre.it_lock_handle = plock.cookie;
144 it->d.lustre.it_lock_mode = pmode;
148 OBD_FREE_PTR(op_data);
151 ldlm_lock_decref(&plock, pmode);
153 ptlrpc_req_finished(*reqp);
159 * IT_OPEN is intended to open (and create, possible) an object. Parent (pid)
162 int lmv_intent_open(struct obd_export *exp, struct md_op_data *op_data,
163 void *lmm, int lmmsize, struct lookup_intent *it,
164 int flags, struct ptlrpc_request **reqp,
165 ldlm_blocking_callback cb_blocking,
166 int extra_lock_flags)
168 struct obd_device *obd = exp->exp_obd;
169 struct lu_fid rpid = op_data->op_fid1;
170 struct lmv_obd *lmv = &obd->u.lmv;
171 struct md_op_data *sop_data;
172 struct lmv_stripe_md *mea;
173 struct lmv_tgt_desc *tgt;
174 struct mdt_body *body;
175 struct lmv_object *obj;
181 OBD_ALLOC_PTR(sop_data);
182 if (sop_data == NULL)
185 /* save op_data fro repeat case */
186 *sop_data = *op_data;
192 obj = lmv_object_find(obd, &rpid);
195 * Directory is already split, so we have to forward request to
198 sidx = raw_name2idx(obj->lo_hashtype, obj->lo_objcount,
199 (char *)op_data->op_name,
200 op_data->op_namelen);
202 rpid = obj->lo_stripes[sidx].ls_fid;
204 sop_data->op_mds = obj->lo_stripes[sidx].ls_mds;
205 tgt = lmv_get_target(lmv, sop_data->op_mds);
206 sop_data->op_bias &= ~MDS_CHECK_SPLIT;
210 "Choose slave dir ("DFID") -> mds #%d\n",
211 PFID(&rpid), tgt->ltd_idx);
213 sop_data->op_bias |= MDS_CHECK_SPLIT;
214 tgt = lmv_find_target(lmv, &rpid);
215 sop_data->op_mds = tgt->ltd_idx;
218 GOTO(out_free_sop_data, rc = PTR_ERR(tgt));
220 sop_data->op_fid1 = rpid;
222 if (it->it_op & IT_CREAT) {
224 * For open with IT_CREATE and for IT_CREATE cases allocate new
225 * fid and setup FLD for it.
227 sop_data->op_fid3 = sop_data->op_fid2;
228 rc = lmv_fid_alloc(exp, &sop_data->op_fid2, sop_data);
230 GOTO(out_free_sop_data, rc);
235 GOTO(out_free_sop_data, rc);
239 "OPEN_INTENT with fid1="DFID", fid2="DFID", name='%s' -> mds #%d\n",
240 PFID(&sop_data->op_fid1), PFID(&sop_data->op_fid2),
241 sop_data->op_name, tgt->ltd_idx);
243 rc = md_intent_lock(tgt->ltd_exp, sop_data, lmm, lmmsize, it, flags,
244 reqp, cb_blocking, extra_lock_flags);
246 if (rc == -ERESTART) {
247 LASSERT(*reqp != NULL);
248 DEBUG_REQ(D_WARNING|D_RPCTRACE, *reqp,
249 "Got -ERESTART during open!\n");
250 ptlrpc_req_finished(*reqp);
252 it->d.lustre.it_data = NULL;
255 * Directory got split. Time to update local object and repeat
256 * the request with proper MDS.
258 LASSERT(lu_fid_eq(&op_data->op_fid1, &rpid));
259 rc = lmv_handle_split(exp, &rpid);
261 /* We should reallocate child FID. */
262 rc = lmv_allocate_slaves(obd, &rpid, op_data,
270 GOTO(out_free_sop_data, rc);
273 * Nothing is found, do not access body->fid1 as it is zero and thus
276 if ((it->d.lustre.it_disposition & DISP_LOOKUP_NEG) &&
277 !(it->d.lustre.it_disposition & DISP_OPEN_CREATE) &&
278 !(it->d.lustre.it_disposition & DISP_OPEN_OPEN))
279 GOTO(out_free_sop_data, rc = 0);
282 * Okay, MDS has returned success. Probably name has been resolved in
285 rc = lmv_intent_remote(exp, lmm, lmmsize, it, flags, reqp,
286 cb_blocking, extra_lock_flags);
290 * This is possible, that some userspace application will try to
291 * open file as directory and we will have -ENOTDIR here. As
292 * this is normal situation, we should not print error here,
295 CDEBUG(D_INODE, "Can't handle remote %s: dir "DFID"("DFID"):"
296 "%*s: %d\n", LL_IT2STR(it), PFID(&op_data->op_fid2),
297 PFID(&rpid), op_data->op_namelen, op_data->op_name, rc);
298 GOTO(out_free_sop_data, rc);
302 * Caller may use attrs MDS returns on IT_OPEN lock request so, we have
303 * to update them for split dir.
305 body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY);
306 LASSERT(body != NULL);
309 * Could not find object, FID is not present in response.
311 if (!(body->valid & OBD_MD_FLID))
312 GOTO(out_free_sop_data, rc = 0);
314 obj = lmv_object_find(obd, &body->fid1);
317 * XXX: Capability for remote call!
319 mea = lmv_get_mea(*reqp);
321 obj = lmv_object_create(exp, &body->fid1, mea);
323 GOTO(out_free_sop_data, rc = (int)PTR_ERR(obj));
329 * This is split dir and we'd want to get attrs.
331 CDEBUG(D_INODE, "Slave attributes for "DFID"\n",
334 rc = lmv_revalidate_slaves(exp, reqp, &body->fid1, it, 1,
335 cb_blocking, extra_lock_flags);
340 OBD_FREE_PTR(sop_data);
345 * Handler for: getattr, lookup and revalidate cases.
347 int lmv_intent_lookup(struct obd_export *exp, struct md_op_data *op_data,
348 void *lmm, int lmmsize, struct lookup_intent *it,
349 int flags, struct ptlrpc_request **reqp,
350 ldlm_blocking_callback cb_blocking,
351 int extra_lock_flags)
353 struct obd_device *obd = exp->exp_obd;
354 struct lu_fid rpid = op_data->op_fid1;
355 struct lmv_obd *lmv = &obd->u.lmv;
356 struct lmv_object *obj = NULL;
357 struct md_op_data *sop_data;
358 struct lmv_stripe_md *mea;
359 struct lmv_tgt_desc *tgt = NULL;
360 struct mdt_body *body;
366 OBD_ALLOC_PTR(sop_data);
367 if (sop_data == NULL)
370 *sop_data = *op_data;
376 obj = lmv_object_find(obd, &op_data->op_fid1);
377 if (obj && op_data->op_namelen) {
378 sidx = raw_name2idx(obj->lo_hashtype,
380 (char *)op_data->op_name,
381 op_data->op_namelen);
382 rpid = obj->lo_stripes[sidx].ls_fid;
383 tgt = lmv_get_target(lmv,
384 obj->lo_stripes[sidx].ls_mds);
386 "Choose slave dir ("DFID") -> mds #%d\n",
387 PFID(&rpid), tgt->ltd_idx);
388 sop_data->op_bias &= ~MDS_CHECK_SPLIT;
390 tgt = lmv_find_target(lmv, &op_data->op_fid1);
391 sop_data->op_bias |= MDS_CHECK_SPLIT;
397 GOTO(out_free_sop_data, rc = PTR_ERR(tgt));
399 if (!fid_is_sane(&sop_data->op_fid2))
400 fid_zero(&sop_data->op_fid2);
403 "LOOKUP_INTENT with fid1="DFID", fid2="DFID
404 ", name='%s' -> mds #%d\n",
405 PFID(&sop_data->op_fid1), PFID(&sop_data->op_fid2),
406 sop_data->op_name ? sop_data->op_name : "<NULL>",
409 sop_data->op_bias &= ~MDS_CROSS_REF;
410 sop_data->op_fid1 = rpid;
412 rc = md_intent_lock(tgt->ltd_exp, sop_data, lmm, lmmsize, it,
413 flags, reqp, cb_blocking, extra_lock_flags);
415 if (rc == -ERESTART) {
416 LASSERT(*reqp != NULL);
417 DEBUG_REQ(D_WARNING|D_RPCTRACE, *reqp,
418 "Got -ERESTART during lookup!\n");
419 ptlrpc_req_finished(*reqp);
421 it->d.lustre.it_data = 0;
424 * Directory got split since last update. This shouldn't be
425 * because splitting causes lock revocation, so revalidate had
426 * to fail and lookup on dir had to return mea.
428 LASSERT(obj == NULL);
430 obj = lmv_object_create(exp, &rpid, NULL);
432 GOTO(out_free_sop_data, rc = PTR_ERR(obj));
438 GOTO(out_free_sop_data, rc);
442 * This is split dir. In order to optimize things a bit, we
443 * consider obj valid updating missing parts.
446 "Revalidate slaves for "DFID", rc %d\n",
447 PFID(&op_data->op_fid1), rc);
449 LASSERT(fid_is_sane(&op_data->op_fid2));
450 rc = lmv_revalidate_slaves(exp, reqp, &op_data->op_fid1, it, rc,
451 cb_blocking, extra_lock_flags);
452 GOTO(out_free_sop_data, rc);
456 GOTO(out_free_sop_data, rc);
459 * MDS has returned success. Probably name has been resolved in
460 * remote inode. Let's check this.
462 rc = lmv_intent_remote(exp, lmm, lmmsize, it, flags,
463 reqp, cb_blocking, extra_lock_flags);
465 GOTO(out_free_sop_data, rc);
468 * Nothing is found, do not access body->fid1 as it is zero and thus
471 if (it->d.lustre.it_disposition & DISP_LOOKUP_NEG)
472 GOTO(out_free_sop_data, rc = 0);
474 LASSERT(*reqp != NULL);
475 LASSERT((*reqp)->rq_repmsg != NULL);
476 body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY);
477 LASSERT(body != NULL);
480 * Could not find object, FID is not present in response.
482 if (!(body->valid & OBD_MD_FLID))
483 GOTO(out_free_sop_data, rc = 0);
485 obj = lmv_object_find(obd, &body->fid1);
488 * XXX: Remote capability is not handled.
490 mea = lmv_get_mea(*reqp);
492 obj = lmv_object_create(exp, &body->fid1, mea);
494 GOTO(out_free_sop_data, rc = (int)PTR_ERR(obj));
497 CDEBUG(D_INODE, "Slave attributes for "DFID", rc %d\n",
498 PFID(&body->fid1), rc);
500 rc = lmv_revalidate_slaves(exp, reqp, &body->fid1, it, 1,
501 cb_blocking, extra_lock_flags);
507 OBD_FREE_PTR(sop_data);
511 int lmv_intent_lock(struct obd_export *exp, struct md_op_data *op_data,
512 void *lmm, int lmmsize, struct lookup_intent *it,
513 int flags, struct ptlrpc_request **reqp,
514 ldlm_blocking_callback cb_blocking,
515 int extra_lock_flags)
517 struct obd_device *obd = exp->exp_obd;
522 LASSERT(fid_is_sane(&op_data->op_fid1));
524 CDEBUG(D_INODE, "INTENT LOCK '%s' for '%*s' on "DFID"\n",
525 LL_IT2STR(it), op_data->op_namelen, op_data->op_name,
526 PFID(&op_data->op_fid1));
528 rc = lmv_check_connect(obd);
532 if (it->it_op & (IT_LOOKUP | IT_GETATTR | IT_LAYOUT))
533 rc = lmv_intent_lookup(exp, op_data, lmm, lmmsize, it,
534 flags, reqp, cb_blocking,
536 else if (it->it_op & IT_OPEN)
537 rc = lmv_intent_open(exp, op_data, lmm, lmmsize, it,
538 flags, reqp, cb_blocking,
545 int lmv_revalidate_slaves(struct obd_export *exp, struct ptlrpc_request **reqp,
546 const struct lu_fid *mid, struct lookup_intent *oit,
547 int master_valid, ldlm_blocking_callback cb_blocking,
548 int extra_lock_flags)
550 struct obd_device *obd = exp->exp_obd;
551 struct lmv_obd *lmv = &obd->u.lmv;
552 int master_lockm = 0;
553 struct lustre_handle *lockh = NULL;
554 struct ptlrpc_request *mreq = *reqp;
555 struct lustre_handle master_lockh = { 0 };
556 struct md_op_data *op_data;
557 struct ldlm_lock *lock;
558 unsigned long size = 0;
559 struct mdt_body *body;
560 struct lmv_object *obj;
564 struct ptlrpc_request *req;
565 ldlm_blocking_callback cb;
566 struct lookup_intent it;
567 struct lmv_tgt_desc *tgt;
571 CDEBUG(D_INODE, "Revalidate master obj "DFID"\n", PFID(mid));
573 OBD_ALLOC_PTR(op_data);
578 * We have to loop over the subobjects, check validity and update them
579 * from MDS if needed. It's very useful that we need not to update all
580 * the fields. Say, common fields (that are equal on all the subojects
581 * need not to be update, another fields (i_size, for example) are
582 * cached all the time.
584 obj = lmv_object_find_lock(obd, mid);
588 for (i = 0; i < obj->lo_objcount; i++) {
589 fid = obj->lo_stripes[i].ls_fid;
590 master = lu_fid_eq(&fid, &obj->lo_fid);
591 cb = master ? cb_blocking : lmv_blocking_ast;
594 * We need i_size and we would like to check possible cached locks,
595 * so this is is IT_GETATTR intent.
597 memset(&it, 0, sizeof(it));
598 it.it_op = IT_GETATTR;
600 if (master && master_valid) {
602 * lmv_intent_lookup() already checked
603 * validness and took the lock.
606 body = req_capsule_server_get(&mreq->rq_pill,
608 LASSERT(body != NULL);
612 * Take already cached attrs into account.
615 "Master "DFID"is locked and cached\n",
621 * Prepare op_data for revalidating. Note that @fid2 shuld be
622 * defined otherwise it will go to server and take new lock
623 * which is what we reall not need here.
625 memset(op_data, 0, sizeof(*op_data));
626 op_data->op_bias = MDS_CROSS_REF;
627 op_data->op_fid1 = fid;
628 op_data->op_fid2 = fid;
631 tgt = lmv_get_target(lmv, obj->lo_stripes[i].ls_mds);
633 GOTO(cleanup, rc = PTR_ERR(tgt));
635 CDEBUG(D_INODE, "Revalidate slave obj "DFID" -> mds #%d\n",
636 PFID(&fid), tgt->ltd_idx);
638 rc = md_intent_lock(tgt->ltd_exp, op_data, NULL, 0, &it, 0,
639 &req, cb, extra_lock_flags);
641 lockh = (struct lustre_handle *)&it.d.lustre.it_lock_handle;
642 if (rc > 0 && req == NULL) {
644 * Nice, this slave is valid.
646 CDEBUG(D_INODE, "Cached slave "DFID"\n", PFID(&fid));
655 * Save lock on master to be returned to the caller.
657 CDEBUG(D_INODE, "No lock on master "DFID" yet\n",
659 memcpy(&master_lockh, lockh, sizeof(master_lockh));
660 master_lockm = it.d.lustre.it_lock_mode;
661 it.d.lustre.it_lock_mode = 0;
664 * This is slave. We want to control it.
666 lock = ldlm_handle2lock(lockh);
667 LASSERT(lock != NULL);
668 lock->l_ast_data = lmv_object_get(obj);
674 * This is first reply, we'll use it to return updated
675 * data back to the caller.
677 LASSERT(req != NULL);
678 ptlrpc_request_addref(req);
682 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
683 LASSERT(body != NULL);
686 obj->lo_stripes[i].ls_size = body->size;
688 CDEBUG(D_INODE, "Fresh size %lu from "DFID"\n",
689 (unsigned long)obj->lo_stripes[i].ls_size, PFID(&fid));
692 ptlrpc_req_finished(req);
694 size += obj->lo_stripes[i].ls_size;
696 if (it.d.lustre.it_lock_mode && lockh) {
697 ldlm_lock_decref(lockh, it.d.lustre.it_lock_mode);
698 it.d.lustre.it_lock_mode = 0;
704 * Some attrs got refreshed, we have reply and it's time to put
707 CDEBUG(D_INODE, "Return refreshed attrs: size = %lu for "DFID"\n",
708 (unsigned long)size, PFID(mid));
710 body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY);
711 LASSERT(body != NULL);
716 * Very important to maintain mds num the same because
717 * of revalidation. mreq == NULL means that caller has
718 * no reply and the only attr we can return is size.
720 body->valid = OBD_MD_FLSIZE;
722 if (master_valid == 0) {
723 oit->d.lustre.it_lock_handle = master_lockh.cookie;
724 oit->d.lustre.it_lock_mode = master_lockm;
729 * It seems all the attrs are fresh and we did no request.
731 CDEBUG(D_INODE, "All the attrs were fresh on "DFID"\n",
733 if (master_valid == 0)
734 oit->d.lustre.it_lock_mode = master_lockm;
740 OBD_FREE_PTR(op_data);
741 lmv_object_put_unlock(obj);
745 int lmv_allocate_slaves(struct obd_device *obd, struct lu_fid *pid,
746 struct md_op_data *op, struct lu_fid *fid)
748 struct lmv_obd *lmv = &obd->u.lmv;
749 struct lmv_object *obj;
755 obj = lmv_object_find(obd, pid);
759 sidx = raw_name2idx(obj->lo_hashtype, obj->lo_objcount,
760 (char *)op->op_name, op->op_namelen);
761 mds = obj->lo_stripes[sidx].ls_mds;
764 rc = __lmv_fid_alloc(lmv, fid, mds);
766 CERROR("Can't allocate fid, rc %d\n", rc);
770 CDEBUG(D_INODE, "Allocate new fid "DFID" for slave "
771 "obj -> mds #%x\n", PFID(fid), mds);