4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2012, Whamcloud, Inc.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
37 #define DEBUG_SUBSYSTEM S_LMV
39 #include <linux/slab.h>
40 #include <linux/module.h>
41 #include <linux/init.h>
42 #include <linux/slab.h>
43 #include <linux/pagemap.h>
44 #include <asm/div64.h>
45 #include <linux/seq_file.h>
46 #include <linux/namei.h>
47 #include <linux/lustre_intent.h>
49 #include <liblustre.h>
52 #include <obd_support.h>
53 #include <lustre/lustre_idl.h>
54 #include <lustre_lib.h>
55 #include <lustre_net.h>
56 #include <lustre_dlm.h>
57 #include <obd_class.h>
58 #include <lprocfs_status.h>
59 #include "lmv_internal.h"
61 int lmv_intent_remote(struct obd_export *exp, void *lmm,
62 int lmmsize, struct lookup_intent *it,
63 int flags, struct ptlrpc_request **reqp,
64 ldlm_blocking_callback cb_blocking,
67 struct obd_device *obd = exp->exp_obd;
68 struct lmv_obd *lmv = &obd->u.lmv;
69 struct ptlrpc_request *req = NULL;
70 struct lustre_handle plock;
71 struct md_op_data *op_data;
72 struct lmv_tgt_desc *tgt;
73 struct mdt_body *body;
78 body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY);
83 * Not cross-ref case, just get out of here.
85 if (!(body->valid & OBD_MD_MDS))
89 * Unfortunately, we have to lie to MDC/MDS to retrieve
90 * attributes llite needs and provideproper locking.
92 if (it->it_op & IT_LOOKUP)
93 it->it_op = IT_GETATTR;
96 * We got LOOKUP lock, but we really need attrs.
98 pmode = it->d.lustre.it_lock_mode;
100 plock.cookie = it->d.lustre.it_lock_handle;
101 it->d.lustre.it_lock_mode = 0;
102 it->d.lustre.it_data = NULL;
105 LASSERT(fid_is_sane(&body->fid1));
107 tgt = lmv_find_target(lmv, &body->fid1);
109 GOTO(out, rc = PTR_ERR(tgt));
111 OBD_ALLOC_PTR(op_data);
113 GOTO(out, rc = -ENOMEM);
115 op_data->op_fid1 = body->fid1;
116 op_data->op_bias = MDS_CROSS_REF;
119 "REMOTE_INTENT with fid="DFID" -> mds #%d\n",
120 PFID(&body->fid1), tgt->ltd_idx);
122 it->d.lustre.it_disposition &= ~DISP_ENQ_COMPLETE;
123 rc = md_intent_lock(tgt->ltd_exp, op_data, lmm, lmmsize, it,
124 flags, &req, cb_blocking, extra_lock_flags);
126 GOTO(out_free_op_data, rc);
129 * LLite needs LOOKUP lock to track dentry revocation in order to
130 * maintain dcache consistency. Thus drop UPDATE lock here and put
133 if (it->d.lustre.it_lock_mode != 0) {
134 ldlm_lock_decref((void *)&it->d.lustre.it_lock_handle,
135 it->d.lustre.it_lock_mode);
136 it->d.lustre.it_lock_mode = 0;
138 it->d.lustre.it_lock_handle = plock.cookie;
139 it->d.lustre.it_lock_mode = pmode;
143 OBD_FREE_PTR(op_data);
146 ldlm_lock_decref(&plock, pmode);
148 ptlrpc_req_finished(*reqp);
154 * IT_OPEN is intended to open (and create, possible) an object. Parent (pid)
157 int lmv_intent_open(struct obd_export *exp, struct md_op_data *op_data,
158 void *lmm, int lmmsize, struct lookup_intent *it,
159 int flags, struct ptlrpc_request **reqp,
160 ldlm_blocking_callback cb_blocking,
161 int extra_lock_flags)
163 struct obd_device *obd = exp->exp_obd;
164 struct lu_fid rpid = op_data->op_fid1;
165 struct lmv_obd *lmv = &obd->u.lmv;
166 struct md_op_data *sop_data;
167 struct lmv_stripe_md *mea;
168 struct lmv_tgt_desc *tgt;
169 struct mdt_body *body;
170 struct lmv_object *obj;
176 OBD_ALLOC_PTR(sop_data);
177 if (sop_data == NULL)
180 /* save op_data fro repeat case */
181 *sop_data = *op_data;
187 obj = lmv_object_find(obd, &rpid);
190 * Directory is already split, so we have to forward request to
193 sidx = raw_name2idx(obj->lo_hashtype, obj->lo_objcount,
194 (char *)op_data->op_name,
195 op_data->op_namelen);
197 rpid = obj->lo_stripes[sidx].ls_fid;
199 sop_data->op_mds = obj->lo_stripes[sidx].ls_mds;
200 tgt = lmv_get_target(lmv, sop_data->op_mds);
201 sop_data->op_bias &= ~MDS_CHECK_SPLIT;
205 "Choose slave dir ("DFID") -> mds #%d\n",
206 PFID(&rpid), tgt->ltd_idx);
208 sop_data->op_bias |= MDS_CHECK_SPLIT;
209 tgt = lmv_find_target(lmv, &rpid);
210 sop_data->op_mds = tgt->ltd_idx;
213 GOTO(out_free_sop_data, rc = PTR_ERR(tgt));
215 sop_data->op_fid1 = rpid;
217 if (it->it_op & IT_CREAT) {
219 * For open with IT_CREATE and for IT_CREATE cases allocate new
220 * fid and setup FLD for it.
222 sop_data->op_fid3 = sop_data->op_fid2;
223 rc = lmv_fid_alloc(exp, &sop_data->op_fid2, sop_data);
225 GOTO(out_free_sop_data, rc);
230 GOTO(out_free_sop_data, rc);
234 "OPEN_INTENT with fid1="DFID", fid2="DFID", name='%s' -> mds #%d\n",
235 PFID(&sop_data->op_fid1), PFID(&sop_data->op_fid2),
236 sop_data->op_name, tgt->ltd_idx);
238 rc = md_intent_lock(tgt->ltd_exp, sop_data, lmm, lmmsize, it, flags,
239 reqp, cb_blocking, extra_lock_flags);
241 if (rc == -ERESTART) {
242 LASSERT(*reqp != NULL);
243 DEBUG_REQ(D_WARNING|D_RPCTRACE, *reqp,
244 "Got -ERESTART during open!\n");
245 ptlrpc_req_finished(*reqp);
247 it->d.lustre.it_data = NULL;
250 * Directory got split. Time to update local object and repeat
251 * the request with proper MDS.
253 LASSERT(lu_fid_eq(&op_data->op_fid1, &rpid));
254 rc = lmv_handle_split(exp, &rpid);
256 /* We should reallocate child FID. */
257 rc = lmv_allocate_slaves(obd, &rpid, op_data,
265 GOTO(out_free_sop_data, rc);
268 * Nothing is found, do not access body->fid1 as it is zero and thus
271 if ((it->d.lustre.it_disposition & DISP_LOOKUP_NEG) &&
272 !(it->d.lustre.it_disposition & DISP_OPEN_CREATE) &&
273 !(it->d.lustre.it_disposition & DISP_OPEN_OPEN))
274 GOTO(out_free_sop_data, rc = 0);
277 * Okay, MDS has returned success. Probably name has been resolved in
280 rc = lmv_intent_remote(exp, lmm, lmmsize, it, flags, reqp,
281 cb_blocking, extra_lock_flags);
285 * This is possible, that some userspace application will try to
286 * open file as directory and we will have -ENOTDIR here. As
287 * this is normal situation, we should not print error here,
290 CDEBUG(D_INODE, "Can't handle remote %s: dir "DFID"("DFID"):"
291 "%*s: %d\n", LL_IT2STR(it), PFID(&op_data->op_fid2),
292 PFID(&rpid), op_data->op_namelen, op_data->op_name, rc);
293 GOTO(out_free_sop_data, rc);
297 * Caller may use attrs MDS returns on IT_OPEN lock request so, we have
298 * to update them for split dir.
300 body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY);
301 LASSERT(body != NULL);
304 * Could not find object, FID is not present in response.
306 if (!(body->valid & OBD_MD_FLID))
307 GOTO(out_free_sop_data, rc = 0);
309 obj = lmv_object_find(obd, &body->fid1);
312 * XXX: Capability for remote call!
314 mea = lmv_get_mea(*reqp);
316 obj = lmv_object_create(exp, &body->fid1, mea);
318 GOTO(out_free_sop_data, rc = (int)PTR_ERR(obj));
324 * This is split dir and we'd want to get attrs.
326 CDEBUG(D_INODE, "Slave attributes for "DFID"\n",
329 rc = lmv_revalidate_slaves(exp, reqp, &body->fid1, it, 1,
330 cb_blocking, extra_lock_flags);
335 OBD_FREE_PTR(sop_data);
340 * Handler for: getattr, lookup and revalidate cases.
342 int lmv_intent_lookup(struct obd_export *exp, struct md_op_data *op_data,
343 void *lmm, int lmmsize, struct lookup_intent *it,
344 int flags, struct ptlrpc_request **reqp,
345 ldlm_blocking_callback cb_blocking,
346 int extra_lock_flags)
348 struct obd_device *obd = exp->exp_obd;
349 struct lu_fid rpid = op_data->op_fid1;
350 struct lmv_obd *lmv = &obd->u.lmv;
351 struct lmv_object *obj = NULL;
352 struct md_op_data *sop_data;
353 struct lmv_stripe_md *mea;
354 struct lmv_tgt_desc *tgt = NULL;
355 struct mdt_body *body;
361 OBD_ALLOC_PTR(sop_data);
362 if (sop_data == NULL)
365 *sop_data = *op_data;
371 obj = lmv_object_find(obd, &op_data->op_fid1);
372 if (obj && op_data->op_namelen) {
373 sidx = raw_name2idx(obj->lo_hashtype,
375 (char *)op_data->op_name,
376 op_data->op_namelen);
377 rpid = obj->lo_stripes[sidx].ls_fid;
378 tgt = lmv_get_target(lmv,
379 obj->lo_stripes[sidx].ls_mds);
381 "Choose slave dir ("DFID") -> mds #%d\n",
382 PFID(&rpid), tgt->ltd_idx);
383 sop_data->op_bias &= ~MDS_CHECK_SPLIT;
385 tgt = lmv_find_target(lmv, &op_data->op_fid1);
386 sop_data->op_bias |= MDS_CHECK_SPLIT;
392 GOTO(out_free_sop_data, rc = PTR_ERR(tgt));
394 if (!fid_is_sane(&sop_data->op_fid2))
395 fid_zero(&sop_data->op_fid2);
398 "LOOKUP_INTENT with fid1="DFID", fid2="DFID
399 ", name='%s' -> mds #%d\n",
400 PFID(&sop_data->op_fid1), PFID(&sop_data->op_fid2),
401 sop_data->op_name ? sop_data->op_name : "<NULL>",
404 sop_data->op_bias &= ~MDS_CROSS_REF;
405 sop_data->op_fid1 = rpid;
407 rc = md_intent_lock(tgt->ltd_exp, sop_data, lmm, lmmsize, it,
408 flags, reqp, cb_blocking, extra_lock_flags);
410 if (rc == -ERESTART) {
411 LASSERT(*reqp != NULL);
412 DEBUG_REQ(D_WARNING|D_RPCTRACE, *reqp,
413 "Got -ERESTART during lookup!\n");
414 ptlrpc_req_finished(*reqp);
416 it->d.lustre.it_data = 0;
419 * Directory got split since last update. This shouldn't be
420 * because splitting causes lock revocation, so revalidate had
421 * to fail and lookup on dir had to return mea.
423 LASSERT(obj == NULL);
425 obj = lmv_object_create(exp, &rpid, NULL);
427 GOTO(out_free_sop_data, rc = PTR_ERR(obj));
433 GOTO(out_free_sop_data, rc);
437 * This is split dir. In order to optimize things a bit, we
438 * consider obj valid updating missing parts.
441 "Revalidate slaves for "DFID", rc %d\n",
442 PFID(&op_data->op_fid1), rc);
444 LASSERT(fid_is_sane(&op_data->op_fid2));
445 rc = lmv_revalidate_slaves(exp, reqp, &op_data->op_fid1, it, rc,
446 cb_blocking, extra_lock_flags);
447 GOTO(out_free_sop_data, rc);
451 GOTO(out_free_sop_data, rc);
454 * MDS has returned success. Probably name has been resolved in
455 * remote inode. Let's check this.
457 rc = lmv_intent_remote(exp, lmm, lmmsize, it, flags,
458 reqp, cb_blocking, extra_lock_flags);
460 GOTO(out_free_sop_data, rc);
463 * Nothing is found, do not access body->fid1 as it is zero and thus
466 if (it->d.lustre.it_disposition & DISP_LOOKUP_NEG)
467 GOTO(out_free_sop_data, rc = 0);
469 LASSERT(*reqp != NULL);
470 LASSERT((*reqp)->rq_repmsg != NULL);
471 body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY);
472 LASSERT(body != NULL);
475 * Could not find object, FID is not present in response.
477 if (!(body->valid & OBD_MD_FLID))
478 GOTO(out_free_sop_data, rc = 0);
480 obj = lmv_object_find(obd, &body->fid1);
483 * XXX: Remote capability is not handled.
485 mea = lmv_get_mea(*reqp);
487 obj = lmv_object_create(exp, &body->fid1, mea);
489 GOTO(out_free_sop_data, rc = (int)PTR_ERR(obj));
492 CDEBUG(D_INODE, "Slave attributes for "DFID", rc %d\n",
493 PFID(&body->fid1), rc);
495 rc = lmv_revalidate_slaves(exp, reqp, &body->fid1, it, 1,
496 cb_blocking, extra_lock_flags);
502 OBD_FREE_PTR(sop_data);
506 int lmv_intent_lock(struct obd_export *exp, struct md_op_data *op_data,
507 void *lmm, int lmmsize, struct lookup_intent *it,
508 int flags, struct ptlrpc_request **reqp,
509 ldlm_blocking_callback cb_blocking,
510 int extra_lock_flags)
512 struct obd_device *obd = exp->exp_obd;
517 LASSERT(fid_is_sane(&op_data->op_fid1));
519 CDEBUG(D_INODE, "INTENT LOCK '%s' for '%*s' on "DFID"\n",
520 LL_IT2STR(it), op_data->op_namelen, op_data->op_name,
521 PFID(&op_data->op_fid1));
523 rc = lmv_check_connect(obd);
527 if (it->it_op & (IT_LOOKUP | IT_GETATTR | IT_LAYOUT))
528 rc = lmv_intent_lookup(exp, op_data, lmm, lmmsize, it,
529 flags, reqp, cb_blocking,
531 else if (it->it_op & IT_OPEN)
532 rc = lmv_intent_open(exp, op_data, lmm, lmmsize, it,
533 flags, reqp, cb_blocking,
540 int lmv_revalidate_slaves(struct obd_export *exp, struct ptlrpc_request **reqp,
541 const struct lu_fid *mid, struct lookup_intent *oit,
542 int master_valid, ldlm_blocking_callback cb_blocking,
543 int extra_lock_flags)
545 struct obd_device *obd = exp->exp_obd;
546 struct lmv_obd *lmv = &obd->u.lmv;
547 int master_lockm = 0;
548 struct lustre_handle *lockh = NULL;
549 struct ptlrpc_request *mreq = *reqp;
550 struct lustre_handle master_lockh = { 0 };
551 struct md_op_data *op_data;
552 struct ldlm_lock *lock;
553 unsigned long size = 0;
554 struct mdt_body *body;
555 struct lmv_object *obj;
559 struct ptlrpc_request *req;
560 ldlm_blocking_callback cb;
561 struct lookup_intent it;
562 struct lmv_tgt_desc *tgt;
566 CDEBUG(D_INODE, "Revalidate master obj "DFID"\n", PFID(mid));
568 OBD_ALLOC_PTR(op_data);
573 * We have to loop over the subobjects, check validity and update them
574 * from MDS if needed. It's very useful that we need not to update all
575 * the fields. Say, common fields (that are equal on all the subojects
576 * need not to be update, another fields (i_size, for example) are
577 * cached all the time.
579 obj = lmv_object_find_lock(obd, mid);
583 for (i = 0; i < obj->lo_objcount; i++) {
584 fid = obj->lo_stripes[i].ls_fid;
585 master = lu_fid_eq(&fid, &obj->lo_fid);
586 cb = master ? cb_blocking : lmv_blocking_ast;
589 * We need i_size and we would like to check possible cached locks,
590 * so this is is IT_GETATTR intent.
592 memset(&it, 0, sizeof(it));
593 it.it_op = IT_GETATTR;
595 if (master && master_valid) {
597 * lmv_intent_lookup() already checked
598 * validness and took the lock.
601 body = req_capsule_server_get(&mreq->rq_pill,
603 LASSERT(body != NULL);
607 * Take already cached attrs into account.
610 "Master "DFID"is locked and cached\n",
616 * Prepare op_data for revalidating. Note that @fid2 shuld be
617 * defined otherwise it will go to server and take new lock
618 * which is what we reall not need here.
620 memset(op_data, 0, sizeof(*op_data));
621 op_data->op_bias = MDS_CROSS_REF;
622 op_data->op_fid1 = fid;
623 op_data->op_fid2 = fid;
626 tgt = lmv_get_target(lmv, obj->lo_stripes[i].ls_mds);
628 GOTO(cleanup, rc = PTR_ERR(tgt));
630 CDEBUG(D_INODE, "Revalidate slave obj "DFID" -> mds #%d\n",
631 PFID(&fid), tgt->ltd_idx);
633 rc = md_intent_lock(tgt->ltd_exp, op_data, NULL, 0, &it, 0,
634 &req, cb, extra_lock_flags);
636 lockh = (struct lustre_handle *)&it.d.lustre.it_lock_handle;
637 if (rc > 0 && req == NULL) {
639 * Nice, this slave is valid.
641 CDEBUG(D_INODE, "Cached slave "DFID"\n", PFID(&fid));
650 * Save lock on master to be returned to the caller.
652 CDEBUG(D_INODE, "No lock on master "DFID" yet\n",
654 memcpy(&master_lockh, lockh, sizeof(master_lockh));
655 master_lockm = it.d.lustre.it_lock_mode;
656 it.d.lustre.it_lock_mode = 0;
659 * This is slave. We want to control it.
661 lock = ldlm_handle2lock(lockh);
662 LASSERT(lock != NULL);
663 lock->l_ast_data = lmv_object_get(obj);
669 * This is first reply, we'll use it to return updated
670 * data back to the caller.
672 LASSERT(req != NULL);
673 ptlrpc_request_addref(req);
677 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
678 LASSERT(body != NULL);
681 obj->lo_stripes[i].ls_size = body->size;
683 CDEBUG(D_INODE, "Fresh size %lu from "DFID"\n",
684 (unsigned long)obj->lo_stripes[i].ls_size, PFID(&fid));
687 ptlrpc_req_finished(req);
689 size += obj->lo_stripes[i].ls_size;
691 if (it.d.lustre.it_lock_mode && lockh) {
692 ldlm_lock_decref(lockh, it.d.lustre.it_lock_mode);
693 it.d.lustre.it_lock_mode = 0;
699 * Some attrs got refreshed, we have reply and it's time to put
702 CDEBUG(D_INODE, "Return refreshed attrs: size = %lu for "DFID"\n",
703 (unsigned long)size, PFID(mid));
705 body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY);
706 LASSERT(body != NULL);
711 * Very important to maintain mds num the same because
712 * of revalidation. mreq == NULL means that caller has
713 * no reply and the only attr we can return is size.
715 body->valid = OBD_MD_FLSIZE;
717 if (master_valid == 0) {
718 oit->d.lustre.it_lock_handle = master_lockh.cookie;
719 oit->d.lustre.it_lock_mode = master_lockm;
724 * It seems all the attrs are fresh and we did no request.
726 CDEBUG(D_INODE, "All the attrs were fresh on "DFID"\n",
728 if (master_valid == 0)
729 oit->d.lustre.it_lock_mode = master_lockm;
735 OBD_FREE_PTR(op_data);
736 lmv_object_put_unlock(obj);
740 int lmv_allocate_slaves(struct obd_device *obd, struct lu_fid *pid,
741 struct md_op_data *op, struct lu_fid *fid)
743 struct lmv_obd *lmv = &obd->u.lmv;
744 struct lmv_object *obj;
750 obj = lmv_object_find(obd, pid);
754 sidx = raw_name2idx(obj->lo_hashtype, obj->lo_objcount,
755 (char *)op->op_name, op->op_namelen);
756 mds = obj->lo_stripes[sidx].ls_mds;
759 rc = __lmv_fid_alloc(lmv, fid, mds);
761 CERROR("Can't allocate fid, rc %d\n", rc);
765 CDEBUG(D_INODE, "Allocate new fid "DFID" for slave "
766 "obj -> mds #%x\n", PFID(fid), mds);