1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2002, 2003, 2004, 2005, 2006 Cluster File Systems, Inc.
6 * This file is part of Lustre, http://www.lustre.org.
8 * Lustre is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU General Public
10 * License as published by the Free Software Foundation.
12 * Lustre is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Lustre; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 # define EXPORT_SYMTAB
25 #define DEBUG_SUBSYSTEM S_LMV
27 #include <linux/slab.h>
28 #include <linux/module.h>
29 #include <linux/init.h>
30 #include <linux/slab.h>
31 #include <linux/pagemap.h>
32 #include <asm/div64.h>
33 #include <linux/seq_file.h>
34 #include <linux/namei.h>
36 #include <liblustre.h>
39 #include <lustre/lustre_idl.h>
40 #include <obd_support.h>
41 #include <lustre_lib.h>
42 #include <lustre_net.h>
43 #include <lustre_dlm.h>
44 #include <obd_class.h>
45 #include <lprocfs_status.h>
46 #include "lmv_internal.h"
48 static inline void lmv_drop_intent_lock(struct lookup_intent *it)
50 if (it->d.lustre.it_lock_mode != 0) {
51 ldlm_lock_decref((void *)&it->d.lustre.it_lock_handle,
52 it->d.lustre.it_lock_mode);
53 it->d.lustre.it_lock_mode = 0;
57 int lmv_intent_remote(struct obd_export *exp, void *lmm,
58 int lmmsize, struct lookup_intent *it,
59 int flags, struct ptlrpc_request **reqp,
60 ldlm_blocking_callback cb_blocking,
63 struct obd_device *obd = exp->exp_obd;
64 struct lmv_obd *lmv = &obd->u.lmv;
65 struct ptlrpc_request *req = NULL;
66 struct lustre_handle plock;
67 struct md_op_data *op_data;
68 struct obd_export *tgt_exp;
69 struct mdt_body *body;
73 body = lustre_msg_buf((*reqp)->rq_repmsg,
74 DLM_REPLY_REC_OFF, sizeof(*body));
75 LASSERT(body != NULL);
76 LASSERT(lustre_rep_swabbed(*reqp, DLM_REPLY_REC_OFF));
78 if (!(body->valid & OBD_MD_MDS))
82 * oh, MDS reports that this is remote inode case i.e. we have to ask
83 * for real attrs on another MDS.
85 if (it->it_op & IT_LOOKUP) {
87 * unfortunately, we have to lie to MDC/MDS to retrieve
88 * attributes llite needs.
90 it->it_op = IT_GETATTR;
93 /* we got LOOKUP lock, but we really need attrs */
94 pmode = it->d.lustre.it_lock_mode;
96 plock.cookie = it->d.lustre.it_lock_handle;
97 it->d.lustre.it_lock_mode = 0;
98 it->d.lustre.it_data = 0;
101 LASSERT(fid_is_sane(&body->fid1));
103 it->d.lustre.it_disposition &= ~DISP_ENQ_COMPLETE;
105 tgt_exp = lmv_find_export(lmv, &body->fid1);
107 GOTO(out, rc = PTR_ERR(tgt_exp));
109 OBD_ALLOC_PTR(op_data);
111 GOTO(out, rc = -ENOMEM);
113 op_data->op_fid1 = body->fid1;
114 op_data->op_bias = MDS_CROSS_REF;
116 rc = md_intent_lock(tgt_exp, op_data, lmm, lmmsize, it, flags,
117 &req, cb_blocking, extra_lock_flags);
120 * llite needs LOOKUP lock to track dentry revocation in order to
121 * maintain dcache consistency. Thus drop UPDATE lock here and put
125 lmv_drop_intent_lock(it);
126 it->d.lustre.it_lock_handle = plock.cookie;
127 it->d.lustre.it_lock_mode = pmode;
130 OBD_FREE_PTR(op_data);
134 ldlm_lock_decref(&plock, pmode);
136 ptlrpc_req_finished(*reqp);
141 int lmv_alloc_slave_fids(struct obd_device *obd, struct lu_fid *pid,
142 struct md_op_data *op, struct lu_fid *fid)
144 struct lmv_obd *lmv = &obd->u.lmv;
151 obj = lmv_obj_grab(obd, pid);
153 CERROR("Object "DFID" should be split\n",
158 mea_idx = raw_name2idx(obj->lo_hashtype, obj->lo_objcount,
159 (char *)op->op_name, op->op_namelen);
160 mds = obj->lo_inodes[mea_idx].li_mds;
163 rc = __lmv_fid_alloc(lmv, fid, mds);
165 CERROR("Can't allocate new fid, rc %d\n",
170 CDEBUG(D_INFO, "Allocate new fid "DFID" for split "
177 * IT_OPEN is intended to open (and create, possible) an object. Parent (pid)
180 int lmv_intent_open(struct obd_export *exp, struct md_op_data *op_data,
181 void *lmm, int lmmsize, struct lookup_intent *it,
182 int flags, struct ptlrpc_request **reqp,
183 ldlm_blocking_callback cb_blocking,
184 int extra_lock_flags)
186 struct obd_device *obd = exp->exp_obd;
187 struct lu_fid rpid = op_data->op_fid1;
188 struct lmv_obd *lmv = &obd->u.lmv;
189 struct md_op_data *sop_data;
190 struct obd_export *tgt_exp;
191 struct lmv_stripe_md *mea;
192 struct mdt_body *body;
197 OBD_ALLOC_PTR(sop_data);
198 if (sop_data == NULL)
201 /* save op_data fro repeat case */
202 *sop_data = *op_data;
208 obj = lmv_obj_grab(obd, &rpid);
213 * Directory is already split, so we have to forward request to
216 mea_idx = raw_name2idx(obj->lo_hashtype, obj->lo_objcount,
217 (char *)op_data->op_name,
218 op_data->op_namelen);
220 rpid = obj->lo_inodes[mea_idx].li_fid;
222 sop_data->op_mds = obj->lo_inodes[mea_idx].li_mds;
223 tgt_exp = lmv_get_export(lmv, sop_data->op_mds);
224 sop_data->op_bias &= ~MDS_CHECK_SPLIT;
226 CDEBUG(D_OTHER, "Choose slave dir ("DFID")\n", PFID(&rpid));
228 struct lmv_tgt_desc *tgt;
230 sop_data->op_bias |= MDS_CHECK_SPLIT;
231 tgt = lmv_find_target(lmv, &rpid);
232 sop_data->op_mds = tgt->ltd_idx;
233 tgt_exp = tgt->ltd_exp;
236 GOTO(out_free_sop_data, rc = PTR_ERR(tgt_exp));
238 sop_data->op_fid1 = rpid;
240 if (it->it_op & IT_CREAT) {
242 * For open with IT_CREATE and for IT_CREATE cases allocate new
243 * fid and setup FLD for it.
245 rc = lmv_fid_alloc(exp, &sop_data->op_fid2, sop_data);
247 GOTO(out_free_sop_data, rc);
252 GOTO(out_free_sop_data, rc);
255 rc = md_intent_lock(tgt_exp, sop_data, lmm, lmmsize, it, flags,
256 reqp, cb_blocking, extra_lock_flags);
258 if (rc == -ERESTART) {
259 LASSERT(*reqp != NULL);
260 DEBUG_REQ(D_WARNING|D_RPCTRACE, *reqp,
261 "Got -ERESTART during open!\n");
262 ptlrpc_req_finished(*reqp);
264 it->d.lustre.it_data = 0;
267 * Directory got split. Time to update local object and repeat
268 * the request with proper MDS.
270 LASSERT(lu_fid_eq(&op_data->op_fid1, &rpid));
271 rc = lmv_handle_split(exp, &rpid);
273 /* We should reallocate child FID. */
274 rc = lmv_alloc_slave_fids(obd, &rpid, op_data,
282 GOTO(out_free_sop_data, rc);
285 * Okay, MDS has returned success. Probably name has been resolved in
288 rc = lmv_intent_remote(exp, lmm, lmmsize, it, flags, reqp,
289 cb_blocking, extra_lock_flags);
293 * This is possible, that some userspace application will try to
294 * open file as directory and we will have -ENOTDIR here. As
295 * this is normal situation, we should not print error here,
298 CDEBUG(D_OTHER, "can't handle remote %s: dir "DFID"("DFID"):"
299 "%*s: %d\n", LL_IT2STR(it), PFID(&op_data->op_fid2),
300 PFID(&rpid), op_data->op_namelen, op_data->op_name, rc);
301 GOTO(out_free_sop_data, rc);
305 * Nothing is found, do not access body->fid1 as it is zero and thus
308 if ((it->d.lustre.it_disposition & DISP_LOOKUP_NEG) &&
309 !(it->d.lustre.it_disposition & DISP_OPEN_CREATE) &&
310 !(it->d.lustre.it_disposition & DISP_OPEN_OPEN))
311 GOTO(out_free_sop_data, rc = 0);
313 /* caller may use attrs MDS returns on IT_OPEN lock request so, we have
314 * to update them for split dir */
315 body = lustre_msg_buf((*reqp)->rq_repmsg,
316 DLM_REPLY_REC_OFF, sizeof(*body));
317 LASSERT(body != NULL);
318 LASSERT(lustre_rep_swabbed(*reqp, DLM_REPLY_REC_OFF));
320 /* could not find object, FID is not present in response. */
321 if (!(body->valid & OBD_MD_FLID))
322 GOTO(out_free_sop_data, rc = 0);
324 obj = lmv_obj_grab(obd, &body->fid1);
325 if (!obj && (mea = lmv_get_mea(*reqp, DLM_REPLY_REC_OFF))) {
326 /* FIXME: capability for remote! */
327 /* wow! this is split dir, we'd like to handle it */
328 obj = lmv_obj_create(exp, &body->fid1, mea);
330 GOTO(out_free_sop_data, rc = (int)PTR_ERR(obj));
334 /* This is split dir and we'd want to get attrs. */
335 CDEBUG(D_OTHER, "attrs from slaves for "DFID"\n",
338 rc = lmv_revalidate_slaves(exp, reqp, &body->fid1, it, 1,
339 cb_blocking, extra_lock_flags);
340 } else if (S_ISDIR(body->mode)) {
341 CDEBUG(D_OTHER, "object "DFID" has not lmv obj?\n",
350 OBD_FREE_PTR(sop_data);
354 int lmv_intent_getattr(struct obd_export *exp, struct md_op_data *op_data,
355 void *lmm, int lmmsize, struct lookup_intent *it,
356 int flags, struct ptlrpc_request **reqp,
357 ldlm_blocking_callback cb_blocking,
358 int extra_lock_flags)
360 struct lmv_obj *obj = NULL, *obj2 = NULL;
361 struct obd_device *obd = exp->exp_obd;
362 struct lu_fid rpid = op_data->op_fid1;
363 struct lmv_obd *lmv = &obd->u.lmv;
364 struct md_op_data *sop_data;
365 struct lmv_stripe_md *mea;
366 struct mdt_body *body;
371 OBD_ALLOC_PTR(sop_data);
372 if (sop_data == NULL)
375 /* save op_data fro repeat case */
376 *sop_data = *op_data;
378 if (fid_is_sane(&op_data->op_fid2)) {
380 * Caller wants to revalidate attrs of obj we have to revalidate
381 * slaves if requested object is split directory.
383 CDEBUG(D_OTHER, "revalidate attrs for "DFID"\n",
384 PFID(&op_data->op_fid2));
386 rc = lmv_fld_lookup(lmv, &op_data->op_fid2, &mds);
388 GOTO(out_free_sop_data, rc);
391 * In fact, we do not need this with current intent_lock(), but
392 * it may change some day.
394 obj = lmv_obj_grab(obd, &op_data->op_fid2);
396 if (!lu_fid_eq(&op_data->op_fid1, &op_data->op_fid2)){
397 rpid = obj->lo_inodes[mds].li_fid;
398 mds = obj->lo_inodes[mds].li_mds;
404 CDEBUG(D_OTHER, "INTENT getattr for %*s on "DFID"\n",
405 op_data->op_namelen, op_data->op_name,
406 PFID(&op_data->op_fid1));
408 rc = lmv_fld_lookup(lmv, &op_data->op_fid1, &mds);
410 GOTO(out_free_sop_data, rc);
411 obj = lmv_obj_grab(obd, &op_data->op_fid1);
412 if (obj && op_data->op_namelen) {
415 /* directory is already split. calculate mds */
416 mea_idx = raw_name2idx(obj->lo_hashtype, obj->lo_objcount,
417 (char *)op_data->op_name,
418 op_data->op_namelen);
419 rpid = obj->lo_inodes[mea_idx].li_fid;
420 mds = obj->lo_inodes[mea_idx].li_mds;
421 sop_data->op_bias &= ~MDS_CHECK_SPLIT;
424 CDEBUG(D_OTHER, "forward to MDS #"LPU64" (slave "DFID")\n",
427 rc = lmv_fld_lookup(lmv, &op_data->op_fid1, &mds);
429 GOTO(out_free_sop_data, rc);
430 sop_data->op_bias |= MDS_CHECK_SPLIT;
434 sop_data->op_fid1 = rpid;
436 rc = md_intent_lock(lmv->tgts[mds].ltd_exp, sop_data, lmm,
437 lmmsize, it, flags, reqp, cb_blocking,
440 LASSERTF(rc != -ERESTART, "GETATTR: Got unhandled -ERESTART!\n");
442 GOTO(out_free_sop_data, rc);
446 * This is split dir. In order to optimize things a bit, we
447 * consider obj valid updating missing parts.
449 * FIXME: do we need to return any lock here? It would be fine
450 * if we don't. This means that nobody should use UPDATE lock to
451 * notify about object * removal.
454 "revalidate slaves for "DFID", rc %d\n",
455 PFID(&op_data->op_fid2), rc);
457 LASSERT(fid_is_sane(&op_data->op_fid2));
458 rc = lmv_revalidate_slaves(exp, reqp, &op_data->op_fid2, it, rc,
459 cb_blocking, extra_lock_flags);
460 GOTO(out_free_sop_data, rc);
464 GOTO(out_free_sop_data, rc);
467 * okay, MDS has returned success. Probably name has been resolved in
470 rc = lmv_intent_remote(exp, lmm, lmmsize, it, flags,
471 reqp, cb_blocking, extra_lock_flags);
473 GOTO(out_free_sop_data, rc);
476 * Nothing is found, do not access body->fid1 as it is zero and thus
479 if (it->d.lustre.it_disposition & DISP_LOOKUP_NEG)
480 GOTO(out_free_sop_data, rc = 0);
483 LASSERT((*reqp)->rq_repmsg);
484 body = lustre_msg_buf((*reqp)->rq_repmsg,
485 DLM_REPLY_REC_OFF, sizeof(*body));
486 LASSERT(body != NULL);
487 LASSERT(lustre_rep_swabbed(*reqp, DLM_REPLY_REC_OFF));
489 /* could not find object, FID is not present in response. */
490 if (!(body->valid & OBD_MD_FLID))
491 GOTO(out_free_sop_data, rc = 0);
493 obj2 = lmv_obj_grab(obd, &body->fid1);
495 if (!obj2 && (mea = lmv_get_mea(*reqp, DLM_REPLY_REC_OFF))) {
497 /* FIXME remote capability! */
498 /* wow! this is split dir, we'd like to handle it. */
499 obj2 = lmv_obj_create(exp, &body->fid1, mea);
501 GOTO(out_free_sop_data, rc = (int)PTR_ERR(obj2));
505 /* this is split dir and we'd want to get attrs */
506 CDEBUG(D_OTHER, "attrs from slaves for "DFID", rc %d\n",
507 PFID(&body->fid1), rc);
509 rc = lmv_revalidate_slaves(exp, reqp, &body->fid1, it, 1,
510 cb_blocking, extra_lock_flags);
516 OBD_FREE_PTR(sop_data);
520 /* this is not used currently */
521 int lmv_lookup_slaves(struct obd_export *exp, struct ptlrpc_request **reqp)
523 struct obd_device *obd = exp->exp_obd;
524 struct lmv_obd *lmv = &obd->u.lmv;
525 struct lustre_handle *lockh;
526 struct md_op_data *op_data;
527 struct ldlm_lock *lock;
528 struct mdt_body *body2;
529 struct mdt_body *body;
538 * Master is locked. we'd like to take locks on slaves and update
539 * attributes to be returned from the slaves it's important that lookup
540 * is called in two cases:
542 * - for first time (dcache has no such a resolving yet). -
543 * ->d_revalidate() returned false.
545 * Last case possible only if all the objs (master and all slaves aren't
549 OBD_ALLOC_PTR(op_data);
553 body = lustre_msg_buf((*reqp)->rq_repmsg,
554 DLM_REPLY_REC_OFF, sizeof(*body));
555 LASSERT(body != NULL);
556 LASSERT(lustre_rep_swabbed(*reqp, DLM_REPLY_REC_OFF));
558 LASSERT((body->valid & OBD_MD_FLID) != 0);
559 obj = lmv_obj_grab(obd, &body->fid1);
560 LASSERT(obj != NULL);
562 CDEBUG(D_OTHER, "lookup slaves for "DFID"\n",
567 for (i = 0; i < obj->lo_objcount; i++) {
568 struct lu_fid fid = obj->lo_inodes[i].li_fid;
569 struct ptlrpc_request *req = NULL;
570 struct obd_export *tgt_exp;
571 struct lookup_intent it;
573 if (lu_fid_eq(&fid, &obj->lo_fid))
574 /* skip master obj */
577 CDEBUG(D_OTHER, "lookup slave "DFID"\n", PFID(&fid));
580 memset(&it, 0, sizeof(it));
581 it.it_op = IT_GETATTR;
583 memset(op_data, 0, sizeof(*op_data));
584 op_data->op_fid1 = fid;
585 op_data->op_fid2 = fid;
586 op_data->op_bias = MDS_CROSS_REF;
588 tgt_exp = lmv_get_export(lmv, obj->lo_inodes[i].li_mds);
590 GOTO(cleanup, rc = PTR_ERR(tgt_exp));
592 rc = md_intent_lock(tgt_exp, op_data, NULL, 0, &it, 0,
593 &req, lmv_blocking_ast, 0);
595 lockh = (struct lustre_handle *)&it.d.lustre.it_lock_handle;
596 if (rc > 0 && req == NULL) {
597 /* nice, this slave is valid */
598 LASSERT(req == NULL);
599 CDEBUG(D_OTHER, "cached\n");
604 /* error during lookup */
607 lock = ldlm_handle2lock(lockh);
610 lock->l_ast_data = lmv_obj_get(obj);
612 body2 = lustre_msg_buf(req->rq_repmsg,
613 DLM_REPLY_REC_OFF, sizeof(*body2));
614 LASSERT(body2 != NULL);
615 LASSERT(lustre_rep_swabbed(req, DLM_REPLY_REC_OFF));
617 obj->lo_inodes[i].li_size = body2->size;
619 CDEBUG(D_OTHER, "fresh: %lu\n",
620 (unsigned long)obj->lo_inodes[i].li_size);
625 ptlrpc_req_finished(req);
627 lmv_update_body(body, obj->lo_inodes + i);
629 if (it.d.lustre.it_lock_mode) {
630 ldlm_lock_decref(lockh, it.d.lustre.it_lock_mode);
631 it.d.lustre.it_lock_mode = 0;
639 OBD_FREE_PTR(op_data);
643 int lmv_intent_lookup(struct obd_export *exp, struct md_op_data *op_data,
644 void *lmm, int lmmsize, struct lookup_intent *it,
645 int flags, struct ptlrpc_request **reqp,
646 ldlm_blocking_callback cb_blocking,
647 int extra_lock_flags)
649 struct obd_device *obd = exp->exp_obd;
650 struct lu_fid rpid = op_data->op_fid1;
651 struct lmv_obd *lmv = &obd->u.lmv;
652 struct md_op_data *sop_data;
653 struct lmv_stripe_md *mea;
654 struct mdt_body *body;
661 OBD_ALLOC_PTR(sop_data);
662 if (sop_data == NULL)
665 /* save op_data fro repeat case */
666 *sop_data = *op_data;
669 * IT_LOOKUP is intended to produce name -> fid resolving (let's call
670 * this lookup below) or to confirm requested resolving is still valid
671 * (let's call this revalidation) fid_is_sane(&sop_data->op_fid2) specifies
674 if (fid_is_sane(&op_data->op_fid2)) {
676 * This is revalidate: we have to check is LOOKUP lock still
677 * valid for given fid. Very important part is that we have to
678 * choose right mds because namespace is per mds.
680 rpid = op_data->op_fid1;
681 obj = lmv_obj_grab(obd, &rpid);
683 mea_idx = raw_name2idx(obj->lo_hashtype,
685 (char *)op_data->op_name,
686 op_data->op_namelen);
687 rpid = obj->lo_inodes[mea_idx].li_fid;
688 mds = obj->lo_inodes[mea_idx].li_mds;
689 sop_data->op_bias &= ~MDS_CHECK_SPLIT;
692 rc = lmv_fld_lookup(lmv, &rpid, &mds);
694 GOTO(out_free_sop_data, rc);
695 sop_data->op_bias |= MDS_CHECK_SPLIT;
698 CDEBUG(D_OTHER, "revalidate lookup for "DFID" to #"LPU64" MDS\n",
699 PFID(&op_data->op_fid2), mds);
706 * This is lookup. During lookup we have to update all the
707 * attributes, because returned values will be put in struct
710 obj = lmv_obj_grab(obd, &op_data->op_fid1);
712 if (op_data->op_namelen) {
713 /* directory is already split. calculate mds */
714 mea_idx = raw_name2idx(obj->lo_hashtype,
716 (char *)op_data->op_name,
717 op_data->op_namelen);
718 rpid = obj->lo_inodes[mea_idx].li_fid;
719 mds = obj->lo_inodes[mea_idx].li_mds;
721 sop_data->op_bias &= ~MDS_CHECK_SPLIT;
724 rc = lmv_fld_lookup(lmv, &op_data->op_fid1, &mds);
726 GOTO(out_free_sop_data, rc);
727 sop_data->op_bias |= MDS_CHECK_SPLIT;
729 fid_zero(&sop_data->op_fid2);
732 sop_data->op_bias &= ~MDS_CROSS_REF;
733 sop_data->op_fid1 = rpid;
735 rc = md_intent_lock(lmv->tgts[mds].ltd_exp, sop_data, lmm, lmmsize,
736 it, flags, reqp, cb_blocking, extra_lock_flags);
738 LASSERT(fid_is_sane(&op_data->op_fid2));
740 * Very interesting. it seems object is still valid but for some
741 * reason llite calls lookup, not revalidate.
743 CDEBUG(D_OTHER, "lookup for "DFID" and data should be uptodate\n",
745 LASSERT(*reqp == NULL);
746 GOTO(out_free_sop_data, rc);
749 if (rc == 0 && *reqp == NULL) {
750 /* once again, we're asked for lookup, not revalidate */
751 CDEBUG(D_OTHER, "lookup for "DFID" and data should be uptodate\n",
753 GOTO(out_free_sop_data, rc);
756 if (rc == -ERESTART) {
757 LASSERT(*reqp != NULL);
758 DEBUG_REQ(D_WARNING|D_RPCTRACE, *reqp,
759 "Got -ERESTART during lookup!\n");
760 ptlrpc_req_finished(*reqp);
762 it->d.lustre.it_data = 0;
764 * Directory got split since last update. This shouldn't be
765 * because splitting causes lock revocation, so revalidate had
766 * to fail and lookup on dir had to return mea.
768 CWARN("we haven't knew about directory splitting!\n");
769 LASSERT(obj == NULL);
771 obj = lmv_obj_create(exp, &rpid, NULL);
773 GOTO(out_free_sop_data, rc = PTR_ERR(obj));
779 GOTO(out_free_sop_data, rc);
782 * Okay, MDS has returned success. Probably name has been resolved in
785 rc = lmv_intent_remote(exp, lmm, lmmsize, it, flags, reqp,
786 cb_blocking, extra_lock_flags);
788 if (rc == 0 && (mea = lmv_get_mea(*reqp, DLM_REPLY_REC_OFF))) {
789 /* Wow! This is split dir, we'd like to handle it. */
790 body = lustre_msg_buf((*reqp)->rq_repmsg,
791 DLM_REPLY_REC_OFF, sizeof(*body));
792 LASSERT(body != NULL);
793 LASSERT(lustre_rep_swabbed(*reqp, DLM_REPLY_REC_OFF));
794 LASSERT((body->valid & OBD_MD_FLID) != 0);
796 obj = lmv_obj_grab(obd, &body->fid1);
798 obj = lmv_obj_create(exp, &body->fid1, mea);
800 GOTO(out_free_sop_data, rc = (int)PTR_ERR(obj));
807 OBD_FREE_PTR(sop_data);
811 int lmv_intent_lock(struct obd_export *exp, struct md_op_data *op_data,
812 void *lmm, int lmmsize, struct lookup_intent *it,
813 int flags, struct ptlrpc_request **reqp,
814 ldlm_blocking_callback cb_blocking,
815 int extra_lock_flags)
817 struct obd_device *obd = exp->exp_obd;
822 LASSERT(fid_is_sane(&op_data->op_fid1));
824 CDEBUG(D_OTHER, "INTENT LOCK '%s' for '%*s' on "DFID"\n",
825 LL_IT2STR(it), op_data->op_namelen, op_data->op_name,
826 PFID(&op_data->op_fid1));
828 rc = lmv_check_connect(obd);
832 if (it->it_op & IT_LOOKUP)
833 rc = lmv_intent_lookup(exp, op_data, lmm, lmmsize, it,
834 flags, reqp, cb_blocking,
836 else if (it->it_op & IT_OPEN)
837 rc = lmv_intent_open(exp, op_data, lmm, lmmsize, it,
838 flags, reqp, cb_blocking,
840 else if (it->it_op & IT_GETATTR)
841 rc = lmv_intent_getattr(exp, op_data,lmm, lmmsize, it,
842 flags, reqp, cb_blocking,
849 int lmv_revalidate_slaves(struct obd_export *exp, struct ptlrpc_request **reqp,
850 const struct lu_fid *mid, struct lookup_intent *oit,
851 int master_valid, ldlm_blocking_callback cb_blocking,
852 int extra_lock_flags)
854 struct obd_device *obd = exp->exp_obd;
855 struct ptlrpc_request *mreq = *reqp;
856 struct lmv_obd *lmv = &obd->u.lmv;
857 struct lustre_handle master_lockh;
858 struct obd_export *tgt_exp;
859 struct md_op_data *op_data;
860 struct ldlm_lock *lock;
861 unsigned long size = 0;
862 struct mdt_body *body;
864 int master_lock_mode;
868 OBD_ALLOC_PTR(op_data);
873 * We have to loop over the subobjects, check validity and update them
874 * from MDSs if needed. it's very useful that we need not to update all
875 * the fields. say, common fields (that are equal on all the subojects
876 * need not to be update, another fields (i_size, for example) are
877 * cached all the time.
879 obj = lmv_obj_grab(obd, mid);
880 LASSERT(obj != NULL);
882 master_lock_mode = 0;
886 for (i = 0; i < obj->lo_objcount; i++) {
887 struct lu_fid fid = obj->lo_inodes[i].li_fid;
888 struct lustre_handle *lockh = NULL;
889 struct ptlrpc_request *req = NULL;
890 ldlm_blocking_callback cb;
891 struct lookup_intent it;
894 CDEBUG(D_OTHER, "revalidate subobj "DFID"\n",
897 memset(op_data, 0, sizeof(*op_data));
898 memset(&it, 0, sizeof(it));
899 it.it_op = IT_GETATTR;
901 cb = lmv_blocking_ast;
903 if (lu_fid_eq(&fid, &obj->lo_fid)) {
906 * lmv_intent_getattr() already checked
907 * validness and took the lock.
911 * It even got the reply refresh attrs
914 body = lustre_msg_buf(mreq->rq_repmsg,
917 LASSERT(body != NULL);
918 LASSERT(lustre_rep_swabbed(
919 mreq, DLM_REPLY_REC_OFF));
922 /* take already cached attrs into account */
924 "master is locked and cached\n");
931 op_data->op_fid1 = fid;
932 op_data->op_fid2 = fid;
933 op_data->op_bias = MDS_CROSS_REF;
936 tgt_exp = lmv_get_export(lmv, obj->lo_inodes[i].li_mds);
938 GOTO(cleanup, rc = PTR_ERR(tgt_exp));
940 rc = md_intent_lock(tgt_exp, op_data, NULL, 0, &it, 0, &req, cb,
943 lockh = (struct lustre_handle *)&it.d.lustre.it_lock_handle;
944 if (rc > 0 && req == NULL) {
945 /* Nice, this slave is valid */
946 LASSERT(req == NULL);
947 CDEBUG(D_OTHER, "cached\n");
955 LASSERT(master_valid == 0);
956 /* Save lock on master to be returned to the caller. */
957 CDEBUG(D_OTHER, "no lock on master yet\n");
958 memcpy(&master_lockh, lockh, sizeof(master_lockh));
959 master_lock_mode = it.d.lustre.it_lock_mode;
960 it.d.lustre.it_lock_mode = 0;
962 /* This is slave. We want to control it. */
963 lock = ldlm_handle2lock(lockh);
964 LASSERT(lock != NULL);
965 lock->l_ast_data = lmv_obj_get(obj);
971 * This is first reply, we'll use it to return updated
972 * data back to the caller.
975 ptlrpc_request_addref(req);
979 body = lustre_msg_buf(req->rq_repmsg,
980 DLM_REPLY_REC_OFF, sizeof(*body));
981 LASSERT(body != NULL);
982 LASSERT(lustre_rep_swabbed(req, DLM_REPLY_REC_OFF));
985 obj->lo_inodes[i].li_size = body->size;
987 CDEBUG(D_OTHER, "fresh: %lu\n",
988 (unsigned long)obj->lo_inodes[i].li_size);
991 ptlrpc_req_finished(req);
993 size += obj->lo_inodes[i].li_size;
995 if (it.d.lustre.it_lock_mode) {
996 ldlm_lock_decref(lockh, it.d.lustre.it_lock_mode);
997 it.d.lustre.it_lock_mode = 0;
1003 * Some attrs got refreshed, we have reply and it's time to put
1004 * fresh attrs to it.
1006 CDEBUG(D_OTHER, "return refreshed attrs: size = %lu\n",
1007 (unsigned long)size);
1009 body = lustre_msg_buf((*reqp)->rq_repmsg,
1010 DLM_REPLY_REC_OFF, sizeof(*body));
1011 LASSERT(body != NULL);
1012 LASSERT(lustre_rep_swabbed(*reqp, DLM_REPLY_REC_OFF));
1018 * Very important to maintain mds num the same because
1019 * of revalidation. mreq == NULL means that caller has
1020 * no reply and the only attr we can return is size.
1022 body->valid = OBD_MD_FLSIZE;
1024 if (master_valid == 0) {
1025 memcpy(&oit->d.lustre.it_lock_handle,
1026 &master_lockh, sizeof(master_lockh));
1027 oit->d.lustre.it_lock_mode = master_lock_mode;
1031 /* It seems all the attrs are fresh and we did no request */
1032 CDEBUG(D_OTHER, "all the attrs were fresh\n");
1033 if (master_valid == 0)
1034 oit->d.lustre.it_lock_mode = master_lock_mode;
1040 OBD_FREE_PTR(op_data);
1041 lmv_obj_unlock(obj);