1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
6 * This file is part of Lustre, http://www.lustre.org.
8 * Lustre is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU General Public
10 * License as published by the Free Software Foundation.
12 * Lustre is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Lustre; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 # define EXPORT_SYMTAB
25 #define DEBUG_SUBSYSTEM S_LMV
27 #include <linux/slab.h>
28 #include <linux/module.h>
29 #include <linux/init.h>
30 #include <linux/slab.h>
31 #include <linux/pagemap.h>
32 #include <asm/div64.h>
33 #include <linux/seq_file.h>
35 #include <liblustre.h>
38 #include <linux/obd_support.h>
39 #include <linux/lustre_lib.h>
40 #include <linux/lustre_net.h>
41 #include <linux/lustre_idl.h>
42 #include <linux/lustre_dlm.h>
43 #include <linux/lustre_mds.h>
44 #include <linux/obd_class.h>
45 #include <linux/obd_ost.h>
46 #include <linux/lprocfs_status.h>
47 #include <linux/lustre_fsfilt.h>
48 #include <linux/obd_lmv.h>
49 #include "lmv_internal.h"
52 static inline void lmv_drop_intent_lock(struct lookup_intent *it)
54 if (it->d.lustre.it_lock_mode != 0)
55 ldlm_lock_decref((void *)&it->d.lustre.it_lock_handle,
56 it->d.lustre.it_lock_mode);
59 int lmv_handle_remote_inode(struct obd_export *exp, void *lmm,
60 int lmmsize, struct lookup_intent *it,
61 int flags, struct ptlrpc_request **reqp,
62 ldlm_blocking_callback cb_blocking)
64 struct obd_device *obd = exp->exp_obd;
65 struct lmv_obd *lmv = &obd->u.lmv;
66 struct mds_body *body = NULL;
70 body = lustre_msg_buf((*reqp)->rq_repmsg, 1, sizeof(*body));
71 LASSERT(body != NULL);
73 if (body->valid & OBD_MD_MDS) {
75 * oh, MDS reports that this is remote inode case i.e. we have
76 * to ask for real attrs on another MDS.
78 struct ptlrpc_request *req = NULL;
79 struct lustre_handle plock;
83 if (it->it_op == IT_LOOKUP) {
85 * unfortunately, we have to lie to MDC/MDS to retrieve
86 * attributes llite needs.
88 it->it_op = IT_GETATTR;
91 /* we got LOOKUP lock, but we really need attrs */
92 pmode = it->d.lustre.it_lock_mode;
94 memcpy(&plock, &it->d.lustre.it_lock_handle,
96 it->d.lustre.it_lock_mode = 0;
100 it->d.lustre.it_disposition &= ~DISP_ENQ_COMPLETE;
101 rc = md_intent_lock(lmv->tgts[id_group(&nid)].ltd_exp, &nid,
102 NULL, 0, lmm, lmmsize, NULL, it, flags,
106 * llite needs LOOKUP lock to track dentry revocation in order
107 * to maintain dcache consistency. Thus drop UPDATE lock here
108 * and put LOOKUP in request.
111 lmv_drop_intent_lock(it);
112 memcpy(&it->d.lustre.it_lock_handle, &plock,
114 it->d.lustre.it_lock_mode = pmode;
117 ldlm_lock_decref(&plock, pmode);
119 ptlrpc_req_finished(*reqp);
125 int lmv_intent_open(struct obd_export *exp, struct lustre_id *pid,
126 const char *name, int len, void *lmm, int lmmsize,
127 struct lustre_id *cid, struct lookup_intent *it,
128 int flags, struct ptlrpc_request **reqp,
129 ldlm_blocking_callback cb_blocking)
131 struct obd_device *obd = exp->exp_obd;
132 struct lmv_obd *lmv = &obd->u.lmv;
133 struct mds_body *body = NULL;
134 struct lustre_id rpid = *pid;
137 int rc, mds, loop = 0;
140 /* IT_OPEN is intended to open (and create, possible) an object. Parent
141 * (pid) may be splitted dir */
144 LASSERT(++loop <= 2);
145 mds = id_group(&rpid);
146 obj = lmv_grab_obj(obd, &rpid);
148 /* directory is already splitted, so we have to forward
149 * request to the right MDS */
150 mds = raw_name2idx(obj->hashtype, obj->objcount,
153 CDEBUG(D_OTHER, "forward to MDS #%u ("DLID4")\n",
155 rpid = obj->objs[mds].id;
159 rc = md_intent_lock(lmv->tgts[id_group(&rpid)].ltd_exp, &rpid, name,
160 len, lmm, lmmsize, cid, it, flags, reqp,
162 if (rc == -ERESTART) {
163 /* directory got splitted. time to update local object and
164 * repeat the request with proper MDS */
165 LASSERT(lmv_id_equal(pid, &rpid));
166 rc = lmv_get_mea_and_update_object(exp, &rpid);
168 ptlrpc_req_finished(*reqp);
175 /* okay, MDS has returned success. Probably name has been resolved in
177 rc = lmv_handle_remote_inode(exp, lmm, lmmsize, it,
178 flags, reqp, cb_blocking);
181 CERROR("can't handle remote %s: dir "DLID4"("DLID4"):"
182 "%*s: %d\n", LL_IT2STR(it), OLID4(pid), OLID4(&rpid),
187 /* caller may use attrs MDS returns on IT_OPEN lock request so, we have
188 * to update them for splitted dir */
189 body = lustre_msg_buf((*reqp)->rq_repmsg, 1, sizeof(*body));
190 LASSERT(body != NULL);
193 obj = lmv_grab_obj(obd, cid);
194 if (!obj && (mea = lmv_splitted_dir_body(*reqp, 1))) {
195 /* wow! this is splitted dir, we'd like to handle it */
196 obj = lmv_create_obj(exp, &body->id1, mea);
198 RETURN(PTR_ERR(obj));
202 /* this is splitted dir and we'd want to get attrs */
203 CDEBUG(D_OTHER, "attrs from slaves for "DLID4"\n",
206 rc = lmv_revalidate_slaves(exp, reqp, cid, it, 1,
208 } else if (S_ISDIR(body->mode)) {
209 CDEBUG(D_OTHER, "object "DLID4" has not lmv obj?\n",
219 int lmv_intent_getattr(struct obd_export *exp, struct lustre_id *pid,
220 const char *name, int len, void *lmm, int lmmsize,
221 struct lustre_id *cid, struct lookup_intent *it,
222 int flags, struct ptlrpc_request **reqp,
223 ldlm_blocking_callback cb_blocking)
225 struct obd_device *obd = exp->exp_obd;
226 struct lmv_obd *lmv = &obd->u.lmv;
227 struct mds_body *body = NULL;
228 struct lustre_id rpid = *pid;
229 struct lmv_obj *obj, *obj2;
235 /* caller wants to revalidate attrs of obj we have to revalidate
236 * slaves if requested object is splitted directory */
237 CDEBUG(D_OTHER, "revalidate attrs for "DLID4"\n", OLID4(cid));
239 obj = lmv_grab_obj(obd, cid);
241 /* in fact, we need not this with current intent_lock(),
242 * but it may change some day */
243 if (!lmv_id_equal(pid, cid)){
244 rpid = obj->objs[mds].id;
245 mds = id_group(&rpid);
250 CDEBUG(D_OTHER, "INTENT getattr for %*s on "DLID4"\n",
251 len, name, OLID4(pid));
253 obj = lmv_grab_obj(obd, pid);
255 /* directory is already splitted. calculate mds */
256 mds = raw_name2idx(obj->hashtype, obj->objcount,
258 rpid = obj->objs[mds].id;
259 mds = id_group(&rpid);
262 CDEBUG(D_OTHER, "forward to MDS #%u (slave "DLID4")\n",
266 rc = md_intent_lock(lmv->tgts[mds].ltd_exp, &rpid, name,
267 len, lmm, lmmsize, cid, it, flags, reqp,
273 /* this is splitted dir. In order to optimize things a
274 * bit, we consider obj valid updating missing parts.
276 * FIXME: do we need to return any lock here? It would
277 * be fine if we don't. this means that nobody should
278 * use UPDATE lock to notify about object * removal */
280 "revalidate slaves for "DLID4", rc %d\n",
284 rc = lmv_revalidate_slaves(exp, reqp, cid, it, rc,
292 /* okay, MDS has returned success. probably name has been
293 * resolved in remote inode */
294 rc = lmv_handle_remote_inode(exp, lmm, lmmsize, it,
295 flags, reqp, cb_blocking);
299 body = lustre_msg_buf((*reqp)->rq_repmsg, 1, sizeof(*body));
300 LASSERT(body != NULL);
303 obj2 = lmv_grab_obj(obd, cid);
305 if (!obj2 && (mea = lmv_splitted_dir_body(*reqp, 1))) {
306 /* wow! this is splitted dir, we'd like to handle it. */
307 body = lustre_msg_buf((*reqp)->rq_repmsg, 1, sizeof(*body));
308 LASSERT(body != NULL);
310 obj2 = lmv_create_obj(exp, &body->id1, mea);
312 RETURN(PTR_ERR(obj2));
316 /* this is splitted dir and we'd want to get attrs */
317 CDEBUG(D_OTHER, "attrs from slaves for "DLID4", rc %d\n",
320 rc = lmv_revalidate_slaves(exp, reqp, cid, it, 1, cb_blocking);
326 void lmv_update_body_from_obj(struct mds_body *body, struct lmv_inode *obj)
329 body->size += obj->size;
332 int lmv_lookup_slaves(struct obd_export *exp, struct ptlrpc_request **reqp)
334 struct obd_device *obd = exp->exp_obd;
335 struct lmv_obd *lmv = &obd->u.lmv;
336 struct mds_body *body = NULL;
337 struct lustre_handle *lockh;
338 struct ldlm_lock *lock;
339 struct mds_body *body2;
347 /* master is locked. we'd like to take locks on slaves and update
348 * attributes to be returned from the slaves it's important that lookup
349 * is called in two cases:
351 * - for first time (dcache has no such a resolving yet).
352 * - ->d_revalidate() returned false.
354 * last case possible only if all the objs (master and all slaves aren't
357 body = lustre_msg_buf((*reqp)->rq_repmsg, 1, sizeof(*body));
358 LASSERT(body != NULL);
360 obj = lmv_grab_obj(obd, &body->id1);
361 LASSERT(obj != NULL);
363 CDEBUG(D_OTHER, "lookup slaves for "DLID4"\n",
368 for (i = 0; i < obj->objcount; i++) {
369 struct lustre_id id = obj->objs[i].id;
370 struct ptlrpc_request *req = NULL;
371 struct lookup_intent it;
373 if (lmv_id_equal(&id, &obj->id))
374 /* skip master obj */
377 CDEBUG(D_OTHER, "lookup slave "DLID4"\n", OLID4(&id));
380 memset(&it, 0, sizeof(it));
381 it.it_op = IT_GETATTR;
382 rc = md_intent_lock(lmv->tgts[id_group(&id)].ltd_exp, &id,
383 NULL, 0, NULL, 0, &id, &it, 0, &req,
384 lmv_dirobj_blocking_ast);
386 lockh = (struct lustre_handle *)&it.d.lustre.it_lock_handle;
387 if (rc > 0 && req == NULL) {
388 /* nice, this slave is valid */
389 LASSERT(req == NULL);
390 CDEBUG(D_OTHER, "cached\n");
395 /* error during lookup */
398 lock = ldlm_handle2lock(lockh);
401 lock->l_ast_data = lmv_get_obj(obj);
403 body2 = lustre_msg_buf(req->rq_repmsg, 1, sizeof(*body2));
406 obj->objs[i].size = body2->size;
408 CDEBUG(D_OTHER, "fresh: %lu\n",
409 (unsigned long)obj->objs[i].size);
414 ptlrpc_req_finished(req);
416 lmv_update_body_from_obj(body, obj->objs + i);
418 if (it.d.lustre.it_lock_mode)
419 ldlm_lock_decref(lockh, it.d.lustre.it_lock_mode);
427 int lmv_intent_lookup(struct obd_export *exp, struct lustre_id *pid,
428 const char *name, int len, void *lmm, int lmmsize,
429 struct lustre_id *cid, struct lookup_intent *it,
430 int flags, struct ptlrpc_request **reqp,
431 ldlm_blocking_callback cb_blocking)
433 struct obd_device *obd = exp->exp_obd;
434 struct lmv_obd *lmv = &obd->u.lmv;
435 struct mds_body *body = NULL;
436 struct lustre_id rpid = *pid;
439 int rc, mds, loop = 0;
443 * IT_LOOKUP is intended to produce name -> id resolving (let's call
444 * this lookup below) or to confirm requested resolving is still valid
445 * (let's call this revalidation) cid != NULL specifies revalidation.
450 * this is revalidation: we have to check is LOOKUP lock still
451 * valid for given id. Very important part is that we have to
452 * choose right mds because namespace is per mds.
455 obj = lmv_grab_obj(obd, pid);
457 mds = raw_name2idx(obj->hashtype, obj->objcount,
459 rpid = obj->objs[mds].id;
462 mds = id_group(&rpid);
464 CDEBUG(D_OTHER, "revalidate lookup for "DLID4" to %d MDS\n",
470 LASSERT(++loop <= 2);
471 /* this is lookup. during lookup we have to update all the
472 * attributes, because returned values will be put in struct
475 obj = lmv_grab_obj(obd, pid);
478 /* directory is already splitted. calculate mds */
479 mds = raw_name2idx(obj->hashtype, obj->objcount,
481 rpid = obj->objs[mds].id;
482 mds = id_group(&rpid);
487 rc = md_intent_lock(lmv->tgts[mds].ltd_exp, pid, name,
488 len, lmm, lmmsize, cid, it, flags, reqp,
495 /* very interesting. it seems object is still valid but for some
496 * reason llite calls lookup, not revalidate */
497 CDEBUG(D_OTHER, "lookup for "DLID4" and data should be uptodate\n",
499 LASSERT(*reqp == NULL);
503 if (rc == 0 && *reqp == NULL) {
504 /* once again, we're asked for lookup, not revalidate */
505 CDEBUG(D_OTHER, "lookup for "DLID4" and data should be uptodate\n",
510 if (rc == -ERESTART) {
511 /* directory got splitted since last update. this shouldn't be
512 * becasue splitting causes lock revocation, so revalidate had
513 * to fail and lookup on dir had to return mea */
514 CWARN("we haven't knew about directory splitting!\n");
515 LASSERT(obj == NULL);
517 obj = lmv_create_obj(exp, &rpid, NULL);
519 RETURN(PTR_ERR(obj));
527 /* okay, MDS has returned success. probably name has been resolved in
529 rc = lmv_handle_remote_inode(exp, lmm, lmmsize, it, flags,
532 if (rc == 0 && (mea = lmv_splitted_dir_body(*reqp, 1))) {
533 /* wow! this is splitted dir, we'd like to handle it */
534 body = lustre_msg_buf((*reqp)->rq_repmsg, 1, sizeof(*body));
535 LASSERT(body != NULL);
537 obj = lmv_grab_obj(obd, &body->id1);
539 obj = lmv_create_obj(exp, &body->id1, mea);
541 RETURN(PTR_ERR(obj));
549 int lmv_intent_lock(struct obd_export *exp, struct lustre_id *pid,
550 const char *name, int len, void *lmm, int lmmsize,
551 struct lustre_id *cid, struct lookup_intent *it,
552 int flags, struct ptlrpc_request **reqp,
553 ldlm_blocking_callback cb_blocking)
555 struct obd_device *obd = exp->exp_obd;
562 CDEBUG(D_OTHER, "INTENT LOCK '%s' for '%*s' on %lu/%lu -> %lu\n",
563 LL_IT2STR(it), len, name, (unsigned long)id_ino(pid),
564 (unsigned long)id_gen(pid), (unsigned long)id_group(pid));
566 rc = lmv_check_connect(obd);
570 if (it->it_op == IT_LOOKUP)
571 rc = lmv_intent_lookup(exp, pid, name, len, lmm,
572 lmmsize, cid, it, flags, reqp,
574 else if (it->it_op & IT_OPEN)
575 rc = lmv_intent_open(exp, pid, name, len, lmm,
576 lmmsize, cid, it, flags, reqp,
578 else if (it->it_op == IT_GETATTR || it->it_op == IT_CHDIR)
579 rc = lmv_intent_getattr(exp, pid, name, len, lmm,
580 lmmsize, cid, it, flags, reqp,
587 int lmv_revalidate_slaves(struct obd_export *exp, struct ptlrpc_request **reqp,
588 struct lustre_id *mid, struct lookup_intent *oit,
589 int master_valid, ldlm_blocking_callback cb_blocking)
591 struct obd_device *obd = exp->exp_obd;
592 struct ptlrpc_request *mreq = *reqp;
593 struct lmv_obd *lmv = &obd->u.lmv;
594 struct lustre_handle master_lockh;
595 struct ldlm_lock *lock;
596 unsigned long size = 0;
597 struct mds_body *body;
599 int master_lock_mode;
603 /* we have to loop over the subobjects, check validity and update them
604 * from MDSs if needed. it's very useful that we need not to update all
605 * the fields. say, common fields (that are equal on all the subojects
606 * need not to be update, another fields (i_size, for example) are
607 * cached all the time */
608 obj = lmv_grab_obj(obd, mid);
609 LASSERT(obj != NULL);
611 master_lock_mode = 0;
615 for (i = 0; i < obj->objcount; i++) {
616 struct lustre_id id = obj->objs[i].id;
617 struct lustre_handle *lockh = NULL;
618 struct ptlrpc_request *req = NULL;
619 ldlm_blocking_callback cb;
620 struct lookup_intent it;
623 CDEBUG(D_OTHER, "revalidate subobj "DLID4"\n",
626 memset(&it, 0, sizeof(it));
627 it.it_op = IT_GETATTR;
628 cb = lmv_dirobj_blocking_ast;
630 if (lmv_id_equal(&id, &obj->id)) {
632 /* lmv_intent_getattr() already checked
633 * validness and took the lock */
635 /* it even got the reply refresh attrs
637 body = lustre_msg_buf(mreq->rq_repmsg,
639 LASSERT(body != NULL);
642 /* take already cached attrs into account */
644 "master is locked and cached\n");
652 rc = md_intent_lock(lmv->tgts[id_group(&id)].ltd_exp,
653 &id, NULL, 0, NULL, 0, &id, &it, 0,
655 lockh = (struct lustre_handle *) &it.d.lustre.it_lock_handle;
656 if (rc > 0 && req == NULL) {
657 /* nice, this slave is valid */
658 LASSERT(req == NULL);
659 CDEBUG(D_OTHER, "cached\n");
664 /* error during revalidation */
668 LASSERT(master_valid == 0);
669 /* save lock on master to be returned to the caller */
670 CDEBUG(D_OTHER, "no lock on master yet\n");
671 memcpy(&master_lockh, lockh, sizeof(master_lockh));
672 master_lock_mode = it.d.lustre.it_lock_mode;
673 it.d.lustre.it_lock_mode = 0;
675 /* this is slave. we want to control it */
676 lock = ldlm_handle2lock(lockh);
678 lock->l_ast_data = lmv_get_obj(obj);
683 /* this is first reply, we'll use it to return updated
684 * data back to the caller */
686 ptlrpc_request_addref(req);
691 body = lustre_msg_buf(req->rq_repmsg, 1, sizeof(*body));
695 obj->objs[i].size = body->size;
697 CDEBUG(D_OTHER, "fresh: %lu\n",
698 (unsigned long)obj->objs[i].size);
701 ptlrpc_req_finished(req);
703 size += obj->objs[i].size;
705 if (it.d.lustre.it_lock_mode)
706 ldlm_lock_decref(lockh, it.d.lustre.it_lock_mode);
710 /* some attrs got refreshed, we have reply and it's time to put
711 * fresh attrs to it */
712 CDEBUG(D_OTHER, "return refreshed attrs: size = %lu\n",
713 (unsigned long)size);
715 body = lustre_msg_buf((*reqp)->rq_repmsg, 1, sizeof(*body));
718 /* FIXME: what about other attributes? */
722 /* very important to maintain lli->mds the same because
723 * of revalidation. mreq == NULL means that caller has
724 * no reply and the only attr we can return is size */
725 body->valid = OBD_MD_FLSIZE;
726 // body->mds = id_group(&obj->id);
728 if (master_valid == 0) {
729 memcpy(&oit->d.lustre.it_lock_handle,
730 &master_lockh, sizeof(master_lockh));
731 oit->d.lustre.it_lock_mode = master_lock_mode;
735 /* it seems all the attrs are fresh and we did no request */
736 CDEBUG(D_OTHER, "all the attrs were fresh\n");
737 if (master_valid == 0)
738 oit->d.lustre.it_lock_mode = master_lock_mode;