1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2001-2004 Cluster File Systems, Inc.
6 * This file is part of Lustre, http://www.sf.net/projects/lustre/
8 * Lustre is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU General Public
10 * License as published by the Free Software Foundation.
12 * Lustre is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Lustre; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 # define EXPORT_SYMTAB
25 #define DEBUG_SUBSYSTEM S_MDC
28 # include <linux/module.h>
29 # include <linux/pagemap.h>
30 # include <linux/miscdevice.h>
31 # include <linux/init.h>
33 # include <liblustre.h>
36 #include <linux/obd_class.h>
37 #include <linux/lustre_mds.h>
38 #include <linux/lustre_dlm.h>
39 #include <linux/lprocfs_status.h>
40 #include "mdc_internal.h"
42 #define REQUEST_MINOR 244
44 static int mdc_cleanup(struct obd_device *obd, int flags);
46 int mdc_get_secdesc_size(void)
49 int ngroups = current_ngroups;
51 if (ngroups > LUSTRE_MAX_GROUPS)
52 ngroups = LUSTRE_MAX_GROUPS;
54 return sizeof(struct mds_req_sec_desc) +
55 sizeof(__u32) * ngroups;
62 * because group info might have changed since last time we call
63 * get_secdesc_size(), so here we did more sanity check to prevent garbage gids
65 void mdc_pack_secdesc(struct ptlrpc_request *req, int size)
68 struct mds_req_sec_desc *rsd;
70 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
71 struct group_info *ginfo;
74 rsd = lustre_msg_buf(req->rq_reqmsg,
75 MDS_REQ_SECDESC_OFF, size);
77 rsd->rsd_uid = current->uid;
78 rsd->rsd_gid = current->gid;
79 rsd->rsd_fsuid = current->fsuid;
80 rsd->rsd_fsgid = current->fsgid;
81 rsd->rsd_cap = current->cap_effective;
82 rsd->rsd_ngroups = (size - sizeof(*rsd)) / sizeof(__u32);
83 LASSERT(rsd->rsd_ngroups <= LUSTRE_MAX_GROUPS);
85 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
87 get_group_info(current->group_info);
88 ginfo = current->group_info;
90 if (rsd->rsd_ngroups > ginfo->ngroups)
91 rsd->rsd_ngroups = ginfo->ngroups;
92 memcpy(rsd->rsd_groups, ginfo->blocks[0],
93 rsd->rsd_ngroups * sizeof(__u32));
95 LASSERT(rsd->rsd_ngroups <= NGROUPS);
96 if (rsd->rsd_ngroups > current->ngroups)
97 rsd->rsd_ngroups = current->ngroups;
98 memcpy(rsd->rsd_groups, current->groups,
99 rsd->rsd_ngroups * sizeof(__u32));
104 extern int mds_queue_req(struct ptlrpc_request *);
105 /* Helper that implements most of mdc_getstatus and signal_completed_replay. */
106 /* XXX this should become mdc_get_info("key"), sending MDS_GET_INFO RPC */
107 static int send_getstatus(struct obd_import *imp, struct lustre_id *rootid,
108 int level, int msg_flags)
110 struct ptlrpc_request *req;
111 struct mds_body *body;
112 int rc, size[2] = {0, sizeof(*body)};
115 //size[0] = mdc_get_secdesc_size();
117 req = ptlrpc_prep_req(imp, LUSTRE_MDS_VERSION, MDS_GETSTATUS,
120 GOTO(out, rc = -ENOMEM);
122 //mdc_pack_secdesc(req, size[0]);
124 body = lustre_msg_buf(req->rq_reqmsg, MDS_REQ_REC_OFF, sizeof (*body));
125 req->rq_send_state = level;
126 req->rq_replen = lustre_msg_size(1, &size[1]);
128 req->rq_reqmsg->flags |= msg_flags;
129 rc = ptlrpc_queue_wait(req);
132 body = lustre_swab_repbuf (req, 0, sizeof (*body),
133 lustre_swab_mds_body);
135 CERROR ("Can't extract mds_body\n");
136 GOTO (out, rc = -EPROTO);
139 memcpy(rootid, &body->id1, sizeof(*rootid));
141 CDEBUG(D_NET, "root ino="LPU64", last_committed="LPU64
142 ", last_xid="LPU64"\n", rootid->li_stc.u.e3s.l3s_ino,
143 req->rq_repmsg->last_committed, req->rq_repmsg->last_xid);
148 ptlrpc_req_finished(req);
152 /* This should be mdc_get_info("rootid") */
153 int mdc_getstatus(struct obd_export *exp, struct lustre_id *rootid)
155 return send_getstatus(class_exp2cliimp(exp), rootid,
159 int mdc_getattr_common(struct obd_export *exp, unsigned int ea_size,
160 struct ptlrpc_request *req)
162 struct mds_body *body;
165 int repsize[2] = {sizeof(*body), 0};
169 /* request message already built */
172 repsize[bufcount++] = ea_size;
173 CDEBUG(D_INODE, "reserved %u bytes for MD/symlink in packet\n",
176 req->rq_replen = lustre_msg_size(bufcount, repsize);
178 mdc_get_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL);
179 rc = ptlrpc_queue_wait(req);
180 mdc_put_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL);
184 body = lustre_swab_repbuf (req, 0, sizeof (*body),
185 lustre_swab_mds_body);
187 CERROR ("Can't unpack mds_body\n");
191 CDEBUG(D_NET, "mode: %o\n", body->mode);
193 LASSERT_REPSWAB (req, 1);
194 if (body->eadatasize != 0) {
195 /* reply indicates presence of eadata; check it's there... */
196 eadata = lustre_msg_buf (req->rq_repmsg, 1, body->eadatasize);
197 if (eadata == NULL) {
198 CERROR ("Missing/short eadata\n");
206 int mdc_getattr(struct obd_export *exp, struct lustre_id *id,
207 __u64 valid, unsigned int ea_size,
208 struct ptlrpc_request **request)
210 struct ptlrpc_request *req;
211 struct mds_body *body;
212 int size[2] = {0, sizeof(*body)};
216 /* XXX do we need to make another request here? We just did a getattr
217 * to do the lookup in the first place.
219 size[0] = mdc_get_secdesc_size();
221 req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION,
222 MDS_GETATTR, 2, size, NULL);
224 GOTO(out, rc = -ENOMEM);
226 mdc_pack_secdesc(req, size[0]);
228 body = lustre_msg_buf(req->rq_reqmsg, MDS_REQ_REC_OFF, sizeof (*body));
229 memcpy(&body->id1, id, sizeof(*id));
231 body->eadatasize = ea_size;
233 rc = mdc_getattr_common(exp, ea_size, req);
235 ptlrpc_req_finished (req);
243 int mdc_getattr_lock(struct obd_export *exp, struct lustre_id *id,
244 char *filename, int namelen, __u64 valid,
245 unsigned int ea_size, struct ptlrpc_request **request)
247 struct ptlrpc_request *req;
248 struct mds_body *body;
249 int rc, size[3] = {0, sizeof(*body), namelen};
252 size[0] = mdc_get_secdesc_size();
254 req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION,
255 MDS_GETATTR_LOCK, 3, size, NULL);
257 GOTO(out, rc = -ENOMEM);
259 mdc_pack_secdesc(req, size[0]);
261 body = lustre_msg_buf(req->rq_reqmsg, MDS_REQ_REC_OFF, sizeof (*body));
262 memcpy(&body->id1, id, sizeof(*id));
264 body->eadatasize = ea_size;
266 if (filename != NULL) {
267 LASSERT (strnlen (filename, namelen) == namelen - 1);
268 memcpy(lustre_msg_buf(req->rq_reqmsg, 2, namelen),
271 LASSERT(namelen == 1);
274 rc = mdc_getattr_common(exp, ea_size, req);
276 ptlrpc_req_finished (req);
284 /* This should be called with both the request and the reply still packed. */
285 int mdc_store_inode_generation(struct obd_export *exp,
286 struct ptlrpc_request *req,
287 int reqoff, int repoff)
289 struct mds_rec_create *rec =
290 lustre_msg_buf(req->rq_reqmsg, reqoff, sizeof(*rec));
291 struct mds_body *body =
292 lustre_msg_buf(req->rq_repmsg, repoff, sizeof(*body));
294 LASSERT (rec != NULL);
295 LASSERT (body != NULL);
297 memcpy(&rec->cr_replayid, &body->id1, sizeof(rec->cr_replayid));
298 DEBUG_REQ(D_HA, req, "storing generation for ino "DLID4,
299 OLID4(&rec->cr_replayid));
303 int mdc_req2lustre_md(struct obd_export *exp_mdc, struct ptlrpc_request *req,
304 unsigned int offset, struct obd_export *exp_osc,
305 struct lustre_md *md)
311 memset(md, 0, sizeof(*md));
313 md->body = lustre_msg_buf(req->rq_repmsg, offset, sizeof (*md->body));
314 LASSERT (md->body != NULL);
315 LASSERT_REPSWABBED (req, offset);
317 if (!(md->body->valid & OBD_MD_FLEASIZE) &&
318 !(md->body->valid & OBD_MD_FLDIREA))
321 /* ea is presented in reply, parse it */
322 if (S_ISREG(md->body->mode)) {
324 struct lov_mds_md *lmm;
326 if (md->body->eadatasize == 0) {
327 CERROR ("OBD_MD_FLEASIZE set, but eadatasize 0\n");
330 lmmsize = md->body->eadatasize;
331 lmm = lustre_msg_buf(req->rq_repmsg, offset + 1, lmmsize);
332 LASSERT (lmm != NULL);
333 LASSERT_REPSWABBED (req, offset + 1);
335 rc = obd_unpackmd(exp_osc, &md->lsm, lmm, lmmsize);
337 LASSERT (rc >= sizeof (*md->lsm));
340 } else if (S_ISDIR(md->body->mode)) {
343 LASSERT(exp_mdc != NULL);
345 /* dir can be non-splitted */
346 if (md->body->eadatasize == 0)
349 mdsize = md->body->eadatasize;
350 mea = lustre_msg_buf(req->rq_repmsg, offset + 1, mdsize);
351 LASSERT(mea != NULL);
354 * check mea for validness, as there is possible that old tests
355 * will try to set lov EA to dir object and thus confuse this
358 if (mea->mea_magic != MEA_MAGIC_LAST_CHAR &&
359 mea->mea_magic != MEA_MAGIC_ALL_CHARS)
360 GOTO(out_invalid_mea, rc = -EINVAL);
362 if (mea->mea_count > 256 || mea->mea_master > 256 ||
363 mea->mea_master > mea->mea_count)
364 GOTO(out_invalid_mea, rc = -EINVAL);
366 LASSERT(id_fid(&mea->mea_ids[0]));
368 rc = obd_unpackmd(exp_mdc, (void *)&md->mea,
369 (void *)mea, mdsize);
371 LASSERT (rc >= sizeof (*md->mea));
378 CERROR("Detected invalid mea, which does not "
379 "support neither old either new format.\n");
386 static void mdc_commit_open(struct ptlrpc_request *req)
388 struct mdc_open_data *mod = req->rq_cb_data;
392 if (mod->mod_close_req != NULL)
393 mod->mod_close_req->rq_cb_data = NULL;
395 if (mod->mod_och != NULL)
396 mod->mod_och->och_mod = NULL;
398 OBD_FREE(mod, sizeof(*mod));
399 req->rq_cb_data = NULL;
402 static void mdc_replay_open(struct ptlrpc_request *req)
404 struct mdc_open_data *mod = req->rq_cb_data;
405 struct obd_client_handle *och;
406 struct ptlrpc_request *close_req;
407 struct lustre_handle old;
408 struct mds_body *body;
411 body = lustre_swab_repbuf(req, 1, sizeof(*body), lustre_swab_mds_body);
412 LASSERT (body != NULL);
415 DEBUG_REQ(D_ERROR, req,
416 "can't properly replay without open data");
423 struct lustre_handle *file_fh;
424 LASSERT(och->och_magic == OBD_CLIENT_HANDLE_MAGIC);
425 file_fh = &och->och_fh;
426 CDEBUG(D_HA, "updating handle from "LPX64" to "LPX64"\n",
427 file_fh->cookie, body->handle.cookie);
428 memcpy(&old, file_fh, sizeof(old));
429 memcpy(file_fh, &body->handle, sizeof(*file_fh));
432 close_req = mod->mod_close_req;
433 if (close_req != NULL) {
434 struct mds_body *close_body;
435 LASSERT(close_req->rq_reqmsg->opc == MDS_CLOSE);
436 close_body = lustre_msg_buf(close_req->rq_reqmsg,
438 sizeof(*close_body));
440 LASSERT(!memcmp(&old, &close_body->handle, sizeof old));
441 DEBUG_REQ(D_HA, close_req, "updating close body with new fh");
442 memcpy(&close_body->handle, &body->handle,
443 sizeof(close_body->handle));
449 int mdc_set_open_replay_data(struct obd_export *exp,
450 struct obd_client_handle *och,
451 struct ptlrpc_request *open_req)
453 struct mdc_open_data *mod;
454 struct mds_rec_create *rec;
455 struct mds_body *body;
457 rec = lustre_msg_buf(open_req->rq_reqmsg, MDS_REQ_INTENT_REC_OFF,
459 body = lustre_msg_buf(open_req->rq_repmsg, 1, sizeof(*body));
461 LASSERT(rec != NULL);
462 /* outgoing messages always in my byte order */
463 LASSERT(body != NULL);
464 /* incoming message in my byte order (it's been swabbed) */
465 LASSERT_REPSWABBED(open_req, 1);
467 OBD_ALLOC(mod, sizeof(*mod));
469 DEBUG_REQ(D_ERROR, open_req, "can't allocate mdc_open_data");
475 mod->mod_open_req = open_req;
477 memcpy(&rec->cr_replayid, &body->id1, sizeof rec->cr_replayid);
478 open_req->rq_replay_cb = mdc_replay_open;
479 open_req->rq_commit_cb = mdc_commit_open;
480 open_req->rq_cb_data = mod;
481 DEBUG_REQ(D_HA, open_req, "set up replay data");
485 int mdc_clear_open_replay_data(struct obd_export *exp,
486 struct obd_client_handle *och)
488 struct mdc_open_data *mod = och->och_mod;
490 /* Don't free the structure now (it happens in mdc_commit_open, after
491 * we're sure we won't need to fix up the close request in the future),
492 * but make sure that replay doesn't poke at the och, which is about to
494 LASSERT(mod != LP_POISON);
501 static void mdc_commit_close(struct ptlrpc_request *req)
503 struct mdc_open_data *mod = req->rq_cb_data;
504 struct ptlrpc_request *open_req;
505 struct obd_import *imp = req->rq_import;
507 DEBUG_REQ(D_HA, req, "close req committed");
511 mod->mod_close_req = NULL;
512 req->rq_cb_data = NULL;
513 req->rq_commit_cb = NULL;
515 open_req = mod->mod_open_req;
516 LASSERT(open_req != NULL);
517 LASSERT(open_req != LP_POISON);
518 LASSERT(open_req->rq_type != LI_POISON);
520 DEBUG_REQ(D_HA, open_req, "open req balanced");
521 if (open_req->rq_transno == 0) {
522 DEBUG_REQ(D_ERROR, open_req, "BUG 3892 open");
523 DEBUG_REQ(D_ERROR, req, "BUG 3892 close");
524 LASSERTF(open_req->rq_transno != 0, "BUG 3892");
526 LASSERT(open_req->rq_import == imp);
528 /* We no longer want to preserve this for transno-unconditional
530 spin_lock(&open_req->rq_lock);
531 open_req->rq_replay = 0;
532 spin_unlock(&open_req->rq_lock);
535 static int mdc_close_interpret(struct ptlrpc_request *req, void *data, int rc)
537 union ptlrpc_async_args *aa = data;
538 struct mdc_rpc_lock *rpc_lock;
539 struct obd_device *obd = aa->pointer_arg[1];
542 spin_lock_irqsave(&req->rq_lock, flags);
543 rpc_lock = aa->pointer_arg[0];
544 aa->pointer_arg[0] = NULL;
545 spin_unlock_irqrestore (&req->rq_lock, flags);
547 if (rpc_lock == NULL) {
548 CERROR("called with NULL rpc_lock\n");
550 mdc_put_rpc_lock(rpc_lock, NULL);
551 LASSERTF(rpc_lock == obd->u.cli.cl_rpc_lock, "%p != %p\n",
552 rpc_lock, obd->u.cli.cl_rpc_lock);
554 wake_up(&req->rq_reply_waitq);
558 /* We can't use ptlrpc_check_reply, because we don't want to wake up for
559 * anything but a reply or an error. */
560 static int mdc_close_check_reply(struct ptlrpc_request *req)
565 spin_lock_irqsave(&req->rq_lock, flags);
566 if (req->rq_async_args.pointer_arg[0] == NULL)
568 spin_unlock_irqrestore (&req->rq_lock, flags);
572 static int go_back_to_sleep(void *unused)
577 int mdc_close(struct obd_export *exp, struct obdo *oa,
578 struct obd_client_handle *och, struct ptlrpc_request **request)
580 struct obd_device *obd = class_exp2obd(exp);
581 int reqsize[3] = {0, sizeof(struct mds_body),
582 obd->u.cli.cl_max_mds_cookiesize};
583 int rc, repsize[3] = {sizeof(struct mds_body),
584 obd->u.cli.cl_max_mds_easize,
585 obd->u.cli.cl_max_mds_cookiesize};
586 struct ptlrpc_request *req;
587 struct mdc_open_data *mod;
588 struct l_wait_info lwi;
591 //reqsize[0] = mdc_get_secdesc_size();
593 req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION,
594 MDS_CLOSE, 3, reqsize, NULL);
596 GOTO(out, rc = -ENOMEM);
598 //mdc_pack_secdesc(req, reqsize[0]);
600 /* Ensure that this close's handle is fixed up during replay. */
601 LASSERT(och != NULL);
603 if (likely(mod != NULL)) {
604 mod->mod_close_req = req;
605 LASSERT(mod->mod_open_req->rq_type != LI_POISON);
606 DEBUG_REQ(D_HA, mod->mod_open_req, "matched open req %p",
609 CDEBUG(D_HA, "couldn't find open req; expecting close error\n");
612 mdc_close_pack(req, 1, oa, oa->o_valid, och);
614 req->rq_replen = lustre_msg_size(3, repsize);
615 req->rq_commit_cb = mdc_commit_close;
616 LASSERT(req->rq_cb_data == NULL);
617 req->rq_cb_data = mod;
619 /* We hand a ref to the rpcd here, so we need another one of our own. */
620 ptlrpc_request_addref(req);
622 mdc_get_rpc_lock(obd->u.cli.cl_rpc_lock, NULL);
623 req->rq_interpret_reply = mdc_close_interpret;
624 req->rq_async_args.pointer_arg[0] = obd->u.cli.cl_rpc_lock;
625 req->rq_async_args.pointer_arg[1] = obd;
626 ptlrpcd_add_req(req);
627 lwi = LWI_TIMEOUT_INTR(MAX(req->rq_timeout * HZ, 1), go_back_to_sleep,
629 rc = l_wait_event(req->rq_reply_waitq, mdc_close_check_reply(req),
631 if (req->rq_repmsg == NULL) {
632 CDEBUG(D_HA, "request failed to send: %p, %d\n", req,
635 rc = req->rq_status ? req->rq_status : -EIO;
636 } else if (rc == 0) {
637 rc = req->rq_repmsg->status;
638 if (req->rq_repmsg->type == PTL_RPC_MSG_ERR) {
639 DEBUG_REQ(D_ERROR, req, "type == PTL_RPC_MSG_ERR, err "
643 } else if (mod == NULL) {
644 CERROR("Unexpected: can't find mdc_open_data, but the "
645 "close succeeded. Please tell CFS.\n");
647 if (!lustre_swab_repbuf(req, 0, sizeof(struct mds_body),
648 lustre_swab_mds_body)) {
649 CERROR("Error unpacking mds_body\n");
653 if (req->rq_async_args.pointer_arg[0] != NULL) {
654 CERROR("returned without dropping rpc_lock: rc %d\n", rc);
655 mdc_close_interpret(req, &req->rq_async_args, rc);
664 int mdc_done_writing(struct obd_export *exp, struct obdo *obdo)
666 struct ptlrpc_request *req;
667 struct mds_body *body;
668 int rc, size[2] = {0, sizeof(*body)};
671 size[0] = mdc_get_secdesc_size();
673 req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION,
674 MDS_DONE_WRITING, 2, size, NULL);
678 mdc_pack_secdesc(req, size[0]);
680 body = lustre_msg_buf(req->rq_reqmsg, MDS_REQ_REC_OFF,
683 mdc_pack_id(&body->id1, obdo->o_id, 0, obdo->o_mode,
684 obdo->o_mds, obdo->o_fid);
686 body->size = obdo->o_size;
687 body->blocks = obdo->o_blocks;
688 body->flags = obdo->o_flags;
689 body->valid = obdo->o_valid;
691 req->rq_replen = lustre_msg_size(1, &size[1]);
693 rc = ptlrpc_queue_wait(req);
694 ptlrpc_req_finished(req);
698 int mdc_readpage(struct obd_export *exp,
699 struct lustre_id *id,
700 __u64 offset, struct page *page,
701 struct ptlrpc_request **request)
703 struct obd_import *imp = class_exp2cliimp(exp);
704 struct ptlrpc_request *req = NULL;
705 struct ptlrpc_bulk_desc *desc = NULL;
706 struct mds_body *body;
707 int rc, size[2] = {0, sizeof(*body)};
710 CDEBUG(D_INODE, "inode: %ld\n", (long)id->li_stc.u.e3s.l3s_ino);
712 size[0] = mdc_get_secdesc_size();
714 req = ptlrpc_prep_req(imp, LUSTRE_MDS_VERSION, MDS_READPAGE,
717 GOTO(out, rc = -ENOMEM);
718 /* XXX FIXME bug 249 */
719 req->rq_request_portal = MDS_READPAGE_PORTAL;
721 mdc_pack_secdesc(req, size[0]);
723 desc = ptlrpc_prep_bulk_imp(req, 1, BULK_PUT_SINK, MDS_BULK_PORTAL);
725 GOTO(out, rc = -ENOMEM);
726 /* NB req now owns desc and will free it when it gets freed */
728 ptlrpc_prep_bulk_page(desc, page, 0, PAGE_CACHE_SIZE);
729 mdc_readdir_pack(req, 1, offset, PAGE_CACHE_SIZE, id);
731 req->rq_replen = lustre_msg_size(1, &size[1]);
732 rc = ptlrpc_queue_wait(req);
735 body = lustre_swab_repbuf(req, 0, sizeof (*body),
736 lustre_swab_mds_body);
738 CERROR("Can't unpack mds_body\n");
739 GOTO(out, rc = -EPROTO);
742 if (req->rq_bulk->bd_nob_transferred != PAGE_CACHE_SIZE) {
743 CERROR ("Unexpected # bytes transferred: %d"
745 req->rq_bulk->bd_nob_transferred,
747 GOTO (out, rc = -EPROTO);
757 static int mdc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
758 void *karg, void *uarg)
760 struct obd_device *obd = exp->exp_obd;
761 struct obd_ioctl_data *data = karg;
762 struct obd_import *imp = obd->u.cli.cl_import;
763 struct llog_ctxt *ctxt;
767 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
770 if (!try_module_get(THIS_MODULE)) {
771 CERROR("Can't get module. Is it alive?");
776 case OBD_IOC_CLIENT_RECOVER:
777 rc = ptlrpc_recover_import(imp, data->ioc_inlbuf1);
781 case IOC_OSC_SET_ACTIVE:
782 rc = ptlrpc_set_import_active(imp, data->ioc_offset);
784 case OBD_IOC_PARSE: {
785 ctxt = llog_get_context(&exp->exp_obd->obd_llogs,
786 LLOG_CONFIG_REPL_CTXT);
787 rc = class_config_process_llog(ctxt, data->ioc_inlbuf1, NULL);
791 case OBD_IOC_LLOG_INFO:
792 case OBD_IOC_LLOG_PRINT: {
793 ctxt = llog_get_context(&obd->obd_llogs, LLOG_CONFIG_REPL_CTXT);
794 rc = llog_ioctl(ctxt, cmd, data);
800 CERROR("mdc_ioctl(): unrecognised ioctl %#x\n", cmd);
801 GOTO(out, rc = -ENOTTY);
804 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
807 module_put(THIS_MODULE);
813 int mdc_set_info(struct obd_export *exp, obd_count keylen,
814 void *key, obd_count vallen, void *val)
818 if (keylen == strlen("initial_recov") &&
819 memcmp(key, "initial_recov", strlen("initial_recov")) == 0) {
820 struct obd_import *imp = exp->exp_obd->u.cli.cl_import;
821 if (vallen != sizeof(int))
823 imp->imp_initial_recov = *(int *)val;
824 CDEBUG(D_HA, "%s: set imp_no_init_recov = %d\n",
825 exp->exp_obd->obd_name,
826 imp->imp_initial_recov);
828 } else if (keylen >= strlen("mds_type") && strcmp(key, "mds_type") == 0) {
829 struct ptlrpc_request *req;
830 char *bufs[2] = {key, val};
831 int rc, size[2] = {keylen, vallen};
833 req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OBD_VERSION,
834 OST_SET_INFO, 2, size, bufs);
838 req->rq_replen = lustre_msg_size(0, NULL);
839 rc = ptlrpc_queue_wait(req);
840 ptlrpc_req_finished(req);
842 } else if (keylen >= strlen("inter_mds") && strcmp(key, "inter_mds") == 0) {
843 struct obd_import *imp = class_exp2cliimp(exp);
844 imp->imp_server_timeout = 1;
845 CDEBUG(D_OTHER, "%s: timeout / 2\n", exp->exp_obd->obd_name);
851 static int mdc_statfs(struct obd_device *obd, struct obd_statfs *osfs,
852 unsigned long max_age)
854 struct obd_statfs *msfs;
855 struct ptlrpc_request *req;
856 int rc, size = sizeof(*msfs);
859 /* We could possibly pass max_age in the request (as an absolute
860 * timestamp or a "seconds.usec ago") so the target can avoid doing
861 * extra calls into the filesystem if that isn't necessary (e.g.
862 * during mount that would help a bit). Having relative timestamps
863 * is not so great if request processing is slow, while absolute
864 * timestamps are not ideal because they need time synchronization. */
865 req = ptlrpc_prep_req(obd->u.cli.cl_import, LUSTRE_MDS_VERSION,
866 MDS_STATFS, 0, NULL, NULL);
870 req->rq_replen = lustre_msg_size(1, &size);
872 mdc_get_rpc_lock(obd->u.cli.cl_rpc_lock, NULL);
873 rc = ptlrpc_queue_wait(req);
874 mdc_put_rpc_lock(obd->u.cli.cl_rpc_lock, NULL);
877 /* this can be LMV fake import, whcih is not connected. */
878 if (!req->rq_import->imp_connection)
879 memset(osfs, 0, sizeof(*osfs));
883 msfs = lustre_swab_repbuf(req, 0, sizeof(*msfs),
884 lustre_swab_obd_statfs);
886 CERROR("Can't unpack obd_statfs\n");
887 GOTO(out, rc = -EPROTO);
890 memcpy(osfs, msfs, sizeof (*msfs));
893 ptlrpc_req_finished(req);
897 static int mdc_pin(struct obd_export *exp, obd_id ino, __u32 gen, int type,
898 struct obd_client_handle *handle, int flag)
900 struct ptlrpc_request *req;
901 struct mds_body *body;
902 int rc, size[2] = {0, sizeof(*body)};
905 //size[0] = mdc_get_secdesc_size();
907 req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION,
908 MDS_PIN, 2, size, NULL);
912 //mdc_pack_secdesc(req, size[0]);
914 body = lustre_msg_buf(req->rq_reqmsg,
915 MDS_REQ_REC_OFF, sizeof(*body));
917 /* FIXME-UMKA: here should be also mdsnum and fid. */
918 mdc_pack_id(&body->id1, ino, gen, type, 0, 0);
921 req->rq_replen = lustre_msg_size(1, &size[1]);
923 mdc_get_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL);
924 rc = ptlrpc_queue_wait(req);
925 mdc_put_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL);
927 CERROR("pin failed: %d\n", rc);
928 ptlrpc_req_finished(req);
932 body = lustre_swab_repbuf(req, 0, sizeof(*body), lustre_swab_mds_body);
934 ptlrpc_req_finished(req);
938 memcpy(&handle->och_fh, &body->handle, sizeof(body->handle));
939 handle->och_magic = OBD_CLIENT_HANDLE_MAGIC;
941 OBD_ALLOC(handle->och_mod, sizeof(*handle->och_mod));
942 if (handle->och_mod == NULL) {
943 DEBUG_REQ(D_ERROR, req, "can't allocate mdc_open_data");
946 handle->och_mod->mod_open_req = req; /* will be dropped by unpin */
951 static int mdc_unpin(struct obd_export *exp,
952 struct obd_client_handle *handle, int flag)
954 struct ptlrpc_request *req;
955 struct mds_body *body;
956 int rc, size[2] = {0, sizeof(*body)};
959 if (handle->och_magic != OBD_CLIENT_HANDLE_MAGIC)
962 //size[0] = mdc_get_secdesc_size();
964 req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION,
965 MDS_CLOSE, 2, size, NULL);
969 //mdc_pack_secdesc(req, size[0]);
971 body = lustre_msg_buf(req->rq_reqmsg, MDS_REQ_REC_OFF, sizeof(*body));
972 memcpy(&body->handle, &handle->och_fh, sizeof(body->handle));
975 req->rq_replen = lustre_msg_size(0, NULL);
976 mdc_get_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL);
977 rc = ptlrpc_queue_wait(req);
978 mdc_put_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL);
981 CERROR("unpin failed: %d\n", rc);
983 ptlrpc_req_finished(req);
984 ptlrpc_req_finished(handle->och_mod->mod_open_req);
985 OBD_FREE(handle->och_mod, sizeof(*handle->och_mod));
989 int mdc_sync(struct obd_export *exp, struct lustre_id *id,
990 struct ptlrpc_request **request)
992 struct ptlrpc_request *req;
993 struct mds_body *body;
994 int size[2] = {0, sizeof(*body)};
998 //size[0] = mdc_get_secdesc_size();
1000 req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION,
1001 MDS_SYNC, 2, size, NULL);
1003 RETURN(rc = -ENOMEM);
1005 //mdc_pack_secdesc(req, size[0]);
1008 body = lustre_msg_buf(req->rq_reqmsg, MDS_REQ_REC_OFF,
1010 memcpy(&body->id1, id, sizeof(*id));
1013 req->rq_replen = lustre_msg_size(1, &size[1]);
1015 rc = ptlrpc_queue_wait(req);
1016 if (rc || request == NULL)
1017 ptlrpc_req_finished(req);
1024 static int mdc_import_event(struct obd_device *obd, struct obd_import *imp,
1025 enum obd_import_event event)
1029 LASSERT(imp->imp_obd == obd);
1032 case IMP_EVENT_DISCON: {
1035 case IMP_EVENT_INACTIVE: {
1036 if (obd->obd_observer)
1037 rc = obd_notify(obd->obd_observer, obd, 0, 0);
1040 case IMP_EVENT_INVALIDATE: {
1041 struct ldlm_namespace *ns = obd->obd_namespace;
1043 ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY);
1047 case IMP_EVENT_ACTIVE: {
1048 if (obd->obd_observer)
1049 rc = obd_notify(obd->obd_observer, obd, 1, 0);
1053 CERROR("Unknown import event %d\n", event);
1059 static int mdc_attach(struct obd_device *dev, obd_count len, void *data)
1061 struct lprocfs_static_vars lvars;
1063 lprocfs_init_vars(mdc, &lvars);
1064 return lprocfs_obd_attach(dev, lvars.obd_vars);
1067 static int mdc_detach(struct obd_device *dev)
1069 return lprocfs_obd_detach(dev);
1072 static int mdc_setup(struct obd_device *obd, obd_count len, void *buf)
1074 struct client_obd *cli = &obd->u.cli;
1078 OBD_ALLOC(cli->cl_rpc_lock, sizeof (*cli->cl_rpc_lock));
1079 if (!cli->cl_rpc_lock)
1081 mdc_init_rpc_lock(cli->cl_rpc_lock);
1085 OBD_ALLOC(cli->cl_setattr_lock, sizeof (*cli->cl_setattr_lock));
1086 if (!cli->cl_setattr_lock)
1087 GOTO(err_rpc_lock, rc = -ENOMEM);
1088 mdc_init_rpc_lock(cli->cl_setattr_lock);
1090 rc = client_obd_setup(obd, len, buf);
1092 GOTO(err_setattr_lock, rc);
1094 rc = obd_llog_init(obd, &obd->obd_llogs, obd, 0, NULL);
1096 mdc_cleanup(obd, 0);
1097 CERROR("failed to setup llogging subsystems\n");
1103 OBD_FREE(cli->cl_setattr_lock, sizeof (*cli->cl_setattr_lock));
1105 OBD_FREE(cli->cl_rpc_lock, sizeof (*cli->cl_rpc_lock));
1110 static int mdc_init_ea_size(struct obd_export *exp, int easize, int cookiesize)
1112 struct obd_device *obd = exp->exp_obd;
1113 struct client_obd *cli = &obd->u.cli;
1116 if (cli->cl_max_mds_easize < easize)
1117 cli->cl_max_mds_easize = easize;
1118 if (cli->cl_max_mds_cookiesize < cookiesize)
1119 cli->cl_max_mds_cookiesize = cookiesize;
1123 static int mdc_precleanup(struct obd_device *obd, int flags)
1127 rc = obd_llog_finish(obd, &obd->obd_llogs, 0);
1129 CERROR("failed to cleanup llogging subsystems\n");
1134 static int mdc_cleanup(struct obd_device *obd, int flags)
1136 struct client_obd *cli = &obd->u.cli;
1138 OBD_FREE(cli->cl_rpc_lock, sizeof (*cli->cl_rpc_lock));
1139 OBD_FREE(cli->cl_setattr_lock, sizeof (*cli->cl_setattr_lock));
1143 return client_obd_cleanup(obd, flags);
1147 static int mdc_llog_init(struct obd_device *obd, struct obd_llogs *llogs,
1148 struct obd_device *tgt, int count,
1149 struct llog_catid *logid)
1151 struct llog_ctxt *ctxt;
1155 rc = obd_llog_setup(obd, llogs, LLOG_CONFIG_REPL_CTXT, tgt, 0, NULL,
1158 ctxt = llog_get_context(llogs, LLOG_CONFIG_REPL_CTXT);
1159 ctxt->loc_imp = obd->u.cli.cl_import;
1165 static int mdc_llog_finish(struct obd_device *obd,
1166 struct obd_llogs *llogs, int count)
1171 rc = obd_llog_cleanup(llog_get_context(llogs, LLOG_CONFIG_REPL_CTXT));
1174 static struct obd_device *mdc_get_real_obd(struct obd_export *exp,
1175 char *name, int len)
1178 RETURN(exp->exp_obd);
1181 static int mdc_get_info(struct obd_export *exp, obd_count keylen,
1182 void *key, __u32 *valsize, void *val)
1184 struct ptlrpc_request *req;
1185 char *bufs[1] = {key};
1189 if (!valsize || !val)
1192 if ((keylen < strlen("mdsize") || strcmp(key, "mdsize") != 0) &&
1193 (keylen < strlen("mdsnum") || strcmp(key, "mdsnum") != 0) &&
1194 (keylen < strlen("rootid") || strcmp(key, "rootid") != 0))
1197 req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OBD_VERSION,
1198 OST_GET_INFO, 1, &keylen, bufs);
1202 req->rq_replen = lustre_msg_size(1, valsize);
1203 rc = ptlrpc_queue_wait(req);
1207 if (keylen >= strlen("rootid") && !strcmp(key, "rootid")) {
1208 struct lustre_id *reply;
1210 reply = lustre_swab_repbuf(req, 0, sizeof(*reply),
1211 lustre_swab_lustre_id);
1212 if (reply == NULL) {
1213 CERROR("Can't unpack %s\n", (char *)key);
1214 GOTO(out_req, rc = -EPROTO);
1217 *(struct lustre_id *)val = *reply;
1221 reply = lustre_swab_repbuf(req, 0, sizeof(*reply),
1222 lustre_swab_generic_32s);
1223 if (reply == NULL) {
1224 CERROR("Can't unpack %s\n", (char *)key);
1225 GOTO(out_req, rc = -EPROTO);
1227 *((__u32 *)val) = *reply;
1230 ptlrpc_req_finished(req);
1234 int mdc_obj_create(struct obd_export *exp, struct obdo *oa,
1235 struct lov_stripe_md **ea, struct obd_trans_info *oti)
1237 struct ptlrpc_request *request;
1238 struct ost_body *body;
1239 int rc, size = sizeof(*body);
1244 request = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OBD_VERSION,
1245 OST_CREATE, 1, &size, NULL);
1247 GOTO(out_req, rc = -ENOMEM);
1249 body = lustre_msg_buf(request->rq_reqmsg, 0, sizeof (*body));
1250 memcpy(&body->oa, oa, sizeof(body->oa));
1252 request->rq_replen = lustre_msg_size(1, &size);
1253 rc = ptlrpc_queue_wait(request);
1257 body = lustre_swab_repbuf(request, 0, sizeof(*body),
1258 lustre_swab_ost_body);
1260 CERROR ("can't unpack ost_body\n");
1261 GOTO (out_req, rc = -EPROTO);
1264 memcpy(oa, &body->oa, sizeof(*oa));
1266 /* store ino/generation for recovery */
1267 body = lustre_msg_buf(request->rq_reqmsg, 0, sizeof (*body));
1268 body->oa.o_id = oa->o_id;
1269 body->oa.o_generation = oa->o_generation;
1271 CDEBUG(D_HA, "transno: "LPD64"\n", request->rq_repmsg->transno);
1274 ptlrpc_req_finished(request);
1278 int mdc_brw(int rw, struct obd_export *exp, struct obdo *oa,
1279 struct lov_stripe_md *ea, obd_count oa_bufs,
1280 struct brw_page *pgarr, struct obd_trans_info *oti)
1282 struct ptlrpc_bulk_desc *desc;
1283 struct niobuf_remote *niobuf;
1284 struct ptlrpc_request *req;
1285 struct obd_ioobj *ioobj;
1286 struct ost_body *body;
1290 opc = ((rw & OBD_BRW_WRITE) != 0) ? OST_WRITE : OST_READ;
1292 size[0] = sizeof(*body);
1293 size[1] = sizeof(*ioobj);
1294 size[2] = oa_bufs * sizeof(*niobuf);
1296 req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OBD_VERSION, opc,
1298 LASSERT(req != NULL);
1300 if (opc == OST_WRITE)
1301 desc = ptlrpc_prep_bulk_imp(req, oa_bufs, BULK_GET_SOURCE,
1304 desc = ptlrpc_prep_bulk_imp(req, oa_bufs, BULK_PUT_SINK,
1306 LASSERT(desc != NULL);
1308 body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof(*body));
1309 ioobj = lustre_msg_buf(req->rq_reqmsg, 1, sizeof(*ioobj));
1310 niobuf = lustre_msg_buf(req->rq_reqmsg, 2, oa_bufs * sizeof(*niobuf));
1312 memcpy(&body->oa, oa, sizeof(*oa));
1313 obdo_to_ioobj(oa, ioobj);
1314 ioobj->ioo_bufcnt = oa_bufs;
1316 for (i = 0; i < oa_bufs; i++, niobuf++) {
1317 struct brw_page *pg = &pgarr[i];
1319 LASSERT(pg->count > 0);
1320 LASSERT((pg->disk_offset & ~PAGE_MASK) + pg->count <= PAGE_SIZE);
1322 ptlrpc_prep_bulk_page(desc, pg->pg, pg->disk_offset & ~PAGE_MASK,
1325 niobuf->offset = pg->disk_offset;
1326 niobuf->len = pg->count;
1327 niobuf->flags = pg->flag;
1330 /* size[0] still sizeof (*body) */
1331 if (opc == OST_WRITE) {
1332 /* 1 RC per niobuf */
1333 size[1] = sizeof(__u32) * oa_bufs;
1334 req->rq_replen = lustre_msg_size(2, size);
1336 /* 1 RC for the whole I/O */
1337 req->rq_replen = lustre_msg_size(1, size);
1339 err = ptlrpc_queue_wait(req);
1342 ptlrpc_req_finished(req);
1346 static int mdc_valid_attrs(struct obd_export *exp,
1347 struct lustre_id *id)
1349 struct ldlm_res_id res_id = { .name = {0} };
1350 struct obd_device *obd = exp->exp_obd;
1351 struct lustre_handle lockh;
1352 ldlm_policy_data_t policy;
1356 res_id.name[0] = id_fid(id);
1357 res_id.name[1] = id_group(id);
1358 policy.l_inodebits.bits = MDS_INODELOCK_UPDATE;
1360 CDEBUG(D_INFO, "trying to match res "LPU64"\n",
1363 /* FIXME use LDLM_FL_TEST_LOCK instead */
1364 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING;
1365 if (ldlm_lock_match(obd->obd_namespace, flags, &res_id,
1366 LDLM_IBITS, &policy, LCK_PR, &lockh)) {
1367 ldlm_lock_decref(&lockh, LCK_PR);
1371 if (ldlm_lock_match(obd->obd_namespace, flags, &res_id,
1372 LDLM_IBITS, &policy, LCK_PW, &lockh)) {
1373 ldlm_lock_decref(&lockh, LCK_PW);
1379 static int mdc_change_cbdata_name(struct obd_export *exp,
1380 struct lustre_id *pid,
1381 char *name, int len,
1382 struct lustre_id *cid,
1383 ldlm_iterator_t it, void *data)
1386 rc = mdc_change_cbdata(exp, cid, it, data);
1390 struct obd_ops mdc_obd_ops = {
1391 .o_owner = THIS_MODULE,
1392 .o_attach = mdc_attach,
1393 .o_detach = mdc_detach,
1394 .o_setup = mdc_setup,
1395 .o_precleanup = mdc_precleanup,
1396 .o_cleanup = mdc_cleanup,
1397 .o_add_conn = client_import_add_conn,
1398 .o_del_conn = client_import_del_conn,
1399 .o_connect = client_connect_import,
1400 .o_disconnect = client_disconnect_export,
1401 .o_iocontrol = mdc_iocontrol,
1402 .o_statfs = mdc_statfs,
1404 .o_unpin = mdc_unpin,
1405 .o_import_event = mdc_import_event,
1406 .o_llog_init = mdc_llog_init,
1407 .o_llog_finish = mdc_llog_finish,
1408 .o_create = mdc_obj_create,
1409 .o_set_info = mdc_set_info,
1410 .o_get_info = mdc_get_info,
1412 .o_init_ea_size = mdc_init_ea_size,
1415 struct md_ops mdc_md_ops = {
1416 .m_getstatus = mdc_getstatus,
1417 .m_getattr = mdc_getattr,
1418 .m_close = mdc_close,
1419 .m_create = mdc_create,
1420 .m_done_writing = mdc_done_writing,
1421 .m_enqueue = mdc_enqueue,
1422 .m_getattr_lock = mdc_getattr_lock,
1423 .m_intent_lock = mdc_intent_lock,
1425 .m_rename = mdc_rename,
1426 .m_setattr = mdc_setattr,
1428 .m_readpage = mdc_readpage,
1429 .m_unlink = mdc_unlink,
1430 .m_valid_attrs = mdc_valid_attrs,
1431 .m_req2lustre_md = mdc_req2lustre_md,
1432 .m_set_open_replay_data = mdc_set_open_replay_data,
1433 .m_clear_open_replay_data = mdc_clear_open_replay_data,
1434 .m_store_inode_generation = mdc_store_inode_generation,
1435 .m_set_lock_data = mdc_set_lock_data,
1436 .m_get_real_obd = mdc_get_real_obd,
1437 .m_change_cbdata_name = mdc_change_cbdata_name,
1438 .m_change_cbdata = mdc_change_cbdata,
1441 int __init mdc_init(void)
1443 struct lprocfs_static_vars lvars;
1444 lprocfs_init_vars(mdc, &lvars);
1445 return class_register_type(&mdc_obd_ops, &mdc_md_ops, lvars.module_vars,
1450 static void /*__exit*/ mdc_exit(void)
1452 class_unregister_type(LUSTRE_MDC_NAME);
1455 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
1456 MODULE_DESCRIPTION("Lustre Metadata Client");
1457 MODULE_LICENSE("GPL");
1459 EXPORT_SYMBOL(mdc_req2lustre_md);
1460 EXPORT_SYMBOL(mdc_change_cbdata);
1461 EXPORT_SYMBOL(mdc_getstatus);
1462 EXPORT_SYMBOL(mdc_getattr);
1463 EXPORT_SYMBOL(mdc_getattr_lock);
1464 EXPORT_SYMBOL(mdc_create);
1465 EXPORT_SYMBOL(mdc_unlink);
1466 EXPORT_SYMBOL(mdc_rename);
1467 EXPORT_SYMBOL(mdc_link);
1468 EXPORT_SYMBOL(mdc_readpage);
1469 EXPORT_SYMBOL(mdc_setattr);
1470 EXPORT_SYMBOL(mdc_close);
1471 EXPORT_SYMBOL(mdc_done_writing);
1472 EXPORT_SYMBOL(mdc_sync);
1473 EXPORT_SYMBOL(mdc_set_open_replay_data);
1474 EXPORT_SYMBOL(mdc_clear_open_replay_data);
1475 EXPORT_SYMBOL(mdc_store_inode_generation);
1477 module_init(mdc_init);
1478 module_exit(mdc_exit);