1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2001-2003 Cluster File Systems, Inc.
6 * This file is part of Lustre, http://www.sf.net/projects/lustre/
8 * Lustre is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU General Public
10 * License as published by the Free Software Foundation.
12 * Lustre is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Lustre; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 #define DEBUG_SUBSYSTEM S_MDC
26 # include <linux/module.h>
27 # include <linux/pagemap.h>
28 # include <linux/miscdevice.h>
29 # include <linux/init.h>
31 # include <liblustre.h>
32 # include <linux/obd_class.h>
35 #include <linux/lustre_mds.h>
36 #include <linux/lustre_lite.h>
37 #include <linux/lustre_dlm.h>
38 #include <linux/lprocfs_status.h>
39 #include "mdc_internal.h"
41 #define REQUEST_MINOR 244
43 extern int mds_queue_req(struct ptlrpc_request *);
44 struct mdc_rpc_lock mdc_rpc_lock;
45 struct mdc_rpc_lock mdc_setattr_lock;
46 EXPORT_SYMBOL(mdc_rpc_lock);
48 /* Helper that implements most of mdc_getstatus and signal_completed_replay. */
49 static int send_getstatus(struct obd_import *imp, struct ll_fid *rootfid,
50 int level, int msg_flags)
52 struct ptlrpc_request *req;
53 struct mds_body *body;
54 int rc, size = sizeof(*body);
57 req = ptlrpc_prep_req(imp, MDS_GETSTATUS, 1, &size, NULL);
59 GOTO(out, rc = -ENOMEM);
61 body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*body));
62 req->rq_level = level;
63 req->rq_replen = lustre_msg_size(1, &size);
65 mds_pack_req_body(req);
66 req->rq_reqmsg->flags |= msg_flags;
67 rc = ptlrpc_queue_wait(req);
70 body = lustre_swab_repbuf (req, 0, sizeof (*body),
71 lustre_swab_mds_body);
73 CERROR ("Can't extract mds_body\n");
74 GOTO (out, rc = -EPROTO);
77 memcpy(rootfid, &body->fid1, sizeof(*rootfid));
79 CDEBUG(D_NET, "root ino="LPU64", last_committed="LPU64
80 ", last_xid="LPU64"\n",
81 rootfid->id, req->rq_repmsg->last_committed,
82 req->rq_repmsg->last_xid);
87 ptlrpc_req_finished(req);
91 /* should become mdc_getinfo() */
92 int mdc_getstatus(struct lustre_handle *conn, struct ll_fid *rootfid)
94 return send_getstatus(class_conn2cliimp(conn), rootfid, LUSTRE_CONN_CON,
98 int mdc_getlovinfo(struct obd_device *obd, struct lustre_handle *mdc_connh,
99 struct ptlrpc_request **request)
101 struct ptlrpc_request *req;
102 struct mds_status_req *streq;
103 struct lov_desc *desc;
104 struct obd_uuid *uuids;
105 int rc, size[2] = {sizeof(*streq)};
109 req = ptlrpc_prep_req(class_conn2cliimp(mdc_connh), MDS_GETLOVINFO, 1,
115 streq = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*streq));
116 streq->flags = MDS_STATUS_LOV;
117 streq->repbuf = LOV_MAX_UUID_BUFFER_SIZE;
119 /* prepare for reply */
120 req->rq_level = LUSTRE_CONN_CON;
121 size[0] = sizeof (*desc);
122 size[1] = LOV_MAX_UUID_BUFFER_SIZE;
123 req->rq_replen = lustre_msg_size(2, size);
125 mdc_get_rpc_lock(&mdc_rpc_lock, NULL);
126 rc = ptlrpc_queue_wait(req);
127 mdc_put_rpc_lock(&mdc_rpc_lock, NULL);
130 CERROR ("rcp failed\n");
134 desc = lustre_swab_repbuf (req, 0, sizeof (*desc),
135 lustre_swab_lov_desc);
137 CERROR ("Can't unpack lov_desc\n");
138 GOTO (failed, rc = -EPROTO);
141 LASSERT_REPSWAB (req, 1);
142 /* array of uuids byte-sex insensitive; just verify they are all
143 * there and terminated */
144 uuids = lustre_msg_buf (req->rq_repmsg, 1,
145 desc->ld_tgt_count * sizeof (*uuids));
147 CERROR ("Can't unpack %d uuids\n", desc->ld_tgt_count);
148 GOTO (failed, rc = -EPROTO);
151 for (i = 0; i < desc->ld_tgt_count; i++) {
152 int uid_len = strnlen (uuids[i].uuid, sizeof (uuids[i].uuid));
154 if (uid_len == sizeof (uuids[i].uuid)) {
155 CERROR ("Unterminated uuid %d:%*s\n",
156 i, (int)sizeof (uuids[i].uuid), uuids[i].uuid);
157 GOTO (failed, rc = -EPROTO);
163 ptlrpc_req_finished (req);
167 int mdc_getattr_common (struct lustre_handle *conn,
168 unsigned int ea_size, struct ptlrpc_request *req)
170 struct mds_body *body;
173 int size[2] = {sizeof(*body), 0};
177 /* request message already built */
180 size[bufcount++] = ea_size;
181 CDEBUG(D_INODE, "reserved %u bytes for MD/symlink in packet\n",
184 req->rq_replen = lustre_msg_size(bufcount, size);
186 mdc_get_rpc_lock(&mdc_rpc_lock, NULL);
187 rc = ptlrpc_queue_wait(req);
188 mdc_put_rpc_lock(&mdc_rpc_lock, NULL);
192 body = lustre_swab_repbuf (req, 0, sizeof (*body),
193 lustre_swab_mds_body);
195 CERROR ("Can't unpack mds_body\n");
199 CDEBUG(D_NET, "mode: %o\n", body->mode);
201 LASSERT_REPSWAB (req, 1);
202 if (body->eadatasize != 0) {
203 /* reply indicates presence of eadata; check it's there... */
204 eadata = lustre_msg_buf (req->rq_repmsg, 1, body->eadatasize);
205 if (eadata == NULL) {
206 CERROR ("Missing/short eadata\n");
214 int mdc_getattr(struct lustre_handle *conn, struct ll_fid *fid,
215 unsigned long valid, unsigned int ea_size,
216 struct ptlrpc_request **request)
218 struct ptlrpc_request *req;
219 struct mds_body *body;
220 int size = sizeof(*body);
224 /* XXX do we need to make another request here? We just did a getattr
225 * to do the lookup in the first place.
227 req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_GETATTR, 1, &size,
230 GOTO(out, rc = -ENOMEM);
232 body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*body));
233 memcpy(&body->fid1, fid, sizeof(*fid));
235 body->eadatasize = ea_size;
236 mds_pack_req_body(req);
238 rc = mdc_getattr_common (conn, ea_size, req);
240 ptlrpc_req_finished (req);
248 int mdc_getattr_name(struct lustre_handle *conn, struct ll_fid *fid,
249 char *filename, int namelen, unsigned long valid,
250 unsigned int ea_size, struct ptlrpc_request **request)
252 struct ptlrpc_request *req;
253 struct mds_body *body;
254 int rc, size[2] = {sizeof(*body), namelen};
257 req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_GETATTR_NAME, 2,
260 GOTO(out, rc = -ENOMEM);
262 body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*body));
263 memcpy(&body->fid1, fid, sizeof(*fid));
265 body->eadatasize = ea_size;
266 mds_pack_req_body(req);
268 LASSERT (strnlen (filename, namelen) == namelen - 1);
269 memcpy(lustre_msg_buf(req->rq_reqmsg, 1, namelen), filename, namelen);
271 rc = mdc_getattr_common (conn, ea_size, req);
273 ptlrpc_req_finished (req);
281 /* This should be called with both the request and the reply still packed. */
282 void mdc_store_inode_generation(struct ptlrpc_request *req, int reqoff,
285 struct mds_rec_create *rec =
286 lustre_msg_buf(req->rq_reqmsg, reqoff, sizeof (*rec));
287 struct mds_body *body =
288 lustre_msg_buf(req->rq_repmsg, repoff, sizeof (*body));
290 LASSERT (rec != NULL);
291 LASSERT (body != NULL);
293 memcpy(&rec->cr_replayfid, &body->fid1, sizeof rec->cr_replayfid);
294 DEBUG_REQ(D_HA, req, "storing generation %x for ino "LPD64,
295 rec->cr_replayfid.generation, rec->cr_replayfid.id);
298 /* We always reserve enough space in the reply packet for a stripe MD, because
299 * we don't know in advance the file type.
301 * XXX we could get that from ext2_dir_entry_2 file_type
303 int mdc_enqueue(struct lustre_handle *conn,
305 struct lookup_intent *it,
307 struct mdc_op_data *data,
308 struct lustre_handle *lockh,
311 ldlm_completion_callback cb_completion,
312 ldlm_blocking_callback cb_blocking,
315 struct ptlrpc_request *req;
316 struct obd_device *obddev = class_conn2obd(conn);
317 struct ldlm_res_id res_id =
318 { .name = {data->ino1, data->gen1} };
319 int size[6] = {sizeof(struct ldlm_request), sizeof(struct ldlm_intent)};
320 int rc, flags = LDLM_FL_HAS_INTENT;
321 int repsize[3] = {sizeof(struct ldlm_reply),
322 sizeof(struct mds_body),
323 obddev->u.cli.cl_max_mds_easize};
324 struct ldlm_reply *dlm_rep;
325 struct ldlm_intent *lit;
326 struct ldlm_request *lockreq;
328 unsigned long irqflags;
329 int reply_buffers = 0;
332 // LDLM_DEBUG_NOLOCK("mdsintent=%s,name=%s,dir=%lu",
333 // ldlm_it2str(it->it_op), it_name, it_inode->i_ino);
335 if (it->it_op & IT_OPEN) {
336 it->it_mode |= S_IFREG;
337 it->it_mode &= ~current->fs->umask;
339 size[2] = sizeof(struct mds_rec_create);
340 size[3] = data->namelen + 1;
341 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 4,
346 spin_lock_irqsave (&req->rq_lock, irqflags);
348 spin_unlock_irqrestore (&req->rq_lock, irqflags);
350 /* pack the intent */
351 lit = lustre_msg_buf(req->rq_reqmsg, 1, sizeof (*lit));
352 lit->opc = (__u64)it->it_op;
354 /* pack the intended request */
355 mds_open_pack(req, 2, data, it->it_mode, 0, current->fsuid,
356 current->fsgid, LTIME_S(CURRENT_TIME),
357 it->it_flags, tgt, tgtlen);
358 /* get ready for the reply */
360 req->rq_replen = lustre_msg_size(3, repsize);
361 } else if (it->it_op & IT_UNLINK) {
362 size[2] = sizeof(struct mds_rec_unlink);
363 size[3] = data->namelen + 1;
364 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 4,
369 /* pack the intent */
370 lit = lustre_msg_buf(req->rq_reqmsg, 1, sizeof (*lit));
371 lit->opc = (__u64)it->it_op;
373 /* pack the intended request */
374 mds_unlink_pack(req, 2, data);
375 /* get ready for the reply */
377 req->rq_replen = lustre_msg_size(3, repsize);
378 } else if (it->it_op & (IT_GETATTR | IT_LOOKUP)) {
379 int valid = OBD_MD_FLNOTOBD | OBD_MD_FLEASIZE;
380 size[2] = sizeof(struct mds_body);
381 size[3] = data->namelen + 1;
383 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 4,
388 /* pack the intent */
389 lit = lustre_msg_buf(req->rq_reqmsg, 1, sizeof (*lit));
390 lit->opc = (__u64)it->it_op;
392 /* pack the intended request */
393 mds_getattr_pack(req, valid, 2, it->it_flags, data);
394 /* get ready for the reply */
396 req->rq_replen = lustre_msg_size(3, repsize);
397 } else if (it->it_op == IT_READDIR) {
398 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 1,
403 /* get ready for the reply */
405 req->rq_replen = lustre_msg_size(1, repsize);
411 mdc_get_rpc_lock(&mdc_rpc_lock, it);
412 rc = ldlm_cli_enqueue(conn, req, obddev->obd_namespace, NULL, res_id,
413 lock_type, NULL, 0, lock_mode, &flags,
414 cb_completion, cb_blocking, cb_data, lockh);
415 mdc_put_rpc_lock(&mdc_rpc_lock, it);
417 /* Similarly, if we're going to replay this request, we don't want to
418 * actually get a lock, just perform the intent. */
419 if (req->rq_transno || req->rq_replay) {
420 lockreq = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*lockreq));
421 lockreq->lock_flags |= LDLM_FL_INTENT_ONLY;
424 /* This can go when we're sure that this can never happen */
425 LASSERT(rc != -ENOENT);
426 if (rc == ELDLM_LOCK_ABORTED) {
428 memset(lockh, 0, sizeof(*lockh));
429 } else if (rc != 0) {
430 CERROR("ldlm_cli_enqueue: %d\n", rc);
432 ptlrpc_req_finished(req);
434 } else { /* rc = 0 */
435 struct ldlm_lock *lock = ldlm_handle2lock(lockh);
438 /* If the server gave us back a different lock mode, we should
439 * fix up our variables. */
440 if (lock->l_req_mode != lock_mode) {
441 ldlm_lock_addref(lockh, lock->l_req_mode);
442 ldlm_lock_decref(lockh, lock_mode);
443 lock_mode = lock->l_req_mode;
449 dlm_rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*dlm_rep));
450 LASSERT (dlm_rep != NULL); /* checked by ldlm_cli_enqueue() */
451 LASSERT_REPSWABBED (req, 0); /* swabbed by ldlm_cli_enqueue() */
453 it->it_disposition = (int) dlm_rep->lock_policy_res1;
454 it->it_status = (int) dlm_rep->lock_policy_res2;
455 it->it_lock_mode = lock_mode;
458 /* We know what to expect, so we do any byte flipping required here */
459 LASSERT (reply_buffers == 3 || reply_buffers == 1);
460 if (reply_buffers == 3) {
461 struct mds_body *body;
463 body = lustre_swab_repbuf (req, 1, sizeof (*body),
464 lustre_swab_mds_body);
466 CERROR ("Can't swab mds_body\n");
470 if ((body->valid & OBD_MD_FLEASIZE) != 0) {
471 /* The eadata is opaque; just check that it is
472 * there. Eventually, obd_unpackmd() will check
474 eadata = lustre_swab_repbuf (req, 2, body->eadatasize,
476 if (eadata == NULL) {
477 CERROR ("Missing/short eadata\n");
486 static void mdc_replay_open(struct ptlrpc_request *req)
488 struct obd_client_handle *och = req->rq_replay_data;
489 struct lustre_handle old, *file_fh = &och->och_fh;
490 struct list_head *tmp;
491 struct mds_body *body;
493 body = lustre_swab_repbuf (req, 1, sizeof (*body),
494 lustre_swab_mds_body);
495 LASSERT (body != NULL);
497 memcpy(&old, file_fh, sizeof(old));
498 CDEBUG(D_HA, "updating handle from "LPD64" to "LPD64"\n",
499 file_fh->cookie, body->handle.cookie);
500 memcpy(file_fh, &body->handle, sizeof(body->handle));
502 /* A few frames up, ptlrpc_replay holds the lock, so this is safe. */
503 list_for_each(tmp, &req->rq_import->imp_sending_list) {
504 req = list_entry(tmp, struct ptlrpc_request, rq_list);
505 if (req->rq_reqmsg->opc != MDS_CLOSE)
507 body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*body));
508 if (memcmp(&body->handle, &old, sizeof(old)))
511 DEBUG_REQ(D_HA, req, "updating close body with new fh");
512 memcpy(&body->handle, file_fh, sizeof(*file_fh));
516 void mdc_set_open_replay_data(struct obd_client_handle *och)
518 struct ptlrpc_request *req = och->och_req;
519 struct mds_rec_create *rec =
520 lustre_msg_buf(req->rq_reqmsg, 2, sizeof (*rec));
521 struct mds_body *body =
522 lustre_msg_buf(req->rq_repmsg, 1, sizeof (*body));
524 LASSERT (rec != NULL);
525 /* outgoing messages always in my byte order */
526 LASSERT (body != NULL);
527 /* incoming message in my byte order (it's been swabbed) */
528 LASSERT_REPSWABBED (req, 1);
530 memcpy(&rec->cr_replayfid, &body->fid1, sizeof rec->cr_replayfid);
531 req->rq_replay_cb = mdc_replay_open;
532 req->rq_replay_data = och;
535 int mdc_close(struct lustre_handle *conn, obd_id ino, int type,
536 struct lustre_handle *fh, struct ptlrpc_request **request)
538 struct mds_body *body;
539 int rc, size = sizeof(*body);
540 struct ptlrpc_request *req;
543 req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_CLOSE, 1, &size,
546 GOTO(out, rc = -ENOMEM);
548 body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*body));
549 ll_ino2fid(&body->fid1, ino, 0, type);
550 memcpy(&body->handle, fh, sizeof(body->handle));
552 req->rq_replen = lustre_msg_size(0, NULL);
554 mdc_get_rpc_lock(&mdc_rpc_lock, NULL);
555 rc = ptlrpc_queue_wait(req);
556 mdc_put_rpc_lock(&mdc_rpc_lock, NULL);
564 int mdc_readpage(struct lustre_handle *conn, obd_id ino, int type, __u64 offset,
565 struct page *page, struct ptlrpc_request **request)
567 struct obd_import *imp = class_conn2cliimp(conn);
568 struct ptlrpc_request *req = NULL;
569 struct ptlrpc_bulk_desc *desc = NULL;
570 struct mds_body *body;
571 int rc, size = sizeof(*body);
574 CDEBUG(D_INODE, "inode: %ld\n", (long)ino);
576 req = ptlrpc_prep_req(imp, MDS_READPAGE, 1, &size, NULL);
578 GOTO(out, rc = -ENOMEM);
579 /* XXX FIXME bug 249 */
580 req->rq_request_portal = MDS_READPAGE_PORTAL;
582 desc = ptlrpc_prep_bulk_imp (req, BULK_PUT_SINK, MDS_BULK_PORTAL);
584 GOTO(out, rc = -ENOMEM);
586 /* NB req now owns desc and will free it when it gets freed */
588 rc = ptlrpc_prep_bulk_page(desc, page, 0, PAGE_CACHE_SIZE);
592 mds_readdir_pack(req, offset, PAGE_CACHE_SIZE, ino, type);
594 req->rq_replen = lustre_msg_size(1, &size);
595 rc = ptlrpc_queue_wait(req);
598 LASSERT (desc->bd_page_count == 1);
599 body = lustre_swab_repbuf (req, 0, sizeof (*body),
600 lustre_swab_mds_body);
602 CERROR ("Can't unpack mds_body\n");
603 GOTO (out, rc = -EPROTO);
613 static int mdc_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
614 void *karg, void *uarg)
616 struct obd_device *obddev = class_conn2obd(conn);
617 struct obd_ioctl_data *data = karg;
618 struct obd_import *imp = obddev->u.cli.cl_import;
622 case OBD_IOC_CLIENT_RECOVER:
623 RETURN(ptlrpc_recover_import(imp, data->ioc_inlbuf1));
624 case IOC_OSC_SET_ACTIVE:
625 if (data->ioc_offset) {
626 CERROR("%s: can't reactivate MDC\n",
627 obddev->obd_uuid.uuid);
630 RETURN(ptlrpc_set_import_active(imp, 0));
632 CERROR("osc_ioctl(): unrecognised ioctl %#x\n", cmd);
637 static int mdc_statfs(struct obd_export *exp, struct obd_statfs *osfs)
639 struct ptlrpc_request *req;
640 struct obd_statfs *msfs;
641 int rc, size = sizeof(*msfs);
644 req = ptlrpc_prep_req(exp->exp_obd->u.cli.cl_import, MDS_STATFS, 0,
649 req->rq_replen = lustre_msg_size(1, &size);
651 mdc_get_rpc_lock(&mdc_rpc_lock, NULL);
652 rc = ptlrpc_queue_wait(req);
653 mdc_put_rpc_lock(&mdc_rpc_lock, NULL);
658 msfs = lustre_swab_repbuf (req, 0, sizeof (*msfs),
659 lustre_swab_obd_statfs);
661 CERROR ("Can't unpack obd_statfs\n");
662 GOTO (out, rc = -EPROTO);
665 memcpy (osfs, msfs, sizeof (*msfs));
668 ptlrpc_req_finished(req);
673 static int mdc_attach(struct obd_device *dev, obd_count len, void *data)
675 struct lprocfs_static_vars lvars;
677 lprocfs_init_vars(&lvars);
678 return lprocfs_obd_attach(dev, lvars.obd_vars);
681 static int mdc_detach(struct obd_device *dev)
683 return lprocfs_obd_detach(dev);
686 struct obd_ops mdc_obd_ops = {
687 o_owner: THIS_MODULE,
688 o_attach: mdc_attach,
689 o_detach: mdc_detach,
690 o_setup: client_obd_setup,
691 o_cleanup: client_obd_cleanup,
692 o_connect: client_import_connect,
693 o_disconnect: client_import_disconnect,
694 o_iocontrol: mdc_iocontrol,
698 int __init mdc_init(void)
700 struct lprocfs_static_vars lvars;
701 mdc_init_rpc_lock(&mdc_rpc_lock);
702 mdc_init_rpc_lock(&mdc_setattr_lock);
703 lprocfs_init_vars(&lvars);
704 return class_register_type(&mdc_obd_ops, lvars.module_vars,
708 static void __exit mdc_exit(void)
710 class_unregister_type(LUSTRE_MDC_NAME);
714 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
715 MODULE_DESCRIPTION("Lustre Metadata Client");
716 MODULE_LICENSE("GPL");
718 EXPORT_SYMBOL(mdc_getstatus);
719 EXPORT_SYMBOL(mdc_getlovinfo);
720 EXPORT_SYMBOL(mdc_enqueue);
721 EXPORT_SYMBOL(mdc_getattr);
722 EXPORT_SYMBOL(mdc_getattr_name);
723 EXPORT_SYMBOL(mdc_create);
724 EXPORT_SYMBOL(mdc_unlink);
725 EXPORT_SYMBOL(mdc_rename);
726 EXPORT_SYMBOL(mdc_link);
727 EXPORT_SYMBOL(mdc_readpage);
728 EXPORT_SYMBOL(mdc_setattr);
729 EXPORT_SYMBOL(mdc_close);
730 EXPORT_SYMBOL(mdc_set_open_replay_data);
732 EXPORT_SYMBOL(mdc_store_inode_generation);
734 module_init(mdc_init);
735 module_exit(mdc_exit);