1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
6 * This file is part of Lustre, http://www.sf.net/projects/lustre/
8 * Lustre is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU General Public
10 * License as published by the Free Software Foundation.
12 * Lustre is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Lustre; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 #define DEBUG_SUBSYSTEM S_MDC
26 #include <linux/module.h>
27 #include <linux/miscdevice.h>
28 #include <linux/lustre_mds.h>
29 #include <linux/lustre_lite.h>
30 #include <linux/lustre_dlm.h>
31 #include <linux/init.h>
32 #include <linux/lprocfs_status.h>
34 #define REQUEST_MINOR 244
36 extern int mds_queue_req(struct ptlrpc_request *);
37 extern struct lprocfs_vars status_var_nm_1[];
38 extern struct lprocfs_vars status_class_var[];
40 /* should become mdc_getinfo() */
41 int mdc_getstatus(struct lustre_handle *conn, struct ll_fid *rootfid)
43 struct ptlrpc_request *req;
44 struct mds_body *body;
45 int rc, size = sizeof(*body);
48 req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_GETSTATUS, 1, &size,
51 GOTO(out, rc = -ENOMEM);
53 body = lustre_msg_buf(req->rq_reqmsg, 0);
54 req->rq_level = LUSTRE_CONN_CON;
55 req->rq_replen = lustre_msg_size(1, &size);
57 mds_pack_req_body(req);
58 rc = ptlrpc_queue_wait(req);
61 body = lustre_msg_buf(req->rq_repmsg, 0);
62 mds_unpack_body(body);
63 memcpy(rootfid, &body->fid1, sizeof(*rootfid));
65 CDEBUG(D_NET, "root ino="LPU64", last_committed="LPU64
66 ", last_xid="LPU64"\n",
67 rootfid->id, req->rq_repmsg->last_committed,
68 req->rq_repmsg->last_xid);
73 ptlrpc_req_finished(req);
77 int mdc_getlovinfo(struct obd_device *obd, struct lustre_handle *mdc_connh,
78 struct ptlrpc_request **request)
80 struct ptlrpc_request *req;
81 struct mds_status_req *streq;
82 int rc, size[2] = {sizeof(*streq)};
85 req = ptlrpc_prep_req(class_conn2cliimp(mdc_connh), MDS_GETLOVINFO, 1,
88 GOTO(out, rc = -ENOMEM);
91 streq = lustre_msg_buf(req->rq_reqmsg, 0);
92 streq->flags = HTON__u32(MDS_STATUS_LOV);
93 streq->repbuf = HTON__u32(8192);
95 /* prepare for reply */
96 req->rq_level = LUSTRE_CONN_CON;
99 req->rq_replen = lustre_msg_size(2, size);
101 rc = ptlrpc_queue_wait(req);
108 int mdc_getattr(struct lustre_handle *conn,
109 obd_id ino, int type, unsigned long valid, size_t ea_size,
110 struct ptlrpc_request **request)
112 struct ptlrpc_request *req;
113 struct mds_body *body;
114 int rc, size[2] = {sizeof(*body), 0}, bufcount = 1;
117 /* XXX do we need to make another request here? We just did a getattr
118 * to do the lookup in the first place.
120 req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_GETATTR, 1, size,
123 GOTO(out, rc = -ENOMEM);
125 body = lustre_msg_buf(req->rq_reqmsg, 0);
126 ll_ino2fid(&body->fid1, ino, 0, type);
130 size[bufcount] = ea_size;
132 body->size = ea_size;
133 CDEBUG(D_INODE, "reserving %d bytes for MD/symlink in packet\n",
136 req->rq_replen = lustre_msg_size(bufcount, size);
137 mds_pack_req_body(req);
139 rc = ptlrpc_queue_wait(req);
142 body = lustre_msg_buf(req->rq_repmsg, 0);
143 mds_unpack_body(body);
144 CDEBUG(D_NET, "mode: %o\n", body->mode);
153 void d_delete_aliases(struct inode *inode)
155 struct dentry *dentry = NULL;
156 struct list_head *tmp;
157 struct ll_sb_info *sbi = ll_i2sbi(inode);
160 spin_lock(&dcache_lock);
161 list_for_each(tmp, &inode->i_dentry) {
162 dentry = list_entry(tmp, struct dentry, d_alias);
164 list_del_init(&dentry->d_hash);
165 list_add(&dentry->d_hash, &sbi->ll_orphan_dentry_list);
168 spin_unlock(&dcache_lock);
172 static int mdc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
173 void *data, __u32 data_len, int flag)
176 struct lustre_handle lockh;
180 case LDLM_CB_BLOCKING:
181 ldlm_lock2handle(lock, &lockh);
182 rc = ldlm_cli_cancel(&lockh);
184 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
188 case LDLM_CB_CANCELING: {
189 /* Invalidate all dentries associated with this inode */
190 struct inode *inode = data;
192 #warning "FIXME: what tells us that 'inode' is valid at all?"
193 if (inode->i_state & I_FREEING)
196 LASSERT(inode != NULL);
197 LASSERT(data_len == sizeof(*inode));
199 if (S_ISDIR(inode->i_mode)) {
200 CDEBUG(D_INODE, "invalidating inode %lu\n",
203 ll_invalidate_inode_pages(inode);
206 if (inode != inode->i_sb->s_root->d_inode) {
207 /* XXX should this igrab move up 12 lines? */
208 LASSERT(igrab(inode) == inode);
209 d_delete_aliases(inode);
221 /* This should be called with both the request and the reply still packed. */
222 void mdc_store_inode_generation(struct ptlrpc_request *req, int reqoff,
225 struct mds_rec_create *rec = lustre_msg_buf(req->rq_reqmsg, reqoff);
226 struct mds_body *body = lustre_msg_buf(req->rq_repmsg, repoff);
228 DEBUG_REQ(D_HA, req, "storing generation %x for ino "LPD64,
229 body->fid1.generation, body->fid1.id);
230 memcpy(&rec->cr_replayfid, &body->fid1, sizeof rec->cr_replayfid);
233 int mdc_enqueue(struct lustre_handle *conn, int lock_type,
234 struct lookup_intent *it, int lock_mode, struct inode *dir,
235 struct dentry *de, struct lustre_handle *lockh,
236 char *tgt, int tgtlen, void *data, int datalen)
238 struct ptlrpc_request *req;
239 struct obd_device *obddev = class_conn2obd(conn);
240 __u64 res_id[RES_NAME_SIZE] = {dir->i_ino, (__u64)dir->i_generation};
241 int size[6] = {sizeof(struct ldlm_request), sizeof(struct ldlm_intent)};
242 int rc, flags = LDLM_FL_HAS_INTENT;
243 int repsize[3] = {sizeof(struct ldlm_reply),
244 sizeof(struct mds_body),
245 obddev->u.cli.cl_max_mds_easize};
246 struct ldlm_reply *dlm_rep;
247 struct ldlm_intent *lit;
248 struct ldlm_request *lockreq;
251 LDLM_DEBUG_NOLOCK("mdsintent %s parent dir %lu",
252 ldlm_it2str(it->it_op), dir->i_ino);
254 if (it->it_op & (IT_MKDIR | IT_CREAT | IT_SYMLINK | IT_MKNOD)) {
257 it->it_mode |= S_IFDIR;
259 case (IT_CREAT|IT_OPEN):
261 it->it_mode |= S_IFREG;
264 it->it_mode |= S_IFLNK;
267 it->it_mode &= ~current->fs->umask;
269 size[2] = sizeof(struct mds_rec_create);
270 size[3] = de->d_name.len + 1;
271 size[4] = tgtlen + 1;
272 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 5,
277 /* pack the intent */
278 lit = lustre_msg_buf(req->rq_reqmsg, 1);
279 lit->opc = NTOH__u64((__u64)it->it_op);
281 /* pack the intended request */
282 mds_create_pack(req, 2, dir, it->it_mode, 0, current->fsuid,
283 current->fsgid, CURRENT_TIME, de->d_name.name,
284 de->d_name.len, tgt, tgtlen);
285 req->rq_replen = lustre_msg_size(3, repsize);
286 } else if (it->it_op == IT_RENAME2) {
287 struct dentry *old_de = it->it_data;
289 size[2] = sizeof(struct mds_rec_rename);
290 size[3] = old_de->d_name.len + 1;
291 size[4] = de->d_name.len + 1;
292 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 5,
297 /* pack the intent */
298 lit = lustre_msg_buf(req->rq_reqmsg, 1);
299 lit->opc = NTOH__u64((__u64)it->it_op);
301 /* pack the intended request */
302 mds_rename_pack(req, 2, old_de->d_parent->d_inode, dir,
303 old_de->d_name.name, old_de->d_name.len,
304 de->d_name.name, de->d_name.len);
305 req->rq_replen = lustre_msg_size(3, repsize);
306 } else if (it->it_op == IT_LINK2) {
307 struct dentry *old_de = it->it_data;
309 size[2] = sizeof(struct mds_rec_link);
310 size[3] = de->d_name.len + 1;
311 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 4,
316 /* pack the intent */
317 lit = lustre_msg_buf(req->rq_reqmsg, 1);
318 lit->opc = NTOH__u64((__u64)it->it_op);
320 /* pack the intended request */
321 mds_link_pack(req, 2, old_de->d_inode, dir,
322 de->d_name.name, de->d_name.len);
323 req->rq_replen = lustre_msg_size(3, repsize);
324 } else if (it->it_op == IT_UNLINK || it->it_op == IT_RMDIR) {
325 size[2] = sizeof(struct mds_rec_unlink);
326 size[3] = de->d_name.len + 1;
327 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 4,
332 /* pack the intent */
333 lit = lustre_msg_buf(req->rq_reqmsg, 1);
334 lit->opc = NTOH__u64((__u64)it->it_op);
336 /* pack the intended request */
337 mds_unlink_pack(req, 2, dir, NULL,
338 it->it_op == IT_UNLINK ? S_IFREG : S_IFDIR,
339 de->d_name.name, de->d_name.len);
341 req->rq_replen = lustre_msg_size(3, repsize);
342 } else if (it->it_op & (IT_GETATTR | IT_RENAME | IT_LINK |
343 IT_OPEN | IT_SETATTR | IT_LOOKUP | IT_READLINK)) {
344 size[2] = sizeof(struct mds_body);
345 size[3] = de->d_name.len + 1;
347 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 4,
352 /* pack the intent */
353 lit = lustre_msg_buf(req->rq_reqmsg, 1);
354 lit->opc = NTOH__u64((__u64)it->it_op);
356 /* pack the intended request */
357 mds_getattr_pack(req, 2, dir, de->d_name.name, de->d_name.len);
359 /* get ready for the reply */
360 req->rq_replen = lustre_msg_size(3, repsize);
361 } else if (it->it_op == IT_READDIR) {
362 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 1,
367 /* get ready for the reply */
368 req->rq_replen = lustre_msg_size(1, repsize);
374 rc = ldlm_cli_enqueue(conn, req, obddev->obd_namespace, NULL, res_id,
375 lock_type, NULL, 0, lock_mode, &flags,
376 ldlm_completion_ast, mdc_blocking_ast, data,
379 if (it->it_op != IT_READDIR) {
380 /* XXX This should become a lustre_msg flag, but for now... */
381 __u32 *opp = lustre_msg_buf(req->rq_reqmsg, 2);
382 *opp |= REINT_REPLAYING;
386 /* This can go when we're sure that this can never happen */
389 if (rc == ELDLM_LOCK_ABORTED) {
391 memset(lockh, 0, sizeof(*lockh));
393 } else if (rc != 0) {
394 CERROR("ldlm_cli_enqueue: %d\n", rc);
397 /* The server almost certainly gave us a lock other than the one
398 * that we asked for. If we already have a matching lock, then
399 * cancel this one--we don't need two. */
400 struct ldlm_lock *lock = ldlm_handle2lock(lockh);
401 struct lustre_handle lockh2;
404 LDLM_DEBUG(lock, "matching against this");
406 memcpy(&lockh2, lockh, sizeof(lockh2));
407 if (ldlm_lock_match(NULL, NULL, LDLM_PLAIN, NULL, 0, LCK_NL,
409 /* We already have a lock; cancel the old one */
410 ldlm_lock_decref(lockh, lock_mode);
411 ldlm_cli_cancel(lockh);
412 memcpy(lockh, &lockh2, sizeof(lockh2));
417 /* On replay, we don't want the lock granted. */
418 lockreq = lustre_msg_buf(req->rq_reqmsg, 0);
419 lockreq->lock_flags |= LDLM_FL_INTENT_ONLY;
421 dlm_rep = lustre_msg_buf(req->rq_repmsg, 0);
422 it->it_disposition = (int) dlm_rep->lock_policy_res1;
423 it->it_status = (int) dlm_rep->lock_policy_res2;
424 it->it_lock_mode = lock_mode;
430 int mdc_cancel_unused(struct lustre_handle *conn, struct inode *inode,
433 __u64 res_id[RES_NAME_SIZE] = {inode->i_ino, inode->i_generation};
434 struct obd_device *obddev = class_conn2obd(conn);
436 RETURN(ldlm_cli_cancel_unused(obddev->obd_namespace, res_id, flags));
439 struct replay_open_data {
440 struct lustre_handle *fh;
443 static void mdc_replay_open(struct ptlrpc_request *req)
446 struct replay_open_data *saved;
447 struct mds_body *body = lustre_msg_buf(req->rq_repmsg, 0);
449 if (lustre_msg_get_op_flags(req->rq_reqmsg) & MDS_OPEN_HAS_EA)
454 saved = lustre_msg_buf(req->rq_reqmsg, offset);
455 mds_unpack_body(body);
456 CDEBUG(D_HA, "updating from "LPD64"/"LPD64" to "LPD64"/"LPD64"\n",
457 saved->fh->addr, saved->fh->cookie,
458 body->handle.addr, body->handle.cookie);
459 memcpy(saved->fh, &body->handle, sizeof(body->handle));
462 int mdc_open(struct lustre_handle *conn, obd_id ino, int type, int flags,
463 struct lov_mds_md *lmm, int lmm_size, struct lustre_handle *fh,
464 struct ptlrpc_request **request)
466 struct mds_body *body;
467 struct replay_open_data *replay_data;
468 int rc, size[3] = {sizeof(*body), sizeof(*replay_data)}, bufcount = 2;
469 struct ptlrpc_request *req;
472 if (lmm && lmm_size) {
474 size[2] = size[1]; /* shuffle the spare data along */
478 req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_OPEN, bufcount, size,
481 GOTO(out, rc = -ENOMEM);
483 req->rq_flags |= PTL_RPC_FL_REPLAY;
484 body = lustre_msg_buf(req->rq_reqmsg, 0);
486 ll_ino2fid(&body->fid1, ino, 0, type);
487 body->flags = HTON__u32(flags);
488 memcpy(&body->handle, fh, sizeof(body->handle));
490 if (lmm && lmm_size) {
491 CDEBUG(D_INODE, "sending %u bytes MD for ino "LPU64"\n",
493 lustre_msg_set_op_flags(req->rq_reqmsg, MDS_OPEN_HAS_EA);
494 memcpy(lustre_msg_buf(req->rq_reqmsg, 1), lmm, lmm_size);
495 body->flags |= HTON__u32(OBD_MD_FLEASIZE);
498 req->rq_replen = lustre_msg_size(1, size);
500 rc = ptlrpc_queue_wait(req);
502 body = lustre_msg_buf(req->rq_repmsg, 0);
503 mds_unpack_body(body);
504 memcpy(fh, &body->handle, sizeof(*fh));
507 /* If open is replayed, we need to fix up the fh. */
508 req->rq_replay_cb = mdc_replay_open;
509 replay_data = lustre_msg_buf(req->rq_reqmsg, lmm ? 2 : 1);
510 replay_data->fh = fh;
518 int mdc_close(struct lustre_handle *conn, obd_id ino, int type,
519 struct lustre_handle *fh, struct ptlrpc_request **request)
521 struct mds_body *body;
522 int rc, size = sizeof(*body);
523 struct ptlrpc_request *req;
526 req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_CLOSE, 1, &size,
529 GOTO(out, rc = -ENOMEM);
531 body = lustre_msg_buf(req->rq_reqmsg, 0);
532 ll_ino2fid(&body->fid1, ino, 0, type);
533 memcpy(&body->handle, fh, sizeof(body->handle));
535 req->rq_replen = lustre_msg_size(0, NULL);
537 rc = ptlrpc_queue_wait(req);
545 int mdc_readpage(struct lustre_handle *conn, obd_id ino, int type, __u64 offset,
546 char *addr, struct ptlrpc_request **request)
548 struct ptlrpc_connection *connection =
549 client_conn2cli(conn)->cl_import.imp_connection;
550 struct ptlrpc_request *req = NULL;
551 struct ptlrpc_bulk_desc *desc = NULL;
552 struct ptlrpc_bulk_page *bulk = NULL;
553 struct mds_body *body;
554 int rc, size = sizeof(*body);
557 CDEBUG(D_INODE, "inode: %ld\n", (long)ino);
559 desc = ptlrpc_prep_bulk(connection);
561 GOTO(out, rc = -ENOMEM);
563 req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_READPAGE, 1, &size,
566 GOTO(out2, rc = -ENOMEM);
568 bulk = ptlrpc_prep_bulk_page(desc);
569 bulk->bp_buflen = PAGE_SIZE;
571 bulk->bp_xid = req->rq_xid;
572 desc->bd_ptl_ev_hdlr = NULL;
573 desc->bd_portal = MDS_BULK_PORTAL;
575 rc = ptlrpc_register_bulk(desc);
577 CERROR("couldn't setup bulk sink: error %d.\n", rc);
581 mds_readdir_pack(req, offset, ino, type);
583 req->rq_replen = lustre_msg_size(1, &size);
584 rc = ptlrpc_queue_wait(req);
586 ptlrpc_abort_bulk(desc);
589 body = lustre_msg_buf(req->rq_repmsg, 0);
590 mds_unpack_body(body);
595 ptlrpc_bulk_decref(desc);
601 static int mdc_statfs(struct lustre_handle *conn, struct obd_statfs *osfs)
603 struct ptlrpc_request *req;
604 int rc, size = sizeof(*osfs);
607 req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_STATFS, 0, NULL,
612 req->rq_replen = lustre_msg_size(1, &size);
614 rc = ptlrpc_queue_wait(req);
619 obd_statfs_unpack(osfs, lustre_msg_buf(req->rq_repmsg, 0));
623 ptlrpc_req_finished(req);
628 static int mdc_attach(struct obd_device *dev, obd_count len, void *data)
630 return lprocfs_reg_obd(dev, status_var_nm_1, dev);
633 static int mdc_detach(struct obd_device *dev)
635 return lprocfs_dereg_obd(dev);
638 static int mdc_recover(struct obd_import *imp, int phase)
644 case PTLRPC_RECOVD_PHASE_PREPARE:
645 ldlm_cli_cancel_unused(imp->imp_obd->obd_namespace,
646 NULL, LDLM_FL_LOCAL_ONLY);
648 case PTLRPC_RECOVD_PHASE_RECOVER:
649 rc = ptlrpc_reconnect_import(imp, MDS_CONNECT);
651 RETURN(ptlrpc_replay(imp, 0));
655 rc = ptlrpc_replay(imp, 0 /* no last flag*/);
659 rc = ldlm_replay_locks(imp);
663 spin_lock(&imp->imp_lock);
664 imp->imp_level = LUSTRE_CONN_FULL;
665 spin_unlock(&imp->imp_lock);
667 ptlrpc_wake_delayed(imp);
669 rc = ptlrpc_resend(imp);
679 static int mdc_connect(struct lustre_handle *conn, struct obd_device *obd,
680 obd_uuid_t cluuid, struct recovd_obd *recovd,
681 ptlrpc_recovery_cb_t recover)
683 struct obd_import *imp = &obd->u.cli.cl_import;
684 imp->imp_recover = mdc_recover;
685 return client_obd_connect(conn, obd, cluuid, recovd, recover);
688 struct obd_ops mdc_obd_ops = {
689 o_attach: mdc_attach,
690 o_detach: mdc_detach,
691 o_setup: client_obd_setup,
692 o_cleanup: client_obd_cleanup,
693 o_connect: mdc_connect,
694 o_disconnect: client_obd_disconnect,
695 o_statfs: mdc_statfs,
698 static int __init ptlrpc_request_init(void)
700 return class_register_type(&mdc_obd_ops, status_class_var,
704 static void __exit ptlrpc_request_exit(void)
706 class_unregister_type(LUSTRE_MDC_NAME);
709 MODULE_AUTHOR("Cluster File Systems <info@clusterfs.com>");
710 MODULE_DESCRIPTION("Lustre Metadata Client v1.0");
711 MODULE_LICENSE("GPL");
713 EXPORT_SYMBOL(d_delete_aliases);
714 EXPORT_SYMBOL(mdc_getstatus);
715 EXPORT_SYMBOL(mdc_getlovinfo);
716 EXPORT_SYMBOL(mdc_enqueue);
717 EXPORT_SYMBOL(mdc_cancel_unused);
718 EXPORT_SYMBOL(mdc_getattr);
719 EXPORT_SYMBOL(mdc_create);
720 EXPORT_SYMBOL(mdc_unlink);
721 EXPORT_SYMBOL(mdc_rename);
722 EXPORT_SYMBOL(mdc_link);
723 EXPORT_SYMBOL(mdc_readpage);
724 EXPORT_SYMBOL(mdc_setattr);
725 EXPORT_SYMBOL(mdc_close);
726 EXPORT_SYMBOL(mdc_open);
728 EXPORT_SYMBOL(mdc_store_inode_generation);
730 module_init(ptlrpc_request_init);
731 module_exit(ptlrpc_request_exit);