1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
6 * This file is part of Lustre, http://www.sf.net/projects/lustre/
8 * Lustre is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU General Public
10 * License as published by the Free Software Foundation.
12 * Lustre is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Lustre; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 #define DEBUG_SUBSYSTEM S_MDC
26 #include <linux/module.h>
27 #include <linux/miscdevice.h>
28 #include <linux/lustre_mds.h>
29 #include <linux/lustre_lite.h>
30 #include <linux/lustre_dlm.h>
31 #include <linux/init.h>
32 #include <linux/obd_lov.h>
33 #include <linux/lprocfs_status.h>
35 #define REQUEST_MINOR 244
37 extern int mds_queue_req(struct ptlrpc_request *);
38 extern struct lprocfs_vars status_var_nm_1[];
39 extern struct lprocfs_vars status_class_var[];
41 /* should become mdc_getinfo() */
42 int mdc_getstatus(struct lustre_handle *conn, struct ll_fid *rootfid)
44 struct ptlrpc_request *req;
45 struct mds_body *body;
46 int rc, size = sizeof(*body);
49 req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_GETSTATUS, 1, &size,
52 GOTO(out, rc = -ENOMEM);
54 body = lustre_msg_buf(req->rq_reqmsg, 0);
55 req->rq_level = LUSTRE_CONN_CON;
56 req->rq_replen = lustre_msg_size(1, &size);
58 mds_pack_req_body(req);
59 rc = ptlrpc_queue_wait(req);
60 rc = ptlrpc_check_status(req, rc);
63 body = lustre_msg_buf(req->rq_repmsg, 0);
64 mds_unpack_body(body);
65 memcpy(rootfid, &body->fid1, sizeof(*rootfid));
67 CDEBUG(D_NET, "root ino="LPU64", last_committed="LPU64
68 ", last_xid="LPU64"\n",
69 rootfid->id, req->rq_repmsg->last_committed,
70 req->rq_repmsg->last_xid);
75 ptlrpc_req_finished(req);
79 int mdc_getlovinfo(struct obd_device *obd, struct lustre_handle *mdc_connh,
80 struct ptlrpc_request **request)
82 struct ptlrpc_request *req;
83 struct mds_status_req *streq;
84 int rc, size[2] = {sizeof(*streq)};
87 req = ptlrpc_prep_req(class_conn2cliimp(mdc_connh), MDS_GETLOVINFO, 1,
90 GOTO(out, rc = -ENOMEM);
93 streq = lustre_msg_buf(req->rq_reqmsg, 0);
94 streq->flags = HTON__u32(MDS_STATUS_LOV);
95 streq->repbuf = HTON__u32(8192);
97 /* prepare for reply */
98 req->rq_level = LUSTRE_CONN_CON;
101 req->rq_replen = lustre_msg_size(2, size);
103 rc = ptlrpc_queue_wait(req);
104 rc = ptlrpc_check_status(req, rc);
111 int mdc_getattr(struct lustre_handle *conn,
112 obd_id ino, int type, unsigned long valid, size_t ea_size,
113 struct ptlrpc_request **request)
115 struct ptlrpc_request *req;
116 struct mds_body *body;
117 int rc, size[2] = {sizeof(*body), 0}, bufcount = 1;
120 req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_GETATTR, 1, size,
123 GOTO(out, rc = -ENOMEM);
125 body = lustre_msg_buf(req->rq_reqmsg, 0);
126 ll_ino2fid(&body->fid1, ino, 0, type);
130 struct client_obd *mdc = &class_conn2obd(conn)->u.cli;
132 size[1] = mdc->cl_max_mds_easize;
133 } else if (valid & OBD_MD_LINKNAME) {
136 body->size = ea_size;
137 CDEBUG(D_INODE, "allocating %d bytes for symlink in packet\n",
140 req->rq_replen = lustre_msg_size(bufcount, size);
141 mds_pack_req_body(req);
143 rc = ptlrpc_queue_wait(req);
144 rc = ptlrpc_check_status(req, rc);
147 body = lustre_msg_buf(req->rq_repmsg, 0);
148 mds_unpack_body(body);
149 CDEBUG(D_NET, "mode: %o\n", body->mode);
158 void d_delete_aliases(struct inode *inode)
160 struct dentry *dentry = NULL;
161 struct list_head *tmp;
162 struct ll_sb_info *sbi = ll_i2sbi(inode);
165 spin_lock(&dcache_lock);
166 list_for_each(tmp, &inode->i_dentry) {
167 dentry = list_entry(tmp, struct dentry, d_alias);
169 // if (atomic_read(&dentry->d_count))
171 //if (!list_empty(&dentry->d_lru))
174 list_del_init(&dentry->d_hash);
175 list_add(&dentry->d_hash, &sbi->ll_orphan_dentry_list);
178 spin_unlock(&dcache_lock);
182 static int mdc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
183 void *data, __u32 data_len, int flag)
186 struct lustre_handle lockh;
190 case LDLM_CB_BLOCKING:
191 ldlm_lock2handle(lock, &lockh);
192 rc = ldlm_cli_cancel(&lockh);
194 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
198 case LDLM_CB_CANCELING: {
199 /* Invalidate all dentries associated with this inode */
200 struct inode *inode = data;
202 #warning "FIXME: what tells us that 'inode' is valid at all?"
203 if (inode->i_state & I_FREEING)
206 LASSERT(inode != NULL);
207 LASSERT(data_len == sizeof(*inode));
209 if (S_ISDIR(inode->i_mode)) {
210 CDEBUG(D_INODE, "invalidating inode %ld\n",
213 ll_invalidate_inode_pages(inode);
216 if ( inode != inode->i_sb->s_root->d_inode ) {
217 /* XXX should this igrab move up 12 lines? */
218 LASSERT(igrab(inode) == inode);
219 d_delete_aliases(inode);
231 /* This should be called with both the request and the reply still packed. */
232 void mdc_store_inode_generation(struct ptlrpc_request *req, int reqoff,
235 struct mds_rec_create *rec = lustre_msg_buf(req->rq_reqmsg, reqoff);
236 struct mds_body *body = lustre_msg_buf(req->rq_repmsg, repoff);
238 memcpy(&rec->cr_replayfid, &body->fid1, sizeof rec->cr_replayfid);
241 int mdc_enqueue(struct lustre_handle *conn, int lock_type,
242 struct lookup_intent *it, int lock_mode, struct inode *dir,
243 struct dentry *de, struct lustre_handle *lockh,
244 char *tgt, int tgtlen, void *data, int datalen)
246 struct ptlrpc_request *req;
247 struct obd_device *obddev = class_conn2obd(conn);
248 __u64 res_id[RES_NAME_SIZE] = {dir->i_ino, (__u64)dir->i_generation};
249 int size[6] = {sizeof(struct ldlm_request), sizeof(struct ldlm_intent)};
250 int rc, flags = LDLM_FL_HAS_INTENT;
251 int repsize[3] = {sizeof(struct ldlm_reply),
252 sizeof(struct mds_body),
253 obddev->u.cli.cl_max_mds_easize};
254 struct ldlm_reply *dlm_rep;
255 struct ldlm_intent *lit;
256 struct ldlm_request *lockreq;
259 LDLM_DEBUG_NOLOCK("mdsintent %s dir %ld", ldlm_it2str(it->it_op),
262 if (it->it_op & (IT_MKDIR | IT_CREAT | IT_SYMLINK | IT_MKNOD)) {
265 it->it_mode |= S_IFDIR;
267 case (IT_CREAT|IT_OPEN):
269 it->it_mode |= S_IFREG;
272 it->it_mode |= S_IFLNK;
275 it->it_mode &= ~current->fs->umask;
277 size[2] = sizeof(struct mds_rec_create);
278 size[3] = de->d_name.len + 1;
279 size[4] = tgtlen + 1;
280 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 5,
285 /* pack the intent */
286 lit = lustre_msg_buf(req->rq_reqmsg, 1);
287 lit->opc = NTOH__u64((__u64)it->it_op);
289 /* pack the intended request */
290 mds_create_pack(req, 2, dir, it->it_mode, 0, current->fsuid,
291 current->fsgid, CURRENT_TIME, de->d_name.name,
292 de->d_name.len, tgt, tgtlen);
293 req->rq_replen = lustre_msg_size(3, repsize);
294 } else if (it->it_op == IT_RENAME2) {
295 struct dentry *old_de = it->it_data;
297 size[2] = sizeof(struct mds_rec_rename);
298 size[3] = old_de->d_name.len + 1;
299 size[4] = de->d_name.len + 1;
300 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 5,
305 /* pack the intent */
306 lit = lustre_msg_buf(req->rq_reqmsg, 1);
307 lit->opc = NTOH__u64((__u64)it->it_op);
309 /* pack the intended request */
310 mds_rename_pack(req, 2, old_de->d_parent->d_inode, dir,
311 old_de->d_name.name, old_de->d_name.len,
312 de->d_name.name, de->d_name.len);
313 req->rq_replen = lustre_msg_size(3, repsize);
314 } else if (it->it_op == IT_LINK2) {
315 struct dentry *old_de = it->it_data;
317 size[2] = sizeof(struct mds_rec_link);
318 size[3] = de->d_name.len + 1;
319 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 4,
324 /* pack the intent */
325 lit = lustre_msg_buf(req->rq_reqmsg, 1);
326 lit->opc = NTOH__u64((__u64)it->it_op);
328 /* pack the intended request */
329 mds_link_pack(req, 2, old_de->d_inode, dir,
330 de->d_name.name, de->d_name.len);
331 req->rq_replen = lustre_msg_size(3, repsize);
332 } else if (it->it_op == IT_UNLINK || it->it_op == IT_RMDIR) {
333 size[2] = sizeof(struct mds_rec_unlink);
334 size[3] = de->d_name.len + 1;
335 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 4,
340 /* pack the intent */
341 lit = lustre_msg_buf(req->rq_reqmsg, 1);
342 lit->opc = NTOH__u64((__u64)it->it_op);
344 /* pack the intended request */
345 mds_unlink_pack(req, 2, dir, NULL,
346 it->it_op == IT_UNLINK ? S_IFREG : S_IFDIR,
347 de->d_name.name, de->d_name.len);
349 req->rq_replen = lustre_msg_size(3, repsize);
350 } else if (it->it_op & (IT_GETATTR | IT_RENAME | IT_LINK |
351 IT_OPEN | IT_SETATTR | IT_LOOKUP | IT_READLINK)) {
352 size[2] = sizeof(struct mds_body);
353 size[3] = de->d_name.len + 1;
355 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 4,
360 /* pack the intent */
361 lit = lustre_msg_buf(req->rq_reqmsg, 1);
362 lit->opc = NTOH__u64((__u64)it->it_op);
364 /* pack the intended request */
365 mds_getattr_pack(req, 2, dir, de->d_name.name, de->d_name.len);
367 /* get ready for the reply */
368 req->rq_replen = lustre_msg_size(3, repsize);
369 } else if (it->it_op == IT_READDIR) {
370 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 1,
375 /* get ready for the reply */
376 req->rq_replen = lustre_msg_size(1, repsize);
382 rc = ldlm_cli_enqueue(conn, req, obddev->obd_namespace, NULL, res_id,
383 lock_type, NULL, 0, lock_mode, &flags,
384 ldlm_completion_ast, mdc_blocking_ast, data,
387 if (it->it_op != IT_READDIR) {
388 /* XXX This should become a lustre_msg flag, but for now... */
389 __u32 *opp = lustre_msg_buf(req->rq_reqmsg, 2);
390 *opp |= REINT_REPLAYING;
394 /* This can go when we're sure that this can never happen */
397 if (rc == ELDLM_LOCK_ABORTED) {
399 memset(lockh, 0, sizeof(*lockh));
401 } else if (rc != 0) {
402 CERROR("ldlm_cli_enqueue: %d\n", rc);
405 /* The server almost certainly gave us a lock other than the one
406 * that we asked for. If we already have a matching lock, then
407 * cancel this one--we don't need two. */
408 struct ldlm_lock *lock = ldlm_handle2lock(lockh);
409 struct lustre_handle lockh2;
412 LDLM_DEBUG(lock, "matching against this");
414 memcpy(&lockh2, lockh, sizeof(lockh2));
415 if (ldlm_lock_match(NULL, NULL, LDLM_PLAIN, NULL, 0, LCK_NL,
417 /* We already have a lock; cancel the old one */
418 ldlm_lock_decref(lockh, lock_mode);
419 ldlm_cli_cancel(lockh);
420 memcpy(lockh, &lockh2, sizeof(lockh2));
425 /* On replay, we don't want the lock granted. */
426 lockreq = lustre_msg_buf(req->rq_reqmsg, 0);
427 lockreq->lock_flags |= LDLM_FL_INTENT_ONLY;
429 dlm_rep = lustre_msg_buf(req->rq_repmsg, 0);
430 it->it_disposition = (int) dlm_rep->lock_policy_res1;
431 it->it_status = (int) dlm_rep->lock_policy_res2;
432 it->it_lock_mode = lock_mode;
438 int mdc_cancel_unused(struct lustre_handle *conn, struct inode *inode,
441 __u64 res_id[RES_NAME_SIZE] = {inode->i_ino, inode->i_generation};
442 struct obd_device *obddev = class_conn2obd(conn);
444 RETURN(ldlm_cli_cancel_unused(obddev->obd_namespace, res_id, flags));
447 struct replay_open_data {
448 struct lustre_handle *fh;
451 static void mdc_replay_open(struct ptlrpc_request *req)
454 struct replay_open_data *saved;
455 struct mds_body *body = lustre_msg_buf(req->rq_repmsg, 0);
457 if (lustre_msg_get_op_flags(req->rq_reqmsg) & MDS_OPEN_HAS_EA)
462 saved = lustre_msg_buf(req->rq_reqmsg, offset);
463 mds_unpack_body(body);
464 CDEBUG(D_HA, "updating from "LPD64"/"LPD64" to "LPD64"/"LPD64"\n",
465 saved->fh->addr, saved->fh->cookie,
466 body->handle.addr, body->handle.cookie);
467 memcpy(saved->fh, &body->handle, sizeof(body->handle));
470 int mdc_open(struct lustre_handle *conn, obd_id ino, int type, int flags,
471 struct lov_stripe_md *lsm, struct lustre_handle *fh,
472 struct ptlrpc_request **request)
474 struct mds_body *body;
475 struct replay_open_data *replay_data;
476 int rc, size[3] = {sizeof(*body), sizeof(*replay_data)}, bufcount = 2;
477 struct ptlrpc_request *req;
482 size[2] = size[1]; /* shuffle the spare data along */
484 size[1] = lsm->lsm_mds_easize;
487 req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_OPEN, bufcount, size,
490 GOTO(out, rc = -ENOMEM);
493 lustre_msg_set_op_flags(req->rq_reqmsg, MDS_OPEN_HAS_EA);
496 req->rq_flags |= PTL_RPC_FL_REPLAY;
497 body = lustre_msg_buf(req->rq_reqmsg, 0);
499 ll_ino2fid(&body->fid1, ino, 0, type);
500 body->flags = HTON__u32(flags);
501 memcpy(&body->handle, fh, sizeof(body->handle));
504 lov_packmd(lustre_msg_buf(req->rq_reqmsg, 1), lsm);
506 req->rq_replen = lustre_msg_size(1, size);
508 rc = ptlrpc_queue_wait(req);
509 rc = ptlrpc_check_status(req, rc);
511 body = lustre_msg_buf(req->rq_repmsg, 0);
512 mds_unpack_body(body);
513 memcpy(fh, &body->handle, sizeof(*fh));
516 /* If open is replayed, we need to fix up the fh. */
517 req->rq_replay_cb = mdc_replay_open;
518 replay_data = lustre_msg_buf(req->rq_reqmsg, lsm ? 2 : 1);
519 replay_data->fh = fh;
527 int mdc_close(struct lustre_handle *conn, obd_id ino, int type,
528 struct lustre_handle *fh, struct ptlrpc_request **request)
530 struct mds_body *body;
531 int rc, size = sizeof(*body);
532 struct ptlrpc_request *req;
534 req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_CLOSE, 1, &size,
537 GOTO(out, rc = -ENOMEM);
539 body = lustre_msg_buf(req->rq_reqmsg, 0);
540 ll_ino2fid(&body->fid1, ino, 0, type);
541 memcpy(&body->handle, fh, sizeof(body->handle));
543 req->rq_replen = lustre_msg_size(0, NULL);
545 rc = ptlrpc_queue_wait(req);
546 rc = ptlrpc_check_status(req, rc);
554 int mdc_readpage(struct lustre_handle *conn, obd_id ino, int type, __u64 offset,
555 char *addr, struct ptlrpc_request **request)
557 struct ptlrpc_connection *connection =
558 client_conn2cli(conn)->cl_import.imp_connection;
559 struct ptlrpc_request *req = NULL;
560 struct ptlrpc_bulk_desc *desc = NULL;
561 struct ptlrpc_bulk_page *bulk = NULL;
562 struct mds_body *body;
563 int rc, size = sizeof(*body);
566 CDEBUG(D_INODE, "inode: %ld\n", (long)ino);
568 desc = ptlrpc_prep_bulk(connection);
570 GOTO(out, rc = -ENOMEM);
572 req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_READPAGE, 1, &size,
575 GOTO(out2, rc = -ENOMEM);
577 bulk = ptlrpc_prep_bulk_page(desc);
578 bulk->bp_buflen = PAGE_SIZE;
580 bulk->bp_xid = req->rq_xid;
581 desc->bd_portal = MDS_BULK_PORTAL;
583 rc = ptlrpc_register_bulk(desc);
585 CERROR("couldn't setup bulk sink: error %d.\n", rc);
589 body = lustre_msg_buf(req->rq_reqmsg, 0);
591 body->fid1.f_type = type;
594 req->rq_replen = lustre_msg_size(1, &size);
595 rc = ptlrpc_queue_wait(req);
596 rc = ptlrpc_check_status(req, rc);
598 ptlrpc_abort_bulk(desc);
601 body = lustre_msg_buf(req->rq_repmsg, 0);
602 mds_unpack_body(body);
607 ptlrpc_free_bulk(desc);
613 static int mdc_statfs(struct lustre_handle *conn, struct obd_statfs *osfs)
615 struct ptlrpc_request *req;
616 int rc, size = sizeof(*osfs);
619 req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_STATFS, 0, NULL,
624 req->rq_replen = lustre_msg_size(1, &size);
626 rc = ptlrpc_queue_wait(req);
627 rc = ptlrpc_check_status(req, rc);
632 obd_statfs_unpack(osfs, lustre_msg_buf(req->rq_repmsg, 0));
636 ptlrpc_req_finished(req);
640 int mdc_attach(struct obd_device *dev, obd_count len, void *data)
642 return lprocfs_reg_obd(dev, status_var_nm_1, dev);
645 int mdc_detach(struct obd_device *dev)
647 return lprocfs_dereg_obd(dev);
649 struct obd_ops mdc_obd_ops = {
650 o_attach: mdc_attach,
651 o_detach: mdc_detach,
652 o_setup: client_obd_setup,
653 o_cleanup: client_obd_cleanup,
654 o_connect: client_obd_connect,
655 o_disconnect: client_obd_disconnect,
656 o_statfs: mdc_statfs,
659 static int __init ptlrpc_request_init(void)
661 return class_register_type(&mdc_obd_ops, status_class_var,
665 static void __exit ptlrpc_request_exit(void)
667 class_unregister_type(LUSTRE_MDC_NAME);
670 MODULE_AUTHOR("Cluster File Systems <info@clusterfs.com>");
671 MODULE_DESCRIPTION("Lustre Metadata Client v1.0");
672 MODULE_LICENSE("GPL");
674 EXPORT_SYMBOL(d_delete_aliases);
675 EXPORT_SYMBOL(mdc_getstatus);
676 EXPORT_SYMBOL(mdc_getlovinfo);
677 EXPORT_SYMBOL(mdc_enqueue);
678 EXPORT_SYMBOL(mdc_cancel_unused);
679 EXPORT_SYMBOL(mdc_getattr);
680 EXPORT_SYMBOL(mdc_create);
681 EXPORT_SYMBOL(mdc_unlink);
682 EXPORT_SYMBOL(mdc_rename);
683 EXPORT_SYMBOL(mdc_link);
684 EXPORT_SYMBOL(mdc_readpage);
685 EXPORT_SYMBOL(mdc_setattr);
686 EXPORT_SYMBOL(mdc_close);
687 EXPORT_SYMBOL(mdc_open);
689 EXPORT_SYMBOL(mdc_store_inode_generation);
691 module_init(ptlrpc_request_init);
692 module_exit(ptlrpc_request_exit);