3 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
4 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
8 * This file is part of Lustre, http://www.sf.net/projects/lustre/
10 * Lustre is free software; you can redistribute it and/or
11 * modify it under the terms of version 2 of the GNU General Public
12 * License as published by the Free Software Foundation.
14 * Lustre is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Lustre; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26 #define DEBUG_SUBSYSTEM S_MDC
28 #include <linux/module.h>
29 #include <linux/miscdevice.h>
30 #include <linux/lustre_mds.h>
31 #include <linux/lustre_lite.h>
32 #include <linux/lustre_dlm.h>
33 #include <linux/init.h>
34 #include <linux/obd_lov.h>
35 #include <linux/lprocfs_status.h>
37 #define REQUEST_MINOR 244
39 extern int mds_queue_req(struct ptlrpc_request *);
40 extern struct lprocfs_vars status_var_nm_1[];
41 extern struct lprocfs_vars status_class_var[];
43 /* should become mdc_getinfo() */
44 int mdc_getstatus(struct lustre_handle *conn, struct ll_fid *rootfid)
46 struct ptlrpc_request *req;
47 struct mds_body *body;
48 int rc, size = sizeof(*body);
51 req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_GETSTATUS, 1, &size,
54 GOTO(out, rc = -ENOMEM);
56 body = lustre_msg_buf(req->rq_reqmsg, 0);
57 req->rq_level = LUSTRE_CONN_CON;
58 req->rq_replen = lustre_msg_size(1, &size);
60 mds_pack_req_body(req);
61 rc = ptlrpc_queue_wait(req);
62 rc = ptlrpc_check_status(req, rc);
65 body = lustre_msg_buf(req->rq_repmsg, 0);
66 mds_unpack_body(body);
67 memcpy(rootfid, &body->fid1, sizeof(*rootfid));
69 CDEBUG(D_NET, "root ino="LPU64", last_committed="LPU64
70 ", last_xid="LPU64"\n",
71 rootfid->id, req->rq_repmsg->last_committed,
72 req->rq_repmsg->last_xid);
77 ptlrpc_req_finished(req);
81 int mdc_getlovinfo(struct obd_device *obd, struct lustre_handle *mdc_connh,
82 struct ptlrpc_request **request)
84 struct ptlrpc_request *req;
85 struct mds_status_req *streq;
86 int rc, size[2] = {sizeof(*streq)};
89 req = ptlrpc_prep_req(class_conn2cliimp(mdc_connh), MDS_GETLOVINFO, 1,
92 GOTO(out, rc = -ENOMEM);
95 streq = lustre_msg_buf(req->rq_reqmsg, 0);
96 streq->flags = HTON__u32(MDS_STATUS_LOV);
97 streq->repbuf = HTON__u32(8192);
99 /* prepare for reply */
100 req->rq_level = LUSTRE_CONN_CON;
103 req->rq_replen = lustre_msg_size(2, size);
105 rc = ptlrpc_queue_wait(req);
106 rc = ptlrpc_check_status(req, rc);
113 int mdc_getattr(struct lustre_handle *conn,
114 obd_id ino, int type, unsigned long valid, size_t ea_size,
115 struct ptlrpc_request **request)
117 struct ptlrpc_request *req;
118 struct mds_body *body;
119 int rc, size[2] = {sizeof(*body), 0}, bufcount = 1;
122 req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_GETATTR, 1, size,
125 GOTO(out, rc = -ENOMEM);
127 body = lustre_msg_buf(req->rq_reqmsg, 0);
128 ll_ino2fid(&body->fid1, ino, 0, type);
132 struct client_obd *mdc = &class_conn2obd(conn)->u.cli;
134 size[1] = mdc->cl_max_mds_easize;
135 } else if (valid & OBD_MD_LINKNAME) {
138 body->size = ea_size;
139 CDEBUG(D_INODE, "allocating %d bytes for symlink in packet\n",
142 req->rq_replen = lustre_msg_size(bufcount, size);
143 mds_pack_req_body(req);
145 rc = ptlrpc_queue_wait(req);
146 rc = ptlrpc_check_status(req, rc);
149 body = lustre_msg_buf(req->rq_repmsg, 0);
150 mds_unpack_body(body);
151 CDEBUG(D_NET, "mode: %o\n", body->mode);
160 void d_delete_aliases(struct inode *inode)
162 struct dentry *dentry = NULL;
163 struct list_head *tmp;
164 struct ll_sb_info *sbi = ll_i2sbi(inode);
167 spin_lock(&dcache_lock);
168 list_for_each(tmp, &inode->i_dentry) {
169 dentry = list_entry(tmp, struct dentry, d_alias);
171 // if (atomic_read(&dentry->d_count))
173 //if (!list_empty(&dentry->d_lru))
176 list_del_init(&dentry->d_hash);
177 list_add(&dentry->d_hash, &sbi->ll_orphan_dentry_list);
180 spin_unlock(&dcache_lock);
184 static int mdc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
185 void *data, __u32 data_len, int flag)
188 struct lustre_handle lockh;
192 case LDLM_CB_BLOCKING:
193 ldlm_lock2handle(lock, &lockh);
194 rc = ldlm_cli_cancel(&lockh);
196 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
200 case LDLM_CB_CANCELING: {
201 /* Invalidate all dentries associated with this inode */
202 struct inode *inode = data;
204 LASSERT(inode != NULL);
205 LASSERT(data_len == sizeof(*inode));
207 if (S_ISDIR(inode->i_mode)) {
208 CDEBUG(D_INODE, "invalidating inode %ld\n",
211 ll_invalidate_inode_pages(inode);
214 LASSERT(igrab(inode) == inode);
215 d_delete_aliases(inode);
226 /* This should be called with both the request and the reply still packed. */
227 void mdc_store_inode_generation(struct ptlrpc_request *req, int reqoff,
230 struct mds_rec_create *rec = lustre_msg_buf(req->rq_reqmsg, reqoff);
231 struct mds_body *body = lustre_msg_buf(req->rq_repmsg, repoff);
233 memcpy(&rec->cr_replayfid, &body->fid1, sizeof rec->cr_replayfid);
236 int mdc_enqueue(struct lustre_handle *conn, int lock_type,
237 struct lookup_intent *it, int lock_mode, struct inode *dir,
238 struct dentry *de, struct lustre_handle *lockh,
239 char *tgt, int tgtlen, void *data, int datalen)
241 struct ptlrpc_request *req;
242 struct obd_device *obddev = class_conn2obd(conn);
243 __u64 res_id[RES_NAME_SIZE] = {dir->i_ino, (__u64)dir->i_generation};
244 int size[6] = {sizeof(struct ldlm_request), sizeof(struct ldlm_intent)};
245 int rc, flags = LDLM_FL_HAS_INTENT;
246 int repsize[3] = {sizeof(struct ldlm_reply),
247 sizeof(struct mds_body),
248 obddev->u.cli.cl_max_mds_easize};
249 struct ldlm_reply *dlm_rep;
250 struct ldlm_intent *lit;
251 struct ldlm_request *lockreq;
254 LDLM_DEBUG_NOLOCK("mdsintent %s dir %ld", ldlm_it2str(it->it_op),
257 if (it->it_op & (IT_MKDIR | IT_CREAT | IT_SYMLINK | IT_MKNOD)) {
260 it->it_mode |= S_IFDIR;
262 case (IT_CREAT|IT_OPEN):
264 it->it_mode |= S_IFREG;
267 it->it_mode |= S_IFLNK;
270 it->it_mode &= ~current->fs->umask;
272 size[2] = sizeof(struct mds_rec_create);
273 size[3] = de->d_name.len + 1;
274 size[4] = tgtlen + 1;
275 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 5,
280 /* pack the intent */
281 lit = lustre_msg_buf(req->rq_reqmsg, 1);
282 lit->opc = NTOH__u64((__u64)it->it_op);
284 /* pack the intended request */
285 mds_create_pack(req, 2, dir, it->it_mode, 0, current->fsuid,
286 current->fsgid, CURRENT_TIME, de->d_name.name,
287 de->d_name.len, tgt, tgtlen);
288 req->rq_replen = lustre_msg_size(3, repsize);
289 } else if (it->it_op == IT_RENAME2) {
290 struct dentry *old_de = it->it_data;
292 size[2] = sizeof(struct mds_rec_rename);
293 size[3] = old_de->d_name.len + 1;
294 size[4] = de->d_name.len + 1;
295 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 5,
300 /* pack the intent */
301 lit = lustre_msg_buf(req->rq_reqmsg, 1);
302 lit->opc = NTOH__u64((__u64)it->it_op);
304 /* pack the intended request */
305 mds_rename_pack(req, 2, old_de->d_parent->d_inode, dir,
306 old_de->d_name.name, old_de->d_name.len,
307 de->d_name.name, de->d_name.len);
308 req->rq_replen = lustre_msg_size(3, repsize);
309 } else if (it->it_op == IT_LINK2) {
310 struct dentry *old_de = it->it_data;
312 size[2] = sizeof(struct mds_rec_link);
313 size[3] = de->d_name.len + 1;
314 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 4,
319 /* pack the intent */
320 lit = lustre_msg_buf(req->rq_reqmsg, 1);
321 lit->opc = NTOH__u64((__u64)it->it_op);
323 /* pack the intended request */
324 mds_link_pack(req, 2, old_de->d_inode, dir,
325 de->d_name.name, de->d_name.len);
326 req->rq_replen = lustre_msg_size(3, repsize);
327 } else if (it->it_op == IT_UNLINK || it->it_op == IT_RMDIR) {
328 size[2] = sizeof(struct mds_rec_unlink);
329 size[3] = de->d_name.len + 1;
330 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 4,
335 /* pack the intent */
336 lit = lustre_msg_buf(req->rq_reqmsg, 1);
337 lit->opc = NTOH__u64((__u64)it->it_op);
339 /* pack the intended request */
340 mds_unlink_pack(req, 2, dir, NULL,
341 it->it_op == IT_UNLINK ? S_IFREG : S_IFDIR,
342 de->d_name.name, de->d_name.len);
344 req->rq_replen = lustre_msg_size(3, repsize);
345 } else if (it->it_op & (IT_GETATTR | IT_RENAME | IT_LINK |
346 IT_OPEN | IT_SETATTR | IT_LOOKUP | IT_READLINK)) {
347 size[2] = sizeof(struct mds_body);
348 size[3] = de->d_name.len + 1;
350 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 4,
355 /* pack the intent */
356 lit = lustre_msg_buf(req->rq_reqmsg, 1);
357 lit->opc = NTOH__u64((__u64)it->it_op);
359 /* pack the intended request */
360 mds_getattr_pack(req, 2, dir, de->d_name.name, de->d_name.len);
362 /* get ready for the reply */
363 req->rq_replen = lustre_msg_size(3, repsize);
364 } else if (it->it_op == IT_READDIR) {
365 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 1,
370 /* get ready for the reply */
371 req->rq_replen = lustre_msg_size(1, repsize);
376 #warning FIXME: the data here needs to be different if a lock was granted for a different inode
377 rc = ldlm_cli_enqueue(conn, req, obddev->obd_namespace, NULL, res_id,
378 lock_type, NULL, 0, lock_mode, &flags,
379 ldlm_completion_ast, mdc_blocking_ast, data,
382 if (it->it_op != IT_READDIR) {
383 /* XXX This should become a lustre_msg flag, but for now... */
384 __u32 *opp = lustre_msg_buf(req->rq_reqmsg, 2);
385 *opp |= REINT_REPLAYING;
389 /* This can go when we're sure that this can never happen */
392 if (rc == ELDLM_LOCK_ABORTED) {
394 memset(lockh, 0, sizeof(*lockh));
396 } else if (rc != 0) {
397 CERROR("ldlm_cli_enqueue: %d\n", rc);
401 /* On replay, we don't want the lock granted. */
402 lockreq = lustre_msg_buf(req->rq_reqmsg, 0);
403 lockreq->lock_flags |= LDLM_FL_INTENT_ONLY;
405 dlm_rep = lustre_msg_buf(req->rq_repmsg, 0);
406 it->it_disposition = (int) dlm_rep->lock_policy_res1;
407 it->it_status = (int) dlm_rep->lock_policy_res2;
408 it->it_lock_mode = lock_mode;
414 int mdc_cancel_unused(struct lustre_handle *conn, struct inode *inode,
417 __u64 res_id[RES_NAME_SIZE] = {inode->i_ino, inode->i_generation};
418 struct obd_device *obddev = class_conn2obd(conn);
420 RETURN(ldlm_cli_cancel_unused(obddev->obd_namespace, res_id, flags));
423 struct replay_open_data {
424 struct lustre_handle *fh;
427 static void mdc_replay_open(struct ptlrpc_request *req)
430 struct replay_open_data *saved;
431 struct mds_body *body = lustre_msg_buf(req->rq_repmsg, 0);
433 if (lustre_msg_get_op_flags(req->rq_reqmsg) & MDS_OPEN_HAS_EA)
438 saved = lustre_msg_buf(req->rq_reqmsg, offset);
439 mds_unpack_body(body);
440 CDEBUG(D_HA, "updating from "LPD64"/"LPD64" to "LPD64"/"LPD64"\n",
441 saved->fh->addr, saved->fh->cookie,
442 body->handle.addr, body->handle.cookie);
443 memcpy(saved->fh, &body->handle, sizeof(body->handle));
446 int mdc_open(struct lustre_handle *conn, obd_id ino, int type, int flags,
447 struct lov_stripe_md *lsm, struct lustre_handle *fh,
448 struct ptlrpc_request **request)
450 struct mds_body *body;
451 struct replay_open_data *replay_data;
452 int rc, size[3] = {sizeof(*body), sizeof(*replay_data)}, bufcount = 2;
453 struct ptlrpc_request *req;
458 size[2] = size[1]; /* shuffle the spare data along */
460 size[1] = lsm->lsm_mds_easize;
463 req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_OPEN, bufcount, size,
466 GOTO(out, rc = -ENOMEM);
469 lustre_msg_set_op_flags(req->rq_reqmsg, MDS_OPEN_HAS_EA);
472 req->rq_flags |= PTL_RPC_FL_REPLAY;
473 body = lustre_msg_buf(req->rq_reqmsg, 0);
475 ll_ino2fid(&body->fid1, ino, 0, type);
476 body->flags = HTON__u32(flags);
477 memcpy(&body->handle, fh, sizeof(body->handle));
480 lov_packmd(lustre_msg_buf(req->rq_reqmsg, 1), lsm);
482 req->rq_replen = lustre_msg_size(1, size);
484 rc = ptlrpc_queue_wait(req);
485 rc = ptlrpc_check_status(req, rc);
487 body = lustre_msg_buf(req->rq_repmsg, 0);
488 mds_unpack_body(body);
489 memcpy(fh, &body->handle, sizeof(*fh));
492 /* If open is replayed, we need to fix up the fh. */
493 req->rq_replay_cb = mdc_replay_open;
494 replay_data = lustre_msg_buf(req->rq_reqmsg, lsm ? 2 : 1);
495 replay_data->fh = fh;
503 int mdc_close(struct lustre_handle *conn, obd_id ino, int type,
504 struct lustre_handle *fh, struct ptlrpc_request **request)
506 struct mds_body *body;
507 int rc, size = sizeof(*body);
508 struct ptlrpc_request *req;
510 req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_CLOSE, 1, &size,
513 GOTO(out, rc = -ENOMEM);
515 body = lustre_msg_buf(req->rq_reqmsg, 0);
516 ll_ino2fid(&body->fid1, ino, 0, type);
517 memcpy(&body->handle, fh, sizeof(body->handle));
519 req->rq_replen = lustre_msg_size(0, NULL);
521 rc = ptlrpc_queue_wait(req);
522 rc = ptlrpc_check_status(req, rc);
530 int mdc_readpage(struct lustre_handle *conn, obd_id ino, int type, __u64 offset,
531 char *addr, struct ptlrpc_request **request)
533 struct ptlrpc_connection *connection =
534 client_conn2cli(conn)->cl_import.imp_connection;
535 struct ptlrpc_request *req = NULL;
536 struct ptlrpc_bulk_desc *desc = NULL;
537 struct ptlrpc_bulk_page *bulk = NULL;
538 struct mds_body *body;
539 int rc, size = sizeof(*body);
542 CDEBUG(D_INODE, "inode: %ld\n", (long)ino);
544 desc = ptlrpc_prep_bulk(connection);
546 GOTO(out, rc = -ENOMEM);
548 req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_READPAGE, 1, &size,
551 GOTO(out2, rc = -ENOMEM);
553 bulk = ptlrpc_prep_bulk_page(desc);
554 bulk->bp_buflen = PAGE_SIZE;
556 bulk->bp_xid = req->rq_xid;
557 desc->bd_portal = MDS_BULK_PORTAL;
559 rc = ptlrpc_register_bulk(desc);
561 CERROR("couldn't setup bulk sink: error %d.\n", rc);
565 body = lustre_msg_buf(req->rq_reqmsg, 0);
567 body->fid1.f_type = type;
570 req->rq_replen = lustre_msg_size(1, &size);
571 rc = ptlrpc_queue_wait(req);
572 rc = ptlrpc_check_status(req, rc);
574 ptlrpc_abort_bulk(desc);
577 body = lustre_msg_buf(req->rq_repmsg, 0);
578 mds_unpack_body(body);
583 ptlrpc_free_bulk(desc);
589 static int mdc_statfs(struct lustre_handle *conn, struct obd_statfs *osfs)
591 struct ptlrpc_request *req;
592 int rc, size = sizeof(*osfs);
595 req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_STATFS, 0, NULL,
600 req->rq_replen = lustre_msg_size(1, &size);
602 rc = ptlrpc_queue_wait(req);
603 rc = ptlrpc_check_status(req, rc);
608 obd_statfs_unpack(osfs, lustre_msg_buf(req->rq_repmsg, 0));
612 ptlrpc_req_finished(req);
616 int mdc_attach(struct obd_device *dev,
617 obd_count len, void *data)
620 rc = lprocfs_reg_obd(dev, (struct lprocfs_vars*)status_var_nm_1,
625 int mdc_detach(struct obd_device *dev)
628 rc = lprocfs_dereg_obd(dev);
632 struct obd_ops mdc_obd_ops = {
633 o_attach: mdc_attach,
634 o_detach: mdc_detach,
635 o_setup: client_obd_setup,
636 o_cleanup: client_obd_cleanup,
637 o_connect: client_obd_connect,
638 o_disconnect: client_obd_disconnect,
639 o_statfs: mdc_statfs,
642 static int __init ptlrpc_request_init(void)
645 rc = class_register_type(&mdc_obd_ops,
646 (struct lprocfs_vars*)status_class_var,
654 static void __exit ptlrpc_request_exit(void)
657 class_unregister_type(LUSTRE_MDC_NAME);
661 MODULE_AUTHOR("Cluster File Systems <info@clusterfs.com>");
662 MODULE_DESCRIPTION("Lustre Metadata Client v1.0");
663 MODULE_LICENSE("GPL");
665 EXPORT_SYMBOL(d_delete_aliases);
666 EXPORT_SYMBOL(mdc_getstatus);
667 EXPORT_SYMBOL(mdc_getlovinfo);
668 EXPORT_SYMBOL(mdc_enqueue);
669 EXPORT_SYMBOL(mdc_cancel_unused);
670 EXPORT_SYMBOL(mdc_getattr);
671 EXPORT_SYMBOL(mdc_create);
672 EXPORT_SYMBOL(mdc_unlink);
673 EXPORT_SYMBOL(mdc_rename);
674 EXPORT_SYMBOL(mdc_link);
675 EXPORT_SYMBOL(mdc_readpage);
676 EXPORT_SYMBOL(mdc_setattr);
677 EXPORT_SYMBOL(mdc_close);
678 EXPORT_SYMBOL(mdc_open);
680 EXPORT_SYMBOL(mdc_store_inode_generation);
682 module_init(ptlrpc_request_init);
683 module_exit(ptlrpc_request_exit);