4 * Lustre Metadata Server (mds) request handler
6 * Copyright (C) 2001 Cluster File Systems, Inc.
8 * This code is issued under the GNU General Public License.
9 * See the file COPYING in this distribution
11 * by Peter Braam <braam@clusterfs.com>
13 * This server is single threaded at present (but can easily be multi threaded).
20 #include <linux/version.h>
21 #include <linux/module.h>
23 #include <linux/stat.h>
24 #include <linux/locks.h>
25 #include <linux/ext2_fs.h>
26 #include <linux/quotaops.h>
27 #include <asm/unistd.h>
28 #include <linux/obd_support.h>
29 #include <linux/obd.h>
30 #include <linux/lustre_lib.h>
31 #include <linux/lustre_idl.h>
32 #include <linux/lustre_mds.h>
33 #include <linux/obd_class.h>
36 static struct mds_obd *MDS;
38 // XXX make this networked!
39 static int mds_queue_req(struct mds_request *req)
41 struct mds_request *srv_request;
48 srv_request = kmalloc(sizeof(*srv_request), GFP_KERNEL);
54 /* move the request buffer */
55 srv_request->rq_reqlen = req->rq_reqlen;
56 srv_request->rq_reqbuf = req->rq_reqbuf;
57 srv_request->rq_obd = MDS;
59 req->rq_reqbuf = NULL;
62 /* remember where it came from */
63 srv_request->rq_reply_handle = req;
65 /* get the server working on this request */
66 spin_lock(&MDS->mds_lock);
67 list_add(&srv_request->rq_list, &MDS->mds_reqs);
68 spin_unlock(&MDS->mds_lock);
69 wake_up(&MDS->mds_waitq);
71 /* put client asleep */
72 printk("-- sleeping\n");
73 interruptible_sleep_on(&req->rq_wait_for_rep);
78 static struct dentry *mds_fid2dentry(struct mds_obd *mds, struct lustre_fid *fid)
83 inode = iget(mds->mds_sb, fid->id);
88 de = d_alloc_root(inode);
99 int mds_getattr(struct mds_request *req)
101 struct dentry *de = mds_fid2dentry(req->rq_obd, &req->rq_req->fid1);
106 rc = mds_pack_rep(NULL, 0, NULL, 0, &req->rq_rephdr, &req->rq_rep,
107 &req->rq_replen, &req->rq_repbuf);
110 printk("mds: out of memory\n");
111 req->rq_status = -ENOMEM;
115 req->rq_rephdr->seqno = req->rq_reqhdr->seqno;
120 req->rq_rephdr->status = -ENOENT;
125 rep->atime = inode->i_atime;
126 rep->ctime = inode->i_ctime;
127 rep->mtime = inode->i_mtime;
128 rep->uid = inode->i_uid;
129 rep->gid = inode->i_gid;
130 rep->size = inode->i_size;
131 rep->mode = inode->i_mode;
137 /* XXX replace with networking code */
138 int mds_reply(struct mds_request *req)
140 struct mds_request *clnt_req = req->rq_reply_handle;
144 /* free the request buffer */
145 kfree(req->rq_reqbuf);
146 req->rq_reqbuf = NULL;
148 /* move the reply to the client */
149 clnt_req->rq_replen = req->rq_replen;
150 clnt_req->rq_repbuf = req->rq_repbuf;
151 req->rq_repbuf = NULL;
154 /* wake up the client */
155 wake_up_interruptible(&clnt_req->rq_wait_for_rep);
160 int mds_error(struct mds_request *req)
162 struct mds_rep_hdr *hdr;
165 hdr = kmalloc(sizeof(*hdr), GFP_KERNEL);
171 memset(hdr, 0, sizeof(*hdr));
173 hdr->seqno = req->rq_reqhdr->seqno;
174 hdr->status = req->rq_status;
175 hdr->type = MDS_TYPE_ERR;
177 req->rq_repbuf = (char *)hdr;
178 req->rq_replen = sizeof(*hdr);
181 return mds_reply(req);
184 //int mds_handle(struct mds_conn *conn, int len, char *buf)
185 int mds_handle(struct mds_request *req)
188 struct mds_req_hdr *hdr;
192 hdr = (struct mds_req_hdr *)req->rq_reqbuf;
194 if (NTOH__u32(hdr->type) != MDS_TYPE_REQ) {
195 printk("lustre_mds: wrong packet type sent %d\n",
196 NTOH__u32(hdr->type));
201 rc = mds_unpack_req(req->rq_reqbuf, req->rq_reqlen,
202 &req->rq_reqhdr, &req->rq_req);
204 printk("lustre_mds: Invalid request\n");
209 switch (req->rq_reqhdr->opc) {
212 CDEBUG(D_INODE, "getattr\n");
213 rc = mds_getattr(req);
217 return mds_getattr(req);
220 return mds_getattr(req);
223 return mds_getattr(req);
226 return mds_getattr(req);
229 return mds_getattr(req);
232 return mds_getattr(req);
235 return mds_getattr(req);
238 return mds_getattr(req);
241 return mds_getattr(req);
244 return mds_getattr(req);
247 return mds_error(req);
252 printk("mds: processing error %d\n", rc);
255 CDEBUG(D_INODE, "sending reply\n");
263 static void mds_timer_run(unsigned long __data)
265 struct task_struct * p = (struct task_struct *) __data;
270 int mds_main(void *arg)
272 struct mds_obd *mds = (struct mds_obd *) arg;
273 struct timer_list timer;
277 spin_lock_irq(¤t->sigmask_lock);
278 sigfillset(¤t->blocked);
279 recalc_sigpending(current);
280 spin_unlock_irq(¤t->sigmask_lock);
282 sprintf(current->comm, "lustre_mds");
284 /* Set up an interval timer which can be used to trigger a
285 wakeup after the interval expires */
287 timer.data = (unsigned long) current;
288 timer.function = mds_timer_run;
289 mds->mds_timer = &timer;
291 /* Record that the thread is running */
292 mds->mds_thread = current;
293 wake_up(&mds->mds_done_waitq);
295 printk(KERN_INFO "lustre_mds starting. Commit interval %d seconds\n",
296 mds->mds_interval / HZ);
298 /* XXX maintain a list of all managed devices: insert here */
300 /* And now, wait forever for commit wakeup events. */
302 struct mds_request *request;
305 if (mds->mds_flags & MDS_UNMOUNT)
309 wake_up(&mds->mds_done_waitq);
310 interruptible_sleep_on(&mds->mds_waitq);
312 CDEBUG(D_INODE, "lustre_mds wakes\n");
313 CDEBUG(D_INODE, "pick up req here and continue\n");
315 if (list_empty(&mds->mds_reqs)) {
316 CDEBUG(D_INODE, "woke because of timer\n");
318 request = list_entry(mds->mds_reqs.next,
319 struct mds_request, rq_list);
320 list_del(&request->rq_list);
321 rc = mds_handle(request);
325 del_timer_sync(mds->mds_timer);
327 /* XXX maintain a list of all managed devices: cleanup here */
329 mds->mds_thread = NULL;
330 wake_up(&mds->mds_done_waitq);
331 printk("lustre_mds: exiting\n");
335 static void mds_stop_srv_thread(struct mds_obd *mds)
337 mds->mds_flags |= MDS_UNMOUNT;
339 while (mds->mds_thread) {
340 wake_up(&mds->mds_waitq);
341 sleep_on(&mds->mds_done_waitq);
345 static void mds_start_srv_thread(struct mds_obd *mds)
347 init_waitqueue_head(&mds->mds_waitq);
348 init_waitqueue_head(&mds->mds_done_waitq);
349 kernel_thread(mds_main, (void *)mds,
350 CLONE_VM | CLONE_FS | CLONE_FILES);
351 while (!mds->mds_thread)
352 sleep_on(&mds->mds_done_waitq);
355 /* mount the file system (secretly) */
356 static int mds_setup(struct obd_device *obddev, obd_count len,
360 struct obd_ioctl_data* data = buf;
361 struct mds_obd *mds = &obddev->u.mds;
362 struct vfsmount *mnt;
366 mnt = do_kern_mount(data->ioc_inlbuf2, 0,
367 data->ioc_inlbuf1, NULL);
374 mds->mds_sb = mnt->mnt_root->d_inode->i_sb;
375 if (!obddev->u.mds.mds_sb) {
380 INIT_LIST_HEAD(&mds->mds_reqs);
381 mds->mds_thread = NULL;
383 mds->mds_interval = 3 * HZ;
384 mds->mds_vfsmnt = mnt;
385 obddev->u.mds.mds_fstype = strdup(data->ioc_inlbuf2);
387 mds->mds_ctxt.pwdmnt = mnt;
388 mds->mds_ctxt.pwd = mnt->mnt_root;
389 mds->mds_ctxt.fs = KERNEL_DS;
392 spin_lock_init(&obddev->u.mds.fo_lock);
394 mds_start_srv_thread(mds);
401 static int mds_cleanup(struct obd_device * obddev)
403 struct super_block *sb;
404 struct mds_obd *mds = &obddev->u.mds;
408 if ( !(obddev->obd_flags & OBD_SET_UP) ) {
413 if ( !list_empty(&obddev->obd_gen_clients) ) {
414 printk(KERN_WARNING __FUNCTION__ ": still has clients!\n");
420 mds_stop_srv_thread(mds);
427 if (!list_empty(&mds->mds_reqs)) {
428 // XXX reply with errors and clean up
429 CDEBUG(D_INODE, "Request list not empty!\n");
433 mntput(mds->mds_vfsmnt);
435 kfree(mds->mds_fstype);
444 /* use obd ops to offer management infrastructure */
445 static struct obd_ops mds_obd_ops = {
447 o_cleanup: mds_cleanup,
450 static int __init mds_init(void)
452 obd_register_type(&mds_obd_ops, LUSTRE_MDS_NAME);
456 static void __exit mds_exit(void)
458 obd_unregister_type(LUSTRE_MDS_NAME);
461 MODULE_AUTHOR("Peter J. Braam <braam@clusterfs.com>");
462 MODULE_DESCRIPTION("Lustre Metadata Server (MDS) v0.01");
463 MODULE_LICENSE("GPL");
466 // for testing (maybe this stays)
467 EXPORT_SYMBOL(mds_queue_req);
469 module_init(mds_init);
470 module_exit(mds_exit);