1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
5 * Author: Peter J. Braam <braam@clusterfs.com>
6 * Author: Phil Schwan <phil@clusterfs.com>
8 * This file is part of Lustre, http://www.lustre.org.
10 * Lustre is free software; you can redistribute it and/or
11 * modify it under the terms of version 2 of the GNU General Public
12 * License as published by the Free Software Foundation.
14 * Lustre is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Lustre; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 * Storage Target Handling functions
24 * Lustre Object Server Module (OST)
26 * This server is single threaded at present (but can easily be multi
27 * threaded). For testing and management it is treated as an
28 * obd_device, although it does not export a full OBD method table
29 * (the requests are coming in over the wire, so object target
30 * modules do not have a full method table.)
34 #define DEBUG_SUBSYSTEM S_OST
36 #include <linux/module.h>
37 #include <linux/obd_ost.h>
38 #include <linux/lustre_net.h>
39 #include <linux/lustre_dlm.h>
40 #include <linux/init.h>
42 static int ost_destroy(struct ptlrpc_request *req)
44 struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
45 struct ost_body *body;
46 int rc, size = sizeof(*body);
49 body = lustre_msg_buf(req->rq_reqmsg, 0);
51 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
55 req->rq_status = obd_destroy(conn, &body->oa, NULL);
59 static int ost_getattr(struct ptlrpc_request *req)
61 struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
62 struct ost_body *body, *repbody;
63 int rc, size = sizeof(*body);
66 body = lustre_msg_buf(req->rq_reqmsg, 0);
68 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
72 repbody = lustre_msg_buf(req->rq_repmsg, 0);
73 memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
74 req->rq_status = obd_getattr(conn, &repbody->oa, NULL);
78 static int ost_statfs(struct ptlrpc_request *req)
80 struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
81 struct obd_statfs *osfs;
83 int rc, size = sizeof(*osfs);
86 rc = obd_statfs(conn, &sfs);
88 CERROR("ost: statfs failed: rc %d\n", rc);
93 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
97 osfs = lustre_msg_buf(req->rq_repmsg, 0);
98 memset(osfs, 0, size);
99 obd_statfs_pack(osfs, &sfs);
103 static int ost_open(struct ptlrpc_request *req)
105 struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
106 struct ost_body *body, *repbody;
107 int rc, size = sizeof(*body);
110 body = lustre_msg_buf(req->rq_reqmsg, 0);
112 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
116 repbody = lustre_msg_buf(req->rq_repmsg, 0);
117 memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
118 req->rq_status = obd_open(conn, &repbody->oa, NULL);
122 static int ost_close(struct ptlrpc_request *req)
124 struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
125 struct ost_body *body, *repbody;
126 int rc, size = sizeof(*body);
129 body = lustre_msg_buf(req->rq_reqmsg, 0);
131 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
135 repbody = lustre_msg_buf(req->rq_repmsg, 0);
136 memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
137 req->rq_status = obd_close(conn, &repbody->oa, NULL);
141 static int ost_create(struct ptlrpc_request *req)
143 struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
144 struct ost_body *body, *repbody;
145 int rc, size = sizeof(*body);
148 body = lustre_msg_buf(req->rq_reqmsg, 0);
150 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
154 repbody = lustre_msg_buf(req->rq_repmsg, 0);
155 memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
156 req->rq_status = obd_create(conn, &repbody->oa, NULL);
160 static int ost_punch(struct ptlrpc_request *req)
162 struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
163 struct ost_body *body, *repbody;
164 int rc, size = sizeof(*body);
167 body = lustre_msg_buf(req->rq_reqmsg, 0);
169 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
173 repbody = lustre_msg_buf(req->rq_repmsg, 0);
174 memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
175 req->rq_status = obd_punch(conn, &repbody->oa, NULL,
176 repbody->oa.o_blocks, repbody->oa.o_size);
180 static int ost_setattr(struct ptlrpc_request *req)
182 struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
183 struct ost_body *body, *repbody;
184 int rc, size = sizeof(*body);
187 body = lustre_msg_buf(req->rq_reqmsg, 0);
189 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
193 repbody = lustre_msg_buf(req->rq_repmsg, 0);
194 memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
195 req->rq_status = obd_setattr(conn, &repbody->oa, NULL);
199 static int ost_bulk_timeout(void *data)
201 struct ptlrpc_bulk_desc *desc = data;
204 CERROR("(not yet) starting recovery of client %p\n", desc->b_client);
208 static int ost_brw_read(struct ptlrpc_request *req)
210 struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
211 struct ptlrpc_bulk_desc *desc;
212 void *tmp1, *tmp2, *end2;
213 struct niobuf_remote *remote_nb;
214 struct niobuf_local *local_nb = NULL;
215 struct obd_ioobj *ioo;
216 struct ost_body *body;
217 struct l_wait_info lwi;
218 int rc, cmd, i, j, objcount, niocount, size = sizeof(*body);
221 body = lustre_msg_buf(req->rq_reqmsg, 0);
222 tmp1 = lustre_msg_buf(req->rq_reqmsg, 1);
223 tmp2 = lustre_msg_buf(req->rq_reqmsg, 2);
224 end2 = (char *)tmp2 + req->rq_reqmsg->buflens[2];
225 objcount = req->rq_reqmsg->buflens[1] / sizeof(*ioo);
226 niocount = req->rq_reqmsg->buflens[2] / sizeof(*remote_nb);
229 if (OBD_FAIL_CHECK(OBD_FAIL_OST_BRW_READ_BULK))
232 for (i = 0; i < objcount; i++) {
233 ost_unpack_ioo(&tmp1, &ioo);
234 if (tmp2 + ioo->ioo_bufcnt > end2) {
236 GOTO(out, rc = -EFAULT);
238 for (j = 0; j < ioo->ioo_bufcnt; j++)
239 ost_unpack_niobuf(&tmp2, &remote_nb);
242 OBD_ALLOC(local_nb, sizeof(*local_nb) * niocount);
243 if (local_nb == NULL)
244 GOTO(out, rc = -ENOMEM);
246 /* The unpackers move tmp1 and tmp2, so reset them before using */
247 tmp1 = lustre_msg_buf(req->rq_reqmsg, 1);
248 tmp2 = lustre_msg_buf(req->rq_reqmsg, 2);
249 req->rq_status = obd_preprw(cmd, conn, objcount,
250 tmp1, niocount, tmp2, local_nb, NULL);
255 desc = ptlrpc_prep_bulk(req->rq_connection);
257 GOTO(out_local, rc = -ENOMEM);
258 desc->b_portal = OST_BULK_PORTAL;
260 for (i = 0; i < niocount; i++) {
261 struct ptlrpc_bulk_page *bulk;
262 bulk = ptlrpc_prep_bulk_page(desc);
264 GOTO(out_bulk, rc = -ENOMEM);
265 remote_nb = &(((struct niobuf_remote *)tmp2)[i]);
266 bulk->b_xid = remote_nb->xid;
267 bulk->b_buf = (void *)(unsigned long)local_nb[i].addr;
268 bulk->b_buflen = PAGE_SIZE;
271 rc = ptlrpc_send_bulk(desc);
275 lwi = LWI_TIMEOUT(obd_timeout * HZ, ost_bulk_timeout, desc);
276 rc = l_wait_event(desc->b_waitq, desc->b_flags & PTL_BULK_FL_SENT, &lwi);
278 LASSERT(rc == -ETIMEDOUT);
282 /* The unpackers move tmp1 and tmp2, so reset them before using */
283 tmp1 = lustre_msg_buf(req->rq_reqmsg, 1);
284 tmp2 = lustre_msg_buf(req->rq_reqmsg, 2);
285 req->rq_status = obd_commitrw(cmd, conn, objcount,
286 tmp1, niocount, local_nb, NULL);
288 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
291 ptlrpc_free_bulk(desc);
293 OBD_FREE(local_nb, sizeof(*local_nb) * niocount);
296 ptlrpc_error(req->rq_svc, req);
298 ptlrpc_reply(req->rq_svc, req);
302 static int ost_brw_write(struct ptlrpc_request *req)
304 struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
305 struct ptlrpc_bulk_desc *desc;
306 struct niobuf_remote *remote_nb;
307 struct niobuf_local *local_nb, *lnb;
308 struct obd_ioobj *ioo;
309 struct ost_body *body;
310 int cmd, rc, i, j, objcount, niocount, size[2] = {sizeof(*body)};
311 void *tmp1, *tmp2, *end2;
312 void *desc_priv = NULL;
314 struct ptlrpc_service *srv;
315 struct l_wait_info lwi;
319 body = lustre_msg_buf(req->rq_reqmsg, 0);
320 tmp1 = lustre_msg_buf(req->rq_reqmsg, 1);
321 tmp2 = lustre_msg_buf(req->rq_reqmsg, 2);
322 end2 = (char *)tmp2 + req->rq_reqmsg->buflens[2];
323 objcount = req->rq_reqmsg->buflens[1] / sizeof(*ioo);
324 niocount = req->rq_reqmsg->buflens[2] / sizeof(*remote_nb);
327 for (i = 0; i < objcount; i++) {
328 ost_unpack_ioo((void *)&tmp1, &ioo);
329 if (tmp2 + ioo->ioo_bufcnt > end2) {
333 for (j = 0; j < ioo->ioo_bufcnt; j++)
334 ost_unpack_niobuf((void *)&tmp2, &remote_nb);
337 size[1] = niocount * sizeof(*remote_nb);
338 rc = lustre_pack_msg(2, size, NULL, &req->rq_replen, &req->rq_repmsg);
341 remote_nb = lustre_msg_buf(req->rq_repmsg, 1);
343 OBD_ALLOC(local_nb, niocount * sizeof(*local_nb));
344 if (local_nb == NULL)
345 GOTO(out, rc = -ENOMEM);
347 /* The unpackers move tmp1 and tmp2, so reset them before using */
348 tmp1 = lustre_msg_buf(req->rq_reqmsg, 1);
349 tmp2 = lustre_msg_buf(req->rq_reqmsg, 2);
350 req->rq_status = obd_preprw(cmd, conn, objcount,
351 tmp1, niocount, tmp2, local_nb, &desc_priv);
353 GOTO(out_free, rc = 0); /* XXX is this correct? */
355 if (OBD_FAIL_CHECK(OBD_FAIL_OST_BRW_WRITE_BULK))
356 GOTO(fail_preprw, rc = 0);
358 desc = ptlrpc_prep_bulk(req->rq_connection);
360 GOTO(fail_preprw, rc = -ENOMEM);
362 desc->b_portal = OSC_BULK_PORTAL;
363 desc->b_desc_private = desc_priv;
364 memcpy(&(desc->b_conn), &conn, sizeof(conn));
366 srv = req->rq_obd->u.ost.ost_service;
367 spin_lock(&srv->srv_lock);
368 xid = srv->srv_xid++; /* single xid for all pages */
369 spin_unlock(&srv->srv_lock);
371 for (i = 0, lnb = local_nb; i < niocount; i++, lnb++) {
372 struct ptlrpc_bulk_page *bulk;
374 bulk = ptlrpc_prep_bulk_page(desc);
376 GOTO(fail_bulk, rc = -ENOMEM);
378 bulk->b_xid = xid; /* single xid for all pages */
380 bulk->b_buf = lnb->addr;
381 bulk->b_page = lnb->page;
382 bulk->b_flags = lnb->flags;
383 bulk->b_dentry = lnb->dentry;
384 bulk->b_buflen = PAGE_SIZE;
387 /* this advances remote_nb */
388 ost_pack_niobuf((void **)&remote_nb, lnb->offset, lnb->len, 0,
392 rc = ptlrpc_register_bulk(desc);
397 ptlrpc_reply(req->rq_svc, req);
399 lwi = LWI_TIMEOUT(obd_timeout * HZ, ost_bulk_timeout, desc);
400 rc = l_wait_event(desc->b_waitq, desc->b_flags & PTL_BULK_FL_RCVD, &lwi);
402 if (rc != -ETIMEDOUT)
407 rc = obd_commitrw(cmd, conn, objcount, tmp1, niocount, local_nb,
408 desc->b_desc_private);
409 ptlrpc_free_bulk(desc);
412 OBD_FREE(local_nb, niocount * sizeof(*local_nb));
416 ptlrpc_error(req->rq_svc, req);
418 ptlrpc_reply(req->rq_svc, req);
423 ptlrpc_free_bulk(desc);
425 /* FIXME: how do we undo the preprw? */
429 static int ost_handle(struct ptlrpc_request *req)
434 rc = lustre_unpack_msg(req->rq_reqmsg, req->rq_reqlen);
435 if (rc || OBD_FAIL_CHECK(OBD_FAIL_OST_HANDLE_UNPACK)) {
436 CERROR("lustre_ost: Invalid request\n");
440 if (req->rq_reqmsg->type != PTL_RPC_MSG_REQUEST) {
441 CERROR("lustre_ost: wrong packet type sent %d\n",
442 req->rq_reqmsg->type);
443 GOTO(out, rc = -EINVAL);
446 if (req->rq_reqmsg->opc != OST_CONNECT &&
447 req->rq_export == NULL) {
448 CERROR("lustre_ost: operation %d on unconnected OST\n",
449 req->rq_reqmsg->opc);
450 GOTO(out, rc = -ENOTCONN);
453 if (strcmp(req->rq_obd->obd_type->typ_name, "ost") != 0)
454 GOTO(out, rc = -EINVAL);
456 switch (req->rq_reqmsg->opc) {
458 CDEBUG(D_INODE, "connect\n");
459 OBD_FAIL_RETURN(OBD_FAIL_OST_CONNECT_NET, 0);
460 rc = target_handle_connect(req);
463 CDEBUG(D_INODE, "disconnect\n");
464 OBD_FAIL_RETURN(OBD_FAIL_OST_DISCONNECT_NET, 0);
465 rc = target_handle_disconnect(req);
468 CDEBUG(D_INODE, "create\n");
469 OBD_FAIL_RETURN(OBD_FAIL_OST_CREATE_NET, 0);
470 rc = ost_create(req);
473 CDEBUG(D_INODE, "destroy\n");
474 OBD_FAIL_RETURN(OBD_FAIL_OST_DESTROY_NET, 0);
475 rc = ost_destroy(req);
478 CDEBUG(D_INODE, "getattr\n");
479 OBD_FAIL_RETURN(OBD_FAIL_OST_GETATTR_NET, 0);
480 rc = ost_getattr(req);
483 CDEBUG(D_INODE, "setattr\n");
484 OBD_FAIL_RETURN(OBD_FAIL_OST_SETATTR_NET, 0);
485 rc = ost_setattr(req);
488 CDEBUG(D_INODE, "setattr\n");
489 OBD_FAIL_RETURN(OBD_FAIL_OST_OPEN_NET, 0);
493 CDEBUG(D_INODE, "setattr\n");
494 OBD_FAIL_RETURN(OBD_FAIL_OST_CLOSE_NET, 0);
498 CDEBUG(D_INODE, "write\n");
499 OBD_FAIL_RETURN(OBD_FAIL_OST_BRW_NET, 0);
500 rc = ost_brw_write(req);
501 /* ost_brw sends its own replies */
504 CDEBUG(D_INODE, "read\n");
505 OBD_FAIL_RETURN(OBD_FAIL_OST_BRW_NET, 0);
506 rc = ost_brw_read(req);
507 /* ost_brw sends its own replies */
510 CDEBUG(D_INODE, "punch\n");
511 OBD_FAIL_RETURN(OBD_FAIL_OST_PUNCH_NET, 0);
515 CDEBUG(D_INODE, "statfs\n");
516 OBD_FAIL_RETURN(OBD_FAIL_OST_STATFS_NET, 0);
517 rc = ost_statfs(req);
520 CDEBUG(D_INODE, "enqueue\n");
521 OBD_FAIL_RETURN(OBD_FAIL_LDLM_ENQUEUE, 0);
522 rc = ldlm_handle_enqueue(req);
527 CDEBUG(D_INODE, "convert\n");
528 OBD_FAIL_RETURN(OBD_FAIL_LDLM_CONVERT, 0);
529 rc = ldlm_handle_convert(req);
534 CDEBUG(D_INODE, "cancel\n");
535 OBD_FAIL_RETURN(OBD_FAIL_LDLM_CANCEL, 0);
536 rc = ldlm_handle_cancel(req);
540 case LDLM_BL_CALLBACK:
541 case LDLM_CP_CALLBACK:
542 CDEBUG(D_INODE, "callback\n");
543 CERROR("callbacks should not happen on OST\n");
545 OBD_FAIL_RETURN(OBD_FAIL_LDLM_BL_CALLBACK, 0);
548 req->rq_status = -ENOTSUPP;
549 rc = ptlrpc_error(req->rq_svc, req);
555 //req->rq_status = rc;
557 CERROR("ost: processing error (opcode=%d): %d\n",
558 req->rq_reqmsg->opc, rc);
559 ptlrpc_error(req->rq_svc, req);
561 CDEBUG(D_INODE, "sending reply\n");
562 if (req->rq_repmsg == NULL)
563 CERROR("handler for opcode %d returned rc=0 without "
564 "creating rq_repmsg; needs to return rc != "
565 "0!\n", req->rq_reqmsg->opc);
566 ptlrpc_reply(req->rq_svc, req);
572 #define OST_NUM_THREADS 6
574 /* mount the file system (secretly) */
575 static int ost_setup(struct obd_device *obddev, obd_count len, void *buf)
577 struct obd_ioctl_data* data = buf;
578 struct ost_obd *ost = &obddev->u.ost;
579 struct obd_device *tgt;
584 if (data->ioc_inllen1 < 1) {
585 CERROR("requires a TARGET OBD UUID\n");
588 if (data->ioc_inllen1 > 37) {
589 CERROR("OBD UUID must be less than 38 characters\n");
594 tgt = class_uuid2obd(data->ioc_inlbuf1);
595 if (!tgt || !(tgt->obd_flags & OBD_ATTACHED) ||
596 !(tgt->obd_flags & OBD_SET_UP)) {
597 CERROR("device not attached or not set up (%d)\n",
599 GOTO(error_dec, err = -EINVAL);
602 err = obd_connect(&ost->ost_conn, tgt, NULL);
604 CERROR("fail to connect to device %d\n", data->ioc_dev);
605 GOTO(error_dec, err = -EINVAL);
608 ost->ost_service = ptlrpc_init_svc(64 * 1024, OST_REQUEST_PORTAL,
609 OSC_REPLY_PORTAL, "self",ost_handle);
610 if (!ost->ost_service) {
611 CERROR("failed to start service\n");
612 GOTO(error_disc, err = -EINVAL);
615 for (i = 0; i < OST_NUM_THREADS; i++) {
617 sprintf(name, "lustre_ost_%02d", i);
618 err = ptlrpc_start_thread(obddev, ost->ost_service, name);
620 CERROR("error starting thread #%d: rc %d\n", i, err);
621 GOTO(error_disc, err = -EINVAL);
628 obd_disconnect(&ost->ost_conn);
634 static int ost_cleanup(struct obd_device * obddev)
636 struct ost_obd *ost = &obddev->u.ost;
641 if ( !list_empty(&obddev->obd_exports) ) {
642 CERROR("still has clients!\n");
646 ptlrpc_stop_all_threads(ost->ost_service);
647 ptlrpc_unregister_service(ost->ost_service);
649 err = obd_disconnect(&ost->ost_conn);
651 CERROR("lustre ost: fail to disconnect device\n");
659 /* use obd ops to offer management infrastructure */
660 static struct obd_ops ost_obd_ops = {
662 o_cleanup: ost_cleanup,
665 static int __init ost_init(void)
667 class_register_type(&ost_obd_ops, LUSTRE_OST_NAME);
671 static void __exit ost_exit(void)
673 class_unregister_type(LUSTRE_OST_NAME);
676 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
677 MODULE_DESCRIPTION("Lustre Object Storage Target (OST) v0.01");
678 MODULE_LICENSE("GPL");
680 module_init(ost_init);
681 module_exit(ost_exit);