1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
5 * Author: Peter J. Braam <braam@clusterfs.com>
6 * Author: Phil Schwan <phil@clusterfs.com>
8 * This file is part of Lustre, http://www.lustre.org.
10 * Lustre is free software; you can redistribute it and/or
11 * modify it under the terms of version 2 of the GNU General Public
12 * License as published by the Free Software Foundation.
14 * Lustre is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Lustre; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 * Storage Target Handling functions
24 * Lustre Object Server Module (OST)
26 * This server is single threaded at present (but can easily be multi
27 * threaded). For testing and management it is treated as an
28 * obd_device, although it does not export a full OBD method table
29 * (the requests are coming in over the wire, so object target
30 * modules do not have a full method table.)
34 #define DEBUG_SUBSYSTEM S_OST
36 #include <linux/module.h>
37 #include <linux/obd_ost.h>
38 #include <linux/lustre_net.h>
39 #include <linux/lustre_dlm.h>
41 static int ost_destroy(struct ptlrpc_request *req)
43 struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
44 struct ost_body *body;
45 int rc, size = sizeof(*body);
48 body = lustre_msg_buf(req->rq_reqmsg, 0);
50 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
54 req->rq_status = obd_destroy(conn, &body->oa, NULL);
58 static int ost_getattr(struct ptlrpc_request *req)
60 struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
61 struct ost_body *body, *repbody;
62 int rc, size = sizeof(*body);
65 body = lustre_msg_buf(req->rq_reqmsg, 0);
67 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
71 repbody = lustre_msg_buf(req->rq_repmsg, 0);
72 memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
73 req->rq_status = obd_getattr(conn, &repbody->oa);
77 static int ost_statfs(struct ptlrpc_request *req)
79 struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
80 struct obd_statfs *osfs;
82 int rc, size = sizeof(*osfs);
85 rc = obd_statfs(conn, &sfs);
87 CERROR("ost: statfs failed: rc %d\n", rc);
92 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
96 osfs = lustre_msg_buf(req->rq_repmsg, 0);
97 memset(osfs, 0, size);
98 obd_statfs_pack(osfs, &sfs);
102 static int ost_open(struct ptlrpc_request *req)
104 struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
105 struct ost_body *body, *repbody;
106 int rc, size = sizeof(*body);
109 body = lustre_msg_buf(req->rq_reqmsg, 0);
111 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
115 repbody = lustre_msg_buf(req->rq_repmsg, 0);
116 memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
117 req->rq_status = obd_open(conn, &repbody->oa, NULL);
121 static int ost_close(struct ptlrpc_request *req)
123 struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
124 struct ost_body *body, *repbody;
125 int rc, size = sizeof(*body);
128 body = lustre_msg_buf(req->rq_reqmsg, 0);
130 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
134 repbody = lustre_msg_buf(req->rq_repmsg, 0);
135 memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
136 req->rq_status = obd_close(conn, &repbody->oa, NULL);
140 static int ost_create(struct ptlrpc_request *req)
142 struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
143 struct ost_body *body, *repbody;
144 int rc, size = sizeof(*body);
147 body = lustre_msg_buf(req->rq_reqmsg, 0);
149 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
153 repbody = lustre_msg_buf(req->rq_repmsg, 0);
154 memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
155 req->rq_status = obd_create(conn, &repbody->oa, NULL);
159 static int ost_punch(struct ptlrpc_request *req)
161 struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
162 struct ost_body *body, *repbody;
163 int rc, size = sizeof(*body);
166 body = lustre_msg_buf(req->rq_reqmsg, 0);
168 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
172 repbody = lustre_msg_buf(req->rq_repmsg, 0);
173 memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
174 req->rq_status = obd_punch(conn, &repbody->oa, NULL,
175 repbody->oa.o_blocks, repbody->oa.o_size);
179 static int ost_setattr(struct ptlrpc_request *req)
181 struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
182 struct ost_body *body, *repbody;
183 int rc, size = sizeof(*body);
186 body = lustre_msg_buf(req->rq_reqmsg, 0);
188 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
192 repbody = lustre_msg_buf(req->rq_repmsg, 0);
193 memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
194 req->rq_status = obd_setattr(conn, &repbody->oa);
198 static int ost_brw_read(struct ptlrpc_request *req)
200 struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
201 struct ptlrpc_bulk_desc *desc;
202 void *tmp1, *tmp2, *end2;
203 struct niobuf_remote *remote_nb;
204 struct niobuf_local *local_nb = NULL;
205 struct obd_ioobj *ioo;
206 struct ost_body *body;
207 int rc, cmd, i, j, objcount, niocount, size = sizeof(*body);
210 body = lustre_msg_buf(req->rq_reqmsg, 0);
211 tmp1 = lustre_msg_buf(req->rq_reqmsg, 1);
212 tmp2 = lustre_msg_buf(req->rq_reqmsg, 2);
213 end2 = (char *)tmp2 + req->rq_reqmsg->buflens[2];
214 objcount = req->rq_reqmsg->buflens[1] / sizeof(*ioo);
215 niocount = req->rq_reqmsg->buflens[2] / sizeof(*remote_nb);
218 for (i = 0; i < objcount; i++) {
219 ost_unpack_ioo(&tmp1, &ioo);
220 if (tmp2 + ioo->ioo_bufcnt > end2) {
222 GOTO(out, rc = -EFAULT);
224 for (j = 0; j < ioo->ioo_bufcnt; j++)
225 ost_unpack_niobuf(&tmp2, &remote_nb);
228 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
231 OBD_ALLOC(local_nb, sizeof(*local_nb) * niocount);
232 if (local_nb == NULL)
235 /* The unpackers move tmp1 and tmp2, so reset them before using */
236 tmp1 = lustre_msg_buf(req->rq_reqmsg, 1);
237 tmp2 = lustre_msg_buf(req->rq_reqmsg, 2);
238 req->rq_status = obd_preprw(cmd, conn, objcount,
239 tmp1, niocount, tmp2, local_nb, NULL);
244 desc = ptlrpc_prep_bulk(req->rq_connection);
246 GOTO(out_local, rc = -ENOMEM);
247 desc->b_portal = OST_BULK_PORTAL;
249 for (i = 0; i < niocount; i++) {
250 struct ptlrpc_bulk_page *bulk;
251 bulk = ptlrpc_prep_bulk_page(desc);
253 GOTO(out_bulk, rc = -ENOMEM);
254 remote_nb = &(((struct niobuf_remote *)tmp2)[i]);
255 bulk->b_xid = remote_nb->xid;
256 bulk->b_buf = (void *)(unsigned long)local_nb[i].addr;
257 bulk->b_buflen = PAGE_SIZE;
260 rc = ptlrpc_send_bulk(desc);
264 #warning OST must time out here.
265 wait_event(desc->b_waitq, ptlrpc_check_bulk_sent(desc));
266 if (desc->b_flags & PTL_RPC_FL_INTR)
269 /* The unpackers move tmp1 and tmp2, so reset them before using */
270 tmp1 = lustre_msg_buf(req->rq_reqmsg, 1);
271 tmp2 = lustre_msg_buf(req->rq_reqmsg, 2);
272 req->rq_status = obd_commitrw(cmd, conn, objcount,
273 tmp1, niocount, local_nb, NULL);
276 ptlrpc_free_bulk(desc);
278 OBD_FREE(local_nb, sizeof(*local_nb) * niocount);
281 ptlrpc_error(req->rq_svc, req);
283 ptlrpc_reply(req->rq_svc, req);
287 static int ost_brw_write(struct ptlrpc_request *req)
289 struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
290 struct ptlrpc_bulk_desc *desc;
291 struct niobuf_remote *remote_nb;
292 struct niobuf_local *local_nb, *lnb;
293 struct obd_ioobj *ioo;
294 struct ost_body *body;
295 int cmd, rc, i, j, objcount, niocount, size[2] = {sizeof(*body)};
296 void *tmp1, *tmp2, *end2;
297 void *desc_priv = NULL;
301 body = lustre_msg_buf(req->rq_reqmsg, 0);
302 tmp1 = lustre_msg_buf(req->rq_reqmsg, 1);
303 tmp2 = lustre_msg_buf(req->rq_reqmsg, 2);
304 end2 = (char *)tmp2 + req->rq_reqmsg->buflens[2];
305 objcount = req->rq_reqmsg->buflens[1] / sizeof(*ioo);
306 niocount = req->rq_reqmsg->buflens[2] / sizeof(*remote_nb);
309 for (i = 0; i < objcount; i++) {
310 ost_unpack_ioo((void *)&tmp1, &ioo);
311 if (tmp2 + ioo->ioo_bufcnt > end2) {
315 for (j = 0; j < ioo->ioo_bufcnt; j++)
316 ost_unpack_niobuf((void *)&tmp2, &remote_nb);
319 size[1] = niocount * sizeof(*remote_nb);
320 rc = lustre_pack_msg(2, size, NULL, &req->rq_replen, &req->rq_repmsg);
323 remote_nb = lustre_msg_buf(req->rq_repmsg, 1);
325 OBD_ALLOC(local_nb, niocount * sizeof(*local_nb));
326 if (local_nb == NULL)
327 GOTO(out, rc = -ENOMEM);
329 /* The unpackers move tmp1 and tmp2, so reset them before using */
330 tmp1 = lustre_msg_buf(req->rq_reqmsg, 1);
331 tmp2 = lustre_msg_buf(req->rq_reqmsg, 2);
332 req->rq_status = obd_preprw(cmd, conn, objcount,
333 tmp1, niocount, tmp2, local_nb, &desc_priv);
335 GOTO(out_free, rc = 0); /* XXX is this correct? */
337 desc = ptlrpc_prep_bulk(req->rq_connection);
339 GOTO(fail_preprw, rc = -ENOMEM);
341 desc->b_portal = OSC_BULK_PORTAL;
342 desc->b_desc_private = desc_priv;
343 memcpy(&(desc->b_conn), &conn, sizeof(conn));
345 for (i = 0, lnb = local_nb; i < niocount; i++, lnb++) {
346 struct ptlrpc_service *srv = req->rq_obd->u.ost.ost_service;
347 struct ptlrpc_bulk_page *bulk;
349 bulk = ptlrpc_prep_bulk_page(desc);
351 GOTO(fail_bulk, rc = -ENOMEM);
353 spin_lock(&srv->srv_lock);
354 bulk->b_xid = srv->srv_xid++;
355 spin_unlock(&srv->srv_lock);
357 bulk->b_buf = lnb->addr;
358 bulk->b_page = lnb->page;
359 bulk->b_flags = lnb->flags;
360 bulk->b_dentry = lnb->dentry;
361 bulk->b_buflen = PAGE_SIZE;
364 /* this advances remote_nb */
365 ost_pack_niobuf((void **)&remote_nb, lnb->offset, lnb->len, 0,
369 rc = ptlrpc_register_bulk(desc);
374 ptlrpc_reply(req->rq_svc, req);
376 #warning OST must time out here.
377 wait_event(desc->b_waitq, desc->b_flags & PTL_BULK_FL_RCVD);
379 rc = obd_commitrw(cmd, conn, objcount, tmp1, niocount, local_nb,
380 desc->b_desc_private);
381 ptlrpc_free_bulk(desc);
384 OBD_FREE(local_nb, niocount * sizeof(*local_nb));
388 ptlrpc_error(req->rq_svc, req);
390 ptlrpc_reply(req->rq_svc, req);
395 ptlrpc_free_bulk(desc);
397 /* FIXME: how do we undo the preprw? */
401 static int ost_brw(struct ptlrpc_request *req)
403 struct ost_body *body = lustre_msg_buf(req->rq_reqmsg, 0);
405 if (body->data & OBD_BRW_WRITE)
406 return ost_brw_write(req);
408 return ost_brw_read(req);
412 static int ost_handle(struct ptlrpc_request *req)
417 rc = lustre_unpack_msg(req->rq_reqmsg, req->rq_reqlen);
418 if (rc || OBD_FAIL_CHECK(OBD_FAIL_OST_HANDLE_UNPACK)) {
419 CERROR("lustre_ost: Invalid request\n");
423 if (req->rq_reqmsg->type != PTL_RPC_MSG_REQUEST) {
424 CERROR("lustre_ost: wrong packet type sent %d\n",
425 req->rq_reqmsg->type);
426 GOTO(out, rc = -EINVAL);
429 if (req->rq_reqmsg->opc != OST_CONNECT &&
430 req->rq_export == NULL) {
431 CERROR("lustre_ost: operation %d on unconnected OST\n",
432 req->rq_reqmsg->opc);
433 GOTO(out, rc = -ENOTCONN);
436 if (strcmp(req->rq_obd->obd_type->typ_name, "ost") != 0)
437 GOTO(out, rc = -EINVAL);
439 switch (req->rq_reqmsg->opc) {
441 CDEBUG(D_INODE, "connect\n");
442 OBD_FAIL_RETURN(OBD_FAIL_OST_CONNECT_NET, 0);
443 rc = target_handle_connect(req);
446 CDEBUG(D_INODE, "disconnect\n");
447 OBD_FAIL_RETURN(OBD_FAIL_OST_DISCONNECT_NET, 0);
448 rc = target_handle_disconnect(req);
451 CDEBUG(D_INODE, "create\n");
452 OBD_FAIL_RETURN(OBD_FAIL_OST_CREATE_NET, 0);
453 rc = ost_create(req);
456 CDEBUG(D_INODE, "destroy\n");
457 OBD_FAIL_RETURN(OBD_FAIL_OST_DESTROY_NET, 0);
458 rc = ost_destroy(req);
461 CDEBUG(D_INODE, "getattr\n");
462 OBD_FAIL_RETURN(OBD_FAIL_OST_GETATTR_NET, 0);
463 rc = ost_getattr(req);
466 CDEBUG(D_INODE, "setattr\n");
467 OBD_FAIL_RETURN(OBD_FAIL_OST_SETATTR_NET, 0);
468 rc = ost_setattr(req);
471 CDEBUG(D_INODE, "setattr\n");
472 OBD_FAIL_RETURN(OBD_FAIL_OST_OPEN_NET, 0);
476 CDEBUG(D_INODE, "setattr\n");
477 OBD_FAIL_RETURN(OBD_FAIL_OST_CLOSE_NET, 0);
481 CDEBUG(D_INODE, "brw\n");
482 OBD_FAIL_RETURN(OBD_FAIL_OST_BRW_NET, 0);
484 /* ost_brw sends its own replies */
487 CDEBUG(D_INODE, "punch\n");
488 OBD_FAIL_RETURN(OBD_FAIL_OST_PUNCH_NET, 0);
492 CDEBUG(D_INODE, "statfs\n");
493 OBD_FAIL_RETURN(OBD_FAIL_OST_STATFS_NET, 0);
494 rc = ost_statfs(req);
497 req->rq_status = -ENOTSUPP;
498 rc = ptlrpc_error(req->rq_svc, req);
504 //req->rq_status = rc;
506 CERROR("ost: processing error (opcode=%d): %d\n",
507 req->rq_reqmsg->opc, rc);
508 ptlrpc_error(req->rq_svc, req);
510 CDEBUG(D_INODE, "sending reply\n");
511 if (req->rq_repmsg == NULL)
512 CERROR("handler for opcode %d returned rc=0 without "
513 "creating rq_repmsg; needs to return rc != "
514 "0!\n", req->rq_reqmsg->opc);
515 ptlrpc_reply(req->rq_svc, req);
521 #define OST_NUM_THREADS 6
523 /* mount the file system (secretly) */
524 static int ost_setup(struct obd_device *obddev, obd_count len, void *buf)
526 struct obd_ioctl_data* data = buf;
527 struct ost_obd *ost = &obddev->u.ost;
528 struct obd_device *tgt;
533 if (data->ioc_dev < 0 || data->ioc_dev > MAX_OBD_DEVICES)
537 tgt = &obd_dev[data->ioc_dev];
538 if (!(tgt->obd_flags & OBD_ATTACHED) ||
539 !(tgt->obd_flags & OBD_SET_UP)) {
540 CERROR("device not attached or not set up (%d)\n",
542 GOTO(error_dec, err = -EINVAL);
545 err = obd_connect(&ost->ost_conn, tgt);
547 CERROR("fail to connect to device %d\n", data->ioc_dev);
548 GOTO(error_dec, err = -EINVAL);
551 ost->ost_service = ptlrpc_init_svc(64 * 1024, OST_REQUEST_PORTAL,
552 OSC_REPLY_PORTAL, "self",ost_handle);
553 if (!ost->ost_service) {
554 CERROR("failed to start service\n");
555 GOTO(error_disc, err = -EINVAL);
558 for (i = 0; i < OST_NUM_THREADS; i++) {
559 err = ptlrpc_start_thread(obddev, ost->ost_service,
562 CERROR("error starting thread #%d: rc %d\n", i, err);
563 GOTO(error_disc, err = -EINVAL);
570 obd_disconnect(&ost->ost_conn);
576 static int ost_cleanup(struct obd_device * obddev)
578 struct ost_obd *ost = &obddev->u.ost;
583 if ( !list_empty(&obddev->obd_exports) ) {
584 CERROR("still has clients!\n");
588 ptlrpc_stop_all_threads(ost->ost_service);
589 ptlrpc_unregister_service(ost->ost_service);
591 err = obd_disconnect(&ost->ost_conn);
593 CERROR("lustre ost: fail to disconnect device\n");
601 /* use obd ops to offer management infrastructure */
602 static struct obd_ops ost_obd_ops = {
604 o_cleanup: ost_cleanup,
607 static int __init ost_init(void)
609 class_register_type(&ost_obd_ops, LUSTRE_OST_NAME);
613 static void __exit ost_exit(void)
615 class_unregister_type(LUSTRE_OST_NAME);
618 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
619 MODULE_DESCRIPTION("Lustre Object Storage Target (OST) v0.01");
620 MODULE_LICENSE("GPL");
622 module_init(ost_init);
623 module_exit(ost_exit);