1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
5 * Author: Peter J. Braam <braam@clusterfs.com>
6 * Author: Phil Schwan <phil@clusterfs.com>
8 * This file is part of Lustre, http://www.lustre.org.
10 * Lustre is free software; you can redistribute it and/or
11 * modify it under the terms of version 2 of the GNU General Public
12 * License as published by the Free Software Foundation.
14 * Lustre is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Lustre; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 * Storage Target Handling functions
24 * Lustre Object Server Module (OST)
26 * This server is single threaded at present (but can easily be multi
27 * threaded). For testing and management it is treated as an
28 * obd_device, although it does not export a full OBD method table
29 * (the requests are coming in over the wire, so object target
30 * modules do not have a full method table.)
34 #define DEBUG_SUBSYSTEM S_OST
36 #include <linux/module.h>
37 #include <linux/obd_ost.h>
38 #include <linux/lustre_net.h>
40 static int ost_destroy(struct ost_obd *ost, struct ptlrpc_request *req)
43 struct ost_body *body;
44 int rc, size = sizeof(*body);
47 body = lustre_msg_buf(req->rq_reqmsg, 0);
48 conn.oc_id = body->connid;
49 conn.oc_dev = ost->ost_tgt;
51 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
55 req->rq_status = obd_destroy(&conn, &body->oa);
59 static int ost_getattr(struct ost_obd *ost, struct ptlrpc_request *req)
62 struct ost_body *body, *repbody;
63 int rc, size = sizeof(*body);
66 body = lustre_msg_buf(req->rq_reqmsg, 0);
67 conn.oc_id = body->connid;
68 conn.oc_dev = ost->ost_tgt;
70 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
74 repbody = lustre_msg_buf(req->rq_repmsg, 0);
75 memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
76 req->rq_status = obd_getattr(&conn, &repbody->oa);
80 static int ost_open(struct ost_obd *ost, struct ptlrpc_request *req)
83 struct ost_body *body, *repbody;
84 int rc, size = sizeof(*body);
87 body = lustre_msg_buf(req->rq_reqmsg, 0);
88 conn.oc_id = body->connid;
89 conn.oc_dev = ost->ost_tgt;
91 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
95 repbody = lustre_msg_buf(req->rq_repmsg, 0);
96 memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
97 req->rq_status = obd_open(&conn, &repbody->oa);
101 static int ost_close(struct ost_obd *ost, struct ptlrpc_request *req)
103 struct obd_conn conn;
104 struct ost_body *body, *repbody;
105 int rc, size = sizeof(*body);
108 body = lustre_msg_buf(req->rq_reqmsg, 0);
109 conn.oc_id = body->connid;
110 conn.oc_dev = ost->ost_tgt;
112 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
116 repbody = lustre_msg_buf(req->rq_repmsg, 0);
117 memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
118 req->rq_status = obd_close(&conn, &repbody->oa);
122 static int ost_create(struct ost_obd *ost, struct ptlrpc_request *req)
124 struct obd_conn conn;
125 struct ost_body *body, *repbody;
126 int rc, size = sizeof(*body);
129 body = lustre_msg_buf(req->rq_reqmsg, 0);
130 conn.oc_id = body->connid;
131 conn.oc_dev = ost->ost_tgt;
133 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
137 repbody = lustre_msg_buf(req->rq_repmsg, 0);
138 memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
139 req->rq_status = obd_create(&conn, &repbody->oa);
143 static int ost_punch(struct ost_obd *ost, struct ptlrpc_request *req)
145 struct obd_conn conn;
146 struct ost_body *body, *repbody;
147 int rc, size = sizeof(*body);
150 body = lustre_msg_buf(req->rq_reqmsg, 0);
151 conn.oc_id = body->connid;
152 conn.oc_dev = ost->ost_tgt;
154 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
158 repbody = lustre_msg_buf(req->rq_repmsg, 0);
159 memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
160 req->rq_status = obd_punch(&conn, &repbody->oa,
161 repbody->oa.o_size, repbody->oa.o_blocks);
165 static int ost_setattr(struct ost_obd *ost, struct ptlrpc_request *req)
167 struct obd_conn conn;
168 struct ost_body *body, *repbody;
169 int rc, size = sizeof(*body);
172 body = lustre_msg_buf(req->rq_reqmsg, 0);
173 conn.oc_id = body->connid;
174 conn.oc_dev = ost->ost_tgt;
176 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
180 repbody = lustre_msg_buf(req->rq_repmsg, 0);
181 memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
182 req->rq_status = obd_setattr(&conn, &repbody->oa);
186 static int ost_connect(struct ost_obd *ost, struct ptlrpc_request *req)
188 struct obd_conn conn;
189 struct ost_body *body;
190 int rc, size = sizeof(*body);
193 conn.oc_dev = ost->ost_tgt;
195 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
199 req->rq_status = obd_connect(&conn);
201 CDEBUG(D_IOCTL, "rep buffer %p, id %d\n", req->rq_repmsg, conn.oc_id);
202 body = lustre_msg_buf(req->rq_repmsg, 0);
203 body->connid = conn.oc_id;
207 static int ost_disconnect(struct ost_obd *ost, struct ptlrpc_request *req)
209 struct obd_conn conn;
210 struct ost_body *body;
211 int rc, size = sizeof(*body);
214 body = lustre_msg_buf(req->rq_reqmsg, 0);
215 conn.oc_id = body->connid;
216 conn.oc_dev = ost->ost_tgt;
218 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
222 CDEBUG(D_IOCTL, "Disconnecting %d\n", conn.oc_id);
223 req->rq_status = obd_disconnect(&conn);
227 static int ost_get_info(struct ost_obd *ost, struct ptlrpc_request *req)
229 struct obd_conn conn;
230 struct ost_body *body;
231 int rc, size[2] = {sizeof(*body)};
232 char *bufs[2] = {NULL, NULL}, *ptr;
235 body = lustre_msg_buf(req->rq_reqmsg, 0);
236 conn.oc_id = body->connid;
237 conn.oc_dev = ost->ost_tgt;
239 ptr = lustre_msg_buf(req->rq_reqmsg, 1);
243 req->rq_status = obd_get_info(&conn, req->rq_reqmsg->buflens[1], ptr,
244 &(size[1]), (void **)&(bufs[1]));
246 rc = lustre_pack_msg(2, size, bufs, &req->rq_replen, &req->rq_repmsg);
248 CERROR("cannot pack reply\n");
253 static int ost_brw_read(struct ost_obd *obddev, struct ptlrpc_request *req)
255 struct ptlrpc_bulk_desc *desc;
256 struct obd_conn conn;
257 void *tmp1, *tmp2, *end2;
258 struct niobuf_remote *remote_nb;
259 struct niobuf_local *local_nb = NULL;
260 struct obd_ioobj *ioo;
261 struct ost_body *body;
262 int rc, cmd, i, j, objcount, niocount, size = sizeof(*body);
265 body = lustre_msg_buf(req->rq_reqmsg, 0);
266 tmp1 = lustre_msg_buf(req->rq_reqmsg, 1);
267 tmp2 = lustre_msg_buf(req->rq_reqmsg, 2);
268 end2 = (char *)tmp2 + req->rq_reqmsg->buflens[2];
269 objcount = req->rq_reqmsg->buflens[1] / sizeof(*ioo);
270 niocount = req->rq_reqmsg->buflens[2] / sizeof(*remote_nb);
273 conn.oc_id = body->connid;
274 conn.oc_dev = req->rq_obd->u.ost.ost_tgt;
276 for (i = 0; i < objcount; i++) {
277 ost_unpack_ioo(&tmp1, &ioo);
278 if (tmp2 + ioo->ioo_bufcnt > end2) {
280 GOTO(out, rc = -EFAULT);
282 for (j = 0; j < ioo->ioo_bufcnt; j++)
283 ost_unpack_niobuf(&tmp2, &remote_nb);
286 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
289 OBD_ALLOC(local_nb, sizeof(*local_nb) * niocount);
290 if (local_nb == NULL)
293 /* The unpackers move tmp1 and tmp2, so reset them before using */
294 tmp1 = lustre_msg_buf(req->rq_reqmsg, 1);
295 tmp2 = lustre_msg_buf(req->rq_reqmsg, 2);
296 req->rq_status = obd_preprw(cmd, &conn, objcount,
297 tmp1, niocount, tmp2, local_nb);
302 desc = ptlrpc_prep_bulk(req->rq_connection);
304 GOTO(out_local, rc = -ENOMEM);
305 desc->b_portal = OST_BULK_PORTAL;
307 for (i = 0; i < niocount; i++) {
308 struct ptlrpc_bulk_page *bulk;
309 bulk = ptlrpc_prep_bulk_page(desc);
311 GOTO(out_bulk, rc = -ENOMEM);
312 remote_nb = &(((struct niobuf_remote *)tmp2)[i]);
313 bulk->b_xid = remote_nb->xid;
314 bulk->b_buf = (void *)(unsigned long)local_nb[i].addr;
315 bulk->b_buflen = PAGE_SIZE;
318 rc = ptlrpc_send_bulk(desc);
322 ptlrpc_free_bulk(desc);
324 /* The unpackers move tmp1 and tmp2, so reset them before using */
325 tmp1 = lustre_msg_buf(req->rq_reqmsg, 1);
326 tmp2 = lustre_msg_buf(req->rq_reqmsg, 2);
327 req->rq_status = obd_commitrw(cmd, &conn, objcount,
328 tmp1, niocount, local_nb);
333 ptlrpc_free_bulk(desc);
335 if (local_nb != NULL)
336 OBD_FREE(local_nb, sizeof(*local_nb) * niocount);
341 static int ost_commit_page(struct obd_conn *conn, struct page *page)
343 struct obd_ioobj obj;
344 struct niobuf_local buf;
348 memset(&buf, 0, sizeof(buf));
349 memset(&obj, 0, sizeof(obj));
354 rc = obd_commitrw(OBD_BRW_WRITE, conn, 1, &obj, 1, &buf);
358 static int ost_brw_write_cb(struct ptlrpc_bulk_page *bulk)
364 /* Restore the filesystem journal context when we do the commit.
365 * This is needed for ext3 and reiserfs, but can't really hurt
368 journal_save = current->journal_info;
369 current->journal_info = bulk->b_desc->b_journal_info;
370 CDEBUG(D_BUFFS, "journal_info: saved %p->%p, restored %p\n", current,
371 journal_save, bulk->b_desc->b_journal_info);
372 rc = ost_commit_page(&bulk->b_desc->b_conn, bulk->b_page);
373 current->journal_info = journal_save;
374 CDEBUG(D_BUFFS, "journal_info: restored %p->%p\n", current,
377 CERROR("ost_commit_page failed: %d\n", rc);
382 static int ost_brw_write_finished_cb(struct ptlrpc_bulk_desc *desc)
384 ptlrpc_free_bulk(desc);
389 static int ost_brw_write(struct ost_obd *obddev, struct ptlrpc_request *req)
391 struct ptlrpc_bulk_desc *desc;
392 struct obd_conn conn;
393 struct niobuf_remote *remote_nb;
394 struct niobuf_local *local_nb, *lnb;
395 struct obd_ioobj *ioo;
396 struct ost_body *body;
397 int cmd, rc, i, j, objcount, niocount, size[2] = {sizeof(*body)};
398 void *tmp1, *tmp2, *end2;
401 body = lustre_msg_buf(req->rq_reqmsg, 0);
402 tmp1 = lustre_msg_buf(req->rq_reqmsg, 1);
403 tmp2 = lustre_msg_buf(req->rq_reqmsg, 2);
404 end2 = (char *)tmp2 + req->rq_reqmsg->buflens[2];
405 objcount = req->rq_reqmsg->buflens[1] / sizeof(*ioo);
406 niocount = req->rq_reqmsg->buflens[2] / sizeof(*remote_nb);
409 conn.oc_id = body->connid;
410 conn.oc_dev = req->rq_obd->u.ost.ost_tgt;
412 for (i = 0; i < objcount; i++) {
413 ost_unpack_ioo((void *)&tmp1, &ioo);
414 if (tmp2 + ioo->ioo_bufcnt > end2) {
418 for (j = 0; j < ioo->ioo_bufcnt; j++)
419 ost_unpack_niobuf((void *)&tmp2, &remote_nb);
422 size[1] = niocount * sizeof(*remote_nb);
423 rc = lustre_pack_msg(2, size, NULL, &req->rq_replen, &req->rq_repmsg);
426 remote_nb = lustre_msg_buf(req->rq_repmsg, 1);
428 OBD_ALLOC(local_nb, niocount * sizeof(*local_nb));
429 if (local_nb == NULL)
430 GOTO(fail, rc = -ENOMEM);
432 /* The unpackers move tmp1 and tmp2, so reset them before using */
433 tmp1 = lustre_msg_buf(req->rq_reqmsg, 1);
434 tmp2 = lustre_msg_buf(req->rq_reqmsg, 2);
435 req->rq_status = obd_preprw(cmd, &conn, objcount,
436 tmp1, niocount, tmp2, local_nb);
440 desc = ptlrpc_prep_bulk(req->rq_connection);
442 GOTO(fail_preprw, rc = -ENOMEM);
443 desc->b_cb = ost_brw_write_finished_cb;
444 desc->b_portal = OSC_BULK_PORTAL;
445 memcpy(&(desc->b_conn), &conn, sizeof(conn));
447 /* Save journal context for commit callbacks */
448 CERROR("journal_info: saved %p->%p\n", current, current->journal_info);
449 desc->b_journal_info = current->journal_info;
451 for (i = 0, lnb = local_nb; i < niocount; i++, lnb++) {
452 struct ptlrpc_service *srv = req->rq_obd->u.ost.ost_service;
453 struct ptlrpc_bulk_page *bulk;
455 bulk = ptlrpc_prep_bulk_page(desc);
457 GOTO(fail_bulk, rc = -ENOMEM);
459 spin_lock(&srv->srv_lock);
460 bulk->b_xid = srv->srv_xid++;
461 spin_unlock(&srv->srv_lock);
463 bulk->b_buf = (void *)(unsigned long)lnb->addr;
464 bulk->b_page = lnb->page;
465 bulk->b_buflen = PAGE_SIZE;
466 bulk->b_cb = ost_brw_write_cb;
468 /* this advances remote_nb */
469 ost_pack_niobuf((void **)&remote_nb, lnb->offset, lnb->len, 0,
473 rc = ptlrpc_register_bulk(desc);
474 current->journal_info = NULL; /* kind of scary */
480 OBD_FREE(local_nb, niocount * sizeof(*local_nb));
484 ptlrpc_free_bulk(desc);
486 OBD_FREE(local_nb, niocount * sizeof(*local_nb));
487 /* FIXME: how do we undo the preprw? */
492 static int ost_brw(struct ost_obd *obddev, struct ptlrpc_request *req)
494 struct ost_body *body = lustre_msg_buf(req->rq_reqmsg, 0);
496 if (body->data == OBD_BRW_READ)
497 return ost_brw_read(obddev, req);
499 return ost_brw_write(obddev, req);
502 static int ost_handle(struct obd_device *obddev, struct ptlrpc_service *svc,
503 struct ptlrpc_request *req)
506 struct ost_obd *ost = &obddev->u.ost;
509 rc = lustre_unpack_msg(req->rq_reqmsg, req->rq_reqlen);
510 if (rc || OBD_FAIL_CHECK(OBD_FAIL_MDS_HANDLE_UNPACK)) {
511 CERROR("lustre_mds: Invalid request\n");
515 if (req->rq_reqmsg->type != PTL_RPC_MSG_REQUEST) {
516 CERROR("lustre_mds: wrong packet type sent %d\n",
517 req->rq_reqmsg->type);
518 GOTO(out, rc = -EINVAL);
521 switch (req->rq_reqmsg->opc) {
523 CDEBUG(D_INODE, "connect\n");
524 OBD_FAIL_RETURN(OBD_FAIL_OST_CONNECT_NET, 0);
525 rc = ost_connect(ost, req);
528 CDEBUG(D_INODE, "disconnect\n");
529 OBD_FAIL_RETURN(OBD_FAIL_OST_DISCONNECT_NET, 0);
530 rc = ost_disconnect(ost, req);
533 CDEBUG(D_INODE, "get_info\n");
534 OBD_FAIL_RETURN(OBD_FAIL_OST_GET_INFO_NET, 0);
535 rc = ost_get_info(ost, req);
538 CDEBUG(D_INODE, "create\n");
539 OBD_FAIL_RETURN(OBD_FAIL_OST_CREATE_NET, 0);
540 rc = ost_create(ost, req);
543 CDEBUG(D_INODE, "destroy\n");
544 OBD_FAIL_RETURN(OBD_FAIL_OST_DESTROY_NET, 0);
545 rc = ost_destroy(ost, req);
548 CDEBUG(D_INODE, "getattr\n");
549 OBD_FAIL_RETURN(OBD_FAIL_OST_GETATTR_NET, 0);
550 rc = ost_getattr(ost, req);
553 CDEBUG(D_INODE, "setattr\n");
554 OBD_FAIL_RETURN(OBD_FAIL_OST_SETATTR_NET, 0);
555 rc = ost_setattr(ost, req);
558 CDEBUG(D_INODE, "setattr\n");
559 OBD_FAIL_RETURN(OBD_FAIL_OST_OPEN_NET, 0);
560 rc = ost_open(ost, req);
563 CDEBUG(D_INODE, "setattr\n");
564 OBD_FAIL_RETURN(OBD_FAIL_OST_CLOSE_NET, 0);
565 rc = ost_close(ost, req);
568 CDEBUG(D_INODE, "brw\n");
569 OBD_FAIL_RETURN(OBD_FAIL_OST_BRW_NET, 0);
570 rc = ost_brw(ost, req);
573 CDEBUG(D_INODE, "punch\n");
574 OBD_FAIL_RETURN(OBD_FAIL_OST_PUNCH_NET, 0);
575 rc = ost_punch(ost, req);
578 req->rq_status = -ENOTSUPP;
579 rc = ptlrpc_error(svc, req);
585 //req->rq_status = rc;
587 CERROR("ost: processing error %d\n", rc);
588 ptlrpc_error(svc, req);
590 CDEBUG(D_INODE, "sending reply\n");
591 ptlrpc_reply(svc, req);
597 /* mount the file system (secretly) */
598 static int ost_setup(struct obd_device *obddev, obd_count len, void *buf)
600 struct obd_ioctl_data* data = buf;
601 struct ost_obd *ost = &obddev->u.ost;
602 struct obd_device *tgt;
606 if (data->ioc_dev < 0 || data->ioc_dev > MAX_OBD_DEVICES)
610 tgt = &obd_dev[data->ioc_dev];
612 if (!(tgt->obd_flags & OBD_ATTACHED) ||
613 !(tgt->obd_flags & OBD_SET_UP)) {
614 CERROR("device not attached or not set up (%d)\n",
616 GOTO(error_dec, err = -EINVAL);
619 ost->ost_conn.oc_dev = tgt;
620 err = obd_connect(&ost->ost_conn);
622 CERROR("fail to connect to device %d\n", data->ioc_dev);
623 GOTO(error_dec, err = -EINVAL);
626 ost->ost_service = ptlrpc_init_svc(128 * 1024,
627 OST_REQUEST_PORTAL, OSC_REPLY_PORTAL,
629 if (!ost->ost_service) {
630 CERROR("failed to start service\n");
631 GOTO(error_disc, err = -EINVAL);
634 err = ptlrpc_start_thread(obddev, ost->ost_service, "lustre_ost");
636 GOTO(error_disc, err = -EINVAL);
637 err = ptlrpc_start_thread(obddev, ost->ost_service, "lustre_ost");
639 GOTO(error_disc, err = -EINVAL);
644 obd_disconnect(&ost->ost_conn);
650 static int ost_cleanup(struct obd_device * obddev)
652 struct ost_obd *ost = &obddev->u.ost;
657 if ( !list_empty(&obddev->obd_gen_clients) ) {
658 CERROR("still has clients!\n");
662 ptlrpc_stop_all_threads(ost->ost_service);
663 rpc_unregister_service(ost->ost_service);
665 if (!list_empty(&ost->ost_service->srv_reqs)) {
666 // XXX reply with errors and clean up
667 CERROR("Request list not empty!\n");
669 OBD_FREE(ost->ost_service, sizeof(*ost->ost_service));
671 err = obd_disconnect(&ost->ost_conn);
673 CERROR("lustre ost: fail to disconnect device\n");
681 /* use obd ops to offer management infrastructure */
682 static struct obd_ops ost_obd_ops = {
684 o_cleanup: ost_cleanup,
687 static int __init ost_init(void)
689 obd_register_type(&ost_obd_ops, LUSTRE_OST_NAME);
693 static void __exit ost_exit(void)
695 obd_unregister_type(LUSTRE_OST_NAME);
698 MODULE_AUTHOR("Peter J. Braam <braam@clusterfs.com>");
699 MODULE_DESCRIPTION("Lustre Object Storage Target (OST) v0.01");
700 MODULE_LICENSE("GPL");
702 module_init(ost_init);
703 module_exit(ost_exit);