1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
5 * Author: Peter J. Braam <braam@clusterfs.com>
6 * Author: Phil Schwan <phil@clusterfs.com>
8 * This file is part of Lustre, http://www.lustre.org.
10 * Lustre is free software; you can redistribute it and/or
11 * modify it under the terms of version 2 of the GNU General Public
12 * License as published by the Free Software Foundation.
14 * Lustre is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Lustre; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 * Storage Target Handling functions
24 * Lustre Object Server Module (OST)
26 * This server is single threaded at present (but can easily be multi
27 * threaded). For testing and management it is treated as an
28 * obd_device, although it does not export a full OBD method table
29 * (the requests are coming in over the wire, so object target
30 * modules do not have a full method table.)
35 #include <linux/version.h>
36 #include <linux/module.h>
38 #include <linux/stat.h>
39 #include <linux/locks.h>
40 #include <linux/ext2_fs.h>
41 #include <linux/quotaops.h>
42 #include <asm/unistd.h>
44 #define DEBUG_SUBSYSTEM S_OST
46 #include <linux/obd_ost.h>
47 #include <linux/lustre_net.h>
49 static int ost_destroy(struct ost_obd *ost, struct ptlrpc_request *req)
52 struct ost_body *body;
53 int rc, size = sizeof(*body);
56 body = lustre_msg_buf(req->rq_reqmsg, 0);
57 conn.oc_id = body->connid;
58 conn.oc_dev = ost->ost_tgt;
60 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
64 req->rq_status = obd_destroy(&conn, &body->oa);
68 static int ost_getattr(struct ost_obd *ost, struct ptlrpc_request *req)
71 struct ost_body *body, *repbody;
72 int rc, size = sizeof(*body);
75 body = lustre_msg_buf(req->rq_reqmsg, 0);
76 conn.oc_id = body->connid;
77 conn.oc_dev = ost->ost_tgt;
79 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
83 repbody = lustre_msg_buf(req->rq_repmsg, 0);
84 memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
85 req->rq_status = obd_getattr(&conn, &repbody->oa);
89 static int ost_open(struct ost_obd *ost, struct ptlrpc_request *req)
92 struct ost_body *body, *repbody;
93 int rc, size = sizeof(*body);
96 body = lustre_msg_buf(req->rq_reqmsg, 0);
97 conn.oc_id = body->connid;
98 conn.oc_dev = ost->ost_tgt;
100 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
104 repbody = lustre_msg_buf(req->rq_repmsg, 0);
105 memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
106 req->rq_status = obd_open(&conn, &repbody->oa);
110 static int ost_close(struct ost_obd *ost, struct ptlrpc_request *req)
112 struct obd_conn conn;
113 struct ost_body *body, *repbody;
114 int rc, size = sizeof(*body);
117 body = lustre_msg_buf(req->rq_reqmsg, 0);
118 conn.oc_id = body->connid;
119 conn.oc_dev = ost->ost_tgt;
121 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
125 repbody = lustre_msg_buf(req->rq_repmsg, 0);
126 memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
127 req->rq_status = obd_close(&conn, &repbody->oa);
131 static int ost_create(struct ost_obd *ost, struct ptlrpc_request *req)
133 struct obd_conn conn;
134 struct ost_body *body, *repbody;
135 int rc, size = sizeof(*body);
138 body = lustre_msg_buf(req->rq_reqmsg, 0);
139 conn.oc_id = body->connid;
140 conn.oc_dev = ost->ost_tgt;
142 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
146 repbody = lustre_msg_buf(req->rq_repmsg, 0);
147 memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
148 req->rq_status = obd_create(&conn, &repbody->oa);
152 static int ost_punch(struct ost_obd *ost, struct ptlrpc_request *req)
154 struct obd_conn conn;
155 struct ost_body *body, *repbody;
156 int rc, size = sizeof(*body);
159 body = lustre_msg_buf(req->rq_reqmsg, 0);
160 conn.oc_id = body->connid;
161 conn.oc_dev = ost->ost_tgt;
163 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
167 repbody = lustre_msg_buf(req->rq_repmsg, 0);
168 memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
169 req->rq_status = obd_punch(&conn, &repbody->oa,
170 repbody->oa.o_size, repbody->oa.o_blocks);
174 static int ost_setattr(struct ost_obd *ost, struct ptlrpc_request *req)
176 struct obd_conn conn;
177 struct ost_body *body, *repbody;
178 int rc, size = sizeof(*body);
181 body = lustre_msg_buf(req->rq_reqmsg, 0);
182 conn.oc_id = body->connid;
183 conn.oc_dev = ost->ost_tgt;
185 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
189 repbody = lustre_msg_buf(req->rq_repmsg, 0);
190 memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
191 req->rq_status = obd_setattr(&conn, &repbody->oa);
195 static int ost_connect(struct ost_obd *ost, struct ptlrpc_request *req)
197 struct obd_conn conn;
198 struct ost_body *body;
199 int rc, size = sizeof(*body);
202 conn.oc_dev = ost->ost_tgt;
204 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
208 req->rq_status = obd_connect(&conn);
210 CDEBUG(D_IOCTL, "rep buffer %p, id %d\n", req->rq_repmsg, conn.oc_id);
211 body = lustre_msg_buf(req->rq_repmsg, 0);
212 body->connid = conn.oc_id;
216 static int ost_disconnect(struct ost_obd *ost, struct ptlrpc_request *req)
218 struct obd_conn conn;
219 struct ost_body *body;
220 int rc, size = sizeof(*body);
223 body = lustre_msg_buf(req->rq_reqmsg, 0);
224 conn.oc_id = body->connid;
225 conn.oc_dev = ost->ost_tgt;
227 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
231 CDEBUG(D_IOCTL, "Disconnecting %d\n", conn.oc_id);
232 req->rq_status = obd_disconnect(&conn);
236 static int ost_get_info(struct ost_obd *ost, struct ptlrpc_request *req)
238 struct obd_conn conn;
239 struct ost_body *body;
240 int rc, size[2] = {sizeof(*body)};
241 char *bufs[2] = {NULL, NULL}, *ptr;
244 body = lustre_msg_buf(req->rq_reqmsg, 0);
245 conn.oc_id = body->connid;
246 conn.oc_dev = ost->ost_tgt;
248 ptr = lustre_msg_buf(req->rq_reqmsg, 1);
252 req->rq_status = obd_get_info(&conn, req->rq_reqmsg->buflens[1], ptr,
253 &(size[1]), (void **)&(bufs[1]));
255 rc = lustre_pack_msg(2, size, bufs, &req->rq_replen, &req->rq_repmsg);
257 CERROR("cannot pack reply\n");
262 static int ost_brw_read(struct ost_obd *obddev, struct ptlrpc_request *req)
264 struct ptlrpc_bulk_desc **bulk_vec = NULL, *bulk = NULL;
265 struct obd_conn conn;
266 void *tmp1, *tmp2, *end2;
267 struct niobuf *nb, *dst, *res = NULL;
268 struct obd_ioobj *ioo;
269 struct ost_body *body;
270 int rc, cmd, i, j, objcount, niocount, size = sizeof(*body);
273 body = lustre_msg_buf(req->rq_reqmsg, 0);
274 tmp1 = lustre_msg_buf(req->rq_reqmsg, 1);
275 tmp2 = lustre_msg_buf(req->rq_reqmsg, 2);
276 end2 = (char *)tmp2 + req->rq_reqmsg->buflens[2];
277 objcount = req->rq_reqmsg->buflens[1] / sizeof(*ioo);
278 niocount = req->rq_reqmsg->buflens[2] / sizeof(*nb);
281 conn.oc_id = body->connid;
282 conn.oc_dev = req->rq_obd->u.ost.ost_tgt;
284 for (i = 0; i < objcount; i++) {
285 ost_unpack_ioo(&tmp1, &ioo);
286 if (tmp2 + ioo->ioo_bufcnt > end2) {
291 for (j = 0; j < ioo->ioo_bufcnt; j++)
292 ost_unpack_niobuf(&tmp2, &nb);
295 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
298 OBD_ALLOC(res, sizeof(*res) * niocount);
302 /* The unpackers move tmp1 and tmp2, so reset them before using */
303 tmp1 = lustre_msg_buf(req->rq_reqmsg, 1);
304 tmp2 = lustre_msg_buf(req->rq_reqmsg, 2);
305 req->rq_status = obd_preprw(cmd, &conn, objcount,
306 tmp1, niocount, tmp2, res);
311 for (i = 0; i < niocount; i++) {
312 bulk = ptlrpc_prep_bulk(req->rq_connection);
314 CERROR("cannot alloc bulk desc\n");
315 GOTO(out, rc = -ENOMEM);
318 dst = &(((struct niobuf *)tmp2)[i]);
319 bulk->b_xid = dst->xid;
320 bulk->b_buf = (void *)(unsigned long)res[i].addr;
321 bulk->b_buflen = PAGE_SIZE;
322 rc = ptlrpc_send_bulk(bulk, OST_BULK_PORTAL);
325 wait_event_interruptible(bulk->b_waitq,
326 ptlrpc_check_bulk_sent(bulk));
328 if (bulk->b_flags & PTL_RPC_FL_INTR)
331 ptlrpc_free_bulk(bulk);
335 /* The unpackers move tmp1 and tmp2, so reset them before using */
336 tmp1 = lustre_msg_buf(req->rq_reqmsg, 1);
337 tmp2 = lustre_msg_buf(req->rq_reqmsg, 2);
338 req->rq_status = obd_commitrw(cmd, &conn, objcount,
339 tmp1, niocount, res);
344 OBD_FREE(res, sizeof(*res) * niocount);
346 ptlrpc_free_bulk(bulk);
347 if (bulk_vec != NULL) {
348 for (i = 0; i < niocount; i++)
349 if (bulk_vec[i] != NULL)
350 ptlrpc_free_bulk(bulk_vec[i]);
351 OBD_FREE(bulk_vec, niocount * sizeof(*bulk_vec));
357 static int ost_commit_page(struct obd_conn *conn, struct page *page)
359 struct obd_ioobj obj;
364 memset(&buf, 0, sizeof(buf));
365 memset(&obj, 0, sizeof(obj));
370 rc = obd_commitrw(OBD_BRW_WRITE, conn, 1, &obj, 1, &buf);
374 static int ost_brw_write_cb(struct ptlrpc_bulk_desc *bulk, void *data)
380 rc = ost_commit_page(&bulk->b_conn, bulk->b_page);
382 CERROR("ost_commit_page failed: %d\n", rc);
387 static int ost_brw_write(struct ost_obd *obddev, struct ptlrpc_request *req)
389 struct obd_conn conn;
390 struct niobuf *nb, *res;
391 struct obd_ioobj *ioo;
392 struct ost_body *body;
393 int cmd, rc, i, j, objcount, niocount, size[2] = {sizeof(*body)};
394 void *tmp1, *tmp2, *end2;
397 body = lustre_msg_buf(req->rq_reqmsg, 0);
398 tmp1 = lustre_msg_buf(req->rq_reqmsg, 1);
399 tmp2 = lustre_msg_buf(req->rq_reqmsg, 2);
400 end2 = (char *)tmp2 + req->rq_reqmsg->buflens[2];
401 objcount = req->rq_reqmsg->buflens[1] / sizeof(*ioo);
402 niocount = req->rq_reqmsg->buflens[2] / sizeof(*nb);
405 conn.oc_id = body->connid;
406 conn.oc_dev = req->rq_obd->u.ost.ost_tgt;
408 for (i = 0; i < objcount; i++) {
409 ost_unpack_ioo((void *)&tmp1, &ioo);
410 if (tmp2 + ioo->ioo_bufcnt > end2) {
414 for (j = 0; j < ioo->ioo_bufcnt; j++)
415 ost_unpack_niobuf((void *)&tmp2, &nb);
418 size[1] = niocount * sizeof(*nb);
419 rc = lustre_pack_msg(2, size, NULL, &req->rq_replen, &req->rq_repmsg);
423 res = lustre_msg_buf(req->rq_repmsg, 1);
425 /* The unpackers move tmp1 and tmp2, so reset them before using */
426 tmp1 = lustre_msg_buf(req->rq_reqmsg, 1);
427 tmp2 = lustre_msg_buf(req->rq_reqmsg, 2);
428 req->rq_status = obd_preprw(cmd, &conn, objcount,
429 tmp1, niocount, tmp2, res);
434 for (i = 0; i < niocount; i++, res++) {
435 struct ptlrpc_bulk_desc *bulk;
436 struct ptlrpc_service *srv = req->rq_obd->u.ost.ost_service;
438 bulk = ptlrpc_prep_bulk(req->rq_connection);
440 GOTO(out, rc = -ENOMEM);
442 spin_lock(&srv->srv_lock);
443 bulk->b_xid = srv->srv_xid++;
444 spin_unlock(&srv->srv_lock);
446 res->xid = HTON__u32(bulk->b_xid);
448 bulk->b_buf = (void *)(unsigned long)res->addr;
449 bulk->b_cb = ost_brw_write_cb;
450 bulk->b_page = res->page;
451 memcpy(&(bulk->b_conn), &conn, sizeof(conn));
452 bulk->b_buflen = PAGE_SIZE;
453 bulk->b_portal = OSC_BULK_PORTAL;
454 rc = ptlrpc_register_bulk(bulk);
461 /* FIXME: should we return 'rc' here? */
465 static int ost_brw(struct ost_obd *obddev, struct ptlrpc_request *req)
467 struct ost_body *body = lustre_msg_buf(req->rq_reqmsg, 0);
469 if (body->data == OBD_BRW_READ)
470 return ost_brw_read(obddev, req);
472 return ost_brw_write(obddev, req);
475 static int ost_handle(struct obd_device *obddev, struct ptlrpc_service *svc,
476 struct ptlrpc_request *req)
479 struct ost_obd *ost = &obddev->u.ost;
482 rc = lustre_unpack_msg(req->rq_reqmsg, req->rq_reqlen);
483 if (rc || OBD_FAIL_CHECK(OBD_FAIL_MDS_HANDLE_UNPACK)) {
484 CERROR("lustre_mds: Invalid request\n");
488 if (req->rq_reqmsg->type != PTL_RPC_MSG_REQUEST) {
489 CERROR("lustre_mds: wrong packet type sent %d\n",
490 req->rq_reqmsg->type);
491 GOTO(out, rc = -EINVAL);
494 switch (req->rq_reqmsg->opc) {
496 CDEBUG(D_INODE, "connect\n");
497 OBD_FAIL_RETURN(OBD_FAIL_OST_CONNECT_NET, 0);
498 rc = ost_connect(ost, req);
501 CDEBUG(D_INODE, "disconnect\n");
502 OBD_FAIL_RETURN(OBD_FAIL_OST_DISCONNECT_NET, 0);
503 rc = ost_disconnect(ost, req);
506 CDEBUG(D_INODE, "get_info\n");
507 OBD_FAIL_RETURN(OBD_FAIL_OST_GET_INFO_NET, 0);
508 rc = ost_get_info(ost, req);
511 CDEBUG(D_INODE, "create\n");
512 OBD_FAIL_RETURN(OBD_FAIL_OST_CREATE_NET, 0);
513 rc = ost_create(ost, req);
516 CDEBUG(D_INODE, "destroy\n");
517 OBD_FAIL_RETURN(OBD_FAIL_OST_DESTROY_NET, 0);
518 rc = ost_destroy(ost, req);
521 CDEBUG(D_INODE, "getattr\n");
522 OBD_FAIL_RETURN(OBD_FAIL_OST_GETATTR_NET, 0);
523 rc = ost_getattr(ost, req);
526 CDEBUG(D_INODE, "setattr\n");
527 OBD_FAIL_RETURN(OBD_FAIL_OST_SETATTR_NET, 0);
528 rc = ost_setattr(ost, req);
531 CDEBUG(D_INODE, "setattr\n");
532 OBD_FAIL_RETURN(OBD_FAIL_OST_OPEN_NET, 0);
533 rc = ost_open(ost, req);
536 CDEBUG(D_INODE, "setattr\n");
537 OBD_FAIL_RETURN(OBD_FAIL_OST_CLOSE_NET, 0);
538 rc = ost_close(ost, req);
541 CDEBUG(D_INODE, "brw\n");
542 OBD_FAIL_RETURN(OBD_FAIL_OST_BRW_NET, 0);
543 rc = ost_brw(ost, req);
546 CDEBUG(D_INODE, "punch\n");
547 OBD_FAIL_RETURN(OBD_FAIL_OST_PUNCH_NET, 0);
548 rc = ost_punch(ost, req);
551 req->rq_status = -ENOTSUPP;
552 rc = ptlrpc_error(svc, req);
558 //req->rq_status = rc;
560 CERROR("ost: processing error %d\n", rc);
561 ptlrpc_error(svc, req);
563 CDEBUG(D_INODE, "sending reply\n");
564 ptlrpc_reply(svc, req);
570 /* mount the file system (secretly) */
571 static int ost_setup(struct obd_device *obddev, obd_count len, void *buf)
573 struct obd_ioctl_data* data = buf;
574 struct ost_obd *ost = &obddev->u.ost;
575 struct obd_device *tgt;
579 if (data->ioc_dev < 0 || data->ioc_dev > MAX_OBD_DEVICES)
583 tgt = &obd_dev[data->ioc_dev];
585 if (!(tgt->obd_flags & OBD_ATTACHED) ||
586 !(tgt->obd_flags & OBD_SET_UP)) {
587 CERROR("device not attached or not set up (%d)\n",
589 GOTO(error_dec, err = -EINVAL);
592 ost->ost_conn.oc_dev = tgt;
593 err = obd_connect(&ost->ost_conn);
595 CERROR("fail to connect to device %d\n", data->ioc_dev);
596 GOTO(error_dec, err = -EINVAL);
599 ost->ost_service = ptlrpc_init_svc(128 * 1024,
600 OST_REQUEST_PORTAL, OSC_REPLY_PORTAL,
602 if (!ost->ost_service) {
603 CERROR("failed to start service\n");
604 GOTO(error_disc, err = -EINVAL);
607 err = ptlrpc_start_thread(obddev, ost->ost_service, "lustre_ost");
609 GOTO(error_disc, err = -EINVAL);
614 obd_disconnect(&ost->ost_conn);
620 static int ost_cleanup(struct obd_device * obddev)
622 struct ost_obd *ost = &obddev->u.ost;
627 if ( !list_empty(&obddev->obd_gen_clients) ) {
628 CERROR("still has clients!\n");
632 ptlrpc_stop_thread(ost->ost_service);
633 rpc_unregister_service(ost->ost_service);
635 if (!list_empty(&ost->ost_service->srv_reqs)) {
636 // XXX reply with errors and clean up
637 CERROR("Request list not empty!\n");
639 OBD_FREE(ost->ost_service, sizeof(*ost->ost_service));
641 err = obd_disconnect(&ost->ost_conn);
643 CERROR("lustre ost: fail to disconnect device\n");
651 /* use obd ops to offer management infrastructure */
652 static struct obd_ops ost_obd_ops = {
654 o_cleanup: ost_cleanup,
657 static int __init ost_init(void)
659 obd_register_type(&ost_obd_ops, LUSTRE_OST_NAME);
663 static void __exit ost_exit(void)
665 obd_unregister_type(LUSTRE_OST_NAME);
668 MODULE_AUTHOR("Peter J. Braam <braam@clusterfs.com>");
669 MODULE_DESCRIPTION("Lustre Object Storage Target (OST) v0.01");
670 MODULE_LICENSE("GPL");
672 module_init(ost_init);
673 module_exit(ost_exit);