1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
5 * Author: Peter J. Braam <braam@clusterfs.com>
6 * Author: Phil Schwan <phil@clusterfs.com>
8 * This file is part of Lustre, http://www.lustre.org.
10 * Lustre is free software; you can redistribute it and/or
11 * modify it under the terms of version 2 of the GNU General Public
12 * License as published by the Free Software Foundation.
14 * Lustre is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Lustre; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 * Storage Target Handling functions
24 * Lustre Object Server Module (OST)
26 * This server is single threaded at present (but can easily be multi
27 * threaded). For testing and management it is treated as an
28 * obd_device, although it does not export a full OBD method table
29 * (the requests are coming in over the wire, so object target
30 * modules do not have a full method table.)
35 #include <linux/version.h>
36 #include <linux/module.h>
38 #include <linux/stat.h>
39 #include <linux/locks.h>
40 #include <linux/ext2_fs.h>
41 #include <linux/quotaops.h>
42 #include <asm/unistd.h>
44 #define DEBUG_SUBSYSTEM S_OST
46 #include <linux/obd_ost.h>
47 #include <linux/lustre_net.h>
49 static int ost_destroy(struct ost_obd *ost, struct ptlrpc_request *req)
52 struct ost_body *body;
53 int rc, size = sizeof(*body);
56 body = lustre_msg_buf(req->rq_reqmsg, 0);
57 conn.oc_id = body->connid;
58 conn.oc_dev = ost->ost_tgt;
60 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repbuf);
61 req->rq_repmsg = (struct lustre_msg *)req->rq_repbuf;
65 req->rq_status = obd_destroy(&conn, &body->oa);
69 static int ost_getattr(struct ost_obd *ost, struct ptlrpc_request *req)
72 struct ost_body *body, *repbody;
73 int rc, size = sizeof(*body);
76 body = lustre_msg_buf(req->rq_reqmsg, 0);
77 conn.oc_id = body->connid;
78 conn.oc_dev = ost->ost_tgt;
80 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repbuf);
81 req->rq_repmsg = (struct lustre_msg *)req->rq_repbuf;
85 repbody = lustre_msg_buf(req->rq_repmsg, 0);
86 memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
87 req->rq_status = obd_getattr(&conn, &repbody->oa);
91 static int ost_open(struct ost_obd *ost, struct ptlrpc_request *req)
94 struct ost_body *body, *repbody;
95 int rc, size = sizeof(*body);
98 body = lustre_msg_buf(req->rq_reqmsg, 0);
99 conn.oc_id = body->connid;
100 conn.oc_dev = ost->ost_tgt;
102 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repbuf);
103 req->rq_repmsg = (struct lustre_msg *)req->rq_repbuf;
107 repbody = lustre_msg_buf(req->rq_repmsg, 0);
108 memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
109 req->rq_status = obd_open(&conn, &repbody->oa);
113 static int ost_close(struct ost_obd *ost, struct ptlrpc_request *req)
115 struct obd_conn conn;
116 struct ost_body *body, *repbody;
117 int rc, size = sizeof(*body);
120 body = lustre_msg_buf(req->rq_reqmsg, 0);
121 conn.oc_id = body->connid;
122 conn.oc_dev = ost->ost_tgt;
124 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repbuf);
125 req->rq_repmsg = (struct lustre_msg *)req->rq_repbuf;
129 repbody = lustre_msg_buf(req->rq_repmsg, 0);
130 memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
131 req->rq_status = obd_close(&conn, &repbody->oa);
135 static int ost_create(struct ost_obd *ost, struct ptlrpc_request *req)
137 struct obd_conn conn;
138 struct ost_body *body, *repbody;
139 int rc, size = sizeof(*body);
142 body = lustre_msg_buf(req->rq_reqmsg, 0);
143 conn.oc_id = body->connid;
144 conn.oc_dev = ost->ost_tgt;
146 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repbuf);
147 req->rq_repmsg = (struct lustre_msg *)req->rq_repbuf;
151 repbody = lustre_msg_buf(req->rq_repmsg, 0);
152 memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
153 req->rq_status = obd_create(&conn, &repbody->oa);
157 static int ost_punch(struct ost_obd *ost, struct ptlrpc_request *req)
159 struct obd_conn conn;
160 struct ost_body *body, *repbody;
161 int rc, size = sizeof(*body);
164 body = lustre_msg_buf(req->rq_reqmsg, 0);
165 conn.oc_id = body->connid;
166 conn.oc_dev = ost->ost_tgt;
168 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repbuf);
169 req->rq_repmsg = (struct lustre_msg *)req->rq_repbuf;
173 repbody = lustre_msg_buf(req->rq_repmsg, 0);
174 memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
175 req->rq_status = obd_punch(&conn, &repbody->oa,
176 repbody->oa.o_size, repbody->oa.o_blocks);
180 static int ost_setattr(struct ost_obd *ost, struct ptlrpc_request *req)
182 struct obd_conn conn;
183 struct ost_body *body, *repbody;
184 int rc, size = sizeof(*body);
187 body = lustre_msg_buf(req->rq_reqmsg, 0);
188 conn.oc_id = body->connid;
189 conn.oc_dev = ost->ost_tgt;
191 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repbuf);
192 req->rq_repmsg = (struct lustre_msg *)req->rq_repbuf;
196 repbody = lustre_msg_buf(req->rq_repmsg, 0);
197 memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
198 req->rq_status = obd_setattr(&conn, &repbody->oa);
202 static int ost_connect(struct ost_obd *ost, struct ptlrpc_request *req)
204 struct obd_conn conn;
205 struct ost_body *body;
206 int rc, size = sizeof(*body);
209 conn.oc_dev = ost->ost_tgt;
211 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repbuf);
212 req->rq_repmsg = (struct lustre_msg *)req->rq_repbuf;
216 req->rq_status = obd_connect(&conn);
218 CDEBUG(D_IOCTL, "rep buffer %p, id %d\n", req->rq_repbuf, conn.oc_id);
219 body = lustre_msg_buf(req->rq_repmsg, 0);
220 body->connid = conn.oc_id;
224 static int ost_disconnect(struct ost_obd *ost, struct ptlrpc_request *req)
226 struct obd_conn conn;
227 struct ost_body *body;
228 int rc, size = sizeof(*body);
231 body = lustre_msg_buf(req->rq_reqmsg, 0);
232 conn.oc_id = body->connid;
233 conn.oc_dev = ost->ost_tgt;
235 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repbuf);
236 req->rq_repmsg = (struct lustre_msg *)req->rq_repbuf;
240 CDEBUG(D_IOCTL, "Disconnecting %d\n", conn.oc_id);
241 req->rq_status = obd_disconnect(&conn);
245 static int ost_get_info(struct ost_obd *ost, struct ptlrpc_request *req)
247 struct obd_conn conn;
248 struct ost_body *body;
249 int rc, size[2] = {sizeof(*body)};
250 char *bufs[2] = {NULL, NULL}, *ptr;
253 body = lustre_msg_buf(req->rq_reqmsg, 0);
254 conn.oc_id = body->connid;
255 conn.oc_dev = ost->ost_tgt;
257 ptr = lustre_msg_buf(req->rq_reqmsg, 1);
261 req->rq_status = obd_get_info(&conn, req->rq_reqmsg->buflens[1], ptr,
262 &(size[1]), (void **)&(bufs[1]));
264 rc = lustre_pack_msg(2, size, bufs, &req->rq_replen, &req->rq_repbuf);
265 req->rq_repmsg = (struct lustre_msg *)req->rq_repbuf;
267 CERROR("cannot pack reply\n");
272 static int ost_brw_read(struct ost_obd *obddev, struct ptlrpc_request *req)
274 struct ptlrpc_bulk_desc **bulk_vec = NULL, *bulk = NULL;
275 struct obd_conn conn;
276 void *tmp1, *tmp2, *end2;
277 struct niobuf *nb, *dst, *res = NULL;
278 struct obd_ioobj *ioo;
279 struct ost_body *body;
280 int rc, cmd, i, j, objcount, niocount, size = sizeof(*body);
283 body = lustre_msg_buf(req->rq_reqmsg, 0);
284 tmp1 = lustre_msg_buf(req->rq_reqmsg, 1);
285 tmp2 = lustre_msg_buf(req->rq_reqmsg, 2);
286 end2 = (char *)tmp2 + req->rq_reqmsg->buflens[2];
287 objcount = req->rq_reqmsg->buflens[1] / sizeof(*ioo);
288 niocount = req->rq_reqmsg->buflens[2] / sizeof(*nb);
291 conn.oc_id = body->connid;
292 conn.oc_dev = req->rq_obd->u.ost.ost_tgt;
294 for (i = 0; i < objcount; i++) {
295 ost_unpack_ioo(&tmp1, &ioo);
296 if (tmp2 + ioo->ioo_bufcnt > end2) {
301 for (j = 0; j < ioo->ioo_bufcnt; j++)
302 ost_unpack_niobuf(&tmp2, &nb);
305 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repbuf);
306 req->rq_repmsg = (struct lustre_msg *)req->rq_repbuf;
309 OBD_ALLOC(res, sizeof(*res) * niocount);
313 /* The unpackers move tmp1 and tmp2, so reset them before using */
314 tmp1 = lustre_msg_buf(req->rq_reqmsg, 1);
315 tmp2 = lustre_msg_buf(req->rq_reqmsg, 2);
316 req->rq_status = obd_preprw(cmd, &conn, objcount,
317 tmp1, niocount, tmp2, res);
322 for (i = 0; i < niocount; i++) {
323 bulk = ptlrpc_prep_bulk(&req->rq_peer);
325 CERROR("cannot alloc bulk desc\n");
326 GOTO(out, rc = -ENOMEM);
329 dst = &(((struct niobuf *)tmp2)[i]);
330 bulk->b_xid = dst->xid;
331 bulk->b_buf = (void *)(unsigned long)res[i].addr;
332 bulk->b_buflen = PAGE_SIZE;
333 rc = ptlrpc_send_bulk(bulk, OST_BULK_PORTAL);
336 wait_event_interruptible(bulk->b_waitq,
337 ptlrpc_check_bulk_sent(bulk));
339 if (bulk->b_flags == PTL_RPC_INTR)
342 OBD_FREE(bulk, sizeof(*bulk));
346 /* The unpackers move tmp1 and tmp2, so reset them before using */
347 tmp1 = lustre_msg_buf(req->rq_reqmsg, 1);
348 tmp2 = lustre_msg_buf(req->rq_reqmsg, 2);
349 req->rq_status = obd_commitrw(cmd, &conn, objcount,
350 tmp1, niocount, res);
355 OBD_FREE(res, sizeof(*res) * niocount);
357 OBD_FREE(bulk, sizeof(*bulk));
358 if (bulk_vec != NULL) {
359 for (i = 0; i < niocount; i++)
360 if (bulk_vec[i] != NULL)
361 OBD_FREE(bulk_vec[i], sizeof(*bulk));
362 OBD_FREE(bulk_vec, niocount * sizeof(*bulk_vec));
368 static int ost_commit_page(struct obd_conn *conn, struct page *page)
370 struct obd_ioobj obj;
375 memset(&buf, 0, sizeof(buf));
376 memset(&obj, 0, sizeof(obj));
381 rc = obd_commitrw(OBD_BRW_WRITE, conn, 1, &obj, 1, &buf);
385 static int ost_brw_write_cb(struct ptlrpc_bulk_desc *bulk, void *data)
391 rc = ost_commit_page(&bulk->b_conn, bulk->b_page);
393 CERROR("ost_commit_page failed: %d\n", rc);
398 static int ost_brw_write(struct ost_obd *obddev, struct ptlrpc_request *req)
400 struct obd_conn conn;
401 struct niobuf *nb, *dst;
402 struct obd_ioobj *ioo;
403 struct ost_body *body;
404 int cmd, rc, i, j, objcount, niocount, size[2] = {sizeof(*body)};
405 void *tmp1, *tmp2, *end2, *res;
408 body = lustre_msg_buf(req->rq_reqmsg, 0);
409 tmp1 = lustre_msg_buf(req->rq_reqmsg, 1);
410 tmp2 = lustre_msg_buf(req->rq_reqmsg, 2);
411 end2 = (char *)tmp2 + req->rq_reqmsg->buflens[2];
412 objcount = req->rq_reqmsg->buflens[1] / sizeof(*ioo);
413 niocount = req->rq_reqmsg->buflens[2] / sizeof(*nb);
416 conn.oc_id = body->connid;
417 conn.oc_dev = req->rq_obd->u.ost.ost_tgt;
419 for (i = 0; i < objcount; i++) {
420 ost_unpack_ioo((void *)&tmp1, &ioo);
421 if (tmp2 + ioo->ioo_bufcnt > end2) {
425 for (j = 0; j < ioo->ioo_bufcnt; j++)
426 ost_unpack_niobuf((void *)&tmp2, &nb);
429 size[1] = niocount * sizeof(*nb);
430 rc = lustre_pack_msg(2, size, NULL, &req->rq_replen, &req->rq_repbuf);
431 req->rq_repmsg = (struct lustre_msg *)req->rq_repbuf;
435 res = lustre_msg_buf(req->rq_repmsg, 1);
437 /* The unpackers move tmp1 and tmp2, so reset them before using */
438 tmp1 = lustre_msg_buf(req->rq_reqmsg, 1);
439 tmp2 = lustre_msg_buf(req->rq_reqmsg, 2);
440 req->rq_status = obd_preprw(cmd, &conn, objcount,
441 tmp1, niocount, tmp2, res);
446 for (i = 0; i < niocount; i++) {
447 struct ptlrpc_bulk_desc *bulk;
448 struct ptlrpc_service *srv = req->rq_obd->u.ost.ost_service;
450 bulk = ptlrpc_prep_bulk(&req->rq_peer);
452 GOTO(out, rc = -ENOMEM);
454 spin_lock(&srv->srv_lock);
455 bulk->b_xid = srv->srv_xid++;
456 spin_unlock(&srv->srv_lock);
458 dst = &(((struct niobuf *)res)[i]);
459 dst->xid = HTON__u32(bulk->b_xid);
461 bulk->b_buf = (void *)(unsigned long)dst->addr;
462 bulk->b_cb = ost_brw_write_cb;
463 bulk->b_page = dst->page;
464 memcpy(&(bulk->b_conn), &conn, sizeof(conn));
465 bulk->b_buflen = PAGE_SIZE;
466 bulk->b_portal = OSC_BULK_PORTAL;
467 rc = ptlrpc_register_bulk(bulk);
474 /* FIXME: should we return 'rc' here? */
478 static int ost_brw(struct ost_obd *obddev, struct ptlrpc_request *req)
480 struct ost_body *body = lustre_msg_buf(req->rq_reqmsg, 0);
482 if (body->data == OBD_BRW_READ)
483 return ost_brw_read(obddev, req);
485 return ost_brw_write(obddev, req);
488 static int ost_handle(struct obd_device *obddev, struct ptlrpc_service *svc,
489 struct ptlrpc_request *req)
492 struct ost_obd *ost = &obddev->u.ost;
495 rc = lustre_unpack_msg(req->rq_reqbuf, req->rq_reqlen);
496 req->rq_reqmsg = (struct lustre_msg *)req->rq_reqbuf;
497 if (rc || OBD_FAIL_CHECK(OBD_FAIL_MDS_HANDLE_UNPACK)) {
498 CERROR("lustre_mds: Invalid request\n");
502 if (req->rq_reqmsg->type != PTL_RPC_REQUEST) {
503 CERROR("lustre_mds: wrong packet type sent %d\n",
504 req->rq_reqmsg->type);
505 GOTO(out, rc = -EINVAL);
508 switch (req->rq_reqmsg->opc) {
510 CDEBUG(D_INODE, "connect\n");
511 OBD_FAIL_RETURN(OBD_FAIL_OST_CONNECT_NET, 0);
512 rc = ost_connect(ost, req);
515 CDEBUG(D_INODE, "disconnect\n");
516 OBD_FAIL_RETURN(OBD_FAIL_OST_DISCONNECT_NET, 0);
517 rc = ost_disconnect(ost, req);
520 CDEBUG(D_INODE, "get_info\n");
521 OBD_FAIL_RETURN(OBD_FAIL_OST_GET_INFO_NET, 0);
522 rc = ost_get_info(ost, req);
525 CDEBUG(D_INODE, "create\n");
526 OBD_FAIL_RETURN(OBD_FAIL_OST_CREATE_NET, 0);
527 rc = ost_create(ost, req);
530 CDEBUG(D_INODE, "destroy\n");
531 OBD_FAIL_RETURN(OBD_FAIL_OST_DESTROY_NET, 0);
532 rc = ost_destroy(ost, req);
535 CDEBUG(D_INODE, "getattr\n");
536 OBD_FAIL_RETURN(OBD_FAIL_OST_GETATTR_NET, 0);
537 rc = ost_getattr(ost, req);
540 CDEBUG(D_INODE, "setattr\n");
541 OBD_FAIL_RETURN(OBD_FAIL_OST_SETATTR_NET, 0);
542 rc = ost_setattr(ost, req);
545 CDEBUG(D_INODE, "setattr\n");
546 OBD_FAIL_RETURN(OBD_FAIL_OST_OPEN_NET, 0);
547 rc = ost_open(ost, req);
550 CDEBUG(D_INODE, "setattr\n");
551 OBD_FAIL_RETURN(OBD_FAIL_OST_CLOSE_NET, 0);
552 rc = ost_close(ost, req);
555 CDEBUG(D_INODE, "brw\n");
556 OBD_FAIL_RETURN(OBD_FAIL_OST_BRW_NET, 0);
557 rc = ost_brw(ost, req);
560 CDEBUG(D_INODE, "punch\n");
561 OBD_FAIL_RETURN(OBD_FAIL_OST_PUNCH_NET, 0);
562 rc = ost_punch(ost, req);
565 req->rq_status = -ENOTSUPP;
566 rc = ptlrpc_error(obddev, svc, req);
572 //req->rq_status = rc;
574 CERROR("ost: processing error %d\n", rc);
575 ptlrpc_error(obddev, svc, req);
577 CDEBUG(D_INODE, "sending reply\n");
578 ptlrpc_reply(obddev, svc, req);
585 /* mount the file system (secretly) */
586 static int ost_setup(struct obd_device *obddev, obd_count len,
590 struct obd_ioctl_data* data = buf;
591 struct ost_obd *ost = &obddev->u.ost;
592 struct obd_device *tgt;
596 if (data->ioc_dev < 0 || data->ioc_dev > MAX_OBD_DEVICES)
600 tgt = &obd_dev[data->ioc_dev];
602 if ( ! (tgt->obd_flags & OBD_ATTACHED) ||
603 ! (tgt->obd_flags & OBD_SET_UP) ){
604 CERROR("device not attached or not set up (%d)\n",
606 GOTO(error_dec, err = -EINVAL);
609 ost->ost_conn.oc_dev = tgt;
610 err = obd_connect(&ost->ost_conn);
612 CERROR("fail to connect to device %d\n", data->ioc_dev);
613 GOTO(error_dec, err = -EINVAL);
616 ost->ost_service = ptlrpc_init_svc(128 * 1024,
617 OST_REQUEST_PORTAL, OSC_REPLY_PORTAL,
619 if (!ost->ost_service) {
620 CERROR("failed to start service\n");
621 GOTO(error_disc, err = -EINVAL);
624 err = ptlrpc_start_thread(obddev, ost->ost_service, "lustre_ost");
626 GOTO(error_disc, err = -EINVAL);
632 obd_disconnect(&ost->ost_conn);
638 static int ost_cleanup(struct obd_device * obddev)
640 struct ost_obd *ost = &obddev->u.ost;
645 if ( !list_empty(&obddev->obd_gen_clients) ) {
646 CERROR("still has clients!\n");
650 ptlrpc_stop_thread(ost->ost_service);
651 rpc_unregister_service(ost->ost_service);
653 if (!list_empty(&ost->ost_service->srv_reqs)) {
654 // XXX reply with errors and clean up
655 CERROR("Request list not empty!\n");
657 OBD_FREE(ost->ost_service, sizeof(*ost->ost_service));
659 err = obd_disconnect(&ost->ost_conn);
661 CERROR("lustre ost: fail to disconnect device\n");
669 /* use obd ops to offer management infrastructure */
670 static struct obd_ops ost_obd_ops = {
672 o_cleanup: ost_cleanup,
675 static int __init ost_init(void)
677 obd_register_type(&ost_obd_ops, LUSTRE_OST_NAME);
681 static void __exit ost_exit(void)
683 obd_unregister_type(LUSTRE_OST_NAME);
686 MODULE_AUTHOR("Peter J. Braam <braam@clusterfs.com>");
687 MODULE_DESCRIPTION("Lustre Object Storage Target (OST) v0.01");
688 MODULE_LICENSE("GPL");
690 module_init(ost_init);
691 module_exit(ost_exit);