1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
5 * Author: Peter J. Braam <braam@clusterfs.com>
6 * Author: Phil Schwan <phil@clusterfs.com>
8 * This file is part of Lustre, http://www.lustre.org.
10 * Lustre is free software; you can redistribute it and/or
11 * modify it under the terms of version 2 of the GNU General Public
12 * License as published by the Free Software Foundation.
14 * Lustre is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Lustre; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 * Storage Target Handling functions
24 * Lustre Object Server Module (OST)
26 * This server is single threaded at present (but can easily be multi
27 * threaded). For testing and management it is treated as an
28 * obd_device, although it does not export a full OBD method table
29 * (the requests are coming in over the wire, so object target
30 * modules do not have a full method table.)
34 #define DEBUG_SUBSYSTEM S_OST
36 #include <linux/module.h>
37 #include <linux/obd_ost.h>
38 #include <linux/lustre_net.h>
39 #include <linux/lustre_dlm.h>
40 #include <linux/init.h>
41 #include <linux/lprocfs_status.h>
43 extern struct lprocfs_vars status_var_nm_1[];
44 extern struct lprocfs_vars status_class_var[];
46 static int ost_destroy(struct ptlrpc_request *req)
48 struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
49 struct ost_body *body;
50 int rc, size = sizeof(*body);
53 body = lustre_msg_buf(req->rq_reqmsg, 0);
55 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
59 req->rq_status = obd_destroy(conn, &body->oa, NULL);
63 static int ost_getattr(struct ptlrpc_request *req)
65 struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
66 struct ost_body *body, *repbody;
67 int rc, size = sizeof(*body);
70 body = lustre_msg_buf(req->rq_reqmsg, 0);
72 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
76 repbody = lustre_msg_buf(req->rq_repmsg, 0);
77 /* FIXME: unpack only valid fields instead of memcpy, endianness */
78 memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
79 req->rq_status = obd_getattr(conn, &repbody->oa, NULL);
83 static int ost_statfs(struct ptlrpc_request *req)
85 struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
86 struct obd_statfs *osfs;
87 int rc, size = sizeof(*osfs);
90 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
94 osfs = lustre_msg_buf(req->rq_repmsg, 0);
95 memset(osfs, 0, size);
97 rc = obd_statfs(conn, osfs);
99 CERROR("ost: statfs failed: rc %d\n", rc);
103 obd_statfs_pack(osfs, osfs);
108 static int ost_open(struct ptlrpc_request *req)
110 struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
111 struct ost_body *body, *repbody;
112 int rc, size = sizeof(*body);
115 body = lustre_msg_buf(req->rq_reqmsg, 0);
117 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
121 repbody = lustre_msg_buf(req->rq_repmsg, 0);
122 /* FIXME: unpack only valid fields instead of memcpy, endianness */
123 memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
124 req->rq_status = obd_open(conn, &repbody->oa, NULL);
128 static int ost_close(struct ptlrpc_request *req)
130 struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
131 struct ost_body *body, *repbody;
132 int rc, size = sizeof(*body);
135 body = lustre_msg_buf(req->rq_reqmsg, 0);
137 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
141 repbody = lustre_msg_buf(req->rq_repmsg, 0);
142 /* FIXME: unpack only valid fields instead of memcpy, endianness */
143 memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
144 req->rq_status = obd_close(conn, &repbody->oa, NULL);
148 static int ost_create(struct ptlrpc_request *req)
150 struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
151 struct ost_body *body, *repbody;
152 int rc, size = sizeof(*body);
155 body = lustre_msg_buf(req->rq_reqmsg, 0);
157 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
161 repbody = lustre_msg_buf(req->rq_repmsg, 0);
162 /* FIXME: unpack only valid fields instead of memcpy, endianness */
163 memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
164 req->rq_status = obd_create(conn, &repbody->oa, NULL);
168 static int ost_punch(struct ptlrpc_request *req)
170 struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
171 struct ost_body *body, *repbody;
172 int rc, size = sizeof(*body);
175 body = lustre_msg_buf(req->rq_reqmsg, 0);
177 if ((NTOH__u32(body->oa.o_valid) & (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS))!=
178 (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS))
181 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
185 repbody = lustre_msg_buf(req->rq_repmsg, 0);
186 /* FIXME: unpack only valid fields instead of memcpy, endianness */
187 memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
188 req->rq_status = obd_punch(conn, &repbody->oa, NULL,
189 repbody->oa.o_size, repbody->oa.o_blocks);
193 static int ost_setattr(struct ptlrpc_request *req)
195 struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
196 struct ost_body *body, *repbody;
197 int rc, size = sizeof(*body);
200 body = lustre_msg_buf(req->rq_reqmsg, 0);
202 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
206 repbody = lustre_msg_buf(req->rq_repmsg, 0);
207 /* FIXME: unpack only valid fields instead of memcpy, endianness */
208 memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
209 req->rq_status = obd_setattr(conn, &repbody->oa, NULL);
213 static int ost_bulk_timeout(void *data)
215 struct ptlrpc_bulk_desc *desc = data;
218 recovd_conn_fail(desc->bd_connection);
222 static int ost_brw_read(struct ptlrpc_request *req)
224 struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
225 struct ptlrpc_bulk_desc *desc;
226 void *tmp1, *tmp2, *end2;
227 struct niobuf_remote *remote_nb;
228 struct niobuf_local *local_nb = NULL;
229 struct obd_ioobj *ioo;
230 struct ost_body *body;
231 struct l_wait_info lwi;
232 void *desc_priv = NULL;
233 int rc, cmd, i, j, objcount, niocount, size = sizeof(*body);
236 body = lustre_msg_buf(req->rq_reqmsg, 0);
237 tmp1 = lustre_msg_buf(req->rq_reqmsg, 1);
238 tmp2 = lustre_msg_buf(req->rq_reqmsg, 2);
239 end2 = (char *)tmp2 + req->rq_reqmsg->buflens[2];
240 objcount = req->rq_reqmsg->buflens[1] / sizeof(*ioo);
241 niocount = req->rq_reqmsg->buflens[2] / sizeof(*remote_nb);
244 if (OBD_FAIL_CHECK(OBD_FAIL_OST_BRW_READ_BULK))
247 for (i = 0; i < objcount; i++) {
248 ost_unpack_ioo(&tmp1, &ioo);
249 if (tmp2 + ioo->ioo_bufcnt > end2) {
251 GOTO(out, rc = -EFAULT);
253 for (j = 0; j < ioo->ioo_bufcnt; j++)
254 ost_unpack_niobuf(&tmp2, &remote_nb);
257 OBD_ALLOC(local_nb, sizeof(*local_nb) * niocount);
258 if (local_nb == NULL)
259 GOTO(out, rc = -ENOMEM);
261 /* The unpackers move tmp1 and tmp2, so reset them before using */
262 ioo = lustre_msg_buf(req->rq_reqmsg, 1);
263 remote_nb = lustre_msg_buf(req->rq_reqmsg, 2);
264 req->rq_status = obd_preprw(cmd, conn, objcount, ioo, niocount,
265 remote_nb, local_nb, &desc_priv);
270 desc = ptlrpc_prep_bulk(req->rq_connection);
272 GOTO(out_local, rc = -ENOMEM);
273 desc->bd_ptl_ev_hdlr = NULL;
274 desc->bd_portal = OST_BULK_PORTAL;
276 for (i = 0; i < niocount; i++) {
277 struct ptlrpc_bulk_page *bulk = ptlrpc_prep_bulk_page(desc);
280 GOTO(out_bulk, rc = -ENOMEM);
281 bulk->bp_xid = remote_nb[i].xid;
282 bulk->bp_buf = local_nb[i].addr;
283 bulk->bp_buflen = remote_nb[i].len;
286 rc = ptlrpc_send_bulk(desc);
290 lwi = LWI_TIMEOUT(obd_timeout * HZ, ost_bulk_timeout, desc);
291 rc = l_wait_event(desc->bd_waitq, desc->bd_flags & PTL_BULK_FL_SENT,
294 LASSERT(rc == -ETIMEDOUT);
298 req->rq_status = obd_commitrw(cmd, conn, objcount, ioo, niocount,
299 local_nb, desc_priv);
301 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
304 ptlrpc_bulk_decref(desc);
306 OBD_FREE(local_nb, sizeof(*local_nb) * niocount);
309 ptlrpc_error(req->rq_svc, req);
311 ptlrpc_reply(req->rq_svc, req);
315 static int ost_brw_write(struct ptlrpc_request *req)
317 struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
318 struct ptlrpc_bulk_desc *desc;
319 struct niobuf_remote *remote_nb;
320 struct niobuf_local *local_nb, *lnb;
321 struct obd_ioobj *ioo;
322 struct ost_body *body;
323 int cmd, rc, i, j, objcount, niocount, size[2] = {sizeof(*body)};
324 void *tmp1, *tmp2, *end2;
325 void *desc_priv = NULL;
327 struct ptlrpc_service *srv;
328 struct l_wait_info lwi;
332 body = lustre_msg_buf(req->rq_reqmsg, 0);
333 tmp1 = lustre_msg_buf(req->rq_reqmsg, 1);
334 tmp2 = lustre_msg_buf(req->rq_reqmsg, 2);
335 end2 = (char *)tmp2 + req->rq_reqmsg->buflens[2];
336 objcount = req->rq_reqmsg->buflens[1] / sizeof(*ioo);
337 niocount = req->rq_reqmsg->buflens[2] / sizeof(*remote_nb);
340 for (i = 0; i < objcount; i++) {
341 ost_unpack_ioo((void *)&tmp1, &ioo);
342 if (tmp2 + ioo->ioo_bufcnt > end2) {
346 for (j = 0; j < ioo->ioo_bufcnt; j++)
347 ost_unpack_niobuf((void *)&tmp2, &remote_nb);
350 size[1] = niocount * sizeof(*remote_nb);
351 rc = lustre_pack_msg(2, size, NULL, &req->rq_replen, &req->rq_repmsg);
354 remote_nb = lustre_msg_buf(req->rq_repmsg, 1);
356 OBD_ALLOC(local_nb, niocount * sizeof(*local_nb));
357 if (local_nb == NULL)
358 GOTO(out, rc = -ENOMEM);
360 /* The unpackers move tmp1 and tmp2, so reset them before using */
361 tmp1 = lustre_msg_buf(req->rq_reqmsg, 1);
362 tmp2 = lustre_msg_buf(req->rq_reqmsg, 2);
363 req->rq_status = obd_preprw(cmd, conn, objcount, tmp1, niocount, tmp2,
364 local_nb, &desc_priv);
366 GOTO(out_free, rc = 0); /* XXX is this correct? */
368 if (OBD_FAIL_CHECK(OBD_FAIL_OST_BRW_WRITE_BULK))
369 GOTO(fail_preprw, rc = 0);
371 desc = ptlrpc_prep_bulk(req->rq_connection);
373 GOTO(fail_preprw, rc = -ENOMEM);
374 desc->bd_ptl_ev_hdlr = NULL;
375 desc->bd_portal = OSC_BULK_PORTAL;
376 desc->bd_desc_private = desc_priv;
377 memcpy(&(desc->bd_conn), &conn, sizeof(conn));
379 srv = req->rq_obd->u.ost.ost_service;
380 spin_lock(&srv->srv_lock);
381 xid = srv->srv_xid++; /* single xid for all pages */
382 spin_unlock(&srv->srv_lock);
384 for (i = 0, lnb = local_nb; i < niocount; i++, lnb++) {
385 struct ptlrpc_bulk_page *bulk;
387 bulk = ptlrpc_prep_bulk_page(desc);
389 GOTO(fail_bulk, rc = -ENOMEM);
391 bulk->bp_xid = xid; /* single xid for all pages */
393 bulk->bp_buf = lnb->addr;
394 bulk->bp_page = lnb->page;
395 bulk->bp_flags = lnb->flags;
396 bulk->bp_dentry = lnb->dentry;
397 bulk->bp_buflen = lnb->len;
400 /* this advances remote_nb */
401 ost_pack_niobuf((void **)&remote_nb, lnb->offset, lnb->len, 0,
405 rc = ptlrpc_register_bulk(desc);
410 ptlrpc_reply(req->rq_svc, req);
412 lwi = LWI_TIMEOUT(obd_timeout * HZ, ost_bulk_timeout, desc);
413 rc = l_wait_event(desc->bd_waitq, desc->bd_flags & PTL_BULK_FL_RCVD,
416 if (rc != -ETIMEDOUT)
421 rc = obd_commitrw(cmd, conn, objcount, tmp1, niocount, local_nb,
422 desc->bd_desc_private);
423 ptlrpc_bulk_decref(desc);
426 OBD_FREE(local_nb, niocount * sizeof(*local_nb));
430 OBD_FREE(req->rq_repmsg, req->rq_replen);
431 req->rq_repmsg = NULL;
432 ptlrpc_error(req->rq_svc, req);
434 ptlrpc_reply(req->rq_svc, req);
439 ptlrpc_free_bulk(desc);
441 /* FIXME: how do we undo the preprw? */
445 static int ost_handle(struct ptlrpc_request *req)
450 rc = lustre_unpack_msg(req->rq_reqmsg, req->rq_reqlen);
451 if (rc || OBD_FAIL_CHECK(OBD_FAIL_OST_HANDLE_UNPACK)) {
452 CERROR("lustre_ost: Invalid request\n");
456 if (req->rq_reqmsg->opc != OST_CONNECT &&
457 req->rq_export == NULL) {
458 CERROR("lustre_ost: operation %d on unconnected OST\n",
459 req->rq_reqmsg->opc);
460 GOTO(out, rc = -ENOTCONN);
463 if (strcmp(req->rq_obd->obd_type->typ_name, "ost") != 0)
464 GOTO(out, rc = -EINVAL);
466 switch (req->rq_reqmsg->opc) {
468 CDEBUG(D_INODE, "connect\n");
469 OBD_FAIL_RETURN(OBD_FAIL_OST_CONNECT_NET, 0);
470 rc = target_handle_connect(req);
473 CDEBUG(D_INODE, "disconnect\n");
474 OBD_FAIL_RETURN(OBD_FAIL_OST_DISCONNECT_NET, 0);
475 rc = target_handle_disconnect(req);
478 CDEBUG(D_INODE, "create\n");
479 OBD_FAIL_RETURN(OBD_FAIL_OST_CREATE_NET, 0);
480 rc = ost_create(req);
483 CDEBUG(D_INODE, "destroy\n");
484 OBD_FAIL_RETURN(OBD_FAIL_OST_DESTROY_NET, 0);
485 rc = ost_destroy(req);
488 CDEBUG(D_INODE, "getattr\n");
489 OBD_FAIL_RETURN(OBD_FAIL_OST_GETATTR_NET, 0);
490 rc = ost_getattr(req);
493 CDEBUG(D_INODE, "setattr\n");
494 OBD_FAIL_RETURN(OBD_FAIL_OST_SETATTR_NET, 0);
495 rc = ost_setattr(req);
498 CDEBUG(D_INODE, "open\n");
499 OBD_FAIL_RETURN(OBD_FAIL_OST_OPEN_NET, 0);
503 CDEBUG(D_INODE, "close\n");
504 OBD_FAIL_RETURN(OBD_FAIL_OST_CLOSE_NET, 0);
508 CDEBUG(D_INODE, "write\n");
509 OBD_FAIL_RETURN(OBD_FAIL_OST_BRW_NET, 0);
510 rc = ost_brw_write(req);
511 /* ost_brw sends its own replies */
514 CDEBUG(D_INODE, "read\n");
515 OBD_FAIL_RETURN(OBD_FAIL_OST_BRW_NET, 0);
516 rc = ost_brw_read(req);
517 /* ost_brw sends its own replies */
520 CDEBUG(D_INODE, "punch\n");
521 OBD_FAIL_RETURN(OBD_FAIL_OST_PUNCH_NET, 0);
525 CDEBUG(D_INODE, "statfs\n");
526 OBD_FAIL_RETURN(OBD_FAIL_OST_STATFS_NET, 0);
527 rc = ost_statfs(req);
530 CDEBUG(D_INODE, "enqueue\n");
531 OBD_FAIL_RETURN(OBD_FAIL_LDLM_ENQUEUE, 0);
532 rc = ldlm_handle_enqueue(req);
535 CDEBUG(D_INODE, "convert\n");
536 OBD_FAIL_RETURN(OBD_FAIL_LDLM_CONVERT, 0);
537 rc = ldlm_handle_convert(req);
540 CDEBUG(D_INODE, "cancel\n");
541 OBD_FAIL_RETURN(OBD_FAIL_LDLM_CANCEL, 0);
542 rc = ldlm_handle_cancel(req);
544 case LDLM_BL_CALLBACK:
545 case LDLM_CP_CALLBACK:
546 CDEBUG(D_INODE, "callback\n");
547 CERROR("callbacks should not happen on OST\n");
549 OBD_FAIL_RETURN(OBD_FAIL_LDLM_BL_CALLBACK, 0);
552 req->rq_status = -ENOTSUPP;
553 rc = ptlrpc_error(req->rq_svc, req);
559 //req->rq_status = rc;
561 CERROR("ost: processing error (opcode=%d): %d\n",
562 req->rq_reqmsg->opc, rc);
563 ptlrpc_error(req->rq_svc, req);
565 CDEBUG(D_INODE, "sending reply\n");
566 if (req->rq_repmsg == NULL)
567 CERROR("handler for opcode %d returned rc=0 without "
568 "creating rq_repmsg; needs to return rc != "
569 "0!\n", req->rq_reqmsg->opc);
570 ptlrpc_reply(req->rq_svc, req);
576 /* mount the file system (secretly) */
577 static int ost_setup(struct obd_device *obddev, obd_count len, void *buf)
579 struct obd_ioctl_data* data = buf;
580 struct ost_obd *ost = &obddev->u.ost;
581 struct obd_device *tgt;
586 if (data->ioc_inllen1 < 1) {
587 CERROR("requires a TARGET OBD UUID\n");
590 if (data->ioc_inllen1 > 37) {
591 CERROR("OBD UUID must be less than 38 characters\n");
596 tgt = class_uuid2obd(data->ioc_inlbuf1);
597 if (!tgt || !(tgt->obd_flags & OBD_ATTACHED) ||
598 !(tgt->obd_flags & OBD_SET_UP)) {
599 CERROR("device not attached or not set up (%d)\n",
601 GOTO(error_dec, err = -EINVAL);
604 err = obd_connect(&ost->ost_conn, tgt, NULL, NULL, NULL);
606 CERROR("fail to connect to device %d\n", data->ioc_dev);
607 GOTO(error_dec, err = -EINVAL);
610 ost->ost_service = ptlrpc_init_svc(OST_NEVENTS, OST_NBUFS,
611 OST_BUFSIZE, OST_MAXREQSIZE,
612 OST_REQUEST_PORTAL, OSC_REPLY_PORTAL,
613 "self", ost_handle, "ost");
614 if (!ost->ost_service) {
615 CERROR("failed to start service\n");
616 GOTO(error_disc, err = -EINVAL);
619 for (i = 0; i < OST_NUM_THREADS; i++) {
621 sprintf(name, "ll_ost_%02d", i);
622 err = ptlrpc_start_thread(obddev, ost->ost_service, name);
624 CERROR("error starting thread #%d: rc %d\n", i, err);
625 GOTO(error_disc, err = -EINVAL);
632 obd_disconnect(&ost->ost_conn);
638 static int ost_cleanup(struct obd_device * obddev)
640 struct ost_obd *ost = &obddev->u.ost;
645 if ( !list_empty(&obddev->obd_exports) ) {
646 CERROR("still has clients!\n");
650 ptlrpc_stop_all_threads(ost->ost_service);
651 ptlrpc_unregister_service(ost->ost_service);
653 err = obd_disconnect(&ost->ost_conn);
655 CERROR("lustre ost: fail to disconnect device\n");
662 int ost_attach(struct obd_device *dev, obd_count len, void *data)
664 return lprocfs_reg_obd(dev, status_var_nm_1, dev);
667 int ost_detach(struct obd_device *dev)
669 return lprocfs_dereg_obd(dev);
675 /* use obd ops to offer management infrastructure */
676 static struct obd_ops ost_obd_ops = {
677 o_attach: ost_attach,
678 o_detach: ost_detach,
680 o_cleanup: ost_cleanup,
683 static int __init ost_init(void)
687 rc = class_register_type(&ost_obd_ops, status_class_var,
693 static void __exit ost_exit(void)
696 class_unregister_type(LUSTRE_OST_NAME);
699 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
700 MODULE_DESCRIPTION("Lustre Object Storage Target (OST) v0.01");
701 MODULE_LICENSE("GPL");
703 module_init(ost_init);
704 module_exit(ost_exit);