1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * Copyright (C) 2002 Cluster File Systems, Inc.
7 * Author: Phil Schwan <phil@off.net>
8 * Peter Braam <braam@clusterfs.com>
10 * This code is issued under the GNU General Public License.
11 * See the file COPYING in this distribution
15 #define DEBUG_SUBSYSTEM S_LOV
17 #include <linux/slab.h>
18 #include <linux/module.h>
19 #include <linux/obd_support.h>
20 #include <linux/lustre_lib.h>
21 #include <linux/lustre_net.h>
22 #include <linux/lustre_idl.h>
23 #include <linux/lustre_mds.h>
24 #include <linux/obd_class.h>
25 #include <linux/obd_lov.h>
26 #include <linux/init.h>
28 extern struct obd_device obd_dev[MAX_OBD_DEVICES];
31 static int lov_connect(struct lustre_handle *conn, struct obd_device *obd,
34 struct ptlrpc_request *req;
35 struct lov_obd *lov = &obd->u.lov;
36 struct lustre_handle mdc_conn;
42 rc = class_connect(conn, obd, cluuid);
48 /* retrieve LOV metadata from MDS */
49 rc = obd_connect(&mdc_conn, lov->mdcobd, NULL);
51 CERROR("cannot connect to mdc: rc = %d\n", rc);
52 GOTO(out, rc = -EINVAL);
55 rc = mdc_getlovinfo(obd, &mdc_conn, &uuidarray, &req);
56 rc2 = obd_disconnect(&mdc_conn);
58 CERROR("cannot get lov info or disconnect %d/%d\n", rc, rc2);
59 GOTO(out, (rc) ? rc : rc2 );
63 if (strcmp(obd->obd_uuid, lov->desc.ld_uuid)) {
64 CERROR("lov uuid %s not on mds device (%s)\n",
65 obd->obd_uuid, lov->desc.ld_uuid);
66 GOTO(out, rc = -EINVAL);
68 if (lov->desc.ld_tgt_count > 1000) {
69 CERROR("configuration error: target count > 1000 (%d)\n",
70 lov->desc.ld_tgt_count);
71 GOTO(out, rc = -EINVAL);
73 if (req->rq_repmsg->bufcount < 2 || req->rq_repmsg->buflens[1] <
74 sizeof(uuid_t) * lov->desc.ld_tgt_count) {
75 CERROR("invalid uuid array returned\n");
76 GOTO(out, rc = -EINVAL);
79 lov->bufsize = sizeof(struct lov_tgt_desc) * lov->desc.ld_tgt_count;
80 OBD_ALLOC(lov->tgts, lov->bufsize);
82 CERROR("Out of memory\n");
83 GOTO(out, rc = -ENOMEM);
86 uuidarray = lustre_msg_buf(req->rq_repmsg, 1);
87 for (i = 0 ; i < lov->desc.ld_tgt_count; i++)
88 memcpy(lov->tgts[i].uuid, uuidarray[i], sizeof(uuid_t));
90 for (i = 0 ; i < lov->desc.ld_tgt_count; i++) {
91 struct obd_device *tgt = class_uuid2obd(uuidarray[i]);
93 CERROR("Target %s not attached\n", uuidarray[i]);
94 GOTO(out_mem, rc = -EINVAL);
96 if (!(tgt->obd_flags & OBD_SET_UP)) {
97 CERROR("Target %s not set up\n", uuidarray[i]);
98 GOTO(out_mem, rc = -EINVAL);
100 rc = obd_connect(&lov->tgts[i].conn, tgt, NULL);
102 CERROR("Target %s connect error %d\n",
110 for (i = 0 ; i < lov->desc.ld_tgt_count; i++) {
111 rc2 = obd_disconnect(&lov->tgts[i].conn);
113 CERROR("BAD: Target %s disconnect error %d\n",
116 OBD_FREE(lov->tgts, lov->bufsize);
120 class_disconnect(conn);
121 ptlrpc_free_req(req);
125 static int lov_disconnect(struct lustre_handle *conn)
127 struct obd_device *obd = class_conn2obd(conn);
128 struct lov_obd *lov = &obd->u.lov;
135 for (i = 0 ; i < lov->desc.ld_tgt_count; i++) {
136 rc = obd_disconnect(&lov->tgts[i].conn);
138 CERROR("Target %s disconnect error %d\n",
139 lov->tgts[i].uuid, rc);
143 OBD_FREE(lov->tgts, lov->bufsize);
148 rc = class_disconnect(conn);
154 static int lov_setup(struct obd_device *obd, obd_count len, void *buf)
156 struct obd_ioctl_data* data = buf;
157 struct lov_obd *lov = &obd->u.lov;
161 if (data->ioc_inllen1 < 1) {
162 CERROR("osc setup requires an MDC UUID\n");
166 if (data->ioc_inllen1 > 37) {
167 CERROR("mdc UUID must be less than 38 characters\n");
171 lov->mdcobd = class_uuid2obd(data->ioc_inlbuf1);
173 CERROR("LOV %s cannot locate MDC %s\n", obd->obd_uuid,
181 static inline int lov_stripe_md_size(struct obd_device *obd)
183 struct lov_obd *lov = &obd->u.lov;
186 size = sizeof(struct lov_stripe_md) +
187 lov->desc.ld_tgt_count * sizeof(struct lov_oinfo);
191 static inline int lov_mds_md_size(struct obd_device *obd)
193 struct lov_obd *lov = &obd->u.lov;
196 size = sizeof(struct lov_mds_md) +
197 lov->desc.ld_tgt_count * sizeof(struct lov_object_id);
201 /* the LOV counts on oa->o_id to be set as the LOV object id */
202 static int lov_create(struct lustre_handle *conn, struct obdo *oa, struct lov_stripe_md **ea)
206 struct obd_export *export = class_conn2export(conn);
208 struct lov_stripe_md *md;
212 CERROR("lov_create needs EA for striping information\n");
217 lov = &export->exp_obd->u.lov;
219 oa->o_easize = lov_stripe_md_size(export->exp_obd);
221 OBD_ALLOC(*ea, oa->o_easize);
227 md->lmd_easize = lov_mds_md_size(export->exp_obd);
228 md->lmd_object_id = oa->o_id;
229 if (!md->lmd_stripe_count) {
230 md->lmd_stripe_count = lov->desc.ld_default_stripe_count;
233 if (!md->lmd_stripe_size)
234 md->lmd_stripe_size = lov->desc.ld_default_stripe_size;
238 for (i = 0; i < md->lmd_stripe_count; i++) {
239 struct lov_stripe_md obj_md;
240 struct lov_stripe_md *obj_mdp = &obj_md;
241 /* create data objects with "parent" OA */
242 memcpy(&tmp, oa, sizeof(tmp));
243 tmp.o_easize = sizeof(struct lov_stripe_md);
244 rc = obd_create(&lov->tgts[i].conn, &tmp, &obj_mdp);
246 GOTO(out_cleanup, rc);
247 md->lmd_oinfo[i].loi_id = tmp.o_id;
248 md->lmd_oinfo[i].loi_size = tmp.o_size;
254 for (i2 = 0 ; i2 < i ; i2++) {
255 /* destroy already created objects here */
256 tmp.o_id = md->lmd_oinfo[i].loi_id;
257 rc2 = obd_destroy(&lov->tgts[i].conn, &tmp, NULL);
259 CERROR("Failed to remove object from target %d\n",
267 static int lov_destroy(struct lustre_handle *conn, struct obdo *oa,
268 struct lov_stripe_md *md)
272 struct obd_export *export = class_conn2export(conn);
277 CERROR("LOV requires striping ea for desctruction\n");
281 if (!export || !export->exp_obd)
284 lov = &export->exp_obd->u.lov;
285 for (i = 0; i < md->lmd_stripe_count; i++) {
286 /* create data objects with "parent" OA */
287 memcpy(&tmp, oa, sizeof(tmp));
288 tmp.o_id = md->lmd_oinfo[i].loi_id;
289 rc = obd_destroy(&lov->tgts[i].conn, &tmp, NULL);
291 CERROR("Error destroying object %Ld on %d\n",
298 static int lov_getattr(struct lustre_handle *conn, struct obdo *oa,
299 struct lov_stripe_md *md)
303 struct obd_export *export = class_conn2export(conn);
308 CERROR("LOV requires striping ea for desctruction\n");
312 if (!export || !export->exp_obd)
315 lov = &export->exp_obd->u.lov;
317 for (i = 0; i < md->lmd_stripe_count; i++) {
318 if (md->lmd_oinfo[i].loi_id == 0)
320 /* create data objects with "parent" OA */
321 memcpy(&tmp, oa, sizeof(tmp));
322 tmp.o_id = md->lmd_oinfo[i].loi_id;
324 rc = obd_getattr(&lov->tgts[i].conn, &tmp, NULL);
326 CERROR("Error getattr object %Ld on %d\n",
329 /* XXX can do something more sophisticated here... */
330 /* This some completely wrong. We only need the size from
331 the individual slices. */
333 obd_id id = oa->o_id;
334 memcpy(oa, &tmp, sizeof(tmp));
337 oa->o_size += tmp.o_size;
343 static int lov_setattr(struct lustre_handle *conn, struct obdo *oa,
344 struct lov_stripe_md *md)
348 struct obd_export *export = class_conn2export(conn);
353 CERROR("LOV requires striping ea for desctruction\n");
357 if (!export || !export->exp_obd)
360 lov = &export->exp_obd->u.lov;
361 for (i = 0; i < md->lmd_stripe_count; i++) {
362 /* create data objects with "parent" OA */
363 memcpy(&tmp, oa, sizeof(tmp));
364 oa->o_id = md->lmd_oinfo[i].loi_id;
366 rc = obd_setattr(&lov->tgts[i].conn, &tmp, NULL);
368 CERROR("Error setattr object %Ld on %d\n",
375 static int lov_open(struct lustre_handle *conn, struct obdo *oa,
376 struct lov_stripe_md *md)
378 int rc = 0, rc2 = 0, i;
380 struct obd_export *export = class_conn2export(conn);
385 CERROR("LOV requires striping ea for opening\n");
389 if (!export || !export->exp_obd)
392 lov = &export->exp_obd->u.lov;
393 for (i = 0; i < md->lmd_stripe_count; i++) {
394 /* create data objects with "parent" OA */
395 memcpy(&tmp, oa, sizeof(tmp));
396 tmp.o_id = md->lmd_oinfo[i].loi_id;
398 rc = obd_open(&lov->tgts[i].conn, &tmp, NULL);
401 CERROR("Error open object %Ld on %d\n",
409 static int lov_close(struct lustre_handle *conn, struct obdo *oa,
410 struct lov_stripe_md *md)
414 struct obd_export *export = class_conn2export(conn);
419 CERROR("LOV requires striping ea for desctruction\n");
423 if (!export || !export->exp_obd)
426 lov = &export->exp_obd->u.lov;
427 for (i = 0; i < md->lmd_stripe_count; i++) {
428 /* create data objects with "parent" OA */
429 memcpy(&tmp, oa, sizeof(tmp));
430 tmp.o_id = md->lmd_oinfo[i].loi_id;
432 rc = obd_close(&lov->tgts[i].conn, &tmp, NULL);
434 CERROR("Error close object %Ld on %d\n",
442 #define log2(n) ffz(~(n))
445 /* compute offset in stripe i corresponding to offset "in" */
446 __u64 lov_offset(struct lov_stripe_md *md, __u64 in, int i)
448 __u32 ssz = md->lmd_stripe_size;
449 /* full stripes across all * stripe size */
450 __u32 out = ( ((__u32)in) / (md->lmd_stripe_count * ssz)) * ssz;
451 __u32 off = (__u32)in % (md->lmd_stripe_count * ssz);
453 if ( in == 0xffffffffffffffff ) {
454 return 0xffffffffffffffff;
457 if ( (i+1) * ssz <= off )
459 else if ( i * ssz > off )
462 out += (off - (i * ssz)) % ssz;
467 /* compute offset in stripe i corresponding to offset "in" */
468 __u64 lov_stripe(struct lov_stripe_md *md, __u64 in, int *j)
470 __u32 ssz = md->lmd_stripe_size;
472 /* full stripes across all * stripe size */
473 *j = (((__u32) in)/ssz) % md->lmd_stripe_count;
474 off = (__u32)in % (md->lmd_stripe_count * ssz);
475 out = ( ((__u32)in) / (md->lmd_stripe_count * ssz)) * ssz +
476 (off - ((*j) * ssz)) % ssz;;
481 int lov_stripe_which(struct lov_stripe_md *md, __u64 in)
483 __u32 ssz = md->lmd_stripe_size;
485 j = (((__u32) in)/ssz) % md->lmd_stripe_count;
490 /* FIXME: maybe we'll just make one node the authoritative attribute node, then
491 * we can send this 'punch' to just the authoritative node and the nodes
492 * that the punch will affect. */
493 static int lov_punch(struct lustre_handle *conn, struct obdo *oa,
494 struct lov_stripe_md *md,
495 obd_off start, obd_off end)
499 struct obd_export *export = class_conn2export(conn);
504 CERROR("LOV requires striping ea for desctruction\n");
508 if (!export || !export->exp_obd)
511 lov = &export->exp_obd->u.lov;
512 for (i = 0; i < md->lmd_stripe_count; i++) {
513 __u64 starti = lov_offset(md, start, i);
514 __u64 endi = lov_offset(md, end, i);
518 /* create data objects with "parent" OA */
519 memcpy(&tmp, oa, sizeof(tmp));
520 oa->o_id = md->lmd_oinfo[i].loi_id;
522 rc = obd_punch(&lov->tgts[i].conn, &tmp, NULL,
525 CERROR("Error punch object %Ld on %d\n",
532 int lov_osc_brw_callback(struct io_cb_data *cbd, int err, int phase)
537 if (phase == CB_PHASE_START)
540 if (phase == CB_PHASE_FINISH) {
543 if (atomic_dec_and_test(&cbd->refcount))
544 ret = cbd->cb(cbd, cbd->err, phase);
552 static inline int lov_brw(int cmd, struct lustre_handle *conn,
553 struct lov_stripe_md *md,
555 struct brw_page *pga,
556 brw_callback_t callback, struct io_cb_data *cbd)
558 int stripe_count = md->lmd_stripe_count;
559 struct obd_export *export = class_conn2export(conn);
565 struct lov_stripe_md md;
567 struct brw_page *ioarr;
571 lov = &export->exp_obd->u.lov;
574 OBD_ALLOC(stripeinfo, stripe_count * sizeof(*stripeinfo));
578 OBD_ALLOC(ioarr, sizeof(*ioarr) * oa_bufs);
580 OBD_FREE(stripeinfo, stripe_count * sizeof(*stripeinfo));
584 for (i=0 ; i < oa_bufs ; i++ ) {
586 which = lov_stripe_which(md, pga[i].pg->index * PAGE_SIZE);
587 stripeinfo[which].bufct++;
590 for (i=0 ; i < stripe_count ; i++) {
592 stripeinfo[i].index =
593 stripeinfo[i-1].index + stripeinfo[i-1].bufct;
594 stripeinfo[i].md.lmd_object_id =
595 md->lmd_oinfo[i].loi_id;
598 for (i=0 ; i < oa_bufs ; i++ ) {
600 which = lov_stripe_which(md, pga[i].pg->index * PAGE_SIZE);
602 shift = stripeinfo[which].index;
603 ioarr[shift + stripeinfo[which].subcount] = pga[i];
604 ioarr[shift + stripeinfo[which].subcount].off =
605 lov_offset(md, pga[i].pg->index * PAGE_SIZE, which);
606 stripeinfo[which].subcount++;
610 atomic_set(&cbd->refcount, oa_bufs);
611 for (i=0 ; i < stripe_count ; i++) {
612 int shift = stripeinfo[i].index;
613 if (stripeinfo[i].bufct)
614 obd_brw(cmd, &lov->tgts[i].conn, &stripeinfo[i].md,
615 stripeinfo[i].bufct, &ioarr[shift],
616 lov_osc_brw_callback, cbd);
619 rc = callback(cbd, 0, CB_PHASE_START);
624 static int lov_enqueue(struct lustre_handle *conn, struct lov_stripe_md *md,
625 struct lustre_handle *parent_lock,
626 __u32 type, void *cookie, int cookielen, __u32 mode,
627 int *flags, void *cb, void *data, int datalen,
628 struct lustre_handle *lockhs)
631 struct obd_export *export = class_conn2export(conn);
633 struct lov_stripe_md submd;
637 CERROR("LOV requires striping ea for desctruction\n");
641 if (!export || !export->exp_obd)
644 lov = &export->exp_obd->u.lov;
645 for (i = 0; i < md->lmd_stripe_count; i++) {
646 struct ldlm_extent *extent = (struct ldlm_extent *)cookie;
647 struct ldlm_extent sub_ext;
649 sub_ext.start = lov_offset(md, extent->start, i);
650 sub_ext.end = lov_offset(md, extent->end, i);
651 if ( sub_ext.start == sub_ext.end )
654 submd.lmd_object_id = md->lmd_oinfo[i].loi_id;
655 submd.lmd_easize = sizeof(struct lov_mds_md);
656 submd.lmd_stripe_count = md->lmd_stripe_count;
657 /* XXX submd is not fully initialized here */
658 rc = obd_enqueue(&(lov->tgts[i].conn), &submd, parent_lock,
659 type, &sub_ext, sizeof(sub_ext), mode,
660 flags, cb, data, datalen, &(lockhs[i]));
661 // XXX add a lock debug statement here
663 CERROR("Error obd_enqueue object %Ld subobj %Ld\n",
664 md->lmd_object_id, md->lmd_oinfo[i].loi_id);
670 static int lov_cancel(struct lustre_handle *conn, struct lov_stripe_md *md, __u32 mode,
671 struct lustre_handle *lockhs)
674 struct obd_export *export = class_conn2export(conn);
679 CERROR("LOV requires striping ea for lock cancellation\n");
683 if (!export || !export->exp_obd)
686 lov = &export->exp_obd->u.lov;
687 for (i = 0; i < md->lmd_stripe_count; i++) {
688 struct lov_stripe_md submd;
690 if ( lockhs[i].addr == 0 )
693 submd.lmd_object_id = md->lmd_oinfo[i].loi_id;
694 submd.lmd_easize = sizeof(struct lov_mds_md);
695 rc = obd_cancel(&lov->tgts[i].conn, &submd, mode, &lockhs[i]);
697 CERROR("Error cancel object %Ld subobj %Ld\n",
698 md->lmd_object_id, md->lmd_oinfo[i].loi_id);
707 struct obd_ops lov_obd_ops = {
709 o_connect: lov_connect,
710 o_disconnect: lov_disconnect,
711 o_create: lov_create,
712 o_destroy: lov_destroy,
713 o_getattr: lov_getattr,
714 o_setattr: lov_setattr,
719 o_enqueue: lov_enqueue,
724 #define LOV_VERSION "v0.1"
726 static int __init lov_init(void)
728 printk(KERN_INFO "Lustre Logical Object Volume driver " LOV_VERSION
729 ", info@clusterfs.com\n");
730 return class_register_type(&lov_obd_ops, OBD_LOV_DEVICENAME);
733 static void __exit lov_exit(void)
735 class_unregister_type(OBD_LOV_DEVICENAME);
738 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
739 MODULE_DESCRIPTION("Lustre Logical Object Volume OBD driver v0.1");
740 MODULE_LICENSE("GPL");
742 module_init(lov_init);
743 module_exit(lov_exit);