1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * Copyright (C) 2002 Cluster File Systems, Inc.
7 * Author: Phil Schwan <phil@off.net>
8 * Peter Braam <braam@clusterfs.com>
10 * This code is issued under the GNU General Public License.
11 * See the file COPYING in this distribution
15 #define DEBUG_SUBSYSTEM S_LOV
17 #include <linux/slab.h>
18 #include <linux/module.h>
19 #include <linux/obd_support.h>
20 #include <linux/lustre_lib.h>
21 #include <linux/lustre_net.h>
22 #include <linux/lustre_idl.h>
23 #include <linux/lustre_mds.h>
24 #include <linux/obd_class.h>
25 #include <linux/obd_lov.h>
26 #include <linux/init.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <asm/div64.h>
30 #include <linux/lprocfs_status.h>
32 extern struct lprocfs_vars status_var_nm_1[];
33 extern struct lprocfs_vars status_class_var[];
35 static kmem_cache_t *lov_file_cache;
37 struct lov_file_handles {
38 struct list_head lfh_list;
41 struct lustre_handle *lfh_handles;
44 extern int lov_packmd(struct lustre_handle *conn, struct lov_mds_md **lmm,
45 struct lov_stripe_md *lsm);
46 extern int lov_unpackmd(struct lustre_handle *conn, struct lov_stripe_md **lsm,
47 struct lov_mds_md *lmm);
50 int lov_attach(struct obd_device *dev, obd_count len, void *data)
52 return lprocfs_reg_obd(dev, status_var_nm_1, dev);
55 int lov_detach(struct obd_device *dev)
57 return lprocfs_dereg_obd(dev);
60 static int lov_connect(struct lustre_handle *conn, struct obd_device *obd,
61 obd_uuid_t cluuid, struct recovd_obd *recovd,
62 ptlrpc_recovery_cb_t recover)
64 struct ptlrpc_request *req = NULL;
65 struct lov_obd *lov = &obd->u.lov;
66 struct client_obd *mdc = &lov->mdcobd->u.cli;
67 struct lov_desc *desc = &lov->desc;
68 struct obd_export *exp;
69 struct lustre_handle mdc_conn;
70 obd_uuid_t *uuidarray;
75 rc = class_connect(conn, obd, cluuid);
79 /* We don't want to actually do the underlying connections more than
80 * once, so keep track. */
82 if (lov->refcount > 1)
85 exp = class_conn2export(conn);
86 INIT_LIST_HEAD(&exp->exp_lov_data.led_open_head);
88 /* retrieve LOV metadata from MDS */
89 rc = obd_connect(&mdc_conn, lov->mdcobd, NULL, recovd, recover);
91 CERROR("cannot connect to mdc: rc = %d\n", rc);
95 rc = mdc_getlovinfo(obd, &mdc_conn, &req);
96 rc2 = obd_disconnect(&mdc_conn);
98 CERROR("cannot get lov info %d\n", rc);
103 CERROR("error disconnecting from MDS %d\n", rc2);
104 GOTO(out_conn, rc = rc2);
108 if (req->rq_repmsg->bufcount < 2 ||
109 req->rq_repmsg->buflens[0] < sizeof(*desc)) {
110 CERROR("LOV desc: invalid descriptor returned\n");
111 GOTO(out_conn, rc = -EINVAL);
114 memcpy(desc, lustre_msg_buf(req->rq_repmsg, 0), sizeof(*desc));
115 lov_unpackdesc(desc);
117 if (req->rq_repmsg->buflens[1] < sizeof(*uuidarray)*desc->ld_tgt_count){
118 CERROR("LOV desc: invalid uuid array returned\n");
119 GOTO(out_conn, rc = -EINVAL);
122 if (memcmp(obd->obd_uuid, desc->ld_uuid, sizeof(desc->ld_uuid))) {
123 CERROR("LOV desc: uuid %s not on mds device (%s)\n",
124 obd->obd_uuid, desc->ld_uuid);
125 GOTO(out_conn, rc = -EINVAL);
128 if (desc->ld_tgt_count > 1000) {
129 CERROR("LOV desc: target count > 1000 (%d)\n",
131 GOTO(out_conn, rc = -EINVAL);
134 /* Because of 64-bit divide/mod operations only work with a 32-bit
135 * divisor in a 32-bit kernel, we cannot support a stripe width
136 * of 4GB or larger on 32-bit CPUs.
138 if ((desc->ld_default_stripe_count ?
139 desc->ld_default_stripe_count : desc->ld_tgt_count) *
140 desc->ld_default_stripe_size > ~0UL) {
141 CERROR("LOV: stripe width "LPU64"x%u > %lu on 32-bit system\n",
142 desc->ld_default_stripe_size,
143 desc->ld_default_stripe_count ?
144 desc->ld_default_stripe_count : desc->ld_tgt_count,~0UL);
145 GOTO(out_conn, rc = -EINVAL);
148 lov->bufsize = sizeof(struct lov_tgt_desc) * desc->ld_tgt_count;
149 OBD_ALLOC(lov->tgts, lov->bufsize);
151 CERROR("Out of memory\n");
152 GOTO(out_conn, rc = -ENOMEM);
155 uuidarray = lustre_msg_buf(req->rq_repmsg, 1);
156 for (i = 0; i < desc->ld_tgt_count; i++)
157 memcpy(lov->tgts[i].uuid, uuidarray[i], sizeof(*uuidarray));
159 for (i = 0; i < desc->ld_tgt_count; i++) {
160 struct obd_device *tgt = class_uuid2obd(uuidarray[i]);
163 CERROR("Target %s not attached\n", uuidarray[i]);
164 GOTO(out_disc, rc = -EINVAL);
167 if (!(tgt->obd_flags & OBD_SET_UP)) {
168 CERROR("Target %s not set up\n", uuidarray[i]);
169 GOTO(out_disc, rc = -EINVAL);
172 rc = obd_connect(&lov->tgts[i].conn, tgt, NULL, recovd,
175 CERROR("Target %s connect error %d\n",
179 rc = obd_iocontrol(IOC_OSC_REGISTER_LOV, &lov->tgts[i].conn,
180 sizeof(struct obd_device *), obd, NULL);
182 CERROR("Target %s REGISTER_LOV error %d\n",
186 desc->ld_active_tgt_count++;
187 lov->tgts[i].active = 1;
190 mdc->cl_max_mds_easize = obd_size_wiremd(conn, NULL);
193 ptlrpc_req_finished(req);
198 desc->ld_active_tgt_count--;
199 lov->tgts[i].active = 0;
200 rc2 = obd_disconnect(&lov->tgts[i].conn);
202 CERROR("LOV Target %s disconnect error: rc = %d\n",
205 OBD_FREE(lov->tgts, lov->bufsize);
207 class_disconnect(conn);
213 static int lov_disconnect(struct lustre_handle *conn)
215 struct obd_device *obd = class_conn2obd(conn);
216 struct lov_obd *lov = &obd->u.lov;
217 struct obd_export *exp;
218 struct list_head *p, *n;
224 /* Only disconnect the underlying layers on the final disconnect. */
226 if (lov->refcount != 0)
229 for (i = 0; i < lov->desc.ld_tgt_count; i++) {
230 if (!lov->tgts[i].active) {
231 CERROR("Skipping disconnect for inactive OSC %s\n",
236 lov->desc.ld_active_tgt_count--;
237 lov->tgts[i].active = 0;
238 rc = obd_disconnect(&lov->tgts[i].conn);
240 CERROR("Target %s disconnect error %d\n",
241 lov->tgts[i].uuid, rc);
245 OBD_FREE(lov->tgts, lov->bufsize);
249 exp = class_conn2export(conn);
250 list_for_each_safe(p, n, &exp->exp_lov_data.led_open_head) {
251 /* XXX close these, instead of just discarding them? */
252 struct lov_file_handles *lfh;
253 lfh = list_entry(p, typeof(*lfh), lfh_list);
254 CERROR("discarding open LOV handle %p:"LPX64"\n",
255 lfh, lfh->lfh_cookie);
256 list_del(&lfh->lfh_list);
257 OBD_FREE(lfh->lfh_handles,
258 lfh->lfh_count * sizeof(*lfh->lfh_handles));
259 kmem_cache_free(lov_file_cache, lfh);
263 rc = class_disconnect(conn);
271 * -EINVAL : UUID can't be found in the LOV's target list
272 * -ENOTCONN: The UUID is found, but the target connection is bad (!)
273 * -EBADF : The UUID is found, but the OBD is the wrong type (!)
274 * -EALREADY: The OSC is already marked (in)active
276 static int lov_set_osc_active(struct lov_obd *lov, obd_uuid_t uuid,
279 struct obd_device *obd;
283 CDEBUG(D_INFO, "Searching in lov %p for uuid %s (activate=%d)\n",
284 lov, uuid, activate);
286 spin_lock(&lov->lov_lock);
287 for (i = 0; i < lov->desc.ld_tgt_count; i++)
288 if (strncmp(uuid, lov->tgts[i].uuid,
289 sizeof(lov->tgts[i].uuid)) == 0)
292 if (i == lov->desc.ld_tgt_count)
293 GOTO(out, rc = -EINVAL);
295 obd = class_conn2obd(&lov->tgts[i].conn);
298 GOTO(out, rc = -ENOTCONN);
301 CDEBUG(D_INFO, "Found OBD %p type %s\n", obd, obd->obd_type->typ_name);
302 if (strcmp(obd->obd_type->typ_name, "osc") != 0) {
304 GOTO(out, rc = -EBADF);
307 if (lov->tgts[i].active == activate) {
308 CDEBUG(D_INFO, "OBD %p already %sactive!\n", obd,
309 activate ? "" : "in");
310 GOTO(out, rc = -EALREADY);
313 CDEBUG(D_INFO, "Marking OBD %p %sactive\n", obd, activate ? "" : "in");
315 lov->tgts[i].active = activate;
317 lov->desc.ld_active_tgt_count++;
319 lov->desc.ld_active_tgt_count--;
323 spin_unlock(&lov->lov_lock);
327 static int lov_setup(struct obd_device *obd, obd_count len, void *buf)
329 struct obd_ioctl_data *data = buf;
330 struct lov_obd *lov = &obd->u.lov;
334 if (data->ioc_inllen1 < 1) {
335 CERROR("osc setup requires an MDC UUID\n");
339 if (data->ioc_inllen1 > 37) {
340 CERROR("mdc UUID must be 36 characters or less\n");
344 spin_lock_init(&lov->lov_lock);
345 lov->mdcobd = class_uuid2obd(data->ioc_inlbuf1);
347 CERROR("LOV %s cannot locate MDC %s\n", obd->obd_uuid,
354 static struct lov_file_handles *lov_handle2lfh(struct lustre_handle *handle)
356 struct lov_file_handles *lfh = NULL;
358 if (!handle || !handle->addr)
361 lfh = (struct lov_file_handles *)(unsigned long)(handle->addr);
362 if (!kmem_cache_validate(lov_file_cache, lfh))
365 if (lfh->lfh_cookie != handle->cookie)
371 /* the LOV expects oa->o_id to be set to the LOV object id */
372 static int lov_create(struct lustre_handle *conn, struct obdo *oa,
373 struct lov_stripe_md **ea)
375 struct obd_export *export = class_conn2export(conn);
377 struct lov_stripe_md *lsm;
378 struct lov_oinfo *loi;
380 int ost_count, ost_idx = 1;
393 lov = &export->exp_obd->u.lov;
395 if (!lov->desc.ld_active_tgt_count)
398 spin_lock(&lov->lov_lock);
399 ost_count = lov->desc.ld_tgt_count;
403 /* Free the user lsm if it needs to be changed, to avoid memory leaks */
405 lsm->lsm_stripe_count > lov->desc.ld_active_tgt_count)) {
406 struct lov_stripe_md *lsm_new = NULL;
407 rc = obd_alloc_memmd(conn, &lsm_new);
409 spin_unlock(&lov->lov_lock);
411 obd_free_memmd(conn, &lsm);
415 LASSERT(lsm->lsm_magic == LOV_MAGIC);
416 CERROR("replace user LOV MD: stripes %u > %u active\n",
417 lsm->lsm_stripe_count,
418 lov->desc.ld_active_tgt_count);
419 lsm_new->lsm_stripe_offset = lsm->lsm_stripe_offset;
420 lsm_new->lsm_stripe_size = lsm->lsm_stripe_size;
421 lsm_new->lsm_stripe_pattern = lsm->lsm_stripe_pattern;
422 obd_free_memmd(conn, &lsm);
425 ost_idx = 0; /* if lsm->lsm_stripe_offset is set yet */
426 lsm->lsm_magic = LOV_MAGIC;
429 LASSERT(oa->o_valid & OBD_MD_FLID);
430 lsm->lsm_object_id = oa->o_id;
431 if (!lsm->lsm_stripe_size)
432 lsm->lsm_stripe_size = lov->desc.ld_default_stripe_size;
434 /* Because of 64-bit divide/mod operations only work with a 32-bit
435 * divisor in a 32-bit kernel, we cannot support a stripe width
436 * of 4GB or larger on 32-bit CPUs.
438 if (lsm->lsm_stripe_size * lsm->lsm_stripe_count > ~0UL) {
439 CERROR("LOV: stripe width "LPU64"x%u > %lu on 32-bit system\n",
440 lsm->lsm_stripe_size, lsm->lsm_stripe_count, ~0UL);
441 spin_unlock(&lov->lov_lock);
442 GOTO(out_free, rc = -EINVAL);
445 if (!ost_idx || lsm->lsm_stripe_offset >= ost_count) {
446 int mult = lsm->lsm_object_id * lsm->lsm_stripe_count;
447 int stripe_offset = mult % ost_count;
448 int sub_offset = (mult / ost_count) % lsm->lsm_stripe_count;
450 lsm->lsm_stripe_offset = stripe_offset + sub_offset;
453 /* Start with lsm_stripe_offset on an active OSC to avoid confusion */
454 while (!lov->tgts[lsm->lsm_stripe_offset].active)
455 lsm->lsm_stripe_offset = (lsm->lsm_stripe_offset+1) % ost_count;
457 /* Pick the OSTs before we release the lock */
458 ost_idx = lsm->lsm_stripe_offset;
459 for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
460 CDEBUG(D_INODE, "objid "LPX64"[%d] is ost_idx %d (uuid %s)\n",
461 lsm->lsm_object_id, i, ost_idx, lov->tgts[ost_idx].uuid);
462 loi->loi_ost_idx = ost_idx;
464 ost_idx = (ost_idx + 1) % ost_count;
465 } while (!lov->tgts[ost_idx].active);
468 spin_unlock(&lov->lov_lock);
470 CDEBUG(D_INODE, "allocating %d subobjs for objid "LPX64" at idx %d\n",
471 lsm->lsm_stripe_count,lsm->lsm_object_id,lsm->lsm_stripe_offset);
473 for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
474 struct lov_stripe_md obj_md;
475 struct lov_stripe_md *obj_mdp = &obj_md;
477 ost_idx = loi->loi_ost_idx;
479 /* create data objects with "parent" OA */
480 memcpy(tmp, oa, sizeof(*tmp));
481 /* XXX: LOV STACKING: use real "obj_mdp" sub-data */
482 rc = obd_create(&lov->tgts[ost_idx].conn, tmp, &obj_mdp);
484 CERROR("error creating objid "LPX64" sub-object on "
485 "OST idx %d: rc = %d\n", oa->o_id, ost_idx, rc);
486 GOTO(out_cleanup, rc);
488 loi->loi_id = tmp->o_id;
489 CDEBUG(D_INODE, "objid "LPX64" has subobj "LPX64" at idx %d\n",
490 lsm->lsm_object_id, loi->loi_id, ost_idx);
504 /* destroy already created objects here */
505 memcpy(tmp, oa, sizeof(*tmp));
506 tmp->o_id = loi->loi_id;
507 err = obd_destroy(&lov->tgts[loi->loi_ost_idx].conn, tmp, NULL);
509 CERROR("Failed to uncreate objid "LPX64" subobj "
510 LPX64" on OST idx %d: rc = %d\n",
511 oa->o_id, loi->loi_id, loi->loi_ost_idx,
516 obd_free_memmd(conn, &lsm);
520 static int lov_destroy(struct lustre_handle *conn, struct obdo *oa,
521 struct lov_stripe_md *lsm)
524 struct obd_export *export = class_conn2export(conn);
526 struct lov_oinfo *loi;
527 struct lov_file_handles *lfh = NULL;
532 CERROR("LOV requires striping ea for destruction\n");
536 if (lsm->lsm_magic != LOV_MAGIC) {
537 CERROR("LOV striping magic bad %#lx != %#lx\n",
538 lsm->lsm_magic, LOV_MAGIC);
542 if (!export || !export->exp_obd)
545 if (oa->o_valid & OBD_MD_FLHANDLE)
546 lfh = lov_handle2lfh(obdo_handle(oa));
548 lov = &export->exp_obd->u.lov;
549 for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
550 memcpy(&tmp, oa, sizeof(tmp));
551 tmp.o_id = loi->loi_id;
553 memcpy(obdo_handle(&tmp), &lfh->lfh_handles[i],
554 sizeof(lfh->lfh_handles[i]));
556 tmp.o_valid &= ~OBD_MD_FLHANDLE;
557 rc = obd_destroy(&lov->tgts[loi->loi_ost_idx].conn, &tmp, NULL);
559 CERROR("Error destroying objid "LPX64" subobj "LPX64
560 " on OST idx %d\n: rc = %d",
561 oa->o_id, loi->loi_id, loi->loi_ost_idx, rc);
566 /* compute object size given "stripeno" and the ost size */
567 static obd_size lov_stripe_size(struct lov_stripe_md *lsm, obd_size ost_size,
570 unsigned long ssize = lsm->lsm_stripe_size;
571 unsigned long swidth = ssize * lsm->lsm_stripe_count;
572 unsigned long stripe_size;
578 /* do_div(a, b) returns a % b, and a = a / b */
579 stripe_size = do_div(ost_size, ssize);
582 lov_size = ost_size * swidth + stripeno * ssize + stripe_size;
584 lov_size = (ost_size - 1) * swidth + (stripeno + 1) * ssize;
589 static void lov_merge_attrs(struct obdo *tgt, struct obdo *src, obd_flag valid,
590 struct lov_stripe_md *lsm, int stripeno, int *new)
593 obdo_cpy_md(tgt, src, valid);
594 if (valid & OBD_MD_FLSIZE)
595 tgt->o_size = lov_stripe_size(lsm,src->o_size,stripeno);
598 if (valid & OBD_MD_FLSIZE) {
599 /* this handles sparse files properly */
602 lov_size = lov_stripe_size(lsm, src->o_size, stripeno);
603 if (lov_size > tgt->o_size)
604 tgt->o_size = lov_size;
606 if (valid & OBD_MD_FLBLOCKS)
607 tgt->o_blocks += src->o_blocks;
608 if (valid & OBD_MD_FLCTIME && tgt->o_ctime < src->o_ctime)
609 tgt->o_ctime = src->o_ctime;
610 if (valid & OBD_MD_FLMTIME && tgt->o_mtime < src->o_mtime)
611 tgt->o_mtime = src->o_mtime;
615 static int lov_getattr(struct lustre_handle *conn, struct obdo *oa,
616 struct lov_stripe_md *lsm)
619 struct obd_export *export = class_conn2export(conn);
621 struct lov_oinfo *loi;
622 struct lov_file_handles *lfh = NULL;
628 CERROR("LOV requires striping ea\n");
632 if (lsm->lsm_magic != LOV_MAGIC) {
633 CERROR("LOV striping magic bad %#lx != %#lx\n",
634 lsm->lsm_magic, LOV_MAGIC);
638 if (!export || !export->exp_obd)
641 lov = &export->exp_obd->u.lov;
643 if (oa->o_valid & OBD_MD_FLHANDLE)
644 lfh = lov_handle2lfh(obdo_handle(oa));
646 for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
649 if (loi->loi_id == 0)
652 CDEBUG(D_INFO, "objid "LPX64"[%d] has subobj "LPX64" at idx "
653 "%u\n", oa->o_id, i, loi->loi_id, loi->loi_ost_idx);
654 /* create data objects with "parent" OA */
655 memcpy(&tmp, oa, sizeof(tmp));
656 tmp.o_id = loi->loi_id;
658 memcpy(obdo_handle(&tmp), &lfh->lfh_handles[i],
659 sizeof(lfh->lfh_handles[i]));
661 tmp.o_valid &= ~OBD_MD_FLHANDLE;
663 err = obd_getattr(&lov->tgts[loi->loi_ost_idx].conn, &tmp,NULL);
665 CERROR("Error getattr objid "LPX64" subobj "LPX64
666 " on OST idx %d: rc = %d\n",
667 oa->o_id, loi->loi_id, loi->loi_ost_idx, err);
670 continue; /* XXX or break? */
672 lov_merge_attrs(oa, &tmp, tmp.o_valid, lsm, i, &new);
677 static int lov_setattr(struct lustre_handle *conn, struct obdo *oa,
678 struct lov_stripe_md *lsm)
681 struct obd_export *export = class_conn2export(conn);
683 struct lov_oinfo *loi;
684 struct lov_file_handles *lfh = NULL;
688 /* Note that this code is currently unused, hence LBUG(), just
689 * to know when/if it is ever revived that it needs cleanups.
694 CERROR("LOV requires striping ea\n");
698 if (lsm->lsm_magic != LOV_MAGIC) {
699 CERROR("LOV striping magic bad %#lx != %#lx\n",
700 lsm->lsm_magic, LOV_MAGIC);
704 if (!export || !export->exp_obd)
707 /* size changes should go through punch and not setattr */
708 LASSERT(!(oa->o_valid & OBD_MD_FLSIZE));
714 if (oa->o_valid & OBD_MD_FLHANDLE)
715 lfh = lov_handle2lfh(obdo_handle(oa));
717 lov = &export->exp_obd->u.lov;
718 for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
721 obdo_cpy_md(tmp, oa, oa->o_valid);
724 memcpy(obdo_handle(tmp), &lfh->lfh_handles[i],
725 sizeof(lfh->lfh_handles[i]));
727 tmp->o_valid &= ~OBD_MD_FLHANDLE;
729 tmp->o_id = loi->loi_id;
731 err = obd_setattr(&lov->tgts[loi->loi_ost_idx].conn, tmp, NULL);
733 CERROR("Error setattr objid "LPX64" subobj "LPX64
734 " on OST idx %d: rc = %d\n",
735 oa->o_id, loi->loi_id, loi->loi_ost_idx, err);
744 static int lov_open(struct lustre_handle *conn, struct obdo *oa,
745 struct lov_stripe_md *lsm)
748 struct obd_export *export = class_conn2export(conn);
750 struct lov_oinfo *loi;
751 struct lov_file_handles *lfh = NULL;
757 CERROR("LOV requires striping ea for opening\n");
761 if (lsm->lsm_magic != LOV_MAGIC) {
762 CERROR("LOV striping magic bad %#lx != %#lx\n",
763 lsm->lsm_magic, LOV_MAGIC);
767 if (!export || !export->exp_obd)
774 lfh = kmem_cache_alloc(lov_file_cache, GFP_KERNEL);
776 GOTO(out_tmp, rc = -ENOMEM);
777 OBD_ALLOC(lfh->lfh_handles,
778 lsm->lsm_stripe_count * sizeof(*lfh->lfh_handles));
779 if (!lfh->lfh_handles)
780 GOTO(out_lfh, rc = -ENOMEM);
782 lov = &export->exp_obd->u.lov;
785 for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
788 /* create data objects with "parent" OA */
789 memcpy(tmp, oa, sizeof(*tmp));
790 tmp->o_id = loi->loi_id;
792 err = obd_open(&lov->tgts[loi->loi_ost_idx].conn, tmp, NULL);
794 CERROR("Error open objid "LPX64" subobj "LPX64
795 " on OST idx %d: rc = %d\n",
796 oa->o_id, lsm->lsm_oinfo[i].loi_id,
797 loi->loi_ost_idx, rc);
802 lov_merge_attrs(oa, tmp, tmp->o_valid, lsm, i, &new);
804 if (tmp->o_valid & OBD_MD_FLHANDLE)
805 memcpy(&lfh->lfh_handles[i], obdo_handle(tmp),
806 sizeof(lfh->lfh_handles[i]));
809 if (tmp->o_valid & OBD_MD_FLHANDLE) {
810 struct lustre_handle *handle = obdo_handle(oa);
812 lfh->lfh_count = lsm->lsm_stripe_count;
813 get_random_bytes(&lfh->lfh_cookie, sizeof(lfh->lfh_cookie));
815 handle->addr = (__u64)(unsigned long)lfh;
816 handle->cookie = lfh->lfh_cookie;
817 oa->o_valid |= OBD_MD_FLHANDLE;
818 list_add(&lfh->lfh_list, &export->exp_lov_data.led_open_head);
822 /* FIXME: returning an error, but having opened some objects is a bad
823 * idea, since they will likely never be closed. We either
824 * need to not return an error if _some_ objects could be
825 * opened, and leave it to read/write to return -EIO (with
826 * hopefully partial error status) or close all opened objects
827 * and return an error. I think the former is preferred.
834 OBD_FREE(lfh->lfh_handles,
835 lsm->lsm_stripe_count * sizeof(*lfh->lfh_handles));
837 lfh->lfh_cookie = DEAD_HANDLE_MAGIC;
838 kmem_cache_free(lov_file_cache, lfh);
842 static int lov_close(struct lustre_handle *conn, struct obdo *oa,
843 struct lov_stripe_md *lsm)
846 struct obd_export *export = class_conn2export(conn);
848 struct lov_oinfo *loi;
849 struct lov_file_handles *lfh = NULL;
854 CERROR("LOV requires striping ea\n");
858 if (lsm->lsm_magic != LOV_MAGIC) {
859 CERROR("LOV striping magic bad %#lx != %#lx\n",
860 lsm->lsm_magic, LOV_MAGIC);
864 if (!export || !export->exp_obd)
867 if (oa->o_valid & OBD_MD_FLHANDLE)
868 lfh = lov_handle2lfh(obdo_handle(oa));
870 lov = &export->exp_obd->u.lov;
871 for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
874 /* create data objects with "parent" OA */
875 memcpy(&tmp, oa, sizeof(tmp));
876 tmp.o_id = loi->loi_id;
878 memcpy(obdo_handle(&tmp), &lfh->lfh_handles[i],
879 sizeof(lfh->lfh_handles[i]));
881 tmp.o_valid &= ~OBD_MD_FLHANDLE;
883 err = obd_close(&lov->tgts[loi->loi_ost_idx].conn, &tmp, NULL);
885 CERROR("Error close objid "LPX64" subobj "LPX64
886 " on OST idx %d: rc = %d\n",
887 oa->o_id, loi->loi_id, loi->loi_ost_idx, err);
893 list_del(&lfh->lfh_list);
894 OBD_FREE(lfh->lfh_handles,
895 lsm->lsm_stripe_count * sizeof(*lfh->lfh_handles));
896 lfh->lfh_cookie = DEAD_HANDLE_MAGIC;
897 kmem_cache_free(lov_file_cache, lfh);
904 #define log2(n) ffz(~(n))
907 #warning FIXME: merge these two functions now that they are nearly the same
909 /* compute ost offset in stripe "stripeno" corresponding to offset "lov_off" */
910 static obd_off lov_stripe_offset(struct lov_stripe_md *lsm, obd_off lov_off,
913 unsigned long ssize = lsm->lsm_stripe_size;
914 unsigned long swidth = ssize * lsm->lsm_stripe_count;
915 unsigned long stripe_off, this_stripe;
917 if (lov_off == OBD_OBJECT_EOF || lov_off == 0)
920 /* do_div(a, b) returns a % b, and a = a / b */
921 stripe_off = do_div(lov_off, swidth);
923 this_stripe = stripeno * ssize;
924 if (stripe_off <= this_stripe)
927 stripe_off -= this_stripe;
929 if (stripe_off > ssize)
934 return lov_off * ssize + stripe_off;
937 /* compute which stripe number "lov_off" will be written into */
938 static int lov_stripe_number(struct lov_stripe_md *lsm, obd_off lov_off)
940 unsigned long ssize = lsm->lsm_stripe_size;
941 unsigned long swidth = ssize * lsm->lsm_stripe_count;
942 unsigned long stripe_off;
944 stripe_off = do_div(lov_off, swidth);
946 return stripe_off / ssize;
950 /* FIXME: maybe we'll just make one node the authoritative attribute node, then
951 * we can send this 'punch' to just the authoritative node and the nodes
952 * that the punch will affect. */
953 static int lov_punch(struct lustre_handle *conn, struct obdo *oa,
954 struct lov_stripe_md *lsm,
955 obd_off start, obd_off end)
958 struct obd_export *export = class_conn2export(conn);
960 struct lov_oinfo *loi;
961 struct lov_file_handles *lfh = NULL;
966 CERROR("LOV requires striping ea\n");
970 if (lsm->lsm_magic != LOV_MAGIC) {
971 CERROR("LOV striping magic bad %#lx != %#lx\n",
972 lsm->lsm_magic, LOV_MAGIC);
976 if (!export || !export->exp_obd)
979 if (oa->o_valid & OBD_MD_FLHANDLE)
980 lfh = lov_handle2lfh(obdo_handle(oa));
982 lov = &export->exp_obd->u.lov;
983 for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
984 obd_off starti = lov_stripe_offset(lsm, start, i);
985 obd_off endi = lov_stripe_offset(lsm, end, i);
990 /* create data objects with "parent" OA */
991 memcpy(&tmp, oa, sizeof(tmp));
992 tmp.o_id = loi->loi_id;
994 memcpy(obdo_handle(&tmp), &lfh->lfh_handles[i],
995 sizeof(lfh->lfh_handles[i]));
997 tmp.o_valid &= ~OBD_MD_FLHANDLE;
999 err = obd_punch(&lov->tgts[loi->loi_ost_idx].conn, &tmp, NULL,
1002 CERROR("Error punch objid "LPX64" subobj "LPX64
1003 " on OST idx %d: rc = %d\n",
1004 oa->o_id, loi->loi_id, loi->loi_ost_idx, err);
1012 static inline int lov_brw(int cmd, struct lustre_handle *conn,
1013 struct lov_stripe_md *lsm, obd_count oa_bufs,
1014 struct brw_page *pga, struct obd_brw_set *set)
1020 struct lov_stripe_md lsm;
1022 } *stripeinfo, *si, *si_last;
1023 struct obd_export *export = class_conn2export(conn);
1024 struct lov_obd *lov;
1025 struct brw_page *ioarr;
1026 struct lov_oinfo *loi;
1027 int rc = 0, i, *where, stripe_count = lsm->lsm_stripe_count;
1031 CERROR("LOV requires striping ea\n");
1035 if (lsm->lsm_magic != LOV_MAGIC) {
1036 CERROR("LOV striping magic bad %#lx != %#lx\n",
1037 lsm->lsm_magic, LOV_MAGIC);
1041 lov = &export->exp_obd->u.lov;
1043 OBD_ALLOC(stripeinfo, stripe_count * sizeof(*stripeinfo));
1045 GOTO(out_cbdata, rc = -ENOMEM);
1047 OBD_ALLOC(where, sizeof(*where) * oa_bufs);
1049 GOTO(out_sinfo, rc = -ENOMEM);
1051 OBD_ALLOC(ioarr, sizeof(*ioarr) * oa_bufs);
1053 GOTO(out_where, rc = -ENOMEM);
1055 for (i = 0; i < oa_bufs; i++) {
1056 where[i] = lov_stripe_number(lsm, pga[i].off);
1057 stripeinfo[where[i]].bufct++;
1060 for (i = 0, loi = lsm->lsm_oinfo, si_last = si = stripeinfo;
1061 i < stripe_count; i++, loi++, si_last = si, si++) {
1063 si->index = si_last->index + si_last->bufct;
1064 si->lsm.lsm_object_id = loi->loi_id;
1065 si->ost_idx = loi->loi_ost_idx;
1068 for (i = 0; i < oa_bufs; i++) {
1069 int which = where[i];
1072 shift = stripeinfo[which].index + stripeinfo[which].subcount;
1073 LASSERT(shift < oa_bufs);
1074 ioarr[shift] = pga[i];
1075 ioarr[shift].off = lov_stripe_offset(lsm, pga[i].off, which);
1076 stripeinfo[which].subcount++;
1079 for (i = 0, si = stripeinfo; i < stripe_count; i++, si++) {
1080 int shift = si->index;
1083 LASSERT(shift < oa_bufs);
1084 /* XXX handle error returns here */
1085 obd_brw(cmd, &lov->tgts[si->ost_idx].conn,
1086 &si->lsm, si->bufct, &ioarr[shift], set);
1090 OBD_FREE(ioarr, sizeof(*ioarr) * oa_bufs);
1092 OBD_FREE(where, sizeof(*where) * oa_bufs);
1094 OBD_FREE(stripeinfo, stripe_count * sizeof(*stripeinfo));
1099 static int lov_enqueue(struct lustre_handle *conn, struct lov_stripe_md *lsm,
1100 struct lustre_handle *parent_lock,
1101 __u32 type, void *cookie, int cookielen, __u32 mode,
1102 int *flags, void *cb, void *data, int datalen,
1103 struct lustre_handle *lockhs)
1105 struct obd_export *export = class_conn2export(conn);
1106 struct lov_obd *lov;
1107 struct lov_oinfo *loi;
1112 CERROR("LOV requires striping ea\n");
1116 if (lsm->lsm_magic != LOV_MAGIC) {
1117 CERROR("LOV striping magic bad %#lx != %#lx\n",
1118 lsm->lsm_magic, LOV_MAGIC);
1122 if (!export || !export->exp_obd)
1125 lov = &export->exp_obd->u.lov;
1126 for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
1127 struct ldlm_extent *extent = (struct ldlm_extent *)cookie;
1128 struct ldlm_extent sub_ext;
1129 struct lov_stripe_md submd;
1131 sub_ext.start = lov_stripe_offset(lsm, extent->start, i);
1132 sub_ext.end = lov_stripe_offset(lsm, extent->end, i);
1133 if (sub_ext.start == sub_ext.end)
1136 submd.lsm_object_id = loi->loi_id;
1137 /* XXX submd should be that from the subobj, it should come
1138 * opaquely from the LOV.
1140 submd.lsm_stripe_count = 0;
1141 /* XXX submd is not fully initialized here */
1142 rc = obd_enqueue(&(lov->tgts[loi->loi_ost_idx].conn), &submd,
1143 parent_lock, type, &sub_ext, sizeof(sub_ext),
1144 mode, flags, cb, data, datalen, &(lockhs[i]));
1145 // XXX add a lock debug statement here
1147 CERROR("Error enqueue objid "LPX64" subobj "LPX64
1148 " on OST idx %d: rc = %d\n", lsm->lsm_object_id,
1149 loi->loi_id, loi->loi_ost_idx, rc);
1150 memset(&(lockhs[i]), 0, sizeof(lockhs[i]));
1156 static int lov_cancel(struct lustre_handle *conn, struct lov_stripe_md *lsm,
1157 __u32 mode, struct lustre_handle *lockhs)
1159 struct obd_export *export = class_conn2export(conn);
1160 struct lov_obd *lov;
1161 struct lov_oinfo *loi;
1166 CERROR("LOV requires striping ea\n");
1170 if (lsm->lsm_magic != LOV_MAGIC) {
1171 CERROR("LOV striping magic bad %#lx != %#lx\n",
1172 lsm->lsm_magic, LOV_MAGIC);
1176 if (!export || !export->exp_obd)
1179 lov = &export->exp_obd->u.lov;
1180 for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
1181 struct lov_stripe_md submd;
1183 if (lockhs[i].addr == 0)
1186 submd.lsm_object_id = loi->loi_id;
1187 submd.lsm_stripe_count = 0;
1188 rc = obd_cancel(&lov->tgts[loi->loi_ost_idx].conn, &submd,
1191 CERROR("Error cancel objid "LPX64" subobj "LPX64
1192 " on OST idx %d: rc = %d\n", lsm->lsm_object_id,
1193 loi->loi_id, loi->loi_ost_idx, rc);
1198 static int lov_cancel_unused(struct lustre_handle *conn,
1199 struct lov_stripe_md *lsm, int flags)
1201 struct obd_export *export = class_conn2export(conn);
1202 struct lov_obd *lov;
1203 struct lov_oinfo *loi;
1208 CERROR("LOV requires striping ea for lock cancellation\n");
1212 if (!export || !export->exp_obd)
1215 lov = &export->exp_obd->u.lov;
1216 for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
1217 struct lov_stripe_md submd;
1219 submd.lsm_object_id = loi->loi_id;
1220 submd.lsm_stripe_count = 0;
1221 rc = obd_cancel_unused(&lov->tgts[loi->loi_ost_idx].conn,
1224 CERROR("Error cancel unused objid "LPX64" subobj "LPX64
1225 " on OST idx %d: rc = %d\n", lsm->lsm_object_id,
1226 loi->loi_id, loi->loi_ost_idx, rc);
1231 static int lov_statfs(struct lustre_handle *conn, struct obd_statfs *osfs)
1233 struct obd_export *export = class_conn2export(conn);
1234 struct lov_obd *lov;
1235 struct obd_statfs lov_sfs;
1241 if (!export || !export->exp_obd)
1244 lov = &export->exp_obd->u.lov;
1246 /* We only get block data from the OBD */
1247 for (i = 0; i < lov->desc.ld_tgt_count; i++) {
1250 if (!lov->tgts[i].active)
1253 err = obd_statfs(&lov->tgts[i].conn, &lov_sfs);
1255 CERROR("Error statfs OSC %s idx %d: err = %d\n",
1256 lov->tgts[i].uuid, i, err);
1259 continue; /* XXX or break? - probably OK to continue */
1262 memcpy(osfs, &lov_sfs, sizeof(lov_sfs));
1265 osfs->os_bfree += lov_sfs.os_bfree;
1266 osfs->os_bavail += lov_sfs.os_bavail;
1267 osfs->os_blocks += lov_sfs.os_blocks;
1268 /* XXX not sure about this one - depends on policy.
1269 * - could be minimum if we always stripe on all OBDs
1270 * (but that would be wrong for any other policy,
1271 * if one of the OBDs has no more objects left)
1272 * - could be sum if we stripe whole objects
1273 * - could be average, just to give a nice number
1274 * - we just pick first OST and hope it is enough
1275 sfs->f_ffree += lov_sfs.f_ffree;
1282 static int lov_iocontrol(long cmd, struct lustre_handle *conn, int len,
1283 void *karg, void *uarg)
1285 struct obd_device *obddev = class_conn2obd(conn);
1286 struct lov_obd *lov = &obddev->u.lov;
1287 struct obd_ioctl_data *data = karg;
1288 int i, count = lov->desc.ld_tgt_count;
1294 case IOC_LOV_SET_OSC_ACTIVE: {
1295 rc = lov_set_osc_active(lov,data->ioc_inlbuf1,data->ioc_offset);
1298 case OBD_IOC_LOV_GET_CONFIG: {
1299 struct lov_tgt_desc *tgtdesc;
1300 struct lov_desc *desc;
1306 if (obd_ioctl_getdata(&buf, &len, (void *)uarg))
1309 data = (struct obd_ioctl_data *)buf;
1311 if (sizeof(*desc) > data->ioc_inllen1) {
1316 if (sizeof(*uuidp) * count > data->ioc_inllen2) {
1321 desc = (struct lov_desc *)data->ioc_inlbuf1;
1322 uuidp = (obd_uuid_t *)data->ioc_inlbuf2;
1323 memcpy(desc, &(lov->desc), sizeof(*desc));
1325 tgtdesc = lov->tgts;
1326 for (i = 0; i < count; i++, uuidp++, tgtdesc++)
1327 memcpy(uuidp, tgtdesc->uuid, sizeof(*uuidp));
1329 rc = copy_to_user((void *)uarg, buf, len);
1339 for (i = 0; i < count; i++) {
1340 int err = obd_iocontrol(cmd, &lov->tgts[i].conn,
1350 struct obd_ops lov_obd_ops = {
1351 o_attach: lov_attach,
1352 o_detach: lov_detach,
1354 o_connect: lov_connect,
1355 o_disconnect: lov_disconnect,
1356 o_statfs: lov_statfs,
1357 o_packmd: lov_packmd,
1358 o_unpackmd: lov_unpackmd,
1359 o_create: lov_create,
1360 o_destroy: lov_destroy,
1361 o_getattr: lov_getattr,
1362 o_setattr: lov_setattr,
1367 o_enqueue: lov_enqueue,
1368 o_cancel: lov_cancel,
1369 o_cancel_unused: lov_cancel_unused,
1370 o_iocontrol: lov_iocontrol
1374 #define LOV_VERSION "v0.1"
1376 static int __init lov_init(void)
1379 printk(KERN_INFO "Lustre Logical Object Volume driver " LOV_VERSION
1380 ", info@clusterfs.com\n");
1381 lov_file_cache = kmem_cache_create("ll_lov_file_data",
1382 sizeof(struct lov_file_handles),
1384 if (!lov_file_cache)
1387 rc = class_register_type(&lov_obd_ops, status_class_var,
1388 OBD_LOV_DEVICENAME);
1392 static void __exit lov_exit(void)
1394 if (kmem_cache_destroy(lov_file_cache))
1395 CERROR("couldn't free LOV open cache\n");
1396 class_unregister_type(OBD_LOV_DEVICENAME);
1399 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
1400 MODULE_DESCRIPTION("Lustre Logical Object Volume OBD driver " LOV_VERSION);
1401 MODULE_LICENSE("GPL");
1403 module_init(lov_init);
1404 module_exit(lov_exit);