Whamcloud - gitweb
b=625069
[fs/lustre-release.git] / lustre / lov / lov_obd.c
index 04f5481..05544f1 100644 (file)
 
 /* obd methods */
 static int lov_connect(struct lustre_handle *conn, struct obd_device *obd,
-                       char *cluuid)
+                       obd_uuid_t cluuid, struct recovd_obd *recovd,
+                       ptlrpc_recovery_cb_t recover)
 {
-        struct ptlrpc_request *req;
+        struct ptlrpc_request *req = NULL;
         struct lov_obd *lov = &obd->u.lov;
         struct client_obd *mdc = &lov->mdcobd->u.cli;
         struct lov_desc *desc = &lov->desc;
         struct lustre_handle mdc_conn;
-        uuid_t *uuidarray;
-        int rc, rc2;
-        int i;
+        obd_uuid_t *uuidarray;
+        int rc, rc2, i;
 
         MOD_INC_USE_COUNT;
         rc = class_connect(conn, obd, cluuid);
@@ -46,25 +46,36 @@ static int lov_connect(struct lustre_handle *conn, struct obd_device *obd,
                 RETURN(rc);
         }
 
+        /* We don't want to actually do the underlying connections more than
+         * once, so keep track. */
+        lov->refcount++;
+        if (lov->refcount > 1)
+                RETURN(0);
+
         /* retrieve LOV metadata from MDS */
-        rc = obd_connect(&mdc_conn, lov->mdcobd, NULL);
+        rc = obd_connect(&mdc_conn, lov->mdcobd, NULL, recovd, recover);
         if (rc) {
                 CERROR("cannot connect to mdc: rc = %d\n", rc);
-                GOTO(out, rc = -EINVAL);
+                GOTO(out_conn, rc);
         }
 
         rc = mdc_getlovinfo(obd, &mdc_conn, &req);
         rc2 = obd_disconnect(&mdc_conn);
-        if (rc || rc2) {
-                CERROR("cannot get lov info or disconnect %d/%d\n", rc, rc2);
-                GOTO(out, (rc) ? rc : rc2 );
+        if (rc) {
+                CERROR("cannot get lov info %d\n", rc);
+                GOTO(out_conn, rc);
+        }
+
+        if (rc2) {
+                CERROR("error disconnecting from MDS %d\n", rc2);
+                GOTO(out_conn, rc = rc2);
         }
 
         /* sanity... */
         if (req->rq_repmsg->bufcount < 2 ||
             req->rq_repmsg->buflens[0] < sizeof(*desc)) {
                 CERROR("LOV desc: invalid descriptor returned\n");
-                GOTO(out, rc = -EINVAL);
+                GOTO(out_conn, rc = -EINVAL);
         }
 
         memcpy(desc, lustre_msg_buf(req->rq_repmsg, 0), sizeof(*desc));
@@ -72,7 +83,7 @@ static int lov_connect(struct lustre_handle *conn, struct obd_device *obd,
 
         if (req->rq_repmsg->buflens[1] < sizeof(*uuidarray)*desc->ld_tgt_count){
                 CERROR("LOV desc: invalid uuid array returned\n");
-                GOTO(out, rc = -EINVAL);
+                GOTO(out_conn, rc = -EINVAL);
         }
 
         mdc->cl_max_mds_easize = lov_mds_md_size(desc->ld_tgt_count);
@@ -81,85 +92,110 @@ static int lov_connect(struct lustre_handle *conn, struct obd_device *obd,
         if (memcmp(obd->obd_uuid, desc->ld_uuid, sizeof(desc->ld_uuid))) {
                 CERROR("LOV desc: uuid %s not on mds device (%s)\n",
                        obd->obd_uuid, desc->ld_uuid);
-                GOTO(out, rc = -EINVAL);
+                GOTO(out_conn, rc = -EINVAL);
         }
 
         if (desc->ld_tgt_count > 1000) {
                 CERROR("LOV desc: target count > 1000 (%d)\n",
                        desc->ld_tgt_count);
-                GOTO(out, rc = -EINVAL);
+                GOTO(out_conn, rc = -EINVAL);
         }
 
-        if (desc->ld_default_stripe_count == 0)
-                desc->ld_default_stripe_count = desc->ld_tgt_count;
-
         /* Because of 64-bit divide/mod operations only work with a 32-bit
          * divisor in a 32-bit kernel, we cannot support a stripe width
-         * of 4GB or larger.
+         * of 4GB or larger on 32-bit CPUs.
          */
-        if (desc->ld_default_stripe_size * desc->ld_tgt_count > ~0UL) {
-                CERROR("LOV desc: stripe width > %lu on 32-bit system\n",
-                       ~0UL);
-                GOTO(out, rc = -EINVAL);
+        if ((desc->ld_default_stripe_count ?
+             desc->ld_default_stripe_count : desc->ld_tgt_count) *
+             desc->ld_default_stripe_size > ~0UL) {
+                CERROR("LOV: stripe width "LPU64"x%u > %lu on 32-bit system\n",
+                       desc->ld_default_stripe_size,
+                       desc->ld_default_stripe_count ?
+                       desc->ld_default_stripe_count : desc->ld_tgt_count,~0UL);
+                GOTO(out_conn, rc = -EINVAL);
         }
 
         lov->bufsize = sizeof(struct lov_tgt_desc) * desc->ld_tgt_count;
         OBD_ALLOC(lov->tgts, lov->bufsize);
         if (!lov->tgts) {
                 CERROR("Out of memory\n");
-                GOTO(out, rc = -ENOMEM);
+                GOTO(out_conn, rc = -ENOMEM);
         }
 
         uuidarray = lustre_msg_buf(req->rq_repmsg, 1);
-        for (i = 0 ; i < desc->ld_tgt_count; i++)
+        for (i = 0; i < desc->ld_tgt_count; i++)
                 memcpy(lov->tgts[i].uuid, uuidarray[i], sizeof(*uuidarray));
 
-        for (i = 0 ; i < desc->ld_tgt_count; i++) {
+        for (i = 0; i < desc->ld_tgt_count; i++) {
                 struct obd_device *tgt = class_uuid2obd(uuidarray[i]);
                 if (!tgt) {
                         CERROR("Target %s not attached\n", uuidarray[i]);
-                        GOTO(out_mem, rc = -EINVAL);
+                        GOTO(out_disc, rc = -EINVAL);
                 }
                 if (!(tgt->obd_flags & OBD_SET_UP)) {
                         CERROR("Target %s not set up\n", uuidarray[i]);
-                        GOTO(out_mem, rc = -EINVAL);
+                        GOTO(out_disc, rc = -EINVAL);
                 }
-                rc = obd_connect(&lov->tgts[i].conn, tgt, NULL);
+                rc = obd_connect(&lov->tgts[i].conn, tgt, NULL, recovd,
+                                 recover);
                 if (rc) {
                         CERROR("Target %s connect error %d\n",
                                uuidarray[i], rc);
-                        GOTO(out_mem, rc);
+                        GOTO(out_disc, rc);
                 }
-        }
-
- out_mem:
-        if (rc) {
-                for (i = 0 ; i < desc->ld_tgt_count; i++) {
-                        rc2 = obd_disconnect(&lov->tgts[i].conn);
-                        if (rc2)
-                                CERROR("BAD: Target %s disconnect error %d\n",
-                                       uuidarray[i], rc2);
+                rc = obd_iocontrol(IOC_OSC_REGISTER_LOV, &lov->tgts[i].conn,
+                                   sizeof(struct obd_device *), obd, NULL); 
+                if (rc) {
+                        CERROR("Target %s REGISTER_LOV error %d\n",
+                               uuidarray[i], rc);
+                        GOTO(out_disc, rc);
                 }
-                OBD_FREE(lov->tgts, lov->bufsize);
+                desc->ld_active_tgt_count++;
+                lov->tgts[i].active = 1;
         }
+
  out:
-        if (rc)
-                class_disconnect(conn);
-        ptlrpc_free_req(req);
+        ptlrpc_req_finished(req);
         return rc;
+
+ out_disc:
+        while (i-- > 0) {
+                desc->ld_active_tgt_count--;
+                lov->tgts[i].active = 0;
+                rc2 = obd_disconnect(&lov->tgts[i].conn);
+                if (rc2)
+                        CERROR("LOV Target %s disconnect error: rc = %d\n",
+                                uuidarray[i], rc2);
+        }
+        OBD_FREE(lov->tgts, lov->bufsize);
+ out_conn:
+        class_disconnect(conn);
+        goto out;
 }
 
 static int lov_disconnect(struct lustre_handle *conn)
 {
         struct obd_device *obd = class_conn2obd(conn);
         struct lov_obd *lov = &obd->u.lov;
-        int rc;
-        int i;
+        int rc, i;
 
         if (!lov->tgts)
                 goto out_local;
 
-        for (i = 0 ; i < lov->desc.ld_tgt_count; i++) {
+        /* Only disconnect the underlying laters on the final disconnect. */
+        lov->refcount--;
+        if (lov->refcount != 0)
+                goto out_local;
+
+        for (i = 0; i < lov->desc.ld_tgt_count; i++) {
+                if (!lov->tgts[i].active) {
+                        CERROR("Skipping disconnect for inactive OSC %s\n",
+                               lov->tgts[i].uuid);
+                        continue;
+                }
+
+                lov->desc.ld_active_tgt_count--;
+                lov->tgts[i].active = 0;
                 rc = obd_disconnect(&lov->tgts[i].conn);
                 if (rc) {
                         CERROR("Target %s disconnect error %d\n",
@@ -178,6 +214,64 @@ static int lov_disconnect(struct lustre_handle *conn)
         return rc;
 }
 
+/* Error codes:
+ *
+ *  -EINVAL  : UUID can't be found in the LOV's target list
+ *  -ENOTCONN: The UUID is found, but the target connection is bad (!)
+ *  -EBADF   : The UUID is found, but the OBD is the wrong type (!)
+ *  -EALREADY: The OSC is already marked (in)active
+ */
+static int lov_set_osc_active(struct lov_obd *lov, obd_uuid_t uuid,
+                              int activate)
+{
+        struct obd_device *obd;
+        int i, rc = 0;
+        ENTRY;
+
+        CDEBUG(D_INFO, "Searching in lov %p for uuid %s (activate=%d)\n",
+               lov, uuid, activate);
+
+        spin_lock(&lov->lov_lock);
+        for (i = 0; i < lov->desc.ld_tgt_count; i++)
+                if (strncmp(uuid, lov->tgts[i].uuid,
+                            sizeof(lov->tgts[i].uuid)) == 0)
+                        break;
+
+        if (i == lov->desc.ld_tgt_count)
+                GOTO(out, rc = -EINVAL);
+
+        obd = class_conn2obd(&lov->tgts[i].conn);
+        if (obd == NULL) {
+                LBUG();
+                GOTO(out, rc = -ENOTCONN);
+        }
+
+        CDEBUG(D_INFO, "Found OBD %p type %s\n", obd, obd->obd_type->typ_name);
+        if (strcmp(obd->obd_type->typ_name, "osc") != 0) {
+                LBUG();
+                GOTO(out, rc = -EBADF);
+        }
+
+        if (lov->tgts[i].active == activate) {
+                CDEBUG(D_INFO, "OBD %p already %sactive!\n", obd,
+                       activate ? "" : "in");
+                GOTO(out, rc = -EALREADY);
+        }
+
+        CDEBUG(D_INFO, "Marking OBD %p %sactive\n", obd, activate ? "" : "in");
+
+        lov->tgts[i].active = activate;
+        if (activate)
+                lov->desc.ld_active_tgt_count++;
+        else
+                lov->desc.ld_active_tgt_count--;
+
+        EXIT;
+ out:
+        spin_unlock(&lov->lov_lock);
+        return rc;
+}
+
 static int lov_setup(struct obd_device *obd, obd_count len, void *buf)
 {
         struct obd_ioctl_data* data = buf;
@@ -195,6 +289,7 @@ static int lov_setup(struct obd_device *obd, obd_count len, void *buf)
                 RETURN(-EINVAL);
         }
 
+        spin_lock_init(&lov->lov_lock);
         lov->mdcobd = class_uuid2obd(data->ioc_inlbuf1);
         if (!lov->mdcobd) {
                 CERROR("LOV %s cannot locate MDC %s\n", obd->obd_uuid,
@@ -213,44 +308,84 @@ static int lov_create(struct lustre_handle *conn, struct obdo *oa,
         struct lov_obd *lov;
         struct lov_stripe_md *lsm;
         struct lov_oinfo *loi;
-        int sub_offset, stripe_offset;
-        int ost_count;
-        int rc = 0, i;
+        struct obdo *tmp;
+        int ost_count, ost_idx = 1, i, rc = 0;
         ENTRY;
 
-        if (!ea) {
-                CERROR("lov_create needs EA for striping information\n");
-                RETURN(-EINVAL);
-        }
+        LASSERT(ea);
+
         if (!export)
                 RETURN(-EINVAL);
 
+        tmp = obdo_alloc();
+        if (!tmp)
+                RETURN(-ENOMEM);
+
         lov = &export->exp_obd->u.lov;
+
+        spin_lock(&lov->lov_lock);
         ost_count = lov->desc.ld_tgt_count;
         oa->o_easize = lov_stripe_md_size(ost_count);
-        if (!*ea) {
-                OBD_ALLOC(*ea, oa->o_easize);
-                if (!*ea)
-                        RETURN(-ENOMEM);
-        }
 
         lsm = *ea;
+        if (!lsm) {
+                OBD_ALLOC(lsm, oa->o_easize);
+                if (!lsm) {
+                        spin_unlock(&lov->lov_lock);
+                        GOTO(out_tmp, rc = -ENOMEM);
+                }
+                lsm->lsm_magic = LOV_MAGIC;
+                lsm->lsm_mds_easize = lov_mds_md_size(ost_count);
+                ost_idx = 0; /* if lsm->lsm_stripe_offset is set yet */
+        }
+
         LASSERT(oa->o_valid & OBD_MD_FLID);
-        lsm->lsm_magic = LOV_MAGIC;
-        lsm->lsm_mds_easize = lov_mds_md_size(ost_count);
         lsm->lsm_object_id = oa->o_id;
         if (!lsm->lsm_stripe_count)
                 lsm->lsm_stripe_count = lov->desc.ld_default_stripe_count;
+        if (!lsm->lsm_stripe_count)
+                lsm->lsm_stripe_count = lov->desc.ld_active_tgt_count;
+        else if (lsm->lsm_stripe_count > lov->desc.ld_active_tgt_count)
+                lsm->lsm_stripe_count = lov->desc.ld_active_tgt_count;
 
         if (!lsm->lsm_stripe_size)
                 lsm->lsm_stripe_size = lov->desc.ld_default_stripe_size;
 
+        /* Because of 64-bit divide/mod operations only work with a 32-bit
+         * divisor in a 32-bit kernel, we cannot support a stripe width
+         * of 4GB or larger on 32-bit CPUs.
+         */
+        if (lsm->lsm_stripe_size * lsm->lsm_stripe_count > ~0UL) {
+                CERROR("LOV: stripe width "LPU64"x%u > %lu on 32-bit system\n",
+                       lsm->lsm_stripe_size, lsm->lsm_stripe_count, ~0UL);
+                spin_unlock(&lov->lov_lock);
+                GOTO(out_free, rc = -EINVAL);
+        }
+
         lsm->lsm_ost_count = ost_count;
-        stripe_offset = (((int)lsm->lsm_object_id * lsm->lsm_stripe_count) %
-                         ost_count);
-        sub_offset = ((int)lsm->lsm_object_id*lsm->lsm_stripe_count/ost_count)%
-                        lsm->lsm_stripe_count;
-        lsm->lsm_stripe_offset = stripe_offset + sub_offset;
+        if (!ost_idx || lsm->lsm_stripe_offset >= ost_count) {
+                int mult = lsm->lsm_object_id * lsm->lsm_stripe_count;
+                int stripe_offset = mult % ost_count;
+                int sub_offset = (mult / ost_count) % lsm->lsm_stripe_count;
+
+                lsm->lsm_stripe_offset = stripe_offset + sub_offset;
+        }
+
+        while (!lov->tgts[lsm->lsm_stripe_offset].active)
+                lsm->lsm_stripe_offset = (lsm->lsm_stripe_offset+1) % ost_count;
+
+        /* Pick the OSTs before we release the lock */
+        ost_idx = lsm->lsm_stripe_offset;
+        for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
+                CDEBUG(D_INODE, "objid "LPX64"[%d] is ost_idx %d (uuid %s)\n",
+                       lsm->lsm_object_id, i, ost_idx, lov->tgts[ost_idx].uuid);
+                loi->loi_ost_idx = ost_idx;
+                do {
+                        ost_idx = (ost_idx + 1) % ost_count;
+                } while (!lov->tgts[ost_idx].active);
+        }
+
+        spin_unlock(&lov->lov_lock);
 
         CDEBUG(D_INODE, "allocating %d subobjs for objid "LPX64" at idx %d\n",
                lsm->lsm_stripe_count,lsm->lsm_object_id,lsm->lsm_stripe_offset);
@@ -258,46 +393,48 @@ static int lov_create(struct lustre_handle *conn, struct obdo *oa,
         for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
                 struct lov_stripe_md obj_md;
                 struct lov_stripe_md *obj_mdp = &obj_md;
-                struct obdo tmp;
-                int ost_idx = (((sub_offset + i) % lsm->lsm_stripe_count) +
-                               stripe_offset) % ost_count;
+
+                ost_idx = loi->loi_ost_idx;
 
                 /* create data objects with "parent" OA */
-                memcpy(&tmp, oa, sizeof(tmp));
-                tmp.o_easize = sizeof(struct lov_stripe_md);
-                rc = obd_create(&lov->tgts[ost_idx].conn, &tmp, &obj_mdp);
+                memcpy(tmp, oa, sizeof(*tmp));
+                tmp->o_easize = sizeof(struct lov_stripe_md);
+                rc = obd_create(&lov->tgts[ost_idx].conn, tmp, &obj_mdp);
                 if (rc) {
                         CERROR("error creating objid "LPX64" sub-object on "
                                "OST idx %d: rc = %d\n", oa->o_id, ost_idx, rc);
                         GOTO(out_cleanup, rc);
                 }
-                loi->loi_id = tmp.o_id;
-                loi->loi_size = tmp.o_size;
-                loi->loi_ost_idx = ost_idx;
+                loi->loi_id = tmp->o_id;
+                loi->loi_size = tmp->o_size;
                 CDEBUG(D_INODE, "objid "LPX64" has subobj "LPX64" at idx %d\n",
                        lsm->lsm_object_id, loi->loi_id, ost_idx);
         }
 
+        *ea = lsm;
+
+ out_tmp:
+        obdo_free(tmp);
+        return rc;
+
  out_cleanup:
-        if (rc) {
-                while (i-- > 0) {
-                        struct obdo tmp;
-                        int err;
-
-                        --loi;
-                        /* destroy already created objects here */
-                        memcpy(&tmp, oa, sizeof(tmp));
-                        tmp.o_id = loi->loi_id;
-                        err = obd_destroy(&lov->tgts[loi->loi_ost_idx].conn,
-                                          &tmp, NULL);
-                        if (err)
-                                CERROR("Failed to remove objid "LPX64" subobj "
-                                       LPX64" on OST idx %d: rc = %d\n",
-                                       oa->o_id, loi->loi_id, loi->loi_ost_idx,
-                                       err);
-                }
+        while (i-- > 0) {
+                int err;
+
+                --loi;
+                /* destroy already created objects here */
+                memcpy(tmp, oa, sizeof(*tmp));
+                tmp->o_id = loi->loi_id;
+                err = obd_destroy(&lov->tgts[loi->loi_ost_idx].conn, tmp, NULL);
+                if (err)
+                        CERROR("Failed to uncreate objid "LPX64" subobj "
+                               LPX64" on OST idx %d: rc = %d\n",
+                               oa->o_id, loi->loi_id, loi->loi_ost_idx,
+                               err);
         }
-        return rc;
+ out_free:
+        OBD_FREE(lsm, oa->o_easize);
+        goto out_tmp;
 }
 
 static int lov_destroy(struct lustre_handle *conn, struct obdo *oa,
@@ -315,6 +452,12 @@ static int lov_destroy(struct lustre_handle *conn, struct obdo *oa,
                 RETURN(-EINVAL);
         }
 
+        if (lsm->lsm_magic != LOV_MAGIC) {
+                CERROR("LOV striping magic bad %#lx != %#lx\n",
+                       lsm->lsm_magic, LOV_MAGIC);
+                RETURN(-EINVAL);
+        }
+
         if (!export || !export->exp_obd)
                 RETURN(-ENODEV);
 
@@ -332,6 +475,55 @@ static int lov_destroy(struct lustre_handle *conn, struct obdo *oa,
         RETURN(rc);
 }
 
+/* compute object size given "stripeno" and the ost size */
+static obd_size lov_stripe_size(struct lov_stripe_md *lsm, obd_size ost_size,
+                                int stripeno)
+{
+        unsigned long ssize  = lsm->lsm_stripe_size;
+        unsigned long swidth = ssize * lsm->lsm_stripe_count;
+        unsigned long stripe_size;
+        obd_size lov_size;
+
+        if (ost_size == 0)
+                return 0;
+
+        /* do_div(a, b) returns a % b, and a = a / b */
+        stripe_size = do_div(ost_size, ssize);
+
+        if (stripe_size)
+                lov_size = ost_size * swidth + stripeno * ssize + stripe_size;
+        else
+                lov_size = (ost_size - 1) * swidth + (stripeno + 1) * ssize;
+
+        return lov_size;
+}
+
+static void lov_merge_attrs(struct obdo *tgt, struct obdo *src, obd_flag valid,
+                            struct lov_stripe_md *lsm, int stripeno, int *new)
+{
+        if (*new) {
+                obdo_cpy_md(tgt, src, valid);
+                if (valid & OBD_MD_FLSIZE)
+                        tgt->o_size = lov_stripe_size(lsm,src->o_size,stripeno);
+                *new = 0;
+        } else {
+                if (valid & OBD_MD_FLSIZE) {
+                        /* this handles sparse files properly */
+                        obd_size lov_size;
+
+                        lov_size = lov_stripe_size(lsm, src->o_size, stripeno);
+                        if (lov_size > tgt->o_size)
+                                tgt->o_size = lov_size;
+                }
+                if (valid & OBD_MD_FLBLOCKS)
+                        tgt->o_blocks += src->o_blocks;
+                if (valid & OBD_MD_FLCTIME && tgt->o_ctime < src->o_ctime)
+                        tgt->o_ctime = src->o_ctime;
+                if (valid & OBD_MD_FLMTIME && tgt->o_mtime < src->o_mtime)
+                        tgt->o_mtime = src->o_mtime;
+        }
+}
+
 static int lov_getattr(struct lustre_handle *conn, struct obdo *oa,
                        struct lov_stripe_md *lsm)
 {
@@ -340,7 +532,7 @@ static int lov_getattr(struct lustre_handle *conn, struct obdo *oa,
         struct lov_obd *lov;
         struct lov_oinfo *loi;
         int rc = 0, i;
-        int set = 0;
+        int new = 1;
         ENTRY;
 
         if (!lsm) {
@@ -348,6 +540,12 @@ static int lov_getattr(struct lustre_handle *conn, struct obdo *oa,
                 RETURN(-EINVAL);
         }
 
+        if (lsm->lsm_magic != LOV_MAGIC) {
+                CERROR("LOV striping magic bad %#lx != %#lx\n",
+                       lsm->lsm_magic, LOV_MAGIC);
+                RETURN(-EINVAL);
+        }
+
         if (!export || !export->exp_obd)
                 RETURN(-ENODEV);
 
@@ -360,6 +558,8 @@ static int lov_getattr(struct lustre_handle *conn, struct obdo *oa,
                 if (loi->loi_id == 0)
                         continue;
 
+                CDEBUG(D_INFO, "objid "LPX64"[%d] has subobj "LPX64" at idx "
+                       "%u\n", oa->o_id, i, loi->loi_id, loi->loi_ost_idx);
                 /* create data objects with "parent" OA */
                 memcpy(&tmp, oa, sizeof(tmp));
                 tmp.o_id = loi->loi_id;
@@ -373,22 +573,7 @@ static int lov_getattr(struct lustre_handle *conn, struct obdo *oa,
                                 rc = err;
                         continue; /* XXX or break? */
                 }
-                if (!set) {
-                        obdo_cpy_md(oa, &tmp, tmp.o_valid);
-                        set = 1;
-                } else {
-#warning FIXME: the size needs to be fixed for sparse files
-                        if (tmp.o_valid & OBD_MD_FLSIZE)
-                                oa->o_size += tmp.o_size;
-                        if (tmp.o_valid & OBD_MD_FLBLOCKS)
-                                oa->o_blocks += tmp.o_blocks;
-                        if (tmp.o_valid & OBD_MD_FLCTIME &&
-                            oa->o_ctime < tmp.o_ctime)
-                                oa->o_ctime = tmp.o_ctime;
-                        if (tmp.o_valid & OBD_MD_FLMTIME &&
-                            oa->o_mtime < tmp.o_mtime)
-                                oa->o_mtime = tmp.o_mtime;
-                }
+                lov_merge_attrs(oa, &tmp, tmp.o_valid, lsm, i, &new);
         }
         RETURN(rc);
 }
@@ -396,23 +581,34 @@ static int lov_getattr(struct lustre_handle *conn, struct obdo *oa,
 static int lov_setattr(struct lustre_handle *conn, struct obdo *oa,
                        struct lov_stripe_md *lsm)
 {
-        int rc = 0, i;
         struct obdo tmp;
         struct obd_export *export = class_conn2export(conn);
         struct lov_obd *lov;
         struct lov_oinfo *loi;
+        int rc = 0, i;
         ENTRY;
 
+        /* Note that this code is currently unused, hence LBUG(), just
+         * to know when/if it is ever revived that it needs cleanups.
+         */
+        LBUG();
+
         if (!lsm) {
                 CERROR("LOV requires striping ea\n");
                 RETURN(-EINVAL);
         }
 
+        if (lsm->lsm_magic != LOV_MAGIC) {
+                CERROR("LOV striping magic bad %#lx != %#lx\n",
+                       lsm->lsm_magic, LOV_MAGIC);
+                RETURN(-EINVAL);
+        }
+
         if (!export || !export->exp_obd)
                 RETURN(-ENODEV);
 
-        if (oa->o_valid && OBD_MD_FLSIZE)
-                CERROR("setting size on an LOV object is totally broken\n");
+        /* size changes should go through punch and not setattr */
+        LASSERT(!(oa->o_valid & OBD_MD_FLSIZE));
 
         lov = &export->exp_obd->u.lov;
         for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
@@ -437,10 +633,11 @@ static int lov_setattr(struct lustre_handle *conn, struct obdo *oa,
 static int lov_open(struct lustre_handle *conn, struct obdo *oa,
                     struct lov_stripe_md *lsm)
 {
-        struct obdo tmp;
+        struct obdo *tmp;
         struct obd_export *export = class_conn2export(conn);
         struct lov_obd *lov;
         struct lov_oinfo *loi;
+        int new = 1;
         int rc = 0, i;
         ENTRY;
 
@@ -449,18 +646,30 @@ static int lov_open(struct lustre_handle *conn, struct obdo *oa,
                 RETURN(-EINVAL);
         }
 
+        if (lsm->lsm_magic != LOV_MAGIC) {
+                CERROR("LOV striping magic bad %#lx != %#lx\n",
+                       lsm->lsm_magic, LOV_MAGIC);
+                RETURN(-EINVAL);
+        }
+
         if (!export || !export->exp_obd)
                 RETURN(-ENODEV);
 
+        tmp = obdo_alloc();
+        if (!tmp)
+                RETURN(-ENOMEM);
+
         lov = &export->exp_obd->u.lov;
+        oa->o_size = 0;
+        oa->o_blocks = 0;
         for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
                 int err;
 
                 /* create data objects with "parent" OA */
-                memcpy(&tmp, oa, sizeof(tmp));
-                tmp.o_id = loi->loi_id;
+                memcpy(tmp, oa, sizeof(*tmp));
+                tmp->o_id = loi->loi_id;
 
-                err = obd_open(&lov->tgts[loi->loi_ost_idx].conn, &tmp, NULL);
+                err = obd_open(&lov->tgts[loi->loi_ost_idx].conn, tmp, NULL);
                 if (err) {
                         CERROR("Error open objid "LPX64" subobj "LPX64
                                " on OST idx %d: rc = %d\n",
@@ -469,14 +678,17 @@ static int lov_open(struct lustre_handle *conn, struct obdo *oa,
                         if (!rc)
                                 rc = err;
                 }
+
+                lov_merge_attrs(oa, tmp, tmp->o_valid, lsm, i, &new);
         }
         /* FIXME: returning an error, but having opened some objects is a bad
          *        idea, since they will likely never be closed.  We either
          *        need to not return an error if _some_ objects could be
          *        opened, and leave it to read/write to return -EIO (with
          *        hopefully partial error status) or close all opened objects
-         *        and return an error.
+         *        and return an error.  I think the former is preferred.
          */
+        obdo_free(tmp);
         RETURN(rc);
 }
 
@@ -495,6 +707,12 @@ static int lov_close(struct lustre_handle *conn, struct obdo *oa,
                 RETURN(-EINVAL);
         }
 
+        if (lsm->lsm_magic != LOV_MAGIC) {
+                CERROR("LOV striping magic bad %#lx != %#lx\n",
+                       lsm->lsm_magic, LOV_MAGIC);
+                RETURN(-EINVAL);
+        }
+
         if (!export || !export->exp_obd)
                 RETURN(-ENODEV);
 
@@ -525,28 +743,35 @@ static int lov_close(struct lustre_handle *conn, struct obdo *oa,
 #warning FIXME: merge these two functions now that they are nearly the same
 
 /* compute ost offset in stripe "stripeno" corresponding to offset "lov_off" */
-static __u64 lov_offset(struct lov_stripe_md *lsm, __u64 lov_off, int stripeno)
+static obd_off lov_stripe_offset(struct lov_stripe_md *lsm, obd_off lov_off,
+                                 int stripeno)
 {
         unsigned long ssize  = lsm->lsm_stripe_size;
         unsigned long swidth = ssize * lsm->lsm_stripe_count;
-        unsigned long stripe_off;
+        unsigned long stripe_off, this_stripe;
 
-        if (lov_off == OBD_PUNCH_EOF)
-                return OBD_PUNCH_EOF;
+        if (lov_off == OBD_OBJECT_EOF || lov_off == 0)
+                return lov_off;
 
         /* do_div(a, b) returns a % b, and a = a / b */
         stripe_off = do_div(lov_off, swidth);
 
-        if (stripe_off < stripeno * ssize)
+        this_stripe = stripeno * ssize;
+        if (stripe_off <= this_stripe)
                 stripe_off = 0;
-        else
-                stripe_off -= stripeno * ssize;
+        else {
+                stripe_off -= this_stripe;
 
-        return lov_off + stripe_off;
+                if (stripe_off > ssize)
+                        stripe_off = ssize;
+        }
+
+
+        return lov_off * ssize + stripe_off;
 }
 
-/* compute which stripe offset "lov_off" will be written into */
-static int lov_stripe_which(struct lov_stripe_md *lsm, __u64 lov_off)
+/* compute which stripe number "lov_off" will be written into */
+static int lov_stripe_number(struct lov_stripe_md *lsm, obd_off lov_off)
 {
         unsigned long ssize  = lsm->lsm_stripe_size;
         unsigned long swidth = ssize * lsm->lsm_stripe_count;
@@ -573,7 +798,13 @@ static int lov_punch(struct lustre_handle *conn, struct obdo *oa,
         ENTRY;
 
         if (!lsm) {
-                CERROR("LOV requires striping ea for desctruction\n");
+                CERROR("LOV requires striping ea\n");
+                RETURN(-EINVAL);
+        }
+
+        if (lsm->lsm_magic != LOV_MAGIC) {
+                CERROR("LOV striping magic bad %#lx != %#lx\n",
+                       lsm->lsm_magic, LOV_MAGIC);
                 RETURN(-EINVAL);
         }
 
@@ -582,8 +813,8 @@ static int lov_punch(struct lustre_handle *conn, struct obdo *oa,
 
         lov = &export->exp_obd->u.lov;
         for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
-                __u64 starti = lov_offset(lsm, start, i);
-                __u64 endi = lov_offset(lsm, end, i);
+                obd_off starti = lov_stripe_offset(lsm, start, i);
+                obd_off endi = lov_stripe_offset(lsm, end, i);
                 int err;
 
                 if (starti == endi)
@@ -593,7 +824,7 @@ static int lov_punch(struct lustre_handle *conn, struct obdo *oa,
                 tmp.o_id = loi->loi_id;
 
                 err = obd_punch(&lov->tgts[loi->loi_ost_idx].conn, &tmp, NULL,
-                               starti, endi);
+                                starti, endi);
                 if (err) {
                         CERROR("Error punch objid "LPX64" subobj "LPX64
                                " on OST idx %d: rc = %d\n",
@@ -647,6 +878,17 @@ static inline int lov_brw(int cmd, struct lustre_handle *conn,
         int *where;
         ENTRY;
 
+        if (!lsm) {
+                CERROR("LOV requires striping ea\n");
+                RETURN(-EINVAL);
+        }
+
+        if (lsm->lsm_magic != LOV_MAGIC) {
+                CERROR("LOV striping magic bad %#lx != %#lx\n",
+                       lsm->lsm_magic, LOV_MAGIC);
+                RETURN(-EINVAL);
+        }
+
         lov = &export->exp_obd->u.lov;
 
         our_cb = ll_init_cb();
@@ -672,14 +914,13 @@ static inline int lov_brw(int cmd, struct lustre_handle *conn,
         our_cb->data = cbd;
 
         for (i = 0; i < oa_bufs; i++) {
-                where[i] = lov_stripe_which(lsm, pga[i].off);
+                where[i] = lov_stripe_number(lsm, pga[i].off);
                 if (stripeinfo[where[i]].bufct++ == 0)
                         atomic_inc(&our_cb->refcount);
         }
 
-        for (i = 0, loi = lsm->lsm_oinfo, si = si_last = stripeinfo;
-             i < stripe_count;
-             i++, loi++, si_last = si, si++) {
+        for (i = 0, loi = lsm->lsm_oinfo, si_last = si = stripeinfo;
+             i < stripe_count; i++, loi++, si_last = si, si++) {
                 if (i > 0)
                         si->index = si_last->index + si_last->bufct;
                 si->lsm.lsm_object_id = loi->loi_id;
@@ -693,15 +934,16 @@ static inline int lov_brw(int cmd, struct lustre_handle *conn,
                 shift = stripeinfo[which].index + stripeinfo[which].subcount;
                 LASSERT(shift < oa_bufs);
                 ioarr[shift] = pga[i];
-                ioarr[shift].off = lov_offset(lsm, pga[i].off, which);
+                ioarr[shift].off = lov_stripe_offset(lsm, pga[i].off, which);
                 stripeinfo[which].subcount++;
         }
 
-        for (i = 0, si = stripeinfo; i < stripe_count; i++) {
+        for (i = 0, si = stripeinfo; i < stripe_count; i++, si++) {
                 int shift = si->index;
 
                 if (si->bufct) {
                         LASSERT(shift < oa_bufs);
+                        /* XXX handle error returns here */
                         obd_brw(cmd, &lov->tgts[si->ost_idx].conn,
                                 &si->lsm, si->bufct, &ioarr[shift],
                                 lov_osc_brw_callback, our_cb);
@@ -733,7 +975,13 @@ static int lov_enqueue(struct lustre_handle *conn, struct lov_stripe_md *lsm,
         ENTRY;
 
         if (!lsm) {
-                CERROR("LOV requires striping ea for desctruction\n");
+                CERROR("LOV requires striping ea\n");
+                RETURN(-EINVAL);
+        }
+
+        if (lsm->lsm_magic != LOV_MAGIC) {
+                CERROR("LOV striping magic bad %#lx != %#lx\n",
+                       lsm->lsm_magic, LOV_MAGIC);
                 RETURN(-EINVAL);
         }
 
@@ -746,8 +994,8 @@ static int lov_enqueue(struct lustre_handle *conn, struct lov_stripe_md *lsm,
                 struct ldlm_extent sub_ext;
                 struct lov_stripe_md submd;
 
-                sub_ext.start = lov_offset(lsm, extent->start, i);
-                sub_ext.end = lov_offset(lsm, extent->end, i);
+                sub_ext.start = lov_stripe_offset(lsm, extent->start, i);
+                sub_ext.end = lov_stripe_offset(lsm, extent->end, i);
                 if (sub_ext.start == sub_ext.end)
                         continue;
 
@@ -780,7 +1028,13 @@ static int lov_cancel(struct lustre_handle *conn, struct lov_stripe_md *lsm,
         ENTRY;
 
         if (!lsm) {
-                CERROR("LOV requires striping ea for lock cancellation\n");
+                CERROR("LOV requires striping ea\n");
+                RETURN(-EINVAL);
+        }
+
+        if (lsm->lsm_magic != LOV_MAGIC) {
+                CERROR("LOV striping magic bad %#lx != %#lx\n",
+                       lsm->lsm_magic, LOV_MAGIC);
                 RETURN(-EINVAL);
         }
 
@@ -807,6 +1061,40 @@ static int lov_cancel(struct lustre_handle *conn, struct lov_stripe_md *lsm,
         RETURN(rc);
 }
 
+static int lov_cancel_unused(struct lustre_handle *conn,
+                             struct lov_stripe_md *lsm, int local_only)
+{
+        struct obd_export *export = class_conn2export(conn);
+        struct lov_obd *lov;
+        struct lov_oinfo *loi;
+        int rc = 0, i;
+        ENTRY;
+
+        if (!lsm) {
+                CERROR("LOV requires striping ea for lock cancellation\n");
+                RETURN(-EINVAL);
+        }
+
+        if (!export || !export->exp_obd)
+                RETURN(-ENODEV);
+
+        lov = &export->exp_obd->u.lov;
+        for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
+                struct lov_stripe_md submd;
+
+                submd.lsm_object_id = loi->loi_id;
+                submd.lsm_mds_easize = lov_mds_md_size(lsm->lsm_ost_count);
+                submd.lsm_stripe_count = 0;
+                rc = obd_cancel_unused(&lov->tgts[loi->loi_ost_idx].conn,
+                                       &submd, local_only);
+                if (rc)
+                        CERROR("Error cancel unused objid "LPX64" subobj "LPX64
+                               " on OST idx %d: rc = %d\n", lsm->lsm_object_id,
+                               loi->loi_id, loi->loi_ost_idx, rc);
+        }
+        RETURN(rc);
+}
+
 static int lov_statfs(struct lustre_handle *conn, struct obd_statfs *osfs)
 {
         struct obd_export *export = class_conn2export(conn);
@@ -823,7 +1111,7 @@ static int lov_statfs(struct lustre_handle *conn, struct obd_statfs *osfs)
         lov = &export->exp_obd->u.lov;
 
         /* We only get block data from the OBD */
-        for (i = 0 ; i < lov->desc.ld_tgt_count; i++) {
+        for (i = 0; i < lov->desc.ld_tgt_count; i++) {
                 int err;
 
                 err = obd_statfs(&lov->tgts[i].conn, &lov_sfs);
@@ -855,6 +1143,33 @@ static int lov_statfs(struct lustre_handle *conn, struct obd_statfs *osfs)
         RETURN(rc);
 }
 
+static int lov_iocontrol(long cmd, struct lustre_handle *conn, int len,
+                         void *karg, void *uarg)
+{
+        struct obd_device *obddev = class_conn2obd(conn);
+        struct obd_ioctl_data *data = karg;
+        struct lov_obd *lov = &obddev->u.lov;
+        int rc, i;
+        ENTRY;
+
+        switch (cmd) {
+        case IOC_LOV_SET_OSC_ACTIVE:
+                rc = lov_set_osc_active(lov,data->ioc_inlbuf1,data->ioc_offset);
+                break;
+        default:
+                if (lov->desc.ld_tgt_count == 0)
+                        RETURN(-ENOTTY);
+                rc = 0;
+                for (i = 0; i < lov->desc.ld_tgt_count; i++) {
+                        int err = obd_iocontrol(cmd, &lov->tgts[i].conn,
+                                                len, data, NULL);
+                        if (err && !rc)
+                                rc = err;
+                }
+        }
+
+        RETURN(rc);
+}
 
 struct obd_ops lov_obd_ops = {
         o_setup:       lov_setup,
@@ -870,7 +1185,9 @@ struct obd_ops lov_obd_ops = {
         o_brw:         lov_brw,
         o_punch:       lov_punch,
         o_enqueue:     lov_enqueue,
-        o_cancel:      lov_cancel
+        o_cancel:      lov_cancel,
+        o_cancel_unused: lov_cancel_unused,
+        o_iocontrol:   lov_iocontrol
 };