Whamcloud - gitweb
land b_md onto HEAD:
[fs/lustre-release.git] / lustre / lov / lov_obd.c
1  /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  lov/lov.c
5  *
6  * Copyright (C) 2002 Cluster File Systems, Inc.
7  * Author: Phil Schwan <phil@off.net>
8  *         Peter Braam <braam@clusterfs.com>
9  *
10  * This code is issued under the GNU General Public License.
11  * See the file COPYING in this distribution
12  */
13
14 #define EXPORT_SYMTAB
15 #define DEBUG_SUBSYSTEM S_LOV
16
17 #include <linux/slab.h>
18 #include <linux/module.h>
19 #include <linux/obd_support.h>
20 #include <linux/lustre_lib.h>
21 #include <linux/lustre_net.h>
22 #include <linux/lustre_idl.h>
23 #include <linux/lustre_mds.h>
24 #include <linux/obd_class.h>
25 #include <linux/obd_lov.h>
26 #include <linux/init.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <asm/div64.h>
30 #include <linux/lprocfs_status.h>
31
32 extern struct lprocfs_vars status_var_nm_1[];
33 extern struct lprocfs_vars status_class_var[];
34
35 static kmem_cache_t *lov_file_cache;
36
37 struct lov_file_handles {
38         struct list_head lfh_list;
39         __u64 lfh_cookie;
40         int lfh_count;
41         struct lustre_handle *lfh_handles;
42 };
43
44 extern int lov_packmd(struct lustre_handle *conn, struct lov_mds_md **lmm,
45                        struct lov_stripe_md *lsm);
46 extern int lov_unpackmd(struct lustre_handle *conn, struct lov_stripe_md **lsm,
47                          struct lov_mds_md *lmm);
48
49 /* obd methods */
50 int lov_attach(struct obd_device *dev, obd_count len, void *data)
51 {
52         return lprocfs_reg_obd(dev, status_var_nm_1, dev);
53 }
54
55 int lov_detach(struct obd_device *dev)
56 {
57         return lprocfs_dereg_obd(dev);
58 }
59
60 static int lov_connect(struct lustre_handle *conn, struct obd_device *obd,
61                        obd_uuid_t cluuid, struct recovd_obd *recovd,
62                        ptlrpc_recovery_cb_t recover)
63 {
64         struct ptlrpc_request *req = NULL;
65         struct lov_obd *lov = &obd->u.lov;
66         struct client_obd *mdc = &lov->mdcobd->u.cli;
67         struct lov_desc *desc = &lov->desc;
68         struct obd_export *exp;
69         struct lustre_handle mdc_conn;
70         obd_uuid_t *uuidarray;
71         int rc, rc2, i;
72         ENTRY;
73
74         MOD_INC_USE_COUNT;
75         rc = class_connect(conn, obd, cluuid);
76         if (rc)
77                 GOTO(out_dec, rc);
78
79         /* We don't want to actually do the underlying connections more than
80          * once, so keep track. */
81         lov->refcount++;
82         if (lov->refcount > 1)
83                 RETURN(0);
84
85         exp = class_conn2export(conn);
86         INIT_LIST_HEAD(&exp->exp_lov_data.led_open_head);
87
88         /* retrieve LOV metadata from MDS */
89         rc = obd_connect(&mdc_conn, lov->mdcobd, NULL, recovd, recover);
90         if (rc) {
91                 CERROR("cannot connect to mdc: rc = %d\n", rc);
92                 GOTO(out_conn, rc);
93         }
94
95         rc = mdc_getlovinfo(obd, &mdc_conn, &req);
96         rc2 = obd_disconnect(&mdc_conn);
97         if (rc) {
98                 CERROR("cannot get lov info %d\n", rc);
99                 GOTO(out_conn, rc);
100         }
101
102         if (rc2) {
103                 CERROR("error disconnecting from MDS %d\n", rc2);
104                 GOTO(out_conn, rc = rc2);
105         }
106
107         /* sanity... */
108         if (req->rq_repmsg->bufcount < 2 ||
109             req->rq_repmsg->buflens[0] < sizeof(*desc)) {
110                 CERROR("LOV desc: invalid descriptor returned\n");
111                 GOTO(out_conn, rc = -EINVAL);
112         }
113
114         memcpy(desc, lustre_msg_buf(req->rq_repmsg, 0), sizeof(*desc));
115         lov_unpackdesc(desc);
116
117         if (req->rq_repmsg->buflens[1] < sizeof(*uuidarray)*desc->ld_tgt_count){
118                 CERROR("LOV desc: invalid uuid array returned\n");
119                 GOTO(out_conn, rc = -EINVAL);
120         }
121
122         if (memcmp(obd->obd_uuid, desc->ld_uuid, sizeof(desc->ld_uuid))) {
123                 CERROR("LOV desc: uuid %s not on mds device (%s)\n",
124                        obd->obd_uuid, desc->ld_uuid);
125                 GOTO(out_conn, rc = -EINVAL);
126         }
127
128         if (desc->ld_tgt_count > 1000) {
129                 CERROR("LOV desc: target count > 1000 (%d)\n",
130                        desc->ld_tgt_count);
131                 GOTO(out_conn, rc = -EINVAL);
132         }
133
134         /* Because of 64-bit divide/mod operations only work with a 32-bit
135          * divisor in a 32-bit kernel, we cannot support a stripe width
136          * of 4GB or larger on 32-bit CPUs.
137          */
138         if ((desc->ld_default_stripe_count ?
139              desc->ld_default_stripe_count : desc->ld_tgt_count) *
140              desc->ld_default_stripe_size > ~0UL) {
141                 CERROR("LOV: stripe width "LPU64"x%u > %lu on 32-bit system\n",
142                        desc->ld_default_stripe_size,
143                        desc->ld_default_stripe_count ?
144                        desc->ld_default_stripe_count : desc->ld_tgt_count,~0UL);
145                 GOTO(out_conn, rc = -EINVAL);
146         }
147
148         lov->bufsize = sizeof(struct lov_tgt_desc) * desc->ld_tgt_count;
149         OBD_ALLOC(lov->tgts, lov->bufsize);
150         if (!lov->tgts) {
151                 CERROR("Out of memory\n");
152                 GOTO(out_conn, rc = -ENOMEM);
153         }
154
155         uuidarray = lustre_msg_buf(req->rq_repmsg, 1);
156         for (i = 0; i < desc->ld_tgt_count; i++)
157                 memcpy(lov->tgts[i].uuid, uuidarray[i], sizeof(*uuidarray));
158
159         for (i = 0; i < desc->ld_tgt_count; i++) {
160                 struct obd_device *tgt = class_uuid2obd(uuidarray[i]);
161                 int rc2;
162
163                 if (!tgt) {
164                         CERROR("Target %s not attached\n", uuidarray[i]);
165                         GOTO(out_disc, rc = -EINVAL);
166                 }
167
168                 if (!(tgt->obd_flags & OBD_SET_UP)) {
169                         CERROR("Target %s not set up\n", uuidarray[i]);
170                         GOTO(out_disc, rc = -EINVAL);
171                 }
172
173                 rc = obd_connect(&lov->tgts[i].conn, tgt, NULL, recovd,
174                                  recover);
175
176                 /* Register even if connect failed, so that we get reactivation
177                  * notices.
178                  */
179                 rc2 = obd_iocontrol(IOC_OSC_REGISTER_LOV, &lov->tgts[i].conn,
180                                     sizeof(struct obd_device *), obd, NULL);
181                 if (rc2) {
182                         CERROR("Target %s REGISTER_LOV error %d\n",
183                                uuidarray[i], rc2);
184                         GOTO(out_disc, rc2);
185                 }
186
187                 /* But mark failed-connect OSCs as inactive! */
188                 if (rc) {
189                         CDEBUG(D_INFO, "Target %s connect error %d\n",
190                                uuidarray[i], rc);
191                         LASSERT(lov->tgts[i].active == 0);
192                         rc = 0;
193                         continue;
194                 }
195                 
196                 desc->ld_active_tgt_count++;
197                 lov->tgts[i].active = 1;
198         }
199
200         mdc->cl_max_mds_easize = obd_size_wiremd(conn, NULL);
201
202  out:
203         ptlrpc_req_finished(req);
204         RETURN(rc);
205
206  out_disc:
207         while (i-- > 0) {
208                 desc->ld_active_tgt_count--;
209                 lov->tgts[i].active = 0;
210                 rc2 = obd_disconnect(&lov->tgts[i].conn);
211                 if (rc2)
212                         CERROR("LOV Target %s disconnect error: rc = %d\n",
213                                 uuidarray[i], rc2);
214         }
215         OBD_FREE(lov->tgts, lov->bufsize);
216  out_conn:
217         class_disconnect(conn);
218  out_dec:
219         MOD_DEC_USE_COUNT;
220         goto out;
221 }
222
223 static int lov_disconnect(struct lustre_handle *conn)
224 {
225         struct obd_device *obd = class_conn2obd(conn);
226         struct lov_obd *lov = &obd->u.lov;
227         struct obd_export *exp;
228         struct list_head *p, *n;
229         int rc, i;
230
231         if (!lov->tgts)
232                 goto out_local;
233
234         /* Only disconnect the underlying layers on the final disconnect. */
235         lov->refcount--;
236         if (lov->refcount != 0)
237                 goto out_local;
238
239         for (i = 0; i < lov->desc.ld_tgt_count; i++) {
240                 rc = obd_disconnect(&lov->tgts[i].conn);
241                 if (rc) {
242                         if (lov->tgts[i].active) {
243                                 CERROR("Target %s disconnect error %d\n",
244                                        lov->tgts[i].uuid, rc);
245                         }
246                         rc = 0;
247                 }
248                 if (lov->tgts[i].active) {
249                         lov->desc.ld_active_tgt_count--;
250                         lov->tgts[i].active = 0;
251                 }
252         }
253         OBD_FREE(lov->tgts, lov->bufsize);
254         lov->bufsize = 0;
255         lov->tgts = NULL;
256
257         exp = class_conn2export(conn);
258         list_for_each_safe(p, n, &exp->exp_lov_data.led_open_head) {
259                 /* XXX close these, instead of just discarding them? */
260                 struct lov_file_handles *lfh;
261                 lfh = list_entry(p, typeof(*lfh), lfh_list);
262                 CERROR("discarding open LOV handle %p:"LPX64"\n",
263                        lfh, lfh->lfh_cookie);
264                 list_del(&lfh->lfh_list);
265                 OBD_FREE(lfh->lfh_handles,
266                          lfh->lfh_count * sizeof(*lfh->lfh_handles));
267                 kmem_cache_free(lov_file_cache, lfh);
268         }
269
270  out_local:
271         rc = class_disconnect(conn);
272         if (!rc)
273                 MOD_DEC_USE_COUNT;
274         return rc;
275 }
276
277 /* Error codes:
278  *
279  *  -EINVAL  : UUID can't be found in the LOV's target list
280  *  -ENOTCONN: The UUID is found, but the target connection is bad (!)
281  *  -EBADF   : The UUID is found, but the OBD is the wrong type (!)
282  *  -EALREADY: The OSC is already marked (in)active
283  */
284 static int lov_set_osc_active(struct lov_obd *lov, obd_uuid_t uuid,
285                               int activate)
286 {
287         struct obd_device *obd;
288         int i, rc = 0;
289         ENTRY;
290
291         CDEBUG(D_INFO, "Searching in lov %p for uuid %s (activate=%d)\n",
292                lov, uuid, activate);
293
294         spin_lock(&lov->lov_lock);
295         for (i = 0; i < lov->desc.ld_tgt_count; i++)
296                 if (strncmp(uuid, lov->tgts[i].uuid,
297                             sizeof(lov->tgts[i].uuid)) == 0)
298                         break;
299
300         if (i == lov->desc.ld_tgt_count)
301                 GOTO(out, rc = -EINVAL);
302
303         obd = class_conn2obd(&lov->tgts[i].conn);
304         if (obd == NULL) {
305                 LBUG();
306                 GOTO(out, rc = -ENOTCONN);
307         }
308
309         CDEBUG(D_INFO, "Found OBD %p type %s\n", obd, obd->obd_type->typ_name);
310         if (strcmp(obd->obd_type->typ_name, "osc") != 0) {
311                 LBUG();
312                 GOTO(out, rc = -EBADF);
313         }
314
315         if (lov->tgts[i].active == activate) {
316                 CDEBUG(D_INFO, "OBD %p already %sactive!\n", obd,
317                        activate ? "" : "in");
318                 GOTO(out, rc = -EALREADY);
319         }
320
321         CDEBUG(D_INFO, "Marking OBD %p %sactive\n", obd, activate ? "" : "in");
322
323         lov->tgts[i].active = activate;
324         if (activate) {
325                 /*
326                  * foreach(export)
327                  *     foreach(open_file)
328                  *         if (file_handle uses this_osc)
329                  *             if (has_no_filehandle)
330                  *                 open(file_handle, this_osc);
331                  */
332                 /* XXX reconnect? */
333                 lov->desc.ld_active_tgt_count++;
334         } else {
335                 /*
336                  * Should I invalidate filehandles that refer to this OSC, so
337                  * that I reopen them during reactivation?
338                  */
339                 /* XXX disconnect from OSC? */
340                 lov->desc.ld_active_tgt_count--;
341         }
342
343         EXIT;
344  out:
345         spin_unlock(&lov->lov_lock);
346         return rc;
347 }
348
349 static int lov_setup(struct obd_device *obd, obd_count len, void *buf)
350 {
351         struct obd_ioctl_data *data = buf;
352         struct lov_obd *lov = &obd->u.lov;
353         int rc = 0;
354         ENTRY;
355
356         if (data->ioc_inllen1 < 1) {
357                 CERROR("LOV setup requires an MDC UUID\n");
358                 RETURN(-EINVAL);
359         }
360
361         if (data->ioc_inllen1 > 37) {
362                 CERROR("mdc UUID must be 36 characters or less\n");
363                 RETURN(-EINVAL);
364         }
365
366         spin_lock_init(&lov->lov_lock);
367         lov->mdcobd = class_uuid2obd(data->ioc_inlbuf1);
368         if (!lov->mdcobd) {
369                 CERROR("LOV %s cannot locate MDC %s\n", obd->obd_uuid,
370                        data->ioc_inlbuf1);
371                 rc = -EINVAL;
372         }
373         RETURN(rc);
374 }
375
376 static struct lov_file_handles *lov_handle2lfh(struct lustre_handle *handle)
377 {
378         struct lov_file_handles *lfh = NULL;
379
380         if (!handle || !handle->addr)
381                 RETURN(NULL);
382
383         lfh = (struct lov_file_handles *)(unsigned long)(handle->addr);
384         if (!kmem_cache_validate(lov_file_cache, lfh))
385                 RETURN(NULL);
386
387         if (lfh->lfh_cookie != handle->cookie)
388                 RETURN(NULL);
389
390         return lfh;
391 }
392
393 /* the LOV expects oa->o_id to be set to the LOV object id */
394 static int lov_create(struct lustre_handle *conn, struct obdo *oa,
395                       struct lov_stripe_md **ea)
396 {
397         struct obd_export *export = class_conn2export(conn);
398         struct lov_obd *lov;
399         struct lov_stripe_md *lsm;
400         struct lov_oinfo *loi;
401         struct obdo *tmp;
402         int ost_count, ost_idx = 1;
403         int rc = 0, i;
404         ENTRY;
405
406         LASSERT(ea);
407
408         if (!export)
409                 RETURN(-EINVAL);
410
411         tmp = obdo_alloc();
412         if (!tmp)
413                 RETURN(-ENOMEM);
414
415         lov = &export->exp_obd->u.lov;
416
417         if (!lov->desc.ld_active_tgt_count)
418                 RETURN(-EIO);
419
420         spin_lock(&lov->lov_lock);
421         ost_count = lov->desc.ld_tgt_count;
422
423         lsm = *ea;
424
425         /* Can't create more stripes than we have targets (incl inactive). */
426         if (lsm && lsm->lsm_stripe_count > lov->desc.ld_tgt_count)
427                 GOTO(out_tmp, rc = -EINVAL);
428
429         /* Free the user lsm if it needs to be changed, to avoid memory leaks */
430         if (!lsm || (lsm &&
431                      lsm->lsm_stripe_count > lov->desc.ld_active_tgt_count)) {
432                 struct lov_stripe_md *lsm_new = NULL;
433                 rc = obd_alloc_memmd(conn, &lsm_new);
434                 if (rc < 0) {
435                         spin_unlock(&lov->lov_lock);
436                         if (lsm)
437                                 obd_free_memmd(conn, &lsm);
438                         GOTO(out_tmp, rc);
439                 }
440                 if (lsm) {
441                         LASSERT(lsm->lsm_magic == LOV_MAGIC);
442                         CERROR("replace user LOV MD: stripes %u > %u active\n",
443                                lsm->lsm_stripe_count,
444                                lov->desc.ld_active_tgt_count);
445                         lsm_new->lsm_stripe_offset = lsm->lsm_stripe_offset;
446                         lsm_new->lsm_stripe_size = lsm->lsm_stripe_size;
447                         lsm_new->lsm_stripe_pattern = lsm->lsm_stripe_pattern;
448                         obd_free_memmd(conn, &lsm);
449                 }
450                 lsm = lsm_new;
451                 ost_idx = 0; /* if lsm->lsm_stripe_offset is set yet */
452                 lsm->lsm_magic = LOV_MAGIC;
453         }
454
455         LASSERT(oa->o_valid & OBD_MD_FLID);
456         lsm->lsm_object_id = oa->o_id;
457         if (!lsm->lsm_stripe_size)
458                 lsm->lsm_stripe_size = lov->desc.ld_default_stripe_size;
459
460         /* Because of 64-bit divide/mod operations only work with a 32-bit
461          * divisor in a 32-bit kernel, we cannot support a stripe width
462          * of 4GB or larger on 32-bit CPUs.
463          */
464         if (lsm->lsm_stripe_size * lsm->lsm_stripe_count > ~0UL) {
465                 CERROR("LOV: stripe width "LPU64"x%u > %lu on 32-bit system\n",
466                        lsm->lsm_stripe_size, lsm->lsm_stripe_count, ~0UL);
467                 spin_unlock(&lov->lov_lock);
468                 GOTO(out_free, rc = -EINVAL);
469         }
470
471         if (!ost_idx || lsm->lsm_stripe_offset >= ost_count) {
472                 int mult = lsm->lsm_object_id * lsm->lsm_stripe_count;
473                 int stripe_offset = mult % ost_count;
474                 int sub_offset = (mult / ost_count) % lsm->lsm_stripe_count;
475
476                 lsm->lsm_stripe_offset = stripe_offset + sub_offset;
477         }
478
479         /* Start with lsm_stripe_offset on an active OSC to avoid confusion */
480         while (!lov->tgts[lsm->lsm_stripe_offset].active)
481                 lsm->lsm_stripe_offset = (lsm->lsm_stripe_offset+1) % ost_count;
482
483         /* Pick the OSTs before we release the lock */
484         ost_idx = lsm->lsm_stripe_offset;
485         for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
486                 CDEBUG(D_INODE, "objid "LPX64"[%d] is ost_idx %d (uuid %s)\n",
487                        lsm->lsm_object_id, i, ost_idx, lov->tgts[ost_idx].uuid);
488                 loi->loi_ost_idx = ost_idx;
489                 do {
490                         ost_idx = (ost_idx + 1) % ost_count;
491                 } while (!lov->tgts[ost_idx].active);
492         }
493
494         spin_unlock(&lov->lov_lock);
495
496         CDEBUG(D_INODE, "allocating %d subobjs for objid "LPX64" at idx %d\n",
497                lsm->lsm_stripe_count,lsm->lsm_object_id,lsm->lsm_stripe_offset);
498
499         for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
500                 struct lov_stripe_md obj_md;
501                 struct lov_stripe_md *obj_mdp = &obj_md;
502
503                 ost_idx = loi->loi_ost_idx;
504
505                 /* create data objects with "parent" OA */
506                 memcpy(tmp, oa, sizeof(*tmp));
507                 /* XXX: LOV STACKING: use real "obj_mdp" sub-data */
508                 rc = obd_create(&lov->tgts[ost_idx].conn, tmp, &obj_mdp);
509                 if (rc) {
510                         CERROR("error creating objid "LPX64" sub-object on "
511                                "OST idx %d: rc = %d\n", oa->o_id, ost_idx, rc);
512                         GOTO(out_cleanup, rc);
513                 }
514                 loi->loi_id = tmp->o_id;
515                 CDEBUG(D_INODE, "objid "LPX64" has subobj "LPX64" at idx %d\n",
516                        lsm->lsm_object_id, loi->loi_id, ost_idx);
517         }
518
519         *ea = lsm;
520
521  out_tmp:
522         obdo_free(tmp);
523         RETURN(rc);
524
525  out_cleanup:
526         while (i-- > 0) {
527                 int err;
528
529                 --loi;
530                 /* destroy already created objects here */
531                 memcpy(tmp, oa, sizeof(*tmp));
532                 tmp->o_id = loi->loi_id;
533                 err = obd_destroy(&lov->tgts[loi->loi_ost_idx].conn, tmp, NULL);
534                 if (err)
535                         CERROR("Failed to uncreate objid "LPX64" subobj "
536                                LPX64" on OST idx %d: rc = %d\n",
537                                oa->o_id, loi->loi_id, loi->loi_ost_idx,
538                                err);
539         }
540  out_free:
541         if (!*ea)
542                 obd_free_memmd(conn, &lsm);
543         goto out_tmp;
544 }
545
546 static int lov_destroy(struct lustre_handle *conn, struct obdo *oa,
547                        struct lov_stripe_md *lsm)
548 {
549         struct obdo tmp;
550         struct obd_export *export = class_conn2export(conn);
551         struct lov_obd *lov;
552         struct lov_oinfo *loi;
553         struct lov_file_handles *lfh = NULL;
554         int rc = 0, i;
555         ENTRY;
556
557         if (!lsm) {
558                 CERROR("LOV requires striping ea for destruction\n");
559                 RETURN(-EINVAL);
560         }
561
562         if (lsm->lsm_magic != LOV_MAGIC) {
563                 CERROR("LOV striping magic bad %#lx != %#lx\n",
564                        lsm->lsm_magic, LOV_MAGIC);
565                 RETURN(-EINVAL);
566         }
567
568         if (!export || !export->exp_obd)
569                 RETURN(-ENODEV);
570
571         if (oa->o_valid & OBD_MD_FLHANDLE)
572                 lfh = lov_handle2lfh(obdo_handle(oa));
573
574         lov = &export->exp_obd->u.lov;
575         for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
576                 int err;
577                 if (lov->tgts[loi->loi_ost_idx].active == 0) {
578                         /* Orphan clean up will (someday) fix this up. */
579                         continue;
580                 }
581
582                 memcpy(&tmp, oa, sizeof(tmp));
583                 tmp.o_id = loi->loi_id;
584                 if (lfh)
585                         memcpy(obdo_handle(&tmp), &lfh->lfh_handles[i],
586                                sizeof(lfh->lfh_handles[i]));
587                 else
588                         tmp.o_valid &= ~OBD_MD_FLHANDLE;
589                 err = obd_destroy(&lov->tgts[loi->loi_ost_idx].conn, &tmp,
590                                   NULL);
591                 if (err && lov->tgts[loi->loi_ost_idx].active) {
592                         CERROR("Error destroying objid "LPX64" subobj "
593                                LPX64" on OST idx %d\n: rc = %d",
594                                oa->o_id, loi->loi_id, loi->loi_ost_idx, err);
595                         if (!rc)
596                                 rc = err;
597                 }
598         }
599         RETURN(rc);
600 }
601
602 /* compute object size given "stripeno" and the ost size */
603 static obd_size lov_stripe_size(struct lov_stripe_md *lsm, obd_size ost_size,
604                                 int stripeno)
605 {
606         unsigned long ssize  = lsm->lsm_stripe_size;
607         unsigned long swidth = ssize * lsm->lsm_stripe_count;
608         unsigned long stripe_size;
609         obd_size lov_size;
610
611         if (ost_size == 0)
612                 return 0;
613
614         /* do_div(a, b) returns a % b, and a = a / b */
615         stripe_size = do_div(ost_size, ssize);
616
617         if (stripe_size)
618                 lov_size = ost_size * swidth + stripeno * ssize + stripe_size;
619         else
620                 lov_size = (ost_size - 1) * swidth + (stripeno + 1) * ssize;
621
622         return lov_size;
623 }
624
625 static void lov_merge_attrs(struct obdo *tgt, struct obdo *src, obd_flag valid,
626                             struct lov_stripe_md *lsm, int stripeno, int *new)
627 {
628         if (*new) {
629                 obdo_cpy_md(tgt, src, valid);
630                 if (valid & OBD_MD_FLSIZE)
631                         tgt->o_size = lov_stripe_size(lsm,src->o_size,stripeno);
632                 *new = 0;
633         } else {
634                 if (valid & OBD_MD_FLSIZE) {
635                         /* this handles sparse files properly */
636                         obd_size lov_size;
637
638                         lov_size = lov_stripe_size(lsm, src->o_size, stripeno);
639                         if (lov_size > tgt->o_size)
640                                 tgt->o_size = lov_size;
641                 }
642                 if (valid & OBD_MD_FLBLOCKS)
643                         tgt->o_blocks += src->o_blocks;
644                 if (valid & OBD_MD_FLCTIME && tgt->o_ctime < src->o_ctime)
645                         tgt->o_ctime = src->o_ctime;
646                 if (valid & OBD_MD_FLMTIME && tgt->o_mtime < src->o_mtime)
647                         tgt->o_mtime = src->o_mtime;
648         }
649 }
650
651 static int lov_getattr(struct lustre_handle *conn, struct obdo *oa,
652                        struct lov_stripe_md *lsm)
653 {
654         struct obdo tmp;
655         struct obd_export *export = class_conn2export(conn);
656         struct lov_obd *lov;
657         struct lov_oinfo *loi;
658         struct lov_file_handles *lfh = NULL;
659         int i;
660         int new = 1;
661         ENTRY;
662
663         if (!lsm) {
664                 CERROR("LOV requires striping ea\n");
665                 RETURN(-EINVAL);
666         }
667
668         if (lsm->lsm_magic != LOV_MAGIC) {
669                 CERROR("LOV striping magic bad %#lx != %#lx\n",
670                        lsm->lsm_magic, LOV_MAGIC);
671                 RETURN(-EINVAL);
672         }
673
674         if (!export || !export->exp_obd)
675                 RETURN(-ENODEV);
676
677         lov = &export->exp_obd->u.lov;
678
679         if (oa->o_valid & OBD_MD_FLHANDLE)
680                 lfh = lov_handle2lfh(obdo_handle(oa));
681
682         for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
683                 int err;
684
685                 if (loi->loi_id == 0)
686                         continue;
687
688                 if (lov->tgts[loi->loi_ost_idx].active == 0)
689                         continue;
690
691                 CDEBUG(D_INFO, "objid "LPX64"[%d] has subobj "LPX64" at idx "
692                        "%u\n", oa->o_id, i, loi->loi_id, loi->loi_ost_idx);
693                 /* create data objects with "parent" OA */
694                 memcpy(&tmp, oa, sizeof(tmp));
695                 tmp.o_id = loi->loi_id;
696                 if (lfh)
697                         memcpy(obdo_handle(&tmp), &lfh->lfh_handles[i],
698                                sizeof(lfh->lfh_handles[i]));
699                 else
700                         tmp.o_valid &= ~OBD_MD_FLHANDLE;
701
702                 err = obd_getattr(&lov->tgts[loi->loi_ost_idx].conn, &tmp,NULL);
703                 if (err && lov->tgts[loi->loi_ost_idx].active) {
704                         CERROR("Error getattr objid "LPX64" subobj "LPX64
705                                " on OST idx %d: rc = %d\n",
706                                oa->o_id, loi->loi_id, loi->loi_ost_idx, err);
707                         RETURN(err);
708                 }
709                 lov_merge_attrs(oa, &tmp, tmp.o_valid, lsm, i, &new);
710         }
711
712         RETURN(0);
713 }
714
715 static int lov_setattr(struct lustre_handle *conn, struct obdo *oa,
716                        struct lov_stripe_md *lsm)
717 {
718         struct obdo *tmp;
719         struct obd_export *export = class_conn2export(conn);
720         struct lov_obd *lov;
721         struct lov_oinfo *loi;
722         struct lov_file_handles *lfh = NULL;
723         int rc = 0, i;
724         ENTRY;
725
726         /* Note that this code is currently unused, hence LBUG(), just
727          * to know when/if it is ever revived that it needs cleanups.
728          */
729         LBUG();
730
731         if (!lsm) {
732                 CERROR("LOV requires striping ea\n");
733                 RETURN(-EINVAL);
734         }
735
736         if (lsm->lsm_magic != LOV_MAGIC) {
737                 CERROR("LOV striping magic bad %#lx != %#lx\n",
738                        lsm->lsm_magic, LOV_MAGIC);
739                 RETURN(-EINVAL);
740         }
741
742         if (!export || !export->exp_obd)
743                 RETURN(-ENODEV);
744
745         /* size changes should go through punch and not setattr */
746         LASSERT(!(oa->o_valid & OBD_MD_FLSIZE));
747
748         tmp = obdo_alloc();
749         if (!tmp)
750                 RETURN(-ENOMEM);
751
752         if (oa->o_valid & OBD_MD_FLHANDLE)
753                 lfh = lov_handle2lfh(obdo_handle(oa));
754
755         lov = &export->exp_obd->u.lov;
756         for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
757                 int err;
758
759                 obdo_cpy_md(tmp, oa, oa->o_valid);
760
761                 if (lfh)
762                         memcpy(obdo_handle(tmp), &lfh->lfh_handles[i],
763                                 sizeof(lfh->lfh_handles[i]));
764                 else
765                         tmp->o_valid &= ~OBD_MD_FLHANDLE;
766
767                 tmp->o_id = loi->loi_id;
768
769                 err = obd_setattr(&lov->tgts[loi->loi_ost_idx].conn, tmp, NULL);
770                 if (err) {
771                         CERROR("Error setattr objid "LPX64" subobj "LPX64
772                                " on OST idx %d: rc = %d\n",
773                                oa->o_id, loi->loi_id, loi->loi_ost_idx, err);
774                         if (!rc)
775                                 rc = err;
776                 }
777         }
778         obdo_free(tmp);
779         RETURN(rc);
780 }
781
782 static int lov_open(struct lustre_handle *conn, struct obdo *oa,
783                     struct lov_stripe_md *lsm)
784 {
785         struct obdo *tmp; /* on the heap here, on the stack in lov_close? */
786         struct obd_export *export = class_conn2export(conn);
787         struct lov_obd *lov;
788         struct lov_oinfo *loi;
789         struct lov_file_handles *lfh = NULL;
790         struct lustre_handle *handle;
791         int new = 1;
792         int rc = 0, i;
793         ENTRY;
794
795         if (!lsm) {
796                 CERROR("LOV requires striping ea for opening\n");
797                 RETURN(-EINVAL);
798         }
799
800         if (lsm->lsm_magic != LOV_MAGIC) {
801                 CERROR("LOV striping magic bad %#lx != %#lx\n",
802                        lsm->lsm_magic, LOV_MAGIC);
803                 RETURN(-EINVAL);
804         }
805
806         if (!export || !export->exp_obd)
807                 RETURN(-ENODEV);
808
809         tmp = obdo_alloc();
810         if (!tmp)
811                 RETURN(-ENOMEM);
812
813         lfh = kmem_cache_alloc(lov_file_cache, GFP_KERNEL);
814         if (!lfh)
815                 GOTO(out_tmp, rc = -ENOMEM);
816         OBD_ALLOC(lfh->lfh_handles,
817                   lsm->lsm_stripe_count * sizeof(*lfh->lfh_handles));
818         if (!lfh->lfh_handles)
819                 GOTO(out_lfh, rc = -ENOMEM);
820
821         lov = &export->exp_obd->u.lov;
822         oa->o_size = 0;
823         oa->o_blocks = 0;
824         for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
825
826                 if (lov->tgts[loi->loi_ost_idx].active == 0) {
827                         continue;
828                 }
829
830                 /* create data objects with "parent" OA */
831                 memcpy(tmp, oa, sizeof(*tmp));
832                 tmp->o_id = loi->loi_id;
833
834                 rc = obd_open(&lov->tgts[loi->loi_ost_idx].conn, tmp, NULL);
835                 if (rc && lov->tgts[loi->loi_ost_idx].active) {
836                         CERROR("Error open objid "LPX64" subobj "LPX64
837                                " on OST idx %d: rc = %d\n",
838                                oa->o_id, lsm->lsm_oinfo[i].loi_id,
839                                loi->loi_ost_idx, rc);
840                         goto out_handles;
841                 }
842
843                 lov_merge_attrs(oa, tmp, tmp->o_valid, lsm, i, &new);
844
845                 if (tmp->o_valid & OBD_MD_FLHANDLE)
846                         memcpy(&lfh->lfh_handles[i], obdo_handle(tmp),
847                                sizeof(lfh->lfh_handles[i]));
848         }
849
850         handle = obdo_handle(oa);
851         
852         lfh->lfh_count = lsm->lsm_stripe_count;
853         get_random_bytes(&lfh->lfh_cookie, sizeof(lfh->lfh_cookie));
854         
855         handle->addr = (__u64)(unsigned long)lfh;
856         handle->cookie = lfh->lfh_cookie;
857         oa->o_valid |= OBD_MD_FLHANDLE;
858         list_add(&lfh->lfh_list, &export->exp_lov_data.led_open_head);
859
860 out_tmp:
861         obdo_free(tmp);
862         RETURN(rc);
863
864 out_handles:
865         for (i--, loi = &lsm->lsm_oinfo[i]; i >= 0; i--, loi--) {
866                 int err;
867
868                 if (lov->tgts[loi->loi_ost_idx].active == 0)
869                         continue;
870
871                 memcpy(tmp, oa, sizeof(*tmp));
872                 tmp->o_id = loi->loi_id;
873                 memcpy(obdo_handle(tmp), &lfh->lfh_handles[i],
874                        sizeof(lfh->lfh_handles[i]));
875
876                 err = obd_close(&lov->tgts[loi->loi_ost_idx].conn, tmp, NULL);
877                 if (err) {
878                         CERROR("Error closing objid "LPX64" subobj "LPX64
879                                " on OST idx %d after open error: rc = %d\n",
880                                oa->o_id, loi->loi_id, loi->loi_ost_idx, err);
881                 }
882         }
883        
884         OBD_FREE(lfh->lfh_handles,
885                  lsm->lsm_stripe_count * sizeof(*lfh->lfh_handles));
886 out_lfh:
887         lfh->lfh_cookie = DEAD_HANDLE_MAGIC;
888         kmem_cache_free(lov_file_cache, lfh);
889         goto out_tmp;
890 }
891
892 static int lov_close(struct lustre_handle *conn, struct obdo *oa,
893                      struct lov_stripe_md *lsm)
894 {
895         struct obdo tmp;
896         struct obd_export *export = class_conn2export(conn);
897         struct lov_obd *lov;
898         struct lov_oinfo *loi;
899         struct lov_file_handles *lfh = NULL;
900         int rc = 0, i;
901         ENTRY;
902
903         if (!lsm) {
904                 CERROR("LOV requires striping ea\n");
905                 RETURN(-EINVAL);
906         }
907
908         if (lsm->lsm_magic != LOV_MAGIC) {
909                 CERROR("LOV striping magic bad %#lx != %#lx\n",
910                        lsm->lsm_magic, LOV_MAGIC);
911                 RETURN(-EINVAL);
912         }
913
914         if (!export || !export->exp_obd)
915                 RETURN(-ENODEV);
916
917         if (oa->o_valid & OBD_MD_FLHANDLE)
918                 lfh = lov_handle2lfh(obdo_handle(oa));
919
920         lov = &export->exp_obd->u.lov;
921         for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
922                 int err;
923                 
924                 if (lov->tgts[loi->loi_ost_idx].active == 0)
925                         continue;
926
927                 /* create data objects with "parent" OA */
928                 memcpy(&tmp, oa, sizeof(tmp));
929                 tmp.o_id = loi->loi_id;
930                 if (lfh)
931                         memcpy(obdo_handle(&tmp), &lfh->lfh_handles[i],
932                                sizeof(lfh->lfh_handles[i]));
933                 else
934                         tmp.o_valid &= ~OBD_MD_FLHANDLE;
935
936                 err = obd_close(&lov->tgts[loi->loi_ost_idx].conn, &tmp, NULL);
937                 if (err) {
938                         CERROR("Error close objid "LPX64" subobj "LPX64
939                                " on OST idx %d: rc = %d\n",
940                                oa->o_id, loi->loi_id, loi->loi_ost_idx, err);
941                         if (!rc)
942                                 rc = err;
943                 }
944         }
945         if (lfh) {
946                 list_del(&lfh->lfh_list);
947                 OBD_FREE(lfh->lfh_handles,
948                          lsm->lsm_stripe_count * sizeof(*lfh->lfh_handles));
949                 lfh->lfh_cookie = DEAD_HANDLE_MAGIC;
950                 kmem_cache_free(lov_file_cache, lfh);
951         }
952
953         RETURN(rc);
954 }
955
956 #ifndef log2
957 #define log2(n) ffz(~(n))
958 #endif
959
960 #warning FIXME: merge these two functions now that they are nearly the same
961
962 /* compute ost offset in stripe "stripeno" corresponding to offset "lov_off" */
963 static obd_off lov_stripe_offset(struct lov_stripe_md *lsm, obd_off lov_off,
964                                  int stripeno)
965 {
966         unsigned long ssize  = lsm->lsm_stripe_size;
967         unsigned long swidth = ssize * lsm->lsm_stripe_count;
968         unsigned long stripe_off, this_stripe;
969
970         if (lov_off == OBD_OBJECT_EOF || lov_off == 0)
971                 return lov_off;
972
973         /* do_div(a, b) returns a % b, and a = a / b */
974         stripe_off = do_div(lov_off, swidth);
975
976         this_stripe = stripeno * ssize;
977         if (stripe_off <= this_stripe)
978                 stripe_off = 0;
979         else {
980                 stripe_off -= this_stripe;
981
982                 if (stripe_off > ssize)
983                         stripe_off = ssize;
984         }
985
986
987         return lov_off * ssize + stripe_off;
988 }
989
990 /* compute which stripe number "lov_off" will be written into */
991 static int lov_stripe_number(struct lov_stripe_md *lsm, obd_off lov_off)
992 {
993         unsigned long ssize  = lsm->lsm_stripe_size;
994         unsigned long swidth = ssize * lsm->lsm_stripe_count;
995         unsigned long stripe_off;
996
997         stripe_off = do_div(lov_off, swidth);
998
999         return stripe_off / ssize;
1000 }
1001
1002
1003 /* FIXME: maybe we'll just make one node the authoritative attribute node, then
1004  * we can send this 'punch' to just the authoritative node and the nodes
1005  * that the punch will affect. */
1006 static int lov_punch(struct lustre_handle *conn, struct obdo *oa,
1007                      struct lov_stripe_md *lsm,
1008                      obd_off start, obd_off end)
1009 {
1010         struct obdo tmp;
1011         struct obd_export *export = class_conn2export(conn);
1012         struct lov_obd *lov;
1013         struct lov_oinfo *loi;
1014         struct lov_file_handles *lfh = NULL;
1015         int rc = 0, i;
1016         ENTRY;
1017
1018         if (!lsm) {
1019                 CERROR("LOV requires striping ea\n");
1020                 RETURN(-EINVAL);
1021         }
1022
1023         if (lsm->lsm_magic != LOV_MAGIC) {
1024                 CERROR("LOV striping magic bad %#lx != %#lx\n",
1025                        lsm->lsm_magic, LOV_MAGIC);
1026                 RETURN(-EINVAL);
1027         }
1028
1029         if (!export || !export->exp_obd)
1030                 RETURN(-ENODEV);
1031
1032         if (oa->o_valid & OBD_MD_FLHANDLE)
1033                 lfh = lov_handle2lfh(obdo_handle(oa));
1034
1035         lov = &export->exp_obd->u.lov;
1036         for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
1037                 obd_off starti = lov_stripe_offset(lsm, start, i);
1038                 obd_off endi = lov_stripe_offset(lsm, end, i);
1039                 int err;
1040
1041                 if (starti == endi)
1042                         continue;
1043                 /* create data objects with "parent" OA */
1044                 memcpy(&tmp, oa, sizeof(tmp));
1045                 tmp.o_id = loi->loi_id;
1046                 if (lfh)
1047                         memcpy(obdo_handle(&tmp), &lfh->lfh_handles[i],
1048                                sizeof(lfh->lfh_handles[i]));
1049                 else
1050                         tmp.o_valid &= ~OBD_MD_FLHANDLE;
1051
1052                 err = obd_punch(&lov->tgts[loi->loi_ost_idx].conn, &tmp, NULL,
1053                                 starti, endi);
1054                 if (err) {
1055                         CERROR("Error punch objid "LPX64" subobj "LPX64
1056                                " on OST idx %d: rc = %d\n",
1057                                oa->o_id, loi->loi_id, loi->loi_ost_idx, err);
1058                         if (!rc)
1059                                 rc = err;
1060                 }
1061         }
1062         RETURN(rc);
1063 }
1064
1065 static inline int lov_brw(int cmd, struct lustre_handle *conn,
1066                           struct lov_stripe_md *lsm, obd_count oa_bufs,
1067                           struct brw_page *pga, struct obd_brw_set *set)
1068 {
1069         struct {
1070                 int bufct;
1071                 int index;
1072                 int subcount;
1073                 struct lov_stripe_md lsm;
1074                 int ost_idx;
1075         } *stripeinfo, *si, *si_last;
1076         struct obd_export *export = class_conn2export(conn);
1077         struct lov_obd *lov;
1078         struct brw_page *ioarr;
1079         struct lov_oinfo *loi;
1080         int rc = 0, i, *where, stripe_count = lsm->lsm_stripe_count;
1081         ENTRY;
1082
1083         if (!lsm) {
1084                 CERROR("LOV requires striping ea\n");
1085                 RETURN(-EINVAL);
1086         }
1087
1088         if (lsm->lsm_magic != LOV_MAGIC) {
1089                 CERROR("LOV striping magic bad %#lx != %#lx\n",
1090                        lsm->lsm_magic, LOV_MAGIC);
1091                 RETURN(-EINVAL);
1092         }
1093
1094         lov = &export->exp_obd->u.lov;
1095
1096         OBD_ALLOC(stripeinfo, stripe_count * sizeof(*stripeinfo));
1097         if (!stripeinfo)
1098                 GOTO(out_cbdata, rc = -ENOMEM);
1099
1100         OBD_ALLOC(where, sizeof(*where) * oa_bufs);
1101         if (!where)
1102                 GOTO(out_sinfo, rc = -ENOMEM);
1103
1104         OBD_ALLOC(ioarr, sizeof(*ioarr) * oa_bufs);
1105         if (!ioarr)
1106                 GOTO(out_where, rc = -ENOMEM);
1107
1108         for (i = 0; i < oa_bufs; i++) {
1109                 where[i] = lov_stripe_number(lsm, pga[i].off);
1110                 stripeinfo[where[i]].bufct++;
1111         }
1112
1113         for (i = 0, loi = lsm->lsm_oinfo, si_last = si = stripeinfo;
1114              i < stripe_count; i++, loi++, si_last = si, si++) {
1115                 if (lov->tgts[loi->loi_ost_idx].active == 0)
1116                         GOTO(out_ioarr, rc = -EIO);
1117                 if (i > 0)
1118                         si->index = si_last->index + si_last->bufct;
1119                 si->lsm.lsm_object_id = loi->loi_id;
1120                 si->ost_idx = loi->loi_ost_idx;
1121         }
1122
1123         for (i = 0; i < oa_bufs; i++) {
1124                 int which = where[i];
1125                 int shift;
1126
1127                 shift = stripeinfo[which].index + stripeinfo[which].subcount;
1128                 LASSERT(shift < oa_bufs);
1129                 ioarr[shift] = pga[i];
1130                 ioarr[shift].off = lov_stripe_offset(lsm, pga[i].off, which);
1131                 stripeinfo[which].subcount++;
1132         }
1133
1134         for (i = 0, si = stripeinfo; i < stripe_count; i++, si++) {
1135                 int shift = si->index;
1136
1137                 if (si->bufct) {
1138                         LASSERT(shift < oa_bufs);
1139                         rc = obd_brw(cmd, &lov->tgts[si->ost_idx].conn,
1140                                      &si->lsm, si->bufct, &ioarr[shift], set);
1141                         if (rc)
1142                                 GOTO(out_ioarr, rc);
1143                 }
1144         }
1145
1146  out_ioarr:
1147         OBD_FREE(ioarr, sizeof(*ioarr) * oa_bufs);
1148  out_where:
1149         OBD_FREE(where, sizeof(*where) * oa_bufs);
1150  out_sinfo:
1151         OBD_FREE(stripeinfo, stripe_count * sizeof(*stripeinfo));
1152  out_cbdata:
1153         RETURN(rc);
1154 }
1155
1156 static int lov_enqueue(struct lustre_handle *conn, struct lov_stripe_md *lsm,
1157                        struct lustre_handle *parent_lock,
1158                        __u32 type, void *cookie, int cookielen, __u32 mode,
1159                        int *flags, void *cb, void *data, int datalen,
1160                        struct lustre_handle *lockhs)
1161 {
1162         struct obd_export *export = class_conn2export(conn);
1163         struct lov_obd *lov;
1164         struct lov_oinfo *loi;
1165         struct lov_stripe_md submd;
1166         int rc = 0, i;
1167         ENTRY;
1168
1169         if (!lsm) {
1170                 CERROR("LOV requires striping ea\n");
1171                 RETURN(-EINVAL);
1172         }
1173
1174         if (lsm->lsm_magic != LOV_MAGIC) {
1175                 CERROR("LOV striping magic bad %#lx != %#lx\n",
1176                        lsm->lsm_magic, LOV_MAGIC);
1177                 RETURN(-EINVAL);
1178         }
1179
1180         /* we should never be asked to replay a lock. */
1181
1182         LASSERT((*flags & LDLM_FL_REPLAY) == 0);
1183
1184         if (!export || !export->exp_obd)
1185                 RETURN(-ENODEV);
1186
1187         memset(lockhs, 0, sizeof(*lockhs) * lsm->lsm_stripe_count);
1188
1189         lov = &export->exp_obd->u.lov;
1190         for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
1191                 struct ldlm_extent *extent = (struct ldlm_extent *)cookie;
1192                 struct ldlm_extent sub_ext;
1193
1194                 if (lov->tgts[loi->loi_ost_idx].active == 0)
1195                         continue;
1196
1197                 *flags = 0;
1198                 sub_ext.start = lov_stripe_offset(lsm, extent->start, i);
1199                 sub_ext.end = lov_stripe_offset(lsm, extent->end, i);
1200                 if (sub_ext.start == sub_ext.end)
1201                         continue;
1202
1203                 submd.lsm_object_id = loi->loi_id;
1204                 /* XXX submd should be that from the subobj, it should come
1205                  *     opaquely from the LOV.
1206                  */
1207                 submd.lsm_stripe_count = 0;
1208                 /* XXX submd is not fully initialized here */
1209                 *flags = 0;
1210                 rc = obd_enqueue(&(lov->tgts[loi->loi_ost_idx].conn), &submd,
1211                                  parent_lock, type, &sub_ext, sizeof(sub_ext),
1212                                  mode, flags, cb, data, datalen, &(lockhs[i]));
1213                 // XXX add a lock debug statement here
1214                 if (rc && lov->tgts[loi->loi_ost_idx].active) {
1215                         CERROR("Error enqueue objid "LPX64" subobj "LPX64
1216                                " on OST idx %d: rc = %d\n", lsm->lsm_object_id,
1217                                loi->loi_id, loi->loi_ost_idx, rc);
1218                         goto out_locks;
1219                 }
1220         }
1221
1222         RETURN(0);
1223
1224  out_locks:
1225         for (i--, loi = &lsm->lsm_oinfo[i]; i >= 0; i--, loi--) {
1226                 int err;
1227                 
1228                 if (lov->tgts[loi->loi_ost_idx].active == 0)
1229                         continue;
1230
1231                 submd.lsm_object_id = loi->loi_id;
1232                 submd.lsm_stripe_count = 0;
1233                 err = obd_cancel(&lov->tgts[loi->loi_ost_idx].conn, &submd,
1234                                  mode, &lockhs[i]);
1235                 if (err) {
1236                         CERROR("Error cancelling objid "LPX64" subobj "LPX64
1237                                " on OST idx %d after enqueue error: rc = %d\n",
1238                                loi->loi_id, loi->loi_ost_idx, err);
1239                 }
1240         }
1241         RETURN(rc);
1242 }
1243
1244 static int lov_cancel(struct lustre_handle *conn, struct lov_stripe_md *lsm,
1245                       __u32 mode, struct lustre_handle *lockhs)
1246 {
1247         struct obd_export *export = class_conn2export(conn);
1248         struct lov_obd *lov;
1249         struct lov_oinfo *loi;
1250         int rc = 0, i;
1251         ENTRY;
1252
1253         if (!lsm) {
1254                 CERROR("LOV requires striping ea\n");
1255                 RETURN(-EINVAL);
1256         }
1257
1258         if (lsm->lsm_magic != LOV_MAGIC) {
1259                 CERROR("LOV striping magic bad %#lx != %#lx\n",
1260                        lsm->lsm_magic, LOV_MAGIC);
1261                 RETURN(-EINVAL);
1262         }
1263
1264         if (!export || !export->exp_obd)
1265                 RETURN(-ENODEV);
1266
1267         lov = &export->exp_obd->u.lov;
1268         for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
1269                 struct lov_stripe_md submd;
1270                 int err;
1271
1272                 if (lov->tgts[loi->loi_ost_idx].active == 0)
1273                         continue;
1274
1275                 if (lockhs[i].addr == 0)
1276                         continue;
1277
1278                 submd.lsm_object_id = loi->loi_id;
1279                 submd.lsm_stripe_count = 0;
1280                 err = obd_cancel(&lov->tgts[loi->loi_ost_idx].conn, &submd,
1281                                 mode, &lockhs[i]);
1282                 if (err && lov->tgts[loi->loi_ost_idx].active) {
1283                         CERROR("Error cancel objid "LPX64" subobj "LPX64
1284                                " on OST idx %d: rc = %d\n", lsm->lsm_object_id,
1285                                loi->loi_id, loi->loi_ost_idx, err);
1286                         if (!rc)
1287                                 rc = err;
1288                 }
1289         }
1290         RETURN(rc);
1291 }
1292
1293 static int lov_cancel_unused(struct lustre_handle *conn,
1294                              struct lov_stripe_md *lsm, int flags)
1295 {
1296         struct obd_export *export = class_conn2export(conn);
1297         struct lov_obd *lov;
1298         struct lov_oinfo *loi;
1299         int rc = 0, i, err;
1300         ENTRY;
1301
1302         if (!lsm) {
1303                 CERROR("LOV requires striping ea for lock cancellation\n");
1304                 RETURN(-EINVAL);
1305         }
1306
1307         if (!export || !export->exp_obd)
1308                 RETURN(-ENODEV);
1309
1310         lov = &export->exp_obd->u.lov;
1311         for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
1312                 struct lov_stripe_md submd;
1313
1314                 submd.lsm_object_id = loi->loi_id;
1315                 submd.lsm_stripe_count = 0;
1316                 err = obd_cancel_unused(&lov->tgts[loi->loi_ost_idx].conn,
1317                                        &submd, flags);
1318                 if (err && lov->tgts[loi->loi_ost_idx].active) {
1319                         CERROR("Error cancel unused objid "LPX64" subobj "LPX64
1320                                " on OST idx %d: rc = %d\n", lsm->lsm_object_id,
1321                                loi->loi_id, loi->loi_ost_idx, err);
1322                         if (!rc)
1323                                 rc = err;
1324                 }
1325         }
1326
1327         RETURN(rc);
1328 }
1329
1330 static int lov_statfs(struct lustre_handle *conn, struct obd_statfs *osfs)
1331 {
1332         struct obd_export *export = class_conn2export(conn);
1333         struct lov_obd *lov;
1334         struct obd_statfs lov_sfs;
1335         int set = 0;
1336         int rc = 0;
1337         int i;
1338         ENTRY;
1339
1340         if (!export || !export->exp_obd)
1341                 RETURN(-ENODEV);
1342
1343         lov = &export->exp_obd->u.lov;
1344
1345         /* We only get block data from the OBD */
1346         for (i = 0; i < lov->desc.ld_tgt_count; i++) {
1347                 int err;
1348
1349                 if (!lov->tgts[i].active)
1350                         continue;
1351
1352                 err = obd_statfs(&lov->tgts[i].conn, &lov_sfs);
1353                 if (err) {
1354                         CERROR("Error statfs OSC %s idx %d: err = %d\n",
1355                                lov->tgts[i].uuid, i, err);
1356                         if (!rc)
1357                                 rc = err;
1358                         continue; /* XXX or break? - probably OK to continue */
1359                 }
1360                 if (!set) {
1361                         memcpy(osfs, &lov_sfs, sizeof(lov_sfs));
1362                         set = 1;
1363                 } else {
1364                         osfs->os_bfree += lov_sfs.os_bfree;
1365                         osfs->os_bavail += lov_sfs.os_bavail;
1366                         osfs->os_blocks += lov_sfs.os_blocks;
1367                         /* XXX not sure about this one - depends on policy.
1368                          *   - could be minimum if we always stripe on all OBDs
1369                          *     (but that would be wrong for any other policy,
1370                          *     if one of the OBDs has no more objects left)
1371                          *   - could be sum if we stripe whole objects
1372                          *   - could be average, just to give a nice number
1373                          *   - we just pick first OST and hope it is enough
1374                         sfs->f_ffree += lov_sfs.f_ffree;
1375                          */
1376                 }
1377         }
1378         RETURN(rc);
1379 }
1380
1381 static int lov_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
1382                          void *karg, void *uarg)
1383 {
1384         struct obd_device *obddev = class_conn2obd(conn);
1385         struct lov_obd *lov = &obddev->u.lov;
1386         struct obd_ioctl_data *data = karg;
1387         int i, count = lov->desc.ld_tgt_count;
1388         int rc;
1389
1390         ENTRY;
1391
1392         switch (cmd) {
1393         case IOC_LOV_SET_OSC_ACTIVE: {
1394                 rc = lov_set_osc_active(lov,data->ioc_inlbuf1,data->ioc_offset);
1395                 break;
1396         }
1397         case OBD_IOC_LOV_GET_CONFIG: {
1398                 struct lov_tgt_desc *tgtdesc;
1399                 struct lov_desc *desc;
1400                 obd_uuid_t *uuidp;
1401                 char *buf = NULL;
1402
1403                 buf = NULL;
1404                 len = 0;
1405                 if (obd_ioctl_getdata(&buf, &len, (void *)uarg))
1406                         RETURN(-EINVAL);
1407
1408                 data = (struct obd_ioctl_data *)buf;
1409
1410                 if (sizeof(*desc) > data->ioc_inllen1) {
1411                         OBD_FREE(buf, len);
1412                         RETURN(-EINVAL);
1413                 }
1414
1415                 if (sizeof(*uuidp) * count > data->ioc_inllen2) {
1416                         OBD_FREE(buf, len);
1417                         RETURN(-EINVAL);
1418                 }
1419
1420                 desc = (struct lov_desc *)data->ioc_inlbuf1;
1421                 uuidp = (obd_uuid_t *)data->ioc_inlbuf2;
1422                 memcpy(desc, &(lov->desc), sizeof(*desc));
1423
1424                 tgtdesc = lov->tgts;
1425                 for (i = 0; i < count; i++, uuidp++, tgtdesc++)
1426                         memcpy(uuidp, tgtdesc->uuid, sizeof(*uuidp));
1427
1428                 rc = copy_to_user((void *)uarg, buf, len);
1429                 if (rc)
1430                         rc = -EFAULT;
1431                 OBD_FREE(buf, len);
1432                 break;
1433         }
1434         default:
1435                 if (count == 0)
1436                         RETURN(-ENOTTY);
1437                 rc = 0;
1438                 for (i = 0; i < count; i++) {
1439                         int err = obd_iocontrol(cmd, &lov->tgts[i].conn,
1440                                                 len, karg, uarg);
1441                         if (err && !rc)
1442                                 rc = err;
1443                 }
1444         }
1445
1446         RETURN(rc);
1447 }
1448
1449 struct obd_ops lov_obd_ops = {
1450         o_attach:      lov_attach,
1451         o_detach:      lov_detach,
1452         o_setup:       lov_setup,
1453         o_connect:     lov_connect,
1454         o_disconnect:  lov_disconnect,
1455         o_statfs:      lov_statfs,
1456         o_packmd:      lov_packmd,
1457         o_unpackmd:    lov_unpackmd,
1458         o_create:      lov_create,
1459         o_destroy:     lov_destroy,
1460         o_getattr:     lov_getattr,
1461         o_setattr:     lov_setattr,
1462         o_open:        lov_open,
1463         o_close:       lov_close,
1464         o_brw:         lov_brw,
1465         o_punch:       lov_punch,
1466         o_enqueue:     lov_enqueue,
1467         o_cancel:      lov_cancel,
1468         o_cancel_unused: lov_cancel_unused,
1469         o_iocontrol:   lov_iocontrol
1470 };
1471
1472
1473 #define LOV_VERSION "v0.1"
1474
1475 static int __init lov_init(void)
1476 {
1477         int rc;
1478         printk(KERN_INFO "Lustre Logical Object Volume driver " LOV_VERSION
1479                ", info@clusterfs.com\n");
1480         lov_file_cache = kmem_cache_create("ll_lov_file_data",
1481                                            sizeof(struct lov_file_handles),
1482                                            0, 0, NULL, NULL);
1483         if (!lov_file_cache)
1484                 RETURN(-ENOMEM);
1485
1486         rc = class_register_type(&lov_obd_ops, status_class_var,
1487                                  OBD_LOV_DEVICENAME);
1488         RETURN(rc);
1489 }
1490
1491 static void __exit lov_exit(void)
1492 {
1493         if (kmem_cache_destroy(lov_file_cache))
1494                 CERROR("couldn't free LOV open cache\n");
1495         class_unregister_type(OBD_LOV_DEVICENAME);
1496 }
1497
1498 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
1499 MODULE_DESCRIPTION("Lustre Logical Object Volume OBD driver " LOV_VERSION);
1500 MODULE_LICENSE("GPL");
1501
1502 module_init(lov_init);
1503 module_exit(lov_exit);