Whamcloud - gitweb
Fixes and cleanups in lmv.
[fs/lustre-release.git] / lustre / lmv / lmv_obd.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
5  *
6  *   This file is part of Lustre, http://www.lustre.org.
7  *
8  *   Lustre is free software; you can redistribute it and/or
9  *   modify it under the terms of version 2 of the GNU General Public
10  *   License as published by the Free Software Foundation.
11  *
12  *   Lustre is distributed in the hope that it will be useful,
13  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *   GNU General Public License for more details.
16  *
17  *   You should have received a copy of the GNU General Public License
18  *   along with Lustre; if not, write to the Free Software
19  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20  */
21
22 #ifndef EXPORT_SYMTAB
23 # define EXPORT_SYMTAB
24 #endif
25 #define DEBUG_SUBSYSTEM S_LMV
26 #ifdef __KERNEL__
27 #include <linux/slab.h>
28 #include <linux/module.h>
29 #include <linux/init.h>
30 #include <linux/slab.h>
31 #include <linux/pagemap.h>
32 #include <asm/div64.h>
33 #include <linux/seq_file.h>
34 #else
35 #include <liblustre.h>
36 #endif
37 #include <linux/ext2_fs.h>
38
39 #include <linux/obd_support.h>
40 #include <linux/lustre_lib.h>
41 #include <linux/lustre_net.h>
42 #include <linux/lustre_idl.h>
43 #include <linux/lustre_dlm.h>
44 #include <linux/lustre_mds.h>
45 #include <linux/obd_class.h>
46 #include <linux/obd_ost.h>
47 #include <linux/lprocfs_status.h>
48 #include <linux/lustre_fsfilt.h>
49 #include <linux/obd_lmv.h>
50 #include "lmv_internal.h"
51
52 /* Error codes:
53  *
54  *  -EINVAL  : UUID can't be found in the LMV's target list
55  *  -ENOTCONN: The UUID is found, but the target connection is bad (!)
56  *  -EBADF   : The UUID is found, but the OBD of the wrong type (!)
57  */
58 static int lmv_set_mdc_active(struct lmv_obd *lmv, struct obd_uuid *uuid,
59                               int activate)
60 {
61         struct obd_device *obd;
62         struct lmv_tgt_desc *tgt;
63         int i, rc = 0;
64         ENTRY;
65
66         CDEBUG(D_INFO, "Searching in lmv %p for uuid %s (activate=%d)\n",
67                lmv, uuid->uuid, activate);
68
69         spin_lock(&lmv->lmv_lock);
70         for (i = 0, tgt = lmv->tgts; i < lmv->desc.ld_tgt_count; i++, tgt++) {
71                 CDEBUG(D_INFO, "lmv idx %d is %s conn "LPX64"\n",
72                        i, tgt->uuid.uuid, tgt->ltd_exp->exp_handle.h_cookie);
73                 if (strncmp(uuid->uuid, tgt->uuid.uuid, sizeof uuid->uuid) == 0)
74                         break;
75         }
76
77         if (i == lmv->desc.ld_tgt_count)
78                 GOTO(out, rc = -EINVAL);
79
80         obd = class_exp2obd(tgt->ltd_exp);
81         if (obd == NULL) {
82                 /* This can happen if OST failure races with node shutdown */
83                 GOTO(out, rc = -ENOTCONN);
84         }
85
86         CDEBUG(D_INFO, "Found OBD %s=%s device %d (%p) type %s at LMV idx %d\n",
87                obd->obd_name, obd->obd_uuid.uuid, obd->obd_minor, obd,
88                obd->obd_type->typ_name, i);
89         LASSERT(strcmp(obd->obd_type->typ_name, LUSTRE_MDC_NAME) == 0);
90
91         if (tgt->active == activate) {
92                 CDEBUG(D_INFO, "OBD %p already %sactive!\n", obd,
93                        activate ? "" : "in");
94                 GOTO(out, rc);
95         }
96
97         CDEBUG(D_INFO, "Marking OBD %p %sactive\n", obd, activate ? "" : "in");
98
99         tgt->active = activate;
100         if (activate)
101                 lmv->desc.ld_active_tgt_count++;
102         else
103                 lmv->desc.ld_active_tgt_count--;
104
105         EXIT;
106  out:
107         spin_unlock(&lmv->lmv_lock);
108         return rc;
109 }
110
111 static int lmv_notify(struct obd_device *obd, struct obd_device *watched,
112                       int active)
113 {
114         int rc;
115         struct obd_uuid *uuid;
116
117         if (strcmp(watched->obd_type->typ_name, LUSTRE_MDC_NAME)) {
118                 CERROR("unexpected notification of %s %s!\n",
119                        watched->obd_type->typ_name,
120                        watched->obd_name);
121                 return -EINVAL;
122         }
123         uuid = &watched->u.cli.cl_import->imp_target_uuid;
124
125         /* Set MDC as active before notifying the observer, so the
126          * observer can use the MDC normally.  
127          */
128         rc = lmv_set_mdc_active(&obd->u.lmv, uuid, active);
129         if (rc) {
130                 CERROR("%sactivation of %s failed: %d\n",
131                        active ? "" : "de", uuid->uuid, rc);
132                 RETURN(rc);
133         }
134
135         if (obd->obd_observer)
136                 /* Pass the notification up the chain. */
137                 rc = obd_notify(obd->obd_observer, watched, active);
138
139         RETURN(rc);
140 }
141
142 int lmv_attach(struct obd_device *dev, obd_count len, void *data)
143 {
144         struct lprocfs_static_vars lvars;
145         int rc;
146         ENTRY;
147
148         lprocfs_init_vars(lmv, &lvars);
149         rc = lprocfs_obd_attach(dev, lvars.obd_vars);
150         if (rc == 0) {
151 #ifdef __KERNEL__
152                 struct proc_dir_entry *entry;
153                 
154                 entry = create_proc_entry("target_obd", 0444, dev->obd_proc_entry);
155                 if (entry == NULL)
156                         RETURN(-ENOMEM);
157                 /* entry->proc_fops = &lmv_proc_target_fops; */
158                 entry->data = dev;
159 #endif
160        }
161         RETURN (rc);
162 }
163
164 int lmv_detach(struct obd_device *dev)
165 {
166         return lprocfs_obd_detach(dev);
167 }
168
169 /* This is fake connect function. Its purpose is to initialize lmv and 
170  * say caller that everything is okay. Real connection will be performed
171  * later. */
172 static int lmv_connect(struct lustre_handle *conn, struct obd_device *obd,
173                        struct obd_uuid *cluuid)
174 {
175         struct lmv_obd *lmv = &obd->u.lmv;
176         struct obd_export *exp;
177         int rc;
178         ENTRY;
179
180         rc = class_connect(conn, obd, cluuid);
181         if (rc) {
182                 CERROR("class_connection() returned %d\n", rc);
183                 RETURN(rc);
184         }
185
186         exp = class_conn2export(conn);
187         /* We don't want to actually do the underlying connections more than
188          * once, so keep track. */
189         lmv->refcount++;
190         if (lmv->refcount > 1) {
191                 class_export_put(exp);
192                 RETURN(0);
193         }
194
195         lmv->cluuid = *cluuid;
196         lmv->connected = 0;
197         lmv->exp = exp;
198
199         RETURN(0);
200 }
201
202 void lmv_set_timeouts(struct obd_device *obd)
203 {
204         struct lmv_tgt_desc *tgts;
205         struct lmv_obd *lmv;
206         int i;
207
208         lmv = &obd->u.lmv;
209         if (lmv->server_timeout == 0)
210                 return;
211
212         if (lmv->connected == 0)
213                 return;
214
215         for (i = 0, tgts = lmv->tgts; i < lmv->desc.ld_tgt_count; i++, tgts++) {
216                 if (tgts->ltd_exp == NULL)
217                         continue;
218                 obd_set_info(tgts->ltd_exp, strlen("inter_mds"),
219                              "inter_mds", 0, NULL);
220         }
221 }
222
223 /* Performs a check if passed obd is connected. If no - connect it. */
224 int lmv_check_connect(struct obd_device *obd) {
225         struct lmv_obd *lmv = &obd->u.lmv;
226         struct obd_uuid *cluuid;
227         struct lmv_tgt_desc *tgts;
228         struct obd_export *exp;
229         int rc, rc2, i;
230         ENTRY;
231
232         if (lmv->connected)
233                 RETURN(0);
234       
235         lmv->connected = 1;
236         cluuid = &lmv->cluuid;
237         exp = lmv->exp;
238         CDEBUG(D_OTHER, "time to connect %s to %s\n",
239                         cluuid->uuid, obd->obd_name);
240
241         for (i = 0, tgts = lmv->tgts; i < lmv->desc.ld_tgt_count; i++, tgts++) {
242                 struct obd_device *tgt_obd;
243                 struct obd_uuid lmv_osc_uuid = { "LMV_OSC_UUID" };
244                 struct lustre_handle conn = {0, };
245
246                 LASSERT(tgts != NULL);
247
248                 tgt_obd = class_find_client_obd(&tgts->uuid, LUSTRE_MDC_NAME, 
249                                                 &obd->obd_uuid);
250                 if (!tgt_obd) {
251                         CERROR("Target %s not attached\n", tgts->uuid.uuid);
252                         GOTO(out_disc, rc = -EINVAL);
253                 }
254
255                 /* for MDS: don't connect to yourself */
256                 if (obd_uuid_equals(&tgts->uuid, cluuid)) {
257                         CDEBUG(D_OTHER, "don't connect back to %s\n",
258                                cluuid->uuid);
259                         tgts->ltd_exp = NULL;
260                         continue;
261                 }
262
263                 CDEBUG(D_OTHER, "connect to %s(%s) - %s, %s FOR %s\n",
264                         tgt_obd->obd_name, tgt_obd->obd_uuid.uuid,
265                         tgts->uuid.uuid, obd->obd_uuid.uuid,
266                         cluuid->uuid);
267
268                 if (!tgt_obd->obd_set_up) {
269                         CERROR("Target %s not set up\n", tgts->uuid.uuid);
270                         GOTO(out_disc, rc = -EINVAL);
271                 }
272                 
273                 rc = obd_connect(&conn, tgt_obd, &lmv_osc_uuid);
274                 if (rc) {
275                         CERROR("Target %s connect error %d\n",
276                                 tgts->uuid.uuid, rc);
277                         GOTO(out_disc, rc);
278                 }
279                 tgts->ltd_exp = class_conn2export(&conn);
280
281                 obd_init_ea_size(tgts->ltd_exp, lmv->max_easize,
282                                  lmv->max_cookiesize);
283                 
284                 rc = obd_register_observer(tgt_obd, obd);
285                 if (rc) {
286                         CERROR("Target %s register_observer error %d\n",
287                                tgts->uuid.uuid, rc);
288                         obd_disconnect(tgts->ltd_exp, 0);
289                         GOTO(out_disc, rc);
290                 }
291
292                 lmv->desc.ld_active_tgt_count++;
293                 tgts->active = 1;
294                 
295                 CDEBUG(D_OTHER, "connected to %s(%s) successfully (%d)\n",
296                         tgt_obd->obd_name, tgt_obd->obd_uuid.uuid,
297                         atomic_read(&obd->obd_refcount));
298         }
299
300         lmv_set_timeouts(obd);
301
302         class_export_put(exp);
303         RETURN (0);
304
305  out_disc:
306         while (i-- > 0) {
307                 struct obd_uuid uuid;
308                 --tgts;
309                 --lmv->desc.ld_active_tgt_count;
310                 tgts->active = 0;
311                 /* save for CERROR below; (we know it's terminated) */
312                 uuid = tgts->uuid;
313                 rc2 = obd_disconnect(tgts->ltd_exp, 0);
314                 if (rc2)
315                         CERROR("error: LMV target %s disconnect on MDT idx %d: "
316                                "rc = %d\n", uuid.uuid, i, rc2);
317         }
318         class_disconnect(exp, 0);
319         RETURN (rc);
320 }
321
322 static int lmv_disconnect(struct obd_export *exp, int flags)
323 {
324         struct obd_device *obd = class_exp2obd(exp);
325         struct lmv_obd *lmv = &obd->u.lmv;
326         int rc, i;
327         ENTRY;
328
329         if (!lmv->tgts)
330                 goto out_local;
331
332         /* Only disconnect the underlying layers on the final disconnect. */
333         lmv->refcount--;
334         if (lmv->refcount != 0)
335                 goto out_local;
336
337         for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
338                 if (lmv->tgts[i].ltd_exp == NULL)
339                         continue;
340
341                 if (obd->obd_no_recov) {
342                         /* Pass it on to our clients.
343                          * XXX This should be an argument to disconnect,
344                          * XXX not a back-door flag on the OBD.  Ah well.
345                          */
346                         struct obd_device *mdc_obd;
347                         mdc_obd = class_exp2obd(lmv->tgts[i].ltd_exp);
348                         if (mdc_obd)
349                                 mdc_obd->obd_no_recov = 1;
350                 }
351
352                 CDEBUG(D_OTHER, "disconnected from %s(%s) successfully\n",
353                         lmv->tgts[i].ltd_exp->exp_obd->obd_name,
354                         lmv->tgts[i].ltd_exp->exp_obd->obd_uuid.uuid);
355
356                 obd_register_observer(lmv->tgts[i].ltd_exp->exp_obd, NULL);
357
358                 rc = obd_disconnect(lmv->tgts[i].ltd_exp, flags);
359                 if (lmv->tgts[i].active) {
360                         lmv->desc.ld_active_tgt_count--;
361                         lmv->tgts[i].active = 0;
362                 }
363                 lmv->tgts[i].ltd_exp = NULL;
364         }
365
366  out_local:
367         /* FIXME: cleanup here */
368         if (!lmv->connected)
369                 class_export_put(exp);
370         rc = class_disconnect(exp, 0);
371         RETURN(rc);
372 }
373
374 static int lmv_setup(struct obd_device *obd, obd_count len, void *buf)
375 {
376         struct lustre_cfg *lcfg = buf;
377         struct lmv_desc *desc;
378         struct lmv_obd *lmv = &obd->u.lmv;
379         struct obd_uuid *uuids;
380         struct lmv_tgt_desc *tgts;
381         int i;
382         int rc = 0;
383         ENTRY;
384
385         if (lcfg->lcfg_inllen1 < 1) {
386                 CERROR("LMV setup requires a descriptor\n");
387                 RETURN(-EINVAL);
388         }
389
390         if (lcfg->lcfg_inllen2 < 1) {
391                 CERROR("LMV setup requires an OST UUID list\n");
392                 RETURN(-EINVAL);
393         }
394
395         desc = (struct lmv_desc *)lcfg->lcfg_inlbuf1;
396         if (sizeof(*desc) > lcfg->lcfg_inllen1) {
397                 CERROR("descriptor size wrong: %d > %d\n",
398                        (int)sizeof(*desc), lcfg->lcfg_inllen1);
399                 RETURN(-EINVAL);
400         }
401
402         uuids = (struct obd_uuid *)lcfg->lcfg_inlbuf2;
403         if (sizeof(*uuids) * desc->ld_tgt_count != lcfg->lcfg_inllen2) {
404                 CERROR("UUID array size wrong: %u * %u != %u\n",
405                        sizeof(*uuids), desc->ld_tgt_count, lcfg->lcfg_inllen2);
406                 RETURN(-EINVAL);
407         }
408
409         lmv->bufsize = sizeof(struct lmv_tgt_desc) * desc->ld_tgt_count;
410         OBD_ALLOC(lmv->tgts, lmv->bufsize);
411         if (lmv->tgts == NULL) {
412                 CERROR("Out of memory\n");
413                 RETURN(-EINVAL);
414         }
415
416         lmv->desc = *desc;
417         spin_lock_init(&lmv->lmv_lock);
418         
419         for (i = 0, tgts = lmv->tgts; i < desc->ld_tgt_count; i++, tgts++)
420                 tgts->uuid = uuids[i];
421         
422         lmv->max_easize = sizeof(struct ll_fid) * desc->ld_tgt_count
423                 + sizeof(struct mea);
424         lmv->max_cookiesize = 0;
425
426         RETURN(rc);
427 }
428
429 static int lmv_statfs(struct obd_device *obd, struct obd_statfs *osfs,
430                       unsigned long max_age)
431 {
432         struct lmv_obd *lmv = &obd->u.lmv;
433         struct obd_statfs temp;
434         int rc = 0, i;
435         ENTRY;
436         
437         rc = lmv_check_connect(obd);
438         if (rc)
439                 RETURN(rc);
440                 
441         for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
442                 rc = obd_statfs(lmv->tgts[i].ltd_exp->exp_obd, &temp, max_age);
443                 if (rc) {
444                         CERROR("can't stat MDS #%d (%s)\n", i,
445                                lmv->tgts[i].ltd_exp->exp_obd->obd_name);
446                         RETURN(rc);
447                 }
448                 if (i == 0) {
449                         memcpy(osfs, &temp, sizeof(temp));
450                 } else {
451                         osfs->os_bavail += temp.os_bavail;
452                         osfs->os_blocks += temp.os_blocks;
453                         osfs->os_ffree += temp.os_ffree;
454                         osfs->os_files += temp.os_files;
455                 }
456         }
457         RETURN(rc);
458 }
459
460 static int lmv_cleanup(struct obd_device *obd, int flags) 
461 {
462         struct lmv_obd *lmv = &obd->u.lmv;
463         ENTRY;
464         lmv_cleanup_objs(obd);
465         OBD_FREE(lmv->tgts, lmv->bufsize);
466         RETURN(0);
467 }
468
469 static int lmv_getstatus(struct obd_export *exp, struct ll_fid *fid)
470 {
471         struct obd_device *obd = exp->exp_obd;
472         struct lmv_obd *lmv = &obd->u.lmv;
473         int rc;
474         ENTRY;
475         rc = lmv_check_connect(obd);
476         if (rc)
477                 RETURN(rc);
478         rc = md_getstatus(lmv->tgts[0].ltd_exp, fid);
479         fid->mds = 0;
480         RETURN(rc);
481 }
482
483 static int lmv_getattr(struct obd_export *exp, struct ll_fid *fid,
484                 unsigned long valid, unsigned int ea_size,
485                 struct ptlrpc_request **request)
486 {
487         struct obd_device *obd = exp->exp_obd;
488         struct lmv_obd *lmv = &obd->u.lmv;
489         int rc, i = fid->mds;
490         struct lmv_obj *obj;
491         ENTRY;
492         rc = lmv_check_connect(obd);
493         if (rc)
494                 RETURN(rc);
495         obj = lmv_grab_obj(obd, fid, 0);
496         CDEBUG(D_OTHER, "GETATTR for %lu/%lu/%lu %s\n",
497                (unsigned long) fid->mds,
498                (unsigned long) fid->id,
499                (unsigned long) fid->generation,
500                obj ? "(splitted)" : "");
501
502         LASSERT(fid->mds < lmv->desc.ld_tgt_count);
503         rc = md_getattr(lmv->tgts[i].ltd_exp, fid,
504                              valid, ea_size, request);
505         if (rc == 0 && obj) {
506                 /* we have to loop over dirobjs here and gather attrs
507                  * for all the slaves */
508 #warning "attrs gathering here"
509         }
510         lmv_put_obj(obj);
511         RETURN(rc);
512 }
513
514 static int lmv_change_cbdata(struct obd_export *exp,
515                                  struct ll_fid *fid, 
516                                  ldlm_iterator_t it, void *data)
517 {
518         struct obd_device *obd = exp->exp_obd;
519         struct lmv_obd *lmv = &obd->u.lmv;
520         int rc = 0;
521         ENTRY;
522         
523         rc = lmv_check_connect(obd);
524         if (rc)
525                 RETURN(rc);
526         CDEBUG(D_OTHER, "CBDATA for %lu/%lu/%lu\n",
527                (unsigned long) fid->mds,
528                (unsigned long) fid->id,
529                (unsigned long) fid->generation);
530         LASSERT(fid->mds < lmv->desc.ld_tgt_count);
531         rc = md_change_cbdata(lmv->tgts[fid->mds].ltd_exp, fid, it, data);
532         RETURN(rc);
533 }
534
535 static int lmv_change_cbdata_name(struct obd_export *exp, struct ll_fid *pfid,
536                                   char *name, int len, struct ll_fid *cfid,
537                                   ldlm_iterator_t it, void *data)
538 {
539         struct obd_device *obd = exp->exp_obd;
540         struct lmv_obd *lmv = &obd->u.lmv;
541         struct lmv_obj *obj;
542         int rc = 0, mds;
543         ENTRY;
544         rc = lmv_check_connect(obd);
545         if (rc)
546                 RETURN(rc);
547         LASSERT(pfid->mds < lmv->desc.ld_tgt_count);
548         LASSERT(cfid->mds < lmv->desc.ld_tgt_count);
549         CDEBUG(D_OTHER, "CBDATA for %lu/%lu/%lu:%*s -> %lu/%lu/%lu\n",
550                (unsigned long) pfid->mds, (unsigned long) pfid->id,
551                (unsigned long) pfid->generation, len, name,
552                (unsigned long) cfid->mds, (unsigned long) cfid->id,
553                (unsigned long) cfid->generation);
554
555         /* this is default mds for directory name belongs to */
556         mds = pfid->mds;
557         obj = lmv_grab_obj(obd, pfid, 0);
558         if (obj) {
559                 /* directory is splitted. look for right mds for this name */
560                 mds = raw_name2idx(obj->objcount, name, len);
561                 lmv_put_obj(obj);
562         }
563         rc = md_change_cbdata(lmv->tgts[mds].ltd_exp, cfid, it, data);
564         RETURN(rc);
565 }
566
567 static int lmv_valid_attrs(struct obd_export *exp, struct ll_fid *fid) 
568 {
569         struct obd_device *obd = exp->exp_obd;
570         struct lmv_obd *lmv = &obd->u.lmv;
571         int rc = 0;
572         ENTRY;
573         rc = lmv_check_connect(obd);
574         if (rc)
575                 RETURN(rc);
576         CDEBUG(D_OTHER, "validate %lu/%lu/%lu\n",
577                (unsigned long) fid->mds,
578                (unsigned long) fid->id,
579                (unsigned long) fid->generation);
580         LASSERT(fid->mds < lmv->desc.ld_tgt_count);
581         rc = md_valid_attrs(lmv->tgts[fid->mds].ltd_exp, fid);
582         RETURN(rc);
583 }
584
585 int lmv_close(struct obd_export *exp, struct obdo *obdo,
586                   struct obd_client_handle *och,
587                   struct ptlrpc_request **request)
588 {
589         struct obd_device *obd = exp->exp_obd;
590         struct lmv_obd *lmv = &obd->u.lmv;
591         int rc, i = obdo->o_mds;
592         ENTRY;
593         rc = lmv_check_connect(obd);
594         if (rc)
595                 RETURN(rc);
596         LASSERT(i < lmv->desc.ld_tgt_count);
597         CDEBUG(D_OTHER, "CLOSE %lu/%lu/%lu\n", (unsigned long) obdo->o_mds,
598                (unsigned long) obdo->o_id, (unsigned long) obdo->o_generation);
599         rc = md_close(lmv->tgts[i].ltd_exp, obdo, och, request);
600         RETURN(rc);
601 }
602
603 int lmv_get_mea_and_update_object(struct obd_export *exp, struct ll_fid *fid)
604 {
605         struct obd_device *obd = exp->exp_obd;
606         struct lmv_obd *lmv = &obd->u.lmv;
607         struct ptlrpc_request *req = NULL;
608         struct lustre_md md;
609         unsigned long valid;
610         int mealen, rc;
611
612         md.mea = NULL;
613         mealen = MEA_SIZE_LMV(lmv);
614         
615         valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
616
617         /* time to update mea of parent fid */
618         rc = md_getattr(lmv->tgts[fid->mds].ltd_exp, fid,
619                         valid, mealen, &req);
620         if (rc) {
621                 CERROR("md_getattr() failed, rc = %d\n", rc);
622                 GOTO(cleanup, rc);
623         }
624
625         rc = mdc_req2lustre_md(exp, req, 0, NULL, &md);
626         if (rc) {
627                 CERROR("mdc_req2lustre_md() failed, rc = %d\n", rc);
628                 GOTO(cleanup, rc);
629         }
630
631         if (md.mea == NULL)
632                 GOTO(cleanup, rc = -ENODATA);
633
634         rc = lmv_create_obj_from_attrs(exp, fid, md.mea);
635         obd_free_memmd(exp, (struct lov_stripe_md **) &md.mea);
636
637 cleanup:
638         if (req)
639                 ptlrpc_req_finished(req);
640         RETURN(rc);
641 }
642
643 int lmv_create(struct obd_export *exp, struct mdc_op_data *op_data,
644                const void *data, int datalen, int mode, __u32 uid,
645                __u32 gid, __u64 rdev, struct ptlrpc_request **request)
646 {
647         struct obd_device *obd = exp->exp_obd;
648         struct lmv_obd *lmv = &obd->u.lmv;
649         struct mds_body *mds_body;
650         struct lmv_obj *obj;
651         int rc, mds;
652         ENTRY;
653
654         rc = lmv_check_connect(obd);
655         if (rc)
656                 RETURN(rc);
657
658         if (!lmv->desc.ld_active_tgt_count)
659                 RETURN(-EIO);
660 repeat:
661         obj = lmv_grab_obj(obd, &op_data->fid1, 0);
662         if (obj) {
663                 mds = raw_name2idx(obj->objcount, op_data->name,
664                                    op_data->namelen);
665                 op_data->fid1 = obj->objs[mds].fid;
666                 lmv_put_obj(obj);
667         }
668
669         CDEBUG(D_OTHER, "CREATE '%*s' on %lu/%lu/%lu\n",
670                         op_data->namelen, op_data->name,
671                         (unsigned long) op_data->fid1.mds,
672                         (unsigned long) op_data->fid1.id,
673                         (unsigned long) op_data->fid1.generation);
674         rc = md_create(lmv->tgts[op_data->fid1.mds].ltd_exp, op_data, data,
675                        datalen, mode, uid, gid, rdev, request);
676         if (rc == 0) {
677                 if (*request == NULL)
678                      RETURN(rc);
679                 mds_body = lustre_msg_buf((*request)->rq_repmsg, 0,
680                                           sizeof(*mds_body));
681                 LASSERT(mds_body != NULL);
682                 CDEBUG(D_OTHER, "created. id = %lu, generation = %lu, mds = %d\n",
683                        (unsigned long) mds_body->fid1.id,
684                        (unsigned long) mds_body->fid1.generation,
685                        op_data->fid1.mds);
686                 LASSERT(mds_body->valid & OBD_MD_MDS ||
687                         mds_body->mds == op_data->fid1.mds);
688         } else if (rc == -ERESTART) {
689                 /* directory got splitted. time to update local object
690                  * and repeat the request with proper MDS */
691                 rc = lmv_get_mea_and_update_object(exp, &op_data->fid1);
692                 if (rc == 0) {
693                         ptlrpc_req_finished(*request);
694                         goto repeat;
695                 }
696         }
697         RETURN(rc);
698 }
699
700 int lmv_done_writing(struct obd_export *exp, struct obdo *obdo)
701 {
702         struct obd_device *obd = exp->exp_obd;
703         struct lmv_obd *lmv = &obd->u.lmv;
704         int rc;
705         ENTRY;
706         rc = lmv_check_connect(obd);
707         if (rc)
708                 RETURN(rc);
709
710         /* FIXME: choose right MDC here */
711         rc = md_done_writing(lmv->tgts[0].ltd_exp, obdo);
712         RETURN(rc);
713 }
714
715 int lmv_enqueue(struct obd_export *exp, int lock_type,
716                 struct lookup_intent *it, int lock_mode,
717                 struct mdc_op_data *data, struct lustre_handle *lockh,
718                 void *lmm, int lmmsize,
719                 ldlm_completion_callback cb_completion,
720                 ldlm_blocking_callback cb_blocking, void *cb_data)
721 {
722         struct obd_device *obd = exp->exp_obd;
723         struct lmv_obd *lmv = &obd->u.lmv;
724         struct lmv_obj *obj;
725         int rc, mds;
726         ENTRY;
727
728         rc = lmv_check_connect(obd);
729         if (rc)
730                 RETURN(rc);
731
732         if (data->namelen) {
733                 obj = lmv_grab_obj(obd, &data->fid1, 0);
734                 if (obj) {
735                         /* directory is splitted. look for
736                          * right mds for this name */
737                         mds = raw_name2idx(obj->objcount, (char *)data->name,
738                                            data->namelen);
739                         data->fid1 = obj->objs[mds].fid;
740                         lmv_put_obj(obj);
741                 }
742         }
743         CDEBUG(D_OTHER, "ENQUEUE '%s' on %lu/%lu\n",
744                LL_IT2STR(it), (unsigned long) data->fid1.id,
745                (unsigned long) data->fid1.generation);
746         rc = md_enqueue(lmv->tgts[data->fid1.mds].ltd_exp, lock_type, it,
747                         lock_mode, data, lockh, lmm, lmmsize, cb_completion,
748                         cb_blocking, cb_data);
749
750         RETURN(rc);
751 }
752
753 int lmv_getattr_name(struct obd_export *exp, struct ll_fid *fid,
754                          char *filename, int namelen, unsigned long valid,
755                          unsigned int ea_size, struct ptlrpc_request **request)
756 {
757         struct obd_device *obd = exp->exp_obd;
758         struct lmv_obd *lmv = &obd->u.lmv;
759         struct ll_fid rfid = *fid;
760         int rc, mds = fid->mds;
761         struct mds_body *body;
762         struct lmv_obj *obj;
763         ENTRY;
764         rc = lmv_check_connect(obd);
765         if (rc)
766                 RETURN(rc);
767 repeat:
768         obj = lmv_grab_obj(obd, fid, 0);
769         if (obj) {
770                 /* directory is splitted. look for right mds for this name */
771                 mds = raw_name2idx(obj->objcount, filename, namelen - 1);
772                 rfid = obj->objs[mds].fid;
773                 lmv_put_obj(obj);
774         }
775         CDEBUG(D_OTHER, "getattr_name for %*s on %lu/%lu/%lu -> %lu/%lu/%lu\n",
776                namelen, filename, (unsigned long) fid->mds,
777                (unsigned long) fid->id, (unsigned long) fid->generation,
778                (unsigned long) rfid.mds, (unsigned long) rfid.id,
779                (unsigned long) rfid.generation);
780         rc = md_getattr_name(lmv->tgts[mds].ltd_exp, &rfid, filename, namelen,
781                                   valid, ea_size, request);
782         if (rc == 0) {
783                 /* this could be cross-node reference. in this case all
784                  * we have right now is mds/ino/generation triple. we'd
785                  * like to find other attributes */
786                 body = lustre_msg_buf((*request)->rq_repmsg, 0, sizeof(*body));
787                 LASSERT(body != NULL);
788                 if (body->valid & OBD_MD_MDS) {
789                         struct ptlrpc_request *req = NULL;
790                         rfid = body->fid1;
791                         CDEBUG(D_OTHER, "request attrs for %lu/%lu/%lu\n",
792                                (unsigned long) rfid.mds,
793                                (unsigned long) rfid.id,
794                                (unsigned long) rfid.generation);
795                         rc = md_getattr_name(lmv->tgts[rfid.mds].ltd_exp, &rfid,
796                                              NULL, 1, valid, ea_size, &req);
797                         ptlrpc_req_finished(*request);
798                         *request = req;
799                 }
800         } else if (rc == -ERESTART) {
801                 /* directory got splitted. time to update local object
802                  * and repeat the request with proper MDS */
803                 rc = lmv_get_mea_and_update_object(exp, &rfid);
804                 if (rc == 0) {
805                         ptlrpc_req_finished(*request);
806                         goto repeat;
807                 }
808         }
809         RETURN(rc);
810 }
811
812
813 /*
814  * llite passes fid of an target inode in data->fid1 and
815  * fid of directory in data->fid2
816  */
817 int lmv_link(struct obd_export *exp, struct mdc_op_data *data,
818              struct ptlrpc_request **request)
819 {
820         struct obd_device *obd = exp->exp_obd;
821         struct lmv_obd *lmv = &obd->u.lmv;
822         struct lmv_obj *obj;
823         int rc;
824         ENTRY;
825         rc = lmv_check_connect(obd);
826         if (rc)
827                 RETURN(rc);
828         if (data->namelen != 0) {
829                 /* usual link request */
830                 obj = lmv_grab_obj(obd, &data->fid1, 0);
831                 if (obj) {
832                         rc = raw_name2idx(obj->objcount, data->name,
833                                          data->namelen);
834                         data->fid1 = obj->objs[rc].fid;
835                         lmv_put_obj(obj);
836                 }
837                 CDEBUG(D_OTHER,"link %u/%u/%u:%*s to %u/%u/%u mds %d\n",
838                        (unsigned) data->fid2.mds, (unsigned) data->fid2.id,
839                        (unsigned) data->fid2.generation, data->namelen,
840                        data->name, (unsigned) data->fid1.mds,
841                        (unsigned) data->fid1.id,
842                        (unsigned) data->fid1.generation, data->fid1.mds);
843         } else {
844                 /* request from MDS to acquire i_links for inode by fid1 */
845                 CDEBUG(D_OTHER, "inc i_nlinks for %u/%u/%u\n",
846                        (unsigned) data->fid1.mds, (unsigned) data->fid1.id,
847                        (unsigned) data->fid1.generation);
848         }
849                         
850         rc = md_link(lmv->tgts[data->fid1.mds].ltd_exp, data, request);
851         RETURN(rc);
852 }
853
854 int lmv_rename(struct obd_export *exp, struct mdc_op_data *data,
855                const char *old, int oldlen, const char *new, int newlen,
856                struct ptlrpc_request **request)
857 {
858         struct obd_device *obd = exp->exp_obd;
859         struct lmv_obd *lmv = &obd->u.lmv;
860         struct lmv_obj *obj;
861         int rc, mds;
862         ENTRY;
863
864         CDEBUG(D_OTHER, "rename %*s in %lu/%lu/%lu to %*s in %lu/%lu/%lu\n",
865                oldlen, old, (unsigned long) data->fid1.mds,
866                (unsigned long) data->fid1.id,
867                (unsigned long) data->fid1.generation,
868                newlen, new, (unsigned long) data->fid2.mds,
869                (unsigned long) data->fid2.id,
870                (unsigned long) data->fid2.generation);
871         if (!fid_equal(&data->fid1, &data->fid2))
872                 CWARN("cross-node rename %lu/%lu/%lu:%*s to %lu/%lu/%lu:%*s\n",
873                       (unsigned long) data->fid1.mds,
874                       (unsigned long) data->fid1.id,
875                       (unsigned long) data->fid1.generation, oldlen, old,
876                       (unsigned long) data->fid2.mds,
877                       (unsigned long) data->fid2.id,
878                       (unsigned long) data->fid2.generation, newlen, new);
879
880         rc = lmv_check_connect(obd);
881         if (rc)
882                 RETURN(rc);
883
884         if (oldlen == 0) {
885                 /* MDS with old dir entry is asking another MDS
886                  * to create name there */
887                 CDEBUG(D_OTHER,
888                        "create %*s(%d/%d) in %lu/%lu/%lu pointing to %lu/%lu/%lu\n",
889                        newlen, new, oldlen, newlen,
890                        (unsigned long) data->fid2.mds,
891                        (unsigned long) data->fid2.id,
892                        (unsigned long) data->fid2.generation,
893                        (unsigned long) data->fid1.mds,
894                        (unsigned long) data->fid1.id,
895                        (unsigned long) data->fid1.generation);
896                 mds = data->fid2.mds;
897                 goto request;
898         }
899
900         obj = lmv_grab_obj(obd, &data->fid1, 0);
901         if (obj) {
902                 /* directory is already splitted, so we have to forward
903                  * request to the right MDS */
904                 mds = raw_name2idx(obj->objcount, (char *)old, oldlen);
905                 data->fid1 = obj->objs[mds].fid;
906                 CDEBUG(D_OTHER, "forward to MDS #%u (%lu/%lu/%lu)\n", mds,
907                        (unsigned long) obj->objs[mds].fid.mds,
908                        (unsigned long) obj->objs[mds].fid.id,
909                        (unsigned long) obj->objs[mds].fid.generation);
910         }
911         lmv_put_obj(obj);
912
913         obj = lmv_grab_obj(obd, &data->fid2, 0);
914         if (obj) {
915                 /* directory is already splitted, so we have to forward
916                  * request to the right MDS */
917                 mds = raw_name2idx(obj->objcount, (char *)new, newlen);
918                 data->fid2 = obj->objs[mds].fid;
919                 CDEBUG(D_OTHER, "forward to MDS #%u (%lu/%lu/%lu)\n", mds,
920                        (unsigned long) obj->objs[mds].fid.mds,
921                        (unsigned long) obj->objs[mds].fid.id,
922                        (unsigned long) obj->objs[mds].fid.generation);
923         }
924         lmv_put_obj(obj);
925         
926         mds = data->fid1.mds;
927
928 request:
929         rc = md_rename(lmv->tgts[mds].ltd_exp, data, old, oldlen,
930                        new, newlen, request); 
931         RETURN(rc);
932 }
933
934 int lmv_setattr(struct obd_export *exp, struct mdc_op_data *data,
935                 struct iattr *iattr, void *ea, int ealen, void *ea2, int ea2len,
936                 struct ptlrpc_request **request)
937 {
938         struct obd_device *obd = exp->exp_obd;
939         struct lmv_obd *lmv = &obd->u.lmv;
940         int rc = 0, i = data->fid1.mds;
941         struct ptlrpc_request *req;
942         struct mds_body *mds_body;
943         struct lmv_obj *obj;
944         ENTRY;
945
946         rc = lmv_check_connect(obd);
947         if (rc)
948                 RETURN(rc);
949
950         obj = lmv_grab_obj(obd, &data->fid1, 0);
951         CDEBUG(D_OTHER, "SETATTR for %lu/%lu/%lu, valid 0x%x%s\n",
952                (unsigned long) data->fid1.mds,
953                (unsigned long) data->fid1.id,
954                (unsigned long) data->fid1.generation, iattr->ia_valid,
955                obj ? ", splitted" : "");
956         if (obj) {
957                 for (i = 0; i < obj->objcount; i++) {
958                         data->fid1 = obj->objs[i].fid;
959                         rc = md_setattr(lmv->tgts[i].ltd_exp, data, iattr, ea,
960                                         ealen, ea2, ea2len, &req);
961                         LASSERT(rc == 0);
962                         if (fid_equal(&obj->fid, &obj->objs[i].fid)) {
963                                 /* this is master object and this request
964                                  * should be returned back to llite */
965                                 *request = req;
966                         } else {
967                                 ptlrpc_req_finished(req);
968                         }
969                 }
970                 lmv_put_obj(obj);
971         } else {
972                 LASSERT(data->fid1.mds < lmv->desc.ld_tgt_count);
973                 rc = md_setattr(lmv->tgts[i].ltd_exp, data, iattr, ea, ealen,
974                                 ea2, ea2len, request); 
975                 if (rc == 0) {
976                         mds_body = lustre_msg_buf((*request)->rq_repmsg, 0,
977                                         sizeof(*mds_body));
978                         LASSERT(mds_body != NULL);
979                         LASSERT(mds_body->mds == i);
980                 }
981         }
982         RETURN(rc);
983 }
984
985 int lmv_sync(struct obd_export *exp, struct ll_fid *fid,
986              struct ptlrpc_request **request)
987 {
988         struct obd_device *obd = exp->exp_obd;
989         struct lmv_obd *lmv = &obd->u.lmv;
990         int rc;
991         ENTRY;
992
993         rc = lmv_check_connect(obd);
994         if (rc)
995                 RETURN(rc);
996
997         rc = md_sync(lmv->tgts[0].ltd_exp, fid, request); 
998         RETURN(rc);
999 }
1000
1001 int lmv_dirobj_blocking_ast(struct ldlm_lock *lock,
1002                             struct ldlm_lock_desc *desc, void *data, int flag)
1003 {
1004         struct lustre_handle lockh;
1005         struct lmv_obj *obj;
1006         int rc;
1007         ENTRY;
1008
1009         switch (flag) {
1010         case LDLM_CB_BLOCKING:
1011                 ldlm_lock2handle(lock, &lockh);
1012                 rc = ldlm_cli_cancel(&lockh);
1013                 if (rc < 0) {
1014                         CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
1015                         RETURN(rc);
1016                 }
1017                 break;
1018         case LDLM_CB_CANCELING:
1019                 /* time to drop cached attrs for dirobj */
1020                 obj = lock->l_ast_data;
1021                 if (!obj)
1022                         break;
1023
1024                 CDEBUG(D_OTHER, "cancel %s on %lu/%lu, master %lu/%lu/%lu\n",
1025                        lock->l_resource->lr_name.name[3] == 1 ?
1026                                 "LOOKUP" : "UPDATE",
1027                        (unsigned long) lock->l_resource->lr_name.name[0],
1028                        (unsigned long) lock->l_resource->lr_name.name[1],
1029                        (unsigned long) obj->fid.mds,
1030                        (unsigned long) obj->fid.id,
1031                        (unsigned long) obj->fid.generation);
1032                 break;
1033         default:
1034                 LBUG();
1035         }
1036         RETURN(0);
1037 }
1038
1039 void lmv_remove_dots(struct page *page)
1040 {
1041         char *kaddr = page_address(page);
1042         unsigned limit = PAGE_CACHE_SIZE;
1043         unsigned offs, rec_len;
1044         struct ext2_dir_entry_2 *p;
1045
1046         for (offs = 0; offs <= limit - EXT2_DIR_REC_LEN(1); offs += rec_len) {
1047                 p = (struct ext2_dir_entry_2 *)(kaddr + offs);
1048                 rec_len = le16_to_cpu(p->rec_len);
1049
1050                 if ((p->name_len == 1 && p->name[0] == '.') ||
1051                     (p->name_len == 2 && p->name[0] == '.' && p->name[1] == '.'))
1052                         p->inode = 0;
1053         }
1054 }
1055
1056 int lmv_readpage(struct obd_export *exp, struct ll_fid *mdc_fid,
1057                  __u64 offset, struct page *page,
1058                  struct ptlrpc_request **request)
1059 {
1060         struct obd_device *obd = exp->exp_obd;
1061         struct lmv_obd *lmv = &obd->u.lmv;
1062         struct ll_fid rfid = *mdc_fid;
1063         struct lmv_obj *obj;
1064         int rc, i;
1065         ENTRY;
1066
1067         rc = lmv_check_connect(obd);
1068         if (rc)
1069                 RETURN(rc);
1070
1071         LASSERT(mdc_fid->mds < lmv->desc.ld_tgt_count);
1072         CDEBUG(D_OTHER, "READPAGE at %llu from %lu/%lu/%lu\n",
1073                offset, (unsigned long) rfid.mds,
1074                (unsigned long) rfid.id,
1075                (unsigned long) rfid.generation);
1076
1077         obj = lmv_grab_obj(obd, mdc_fid, 0);
1078         if (obj) {
1079                 /* find dirobj containing page with requested offset */
1080                 /* FIXME: what about protecting cached attrs here? */
1081                 for (i = 0; i < obj->objcount; i++) {
1082                         if (offset < obj->objs[i].size)
1083                                 break;
1084                         offset -= obj->objs[i].size;
1085                 }
1086                 rfid = obj->objs[i].fid;
1087                 CDEBUG(D_OTHER, "forward to %lu/%lu/%lu with offset %lu\n",
1088                        (unsigned long) rfid.mds,
1089                        (unsigned long) rfid.id,
1090                        (unsigned long) rfid.generation,
1091                        (unsigned long) offset);
1092         }
1093         rc = md_readpage(lmv->tgts[rfid.mds].ltd_exp, &rfid, offset, page, request);
1094         if (rc == 0 && !fid_equal(&rfid, mdc_fid)) {
1095                 /* this page isn't from master object. to avoid
1096                  * ./.. duplication in directory, we have to remove them
1097                  * from all slave objects */
1098                 lmv_remove_dots(page);
1099         }
1100       
1101         lmv_put_obj(obj);
1102
1103         RETURN(rc);
1104 }
1105
1106 int lmv_unlink(struct obd_export *exp, struct mdc_op_data *data,
1107                struct ptlrpc_request **request)
1108 {
1109         struct obd_device *obd = exp->exp_obd;
1110         struct lmv_obd *lmv = &obd->u.lmv;
1111         int rc, i = 0;
1112         ENTRY;
1113
1114         rc = lmv_check_connect(obd);
1115         if (rc)
1116                 RETURN(rc);
1117
1118         if (data->namelen != 0) {
1119                 struct lmv_obj *obj;
1120                 obj = lmv_grab_obj(obd, &data->fid1, 0);
1121                 if (obj) {
1122                         i = raw_name2idx(obj->objcount, data->name,
1123                                          data->namelen);
1124                         data->fid1 = obj->objs[i].fid;
1125                         lmv_put_obj(obj);
1126                 }
1127                 CDEBUG(D_OTHER, "unlink '%*s' in %lu/%lu/%lu -> %u\n",
1128                        data->namelen, data->name,
1129                        (unsigned long) data->fid1.mds,
1130                        (unsigned long) data->fid1.id,
1131                        (unsigned long) data->fid1.generation, i);
1132         } else {
1133                 CDEBUG(D_OTHER, "drop i_nlink on %lu/%lu/%lu\n",
1134                        (unsigned long) data->fid1.mds,
1135                        (unsigned long) data->fid1.id,
1136                        (unsigned long) data->fid1.generation);
1137         }
1138         rc = md_unlink(lmv->tgts[data->fid1.mds].ltd_exp, data, request); 
1139         RETURN(rc);
1140 }
1141
1142 struct obd_device *lmv_get_real_obd(struct obd_export *exp,
1143                                     char *name, int len)
1144 {
1145         struct obd_device *obd = exp->exp_obd;
1146         struct lmv_obd *lmv = &obd->u.lmv;
1147         int rc;
1148         ENTRY;
1149
1150         rc = lmv_check_connect(obd);
1151         if (rc)
1152                 RETURN(ERR_PTR(rc));
1153         obd = lmv->tgts[0].ltd_exp->exp_obd;
1154         EXIT;
1155         return obd;
1156 }
1157
1158 int lmv_init_ea_size(struct obd_export *exp, int easize, int cookiesize)
1159 {
1160         struct obd_device *obd = exp->exp_obd;
1161         struct lmv_obd *lmv = &obd->u.lmv;
1162         int i, rc = 0, change = 0;
1163         ENTRY;
1164
1165         if (lmv->max_easize < easize) {
1166                 lmv->max_easize = easize;
1167                 change = 1;
1168         }
1169         if (lmv->max_cookiesize < cookiesize) {
1170                 lmv->max_cookiesize = cookiesize;
1171                 change = 1;
1172         }
1173         if (change == 0)
1174                 RETURN(0);
1175         
1176         if (lmv->connected == 0)
1177                 RETURN(0);
1178
1179         /* FIXME: error handling? */
1180         for (i = 0; i < lmv->desc.ld_tgt_count; i++)
1181                 rc = obd_init_ea_size(lmv->tgts[i].ltd_exp, easize, cookiesize);
1182         RETURN(rc);
1183 }
1184
1185 int lmv_obd_create_single(struct obd_export *exp, struct obdo *oa,
1186                           struct lov_stripe_md **ea, struct obd_trans_info *oti)
1187 {
1188         struct obd_device *obd = exp->exp_obd;
1189         struct lmv_obd *lmv = &obd->u.lmv;
1190         struct lov_stripe_md obj_md;
1191         struct lov_stripe_md *obj_mdp = &obj_md;
1192         int rc = 0;
1193         ENTRY;
1194
1195         rc = lmv_check_connect(obd);
1196         if (rc)
1197                 RETURN(rc);
1198
1199         LASSERT(ea == NULL);
1200         LASSERT(oa->o_mds < lmv->desc.ld_tgt_count);
1201
1202         rc = obd_create(lmv->tgts[oa->o_mds].ltd_exp, oa, &obj_mdp, oti);
1203         LASSERT(rc == 0);
1204
1205         RETURN(rc);
1206 }
1207
1208 /*
1209  * to be called from MDS only
1210  */
1211 int lmv_obd_create(struct obd_export *exp, struct obdo *oa,
1212                struct lov_stripe_md **ea, struct obd_trans_info *oti)
1213 {
1214         struct obd_device *obd = exp->exp_obd;
1215         struct lmv_obd *lmv = &obd->u.lmv;
1216         struct mea *mea;
1217         int i, c, rc = 0;
1218         struct ll_fid mfid;
1219         ENTRY;
1220
1221         rc = lmv_check_connect(obd);
1222         if (rc)
1223                 RETURN(rc);
1224
1225         LASSERT(oa != NULL);
1226         
1227         if (ea == NULL) {
1228                 rc = lmv_obd_create_single(exp, oa, NULL, oti);
1229                 RETURN(rc);
1230         }
1231
1232         if (*ea == NULL) {
1233                 rc = obd_alloc_diskmd(exp, (struct lov_mds_md **)ea);
1234                 LASSERT(*ea != NULL);
1235         }
1236
1237         mea = (struct mea *)*ea;
1238         mfid.id = oa->o_id;
1239         mfid.generation = oa->o_generation;
1240         rc = 0;
1241         if (!mea->mea_count || mea->mea_count > lmv->desc.ld_tgt_count)
1242                 mea->mea_count = lmv->desc.ld_tgt_count;
1243
1244         mea->mea_master = -1;
1245         
1246         /* FIXME: error handling? */
1247         for (i = 0, c = 0; c < mea->mea_count && 
1248                 i < lmv->desc.ld_tgt_count; i++) {
1249                 struct lov_stripe_md obj_md;
1250                 struct lov_stripe_md *obj_mdp = &obj_md;
1251                
1252                 if (lmv->tgts[i].ltd_exp == NULL) {
1253                         /* this is master MDS */
1254                         mea->mea_fids[c].id = mfid.id;
1255                         mea->mea_fids[c].generation = mfid.generation;
1256                         mea->mea_fids[c].mds = i;
1257                         mea->mea_master = i;
1258                         c++;
1259                         continue;
1260                 }
1261
1262                 /* "Master" MDS should always be part of stripped dir, so
1263                    scan for it */
1264                 if (mea->mea_master == -1 && c == mea->mea_count - 1)
1265                         continue;
1266
1267                 oa->o_valid = OBD_MD_FLGENER | OBD_MD_FLTYPE | OBD_MD_FLMODE
1268                                 | OBD_MD_FLUID | OBD_MD_FLGID | OBD_MD_FLID;
1269
1270                 rc = obd_create(lmv->tgts[c].ltd_exp, oa, &obj_mdp, oti);
1271                 /* FIXME: error handling here */
1272                 LASSERT(rc == 0);
1273
1274                 mea->mea_fids[c].id = oa->o_id;
1275                 mea->mea_fids[c].generation = oa->o_generation;
1276                 mea->mea_fids[c].mds = i;
1277                 c++;
1278                 CDEBUG(D_OTHER, "dirobj at mds %d: "LPU64"/%u\n",
1279                        i, oa->o_id, oa->o_generation);
1280         }
1281         LASSERT(c == mea->mea_count);
1282         CDEBUG(D_OTHER, "%d dirobjects created\n", (int) mea->mea_count);
1283
1284         RETURN(rc);
1285 }
1286
1287 static int lmv_get_info(struct obd_export *exp, __u32 keylen,
1288                            void *key, __u32 *vallen, void *val)
1289 {
1290         struct obd_device *obd;
1291         struct lmv_obd *lmv;
1292         ENTRY;
1293
1294         obd = class_exp2obd(exp);
1295         if (obd == NULL) {
1296                 CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",
1297                        exp->exp_handle.h_cookie);
1298                 RETURN(-EINVAL);
1299         }
1300
1301         lmv = &obd->u.lmv;
1302         if (keylen == 6 && memcmp(key, "mdsize", 6) == 0) {
1303                 __u32 *mdsize = val;
1304                 *vallen = sizeof(__u32);
1305                 *mdsize = sizeof(struct ll_fid) * lmv->desc.ld_tgt_count
1306                                 + sizeof(struct mea);
1307                 RETURN(0);
1308         } else if (keylen == 6 && memcmp(key, "mdsnum", 6) == 0) {
1309                 struct obd_uuid *cluuid = &lmv->cluuid;
1310                 struct lmv_tgt_desc *tgts;
1311                 __u32 *mdsnum = val;
1312                 int i;
1313
1314                 for (i = 0, tgts = lmv->tgts; i < lmv->desc.ld_tgt_count; i++, tgts++) {
1315                         if (obd_uuid_equals(&tgts->uuid, cluuid)) {
1316                                 *vallen = sizeof(__u32);
1317                                 *mdsnum = i;
1318                                 RETURN(0);
1319                         }
1320                 }
1321                 LASSERT(0);
1322         }
1323
1324         CDEBUG(D_IOCTL, "invalid key\n");
1325         RETURN(-EINVAL);
1326 }
1327
1328 int lmv_set_info(struct obd_export *exp, obd_count keylen,
1329                  void *key, obd_count vallen, void *val)
1330 {
1331         struct obd_device *obd;
1332         struct lmv_obd *lmv;
1333         ENTRY;
1334
1335         obd = class_exp2obd(exp);
1336         if (obd == NULL) {
1337                 CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",
1338                        exp->exp_handle.h_cookie);
1339                 RETURN(-EINVAL);
1340         }
1341         lmv = &obd->u.lmv;
1342
1343         if (keylen >= strlen("client") && strcmp(key, "client") == 0) {
1344                 struct lmv_tgt_desc *tgts;
1345                 int i, rc;
1346
1347                 rc = lmv_check_connect(obd);
1348                 if (rc)
1349                         RETURN(rc);
1350
1351                 for (i = 0, tgts = lmv->tgts; 
1352                         i < lmv->desc.ld_tgt_count; i++, tgts++) {
1353                         rc = obd_set_info(tgts->ltd_exp, keylen, key, vallen, val);
1354                         if (rc)
1355                                 RETURN(rc);
1356                 }
1357                 RETURN(0);
1358         } else if (keylen >= strlen("inter_mds") && strcmp(key, "inter_mds") == 0) {
1359                 lmv->server_timeout = 1;
1360                 lmv_set_timeouts(obd);
1361                 RETURN(0);
1362         }
1363         
1364         RETURN(-EINVAL);
1365 }
1366
1367 int lmv_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
1368                struct lov_stripe_md *lsm)
1369 {
1370         struct obd_device *obd = class_exp2obd(exp);
1371         struct lmv_obd *lmv = &obd->u.lmv;
1372         int mea_size;
1373         ENTRY;
1374
1375         mea_size = sizeof(struct ll_fid) * 
1376                 lmv->desc.ld_tgt_count + sizeof(struct mea);
1377         if (!lmmp)
1378                 RETURN(mea_size);
1379
1380         if (*lmmp && !lsm) {
1381                 OBD_FREE(*lmmp, mea_size);
1382                 *lmmp = NULL;
1383                 RETURN(0);
1384         }
1385
1386         if (!*lmmp) {
1387                 OBD_ALLOC(*lmmp, mea_size);
1388                 if (!*lmmp)
1389                         RETURN(-ENOMEM);
1390         }
1391
1392         if (!lsm)
1393                 RETURN(mea_size);
1394
1395 #warning "MEA packing/convertation must be here! -bzzz"
1396         memcpy(*lmmp, lsm, mea_size);
1397         RETURN(mea_size);
1398 }
1399
1400 int lmv_unpackmd(struct obd_export *exp, struct lov_stripe_md **mem_tgt,
1401                         struct lov_mds_md *disk_src, int mdsize)
1402 {
1403         struct obd_device *obd = class_exp2obd(exp);
1404         struct lmv_obd *lmv = &obd->u.lmv;
1405         struct mea **tmea = (struct mea **) mem_tgt;
1406         struct mea *mea = (void *) disk_src;
1407         int mea_size;
1408         ENTRY;
1409
1410         mea_size = sizeof(struct ll_fid) * 
1411                 lmv->desc.ld_tgt_count + sizeof(struct mea);
1412         if (mem_tgt == NULL)
1413                 return mea_size;
1414
1415         if (*mem_tgt != NULL && disk_src == NULL) {
1416                 OBD_FREE(*tmea, mea_size);
1417                 RETURN(0);
1418         }
1419
1420         LASSERT(mea_size == mdsize);
1421
1422         OBD_ALLOC(*tmea, mea_size);
1423         /* FIXME: error handling here */
1424         LASSERT(*tmea != NULL);
1425
1426         if (!disk_src)
1427                 RETURN(mea_size);
1428
1429 #warning "MEA unpacking/convertation must be here! -bzzz"
1430         memcpy(*tmea, mea, mdsize);
1431         RETURN(mea_size);
1432 }
1433
1434 int lmv_brw(int rw, struct obd_export *exp, struct obdo *oa,
1435                 struct lov_stripe_md *ea, obd_count oa_bufs,
1436                 struct brw_page *pgarr, struct obd_trans_info *oti)
1437 {
1438         struct obd_device *obd = exp->exp_obd;
1439         struct lmv_obd *lmv = &obd->u.lmv;
1440         struct mea *mea = (struct mea *) ea;
1441         int err;
1442       
1443         LASSERT(oa != NULL);
1444         LASSERT(ea != NULL);
1445         LASSERT(pgarr != NULL);
1446         LASSERT(oa->o_mds < lmv->desc.ld_tgt_count);
1447
1448         oa->o_gr = mea->mea_fids[oa->o_mds].generation;
1449         oa->o_id = mea->mea_fids[oa->o_mds].id;
1450         oa->o_valid =  OBD_MD_FLID | OBD_MD_FLGROUP;
1451         err = obd_brw(rw, lmv->tgts[oa->o_mds].ltd_exp, oa,
1452                       NULL, oa_bufs, pgarr, oti);
1453         RETURN(err);
1454 }
1455
1456 struct obd_ops lmv_obd_ops = {
1457         .o_owner                = THIS_MODULE,
1458         .o_attach               = lmv_attach,
1459         .o_detach               = lmv_detach,
1460         .o_setup                = lmv_setup,
1461         .o_cleanup              = lmv_cleanup,
1462         .o_connect              = lmv_connect,
1463         .o_disconnect           = lmv_disconnect,
1464         .o_statfs               = lmv_statfs,
1465         .o_get_info             = lmv_get_info,
1466         .o_set_info             = lmv_set_info,
1467         .o_create               = lmv_obd_create,
1468         .o_packmd               = lmv_packmd,
1469         .o_unpackmd             = lmv_unpackmd,
1470         .o_brw                  = lmv_brw,
1471         .o_init_ea_size         = lmv_init_ea_size,
1472         .o_notify               = lmv_notify,
1473 };
1474
1475 struct md_ops lmv_md_ops = {
1476         .m_getstatus            = lmv_getstatus,
1477         .m_getattr              = lmv_getattr,
1478         .m_change_cbdata        = lmv_change_cbdata,
1479         .m_change_cbdata_name   = lmv_change_cbdata_name,
1480         .m_close                = lmv_close,
1481         .m_create               = lmv_create,
1482         .m_done_writing         = lmv_done_writing,
1483         .m_enqueue              = lmv_enqueue,
1484         .m_getattr_name         = lmv_getattr_name,
1485         .m_intent_lock          = lmv_intent_lock,
1486         .m_link                 = lmv_link,
1487         .m_rename               = lmv_rename,
1488         .m_setattr              = lmv_setattr,
1489         .m_sync                 = lmv_sync,
1490         .m_readpage             = lmv_readpage,
1491         .m_unlink               = lmv_unlink,
1492         .m_get_real_obd         = lmv_get_real_obd,
1493         .m_valid_attrs          = lmv_valid_attrs,
1494 };
1495
1496 int __init lmv_init(void)
1497 {
1498         struct lprocfs_static_vars lvars;
1499         int rc;
1500
1501         lprocfs_init_vars(lmv, &lvars);
1502         rc = class_register_type(&lmv_obd_ops, &lmv_md_ops,
1503                                  lvars.module_vars, OBD_LMV_DEVICENAME);
1504         RETURN(rc);
1505 }
1506
1507 #ifdef __KERNEL__
1508 static void lmv_exit(void)
1509 {
1510         class_unregister_type(OBD_LMV_DEVICENAME);
1511 }
1512
1513 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
1514 MODULE_DESCRIPTION("Lustre Logical Metadata Volume OBD driver");
1515 MODULE_LICENSE("GPL");
1516
1517 module_init(lmv_init);
1518 module_exit(lmv_exit);
1519 #endif