Whamcloud - gitweb
- added proper ref counting in lmv object manager.
[fs/lustre-release.git] / lustre / lmv / lmv_obd.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
5  *
6  *   This file is part of Lustre, http://www.lustre.org.
7  *
8  *   Lustre is free software; you can redistribute it and/or
9  *   modify it under the terms of version 2 of the GNU General Public
10  *   License as published by the Free Software Foundation.
11  *
12  *   Lustre is distributed in the hope that it will be useful,
13  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *   GNU General Public License for more details.
16  *
17  *   You should have received a copy of the GNU General Public License
18  *   along with Lustre; if not, write to the Free Software
19  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20  */
21
22 #ifndef EXPORT_SYMTAB
23 # define EXPORT_SYMTAB
24 #endif
25 #define DEBUG_SUBSYSTEM S_LMV
26 #ifdef __KERNEL__
27 #include <linux/slab.h>
28 #include <linux/module.h>
29 #include <linux/init.h>
30 #include <linux/slab.h>
31 #include <linux/pagemap.h>
32 #include <asm/div64.h>
33 #include <linux/seq_file.h>
34 #else
35 #include <liblustre.h>
36 #endif
37 #include <linux/ext2_fs.h>
38
39 #include <linux/obd_support.h>
40 #include <linux/lustre_lib.h>
41 #include <linux/lustre_net.h>
42 #include <linux/lustre_idl.h>
43 #include <linux/lustre_dlm.h>
44 #include <linux/lustre_mds.h>
45 #include <linux/obd_class.h>
46 #include <linux/obd_ost.h>
47 #include <linux/lprocfs_status.h>
48 #include <linux/lustre_fsfilt.h>
49 #include <linux/obd_lmv.h>
50 #include "lmv_internal.h"
51
52 /* Error codes:
53  *
54  *  -EINVAL  : UUID can't be found in the LMV's target list
55  *  -ENOTCONN: The UUID is found, but the target connection is bad (!)
56  *  -EBADF   : The UUID is found, but the OBD of the wrong type (!)
57  */
58 static int lmv_set_mdc_active(struct lmv_obd *lmv, struct obd_uuid *uuid,
59                               int activate)
60 {
61         struct obd_device *obd;
62         struct lmv_tgt_desc *tgt;
63         int i, rc = 0;
64         ENTRY;
65
66         CDEBUG(D_INFO, "Searching in lmv %p for uuid %s (activate=%d)\n",
67                lmv, uuid->uuid, activate);
68
69         spin_lock(&lmv->lmv_lock);
70         for (i = 0, tgt = lmv->tgts; i < lmv->desc.ld_tgt_count; i++, tgt++) {
71                 if (tgt->ltd_exp == NULL)
72                         continue;
73
74                 CDEBUG(D_INFO, "lmv idx %d is %s conn "LPX64"\n",
75                        i, tgt->uuid.uuid, tgt->ltd_exp->exp_handle.h_cookie);
76                 if (strncmp(uuid->uuid, tgt->uuid.uuid, sizeof uuid->uuid) == 0)
77                         break;
78         }
79
80         if (i == lmv->desc.ld_tgt_count)
81                 GOTO(out, rc = -EINVAL);
82
83         obd = class_exp2obd(tgt->ltd_exp);
84         if (obd == NULL)
85                 GOTO(out, rc = -ENOTCONN);
86
87         CDEBUG(D_INFO, "Found OBD %s=%s device %d (%p) type %s at LMV idx %d\n",
88                obd->obd_name, obd->obd_uuid.uuid, obd->obd_minor, obd,
89                obd->obd_type->typ_name, i);
90         LASSERT(strcmp(obd->obd_type->typ_name, LUSTRE_MDC_NAME) == 0);
91
92         if (tgt->active == activate) {
93                 CDEBUG(D_INFO, "OBD %p already %sactive!\n", obd,
94                        activate ? "" : "in");
95                 GOTO(out, rc);
96         }
97
98         CDEBUG(D_INFO, "Marking OBD %p %sactive\n", obd, activate ? "" : "in");
99
100         tgt->active = activate;
101         if (activate)
102                 lmv->desc.ld_active_tgt_count++;
103         else
104                 lmv->desc.ld_active_tgt_count--;
105
106         EXIT;
107  out:
108         spin_unlock(&lmv->lmv_lock);
109         return rc;
110 }
111
112 static int lmv_notify(struct obd_device *obd, struct obd_device *watched,
113                       int active)
114 {
115         int rc;
116         struct obd_uuid *uuid;
117
118         if (strcmp(watched->obd_type->typ_name, LUSTRE_MDC_NAME)) {
119                 CERROR("unexpected notification of %s %s!\n",
120                        watched->obd_type->typ_name,
121                        watched->obd_name);
122                 return -EINVAL;
123         }
124         uuid = &watched->u.cli.cl_import->imp_target_uuid;
125
126         /* Set MDC as active before notifying the observer, so the
127          * observer can use the MDC normally.  
128          */
129         rc = lmv_set_mdc_active(&obd->u.lmv, uuid, active);
130         if (rc) {
131                 CERROR("%sactivation of %s failed: %d\n",
132                        active ? "" : "de", uuid->uuid, rc);
133                 RETURN(rc);
134         }
135
136         if (obd->obd_observer)
137                 /* Pass the notification up the chain. */
138                 rc = obd_notify(obd->obd_observer, watched, active);
139
140         RETURN(rc);
141 }
142
143 int lmv_attach(struct obd_device *dev, obd_count len, void *data)
144 {
145         struct lprocfs_static_vars lvars;
146         int rc;
147         ENTRY;
148
149         lprocfs_init_vars(lmv, &lvars);
150         rc = lprocfs_obd_attach(dev, lvars.obd_vars);
151         if (rc == 0) {
152 #ifdef __KERNEL__
153                 struct proc_dir_entry *entry;
154                 
155                 entry = create_proc_entry("target_obd", 0444, dev->obd_proc_entry);
156                 if (entry == NULL)
157                         RETURN(-ENOMEM);
158                 /* entry->proc_fops = &lmv_proc_target_fops; */
159                 entry->data = dev;
160 #endif
161        }
162         RETURN (rc);
163 }
164
165 int lmv_detach(struct obd_device *dev)
166 {
167         return lprocfs_obd_detach(dev);
168 }
169
170 /* This is fake connect function. Its purpose is to initialize lmv and 
171  * say caller that everything is okay. Real connection will be performed
172  * later. */
173 static int lmv_connect(struct lustre_handle *conn, struct obd_device *obd,
174                        struct obd_uuid *cluuid)
175 {
176         struct lmv_obd *lmv = &obd->u.lmv;
177         struct obd_export *exp;
178         int rc;
179         ENTRY;
180
181         rc = class_connect(conn, obd, cluuid);
182         if (rc) {
183                 CERROR("class_connection() returned %d\n", rc);
184                 RETURN(rc);
185         }
186
187         exp = class_conn2export(conn);
188         /* We don't want to actually do the underlying connections more than
189          * once, so keep track. */
190         lmv->refcount++;
191         if (lmv->refcount > 1) {
192                 class_export_put(exp);
193                 RETURN(0);
194         }
195
196         lmv->cluuid = *cluuid;
197         lmv->connected = 0;
198         lmv->exp = exp;
199
200         RETURN(0);
201 }
202
203 void lmv_set_timeouts(struct obd_device *obd)
204 {
205         struct lmv_tgt_desc *tgts;
206         struct lmv_obd *lmv;
207         int i;
208
209         lmv = &obd->u.lmv;
210         if (lmv->server_timeout == 0)
211                 return;
212
213         if (lmv->connected == 0)
214                 return;
215
216         for (i = 0, tgts = lmv->tgts; i < lmv->desc.ld_tgt_count; i++, tgts++) {
217                 if (tgts->ltd_exp == NULL)
218                         continue;
219                 obd_set_info(tgts->ltd_exp, strlen("inter_mds"),
220                              "inter_mds", 0, NULL);
221         }
222 }
223
224 /* Performs a check if passed obd is connected. If no - connect it. */
225 int lmv_check_connect(struct obd_device *obd) {
226         struct lmv_obd *lmv = &obd->u.lmv;
227         struct obd_uuid *cluuid;
228         struct lmv_tgt_desc *tgts;
229         struct obd_export *exp;
230         int rc, rc2, i;
231
232         if (lmv->connected)
233                 return 0;
234       
235         lmv->connected = 1;
236         cluuid = &lmv->cluuid;
237         exp = lmv->exp;
238         
239         CDEBUG(D_OTHER, "time to connect %s to %s\n",
240                cluuid->uuid, obd->obd_name);
241
242         for (i = 0, tgts = lmv->tgts; i < lmv->desc.ld_tgt_count; i++, tgts++) {
243                 struct obd_device *tgt_obd;
244                 struct obd_uuid lmv_osc_uuid = { "LMV_OSC_UUID" };
245                 struct lustre_handle conn = {0, };
246
247                 LASSERT(tgts != NULL);
248
249                 tgt_obd = class_find_client_obd(&tgts->uuid, LUSTRE_MDC_NAME, 
250                                                 &obd->obd_uuid);
251                 if (!tgt_obd) {
252                         CERROR("Target %s not attached\n", tgts->uuid.uuid);
253                         GOTO(out_disc, rc = -EINVAL);
254                 }
255
256                 /* for MDS: don't connect to yourself */
257                 if (obd_uuid_equals(&tgts->uuid, cluuid)) {
258                         CDEBUG(D_OTHER, "don't connect back to %s\n",
259                                cluuid->uuid);
260                         tgts->ltd_exp = NULL;
261                         continue;
262                 }
263
264                 CDEBUG(D_OTHER, "connect to %s(%s) - %s, %s FOR %s\n",
265                         tgt_obd->obd_name, tgt_obd->obd_uuid.uuid,
266                         tgts->uuid.uuid, obd->obd_uuid.uuid,
267                         cluuid->uuid);
268
269                 if (!tgt_obd->obd_set_up) {
270                         CERROR("Target %s not set up\n", tgts->uuid.uuid);
271                         GOTO(out_disc, rc = -EINVAL);
272                 }
273                 
274                 rc = obd_connect(&conn, tgt_obd, &lmv_osc_uuid);
275                 if (rc) {
276                         CERROR("Target %s connect error %d\n",
277                                 tgts->uuid.uuid, rc);
278                         GOTO(out_disc, rc);
279                 }
280                 tgts->ltd_exp = class_conn2export(&conn);
281
282                 obd_init_ea_size(tgts->ltd_exp, lmv->max_easize,
283                                  lmv->max_cookiesize);
284                 
285                 rc = obd_register_observer(tgt_obd, obd);
286                 if (rc) {
287                         CERROR("Target %s register_observer error %d\n",
288                                tgts->uuid.uuid, rc);
289                         obd_disconnect(tgts->ltd_exp, 0);
290                         GOTO(out_disc, rc);
291                 }
292
293                 lmv->desc.ld_active_tgt_count++;
294                 tgts->active = 1;
295                 
296                 CDEBUG(D_OTHER, "connected to %s(%s) successfully (%d)\n",
297                         tgt_obd->obd_name, tgt_obd->obd_uuid.uuid,
298                         atomic_read(&obd->obd_refcount));
299         }
300
301         lmv_set_timeouts(obd);
302
303         class_export_put(exp);
304         return 0;
305
306  out_disc:
307         while (i-- > 0) {
308                 struct obd_uuid uuid;
309                 --tgts;
310                 --lmv->desc.ld_active_tgt_count;
311                 tgts->active = 0;
312                 /* save for CERROR below; (we know it's terminated) */
313                 uuid = tgts->uuid;
314                 rc2 = obd_disconnect(tgts->ltd_exp, 0);
315                 if (rc2)
316                         CERROR("error: LMV target %s disconnect on MDT idx %d: "
317                                "error %d\n", uuid.uuid, i, rc2);
318         }
319         class_disconnect(exp, 0);
320         RETURN (rc);
321 }
322
323 static int lmv_disconnect(struct obd_export *exp, int flags)
324 {
325         struct obd_device *obd = class_exp2obd(exp);
326         struct lmv_obd *lmv = &obd->u.lmv;
327         int rc, i;
328         ENTRY;
329
330         if (!lmv->tgts)
331                 goto out_local;
332
333         /* Only disconnect the underlying layers on the final disconnect. */
334         lmv->refcount--;
335         if (lmv->refcount != 0)
336                 goto out_local;
337
338         for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
339                 if (lmv->tgts[i].ltd_exp == NULL)
340                         continue;
341
342                 if (obd->obd_no_recov) {
343                         /* Pass it on to our clients.
344                          * XXX This should be an argument to disconnect,
345                          * XXX not a back-door flag on the OBD.  Ah well.
346                          */
347                         struct obd_device *mdc_obd;
348                         mdc_obd = class_exp2obd(lmv->tgts[i].ltd_exp);
349                         if (mdc_obd)
350                                 mdc_obd->obd_no_recov = 1;
351                 }
352
353                 CDEBUG(D_OTHER, "disconnected from %s(%s) successfully\n",
354                         lmv->tgts[i].ltd_exp->exp_obd->obd_name,
355                         lmv->tgts[i].ltd_exp->exp_obd->obd_uuid.uuid);
356
357                 obd_register_observer(lmv->tgts[i].ltd_exp->exp_obd, NULL);
358
359                 rc = obd_disconnect(lmv->tgts[i].ltd_exp, flags);
360                 if (rc) {
361                         if (lmv->tgts[i].active) {
362                                 CERROR("Target %s disconnect error %d\n",
363                                        lmv->tgts[i].uuid.uuid, rc);
364                         }
365                         rc = 0;
366                 }
367                 if (lmv->tgts[i].active) {
368                         lmv->desc.ld_active_tgt_count--;
369                         lmv->tgts[i].active = 0;
370                 }
371                 lmv->tgts[i].ltd_exp = NULL;
372         }
373
374 out_local:
375         /* this is the case when no real connection is established by
376          * lmv_check_connect(). */
377         if (!lmv->connected)
378                 class_export_put(exp);
379         rc = class_disconnect(exp, 0);
380         RETURN(rc);
381 }
382
383 static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
384                          int len, void *karg, void *uarg)
385 {
386         struct obd_device *obddev = class_exp2obd(exp);
387         struct lmv_obd *lmv = &obddev->u.lmv;
388         int i, rc = 0, set = 0;
389
390         ENTRY;
391
392         if (lmv->desc.ld_tgt_count == 0)
393                 RETURN(-ENOTTY);
394         
395         for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
396                 int err;
397
398                 err = obd_iocontrol(cmd, lmv->tgts[i].ltd_exp,
399                                     len, karg, uarg);
400                 if (err) {
401                         if (lmv->tgts[i].active) {
402                                 CERROR("error: iocontrol MDC %s on MDT"
403                                        "idx %d: err = %d\n",
404                                        lmv->tgts[i].uuid.uuid, i, err);
405                                 if (!rc)
406                                         rc = err;
407                         }
408                 } else
409                         set = 1;
410         }
411         if (!set && !rc)
412                 rc = -EIO;
413
414         RETURN(rc);
415 }
416
417 static int lmv_setup(struct obd_device *obd, obd_count len, void *buf)
418 {
419         int i, rc = 0;
420         struct lmv_desc *desc;
421         struct obd_uuid *uuids;
422         struct lmv_tgt_desc *tgts;
423         struct lustre_cfg *lcfg = buf;
424         struct lmv_obd *lmv = &obd->u.lmv;
425         ENTRY;
426
427         if (lcfg->lcfg_inllen1 < 1) {
428                 CERROR("LMV setup requires a descriptor\n");
429                 RETURN(-EINVAL);
430         }
431
432         if (lcfg->lcfg_inllen2 < 1) {
433                 CERROR("LMV setup requires an OST UUID list\n");
434                 RETURN(-EINVAL);
435         }
436
437         desc = (struct lmv_desc *)lcfg->lcfg_inlbuf1;
438         if (sizeof(*desc) > lcfg->lcfg_inllen1) {
439                 CERROR("descriptor size wrong: %d > %d\n",
440                        (int)sizeof(*desc), lcfg->lcfg_inllen1);
441                 RETURN(-EINVAL);
442         }
443
444         uuids = (struct obd_uuid *)lcfg->lcfg_inlbuf2;
445         if (sizeof(*uuids) * desc->ld_tgt_count != lcfg->lcfg_inllen2) {
446                 CERROR("UUID array size wrong: %u * %u != %u\n",
447                        sizeof(*uuids), desc->ld_tgt_count, lcfg->lcfg_inllen2);
448                 RETURN(-EINVAL);
449         }
450
451         lmv->bufsize = sizeof(struct lmv_tgt_desc) * desc->ld_tgt_count;
452         OBD_ALLOC(lmv->tgts, lmv->bufsize);
453         if (lmv->tgts == NULL) {
454                 CERROR("Out of memory\n");
455                 RETURN(-ENOMEM);
456         }
457
458         lmv->desc = *desc;
459         spin_lock_init(&lmv->lmv_lock);
460         
461         for (i = 0, tgts = lmv->tgts; i < desc->ld_tgt_count; i++, tgts++)
462                 tgts->uuid = uuids[i];
463         
464         lmv->max_cookiesize = 0;
465
466         lmv->max_easize = sizeof(struct ll_fid) *
467                 desc->ld_tgt_count + sizeof(struct mea);
468         
469         rc = lmv_setup_mgr(obd);
470         if (rc) {
471                 CERROR("Can't setup LMV object manager, "
472                        "error %d.\n", rc);
473                 OBD_FREE(lmv->tgts, lmv->bufsize);
474         }
475
476         RETURN(rc);
477 }
478
479 static int lmv_statfs(struct obd_device *obd, struct obd_statfs *osfs,
480                       unsigned long max_age)
481 {
482         struct lmv_obd *lmv = &obd->u.lmv;
483         struct obd_statfs temp;
484         int rc = 0, i;
485         ENTRY;
486         
487         rc = lmv_check_connect(obd);
488         if (rc)
489                 RETURN(rc);
490                 
491         for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
492                 rc = obd_statfs(lmv->tgts[i].ltd_exp->exp_obd, &temp, max_age);
493                 if (rc) {
494                         CERROR("can't stat MDS #%d (%s)\n", i,
495                                lmv->tgts[i].ltd_exp->exp_obd->obd_name);
496                         RETURN(rc);
497                 }
498                 if (i == 0) {
499                         memcpy(osfs, &temp, sizeof(temp));
500                 } else {
501                         osfs->os_bavail += temp.os_bavail;
502                         osfs->os_blocks += temp.os_blocks;
503                         osfs->os_ffree += temp.os_ffree;
504                         osfs->os_files += temp.os_files;
505                 }
506         }
507         RETURN(rc);
508 }
509
510 static int lmv_cleanup(struct obd_device *obd, int flags) 
511 {
512         struct lmv_obd *lmv = &obd->u.lmv;
513         ENTRY;
514         lmv_cleanup_mgr(obd);
515         OBD_FREE(lmv->tgts, lmv->bufsize);
516         RETURN(0);
517 }
518
519 static int lmv_getstatus(struct obd_export *exp, struct ll_fid *fid)
520 {
521         struct obd_device *obd = exp->exp_obd;
522         struct lmv_obd *lmv = &obd->u.lmv;
523         int rc;
524         ENTRY;
525         rc = lmv_check_connect(obd);
526         if (rc)
527                 RETURN(rc);
528         rc = md_getstatus(lmv->tgts[0].ltd_exp, fid);
529         fid->mds = 0;
530         RETURN(rc);
531 }
532
533 static int lmv_getattr(struct obd_export *exp, struct ll_fid *fid,
534                        unsigned long valid, unsigned int ea_size,
535                        struct ptlrpc_request **request)
536 {
537         struct obd_device *obd = exp->exp_obd;
538         struct lmv_obd *lmv = &obd->u.lmv;
539         int rc, i = fid->mds;
540         struct lmv_obj *obj;
541         ENTRY;
542
543         rc = lmv_check_connect(obd);
544         if (rc)
545                 RETURN(rc);
546
547         obj = lmv_grab_obj(obd, fid);
548         
549         CDEBUG(D_OTHER, "GETATTR for %lu/%lu/%lu %s\n",
550                (unsigned long)fid->mds, (unsigned long)fid->id,
551                (unsigned long)fid->generation, obj ? "(splitted)" : "");
552
553         LASSERT(fid->mds < lmv->desc.ld_tgt_count);
554         rc = md_getattr(lmv->tgts[i].ltd_exp, fid,
555                         valid, ea_size, request);
556         if (rc == 0 && obj) {
557                 /* we have to loop over dirobjs here and gather attrs for all
558                  * the slaves. */
559 #warning "attrs gathering here"
560         }
561
562         if (obj)
563                 lmv_put_obj(obj);
564         
565         RETURN(rc);
566 }
567
568 static int lmv_change_cbdata(struct obd_export *exp, struct ll_fid *fid, 
569                              ldlm_iterator_t it, void *data)
570 {
571         struct obd_device *obd = exp->exp_obd;
572         struct lmv_obd *lmv = &obd->u.lmv;
573         int rc = 0;
574         ENTRY;
575         
576         rc = lmv_check_connect(obd);
577         if (rc)
578                 RETURN(rc);
579         
580         CDEBUG(D_OTHER, "CBDATA for %lu/%lu/%lu\n",
581                (unsigned long) fid->mds, (unsigned long) fid->id,
582                (unsigned long) fid->generation);
583         
584         LASSERT(fid->mds < lmv->desc.ld_tgt_count);
585
586         rc = md_change_cbdata(lmv->tgts[fid->mds].ltd_exp,
587                               fid, it, data);
588         
589         RETURN(rc);
590 }
591
592 static int lmv_change_cbdata_name(struct obd_export *exp, struct ll_fid *pfid,
593                                   char *name, int len, struct ll_fid *cfid,
594                                   ldlm_iterator_t it, void *data)
595 {
596         struct obd_device *obd = exp->exp_obd;
597         struct lmv_obd *lmv = &obd->u.lmv;
598         struct lmv_obj *obj;
599         int rc = 0, mds;
600         ENTRY;
601         rc = lmv_check_connect(obd);
602         if (rc)
603                 RETURN(rc);
604         LASSERT(pfid->mds < lmv->desc.ld_tgt_count);
605         LASSERT(cfid->mds < lmv->desc.ld_tgt_count);
606         CDEBUG(D_OTHER, "CBDATA for %lu/%lu/%lu:%*s -> %lu/%lu/%lu\n",
607                (unsigned long) pfid->mds, (unsigned long) pfid->id,
608                (unsigned long) pfid->generation, len, name,
609                (unsigned long) cfid->mds, (unsigned long) cfid->id,
610                (unsigned long) cfid->generation);
611
612         /* this is default mds for directory name belongs to */
613         mds = pfid->mds;
614         obj = lmv_grab_obj(obd, pfid);
615         if (obj) {
616                 /* directory is splitted. look for right mds for this name */
617                 mds = raw_name2idx(obj->objcount, name, len);
618                 lmv_put_obj(obj);
619         }
620         rc = md_change_cbdata(lmv->tgts[mds].ltd_exp, cfid, it, data);
621         RETURN(rc);
622 }
623
624 static int lmv_valid_attrs(struct obd_export *exp, struct ll_fid *fid) 
625 {
626         struct obd_device *obd = exp->exp_obd;
627         struct lmv_obd *lmv = &obd->u.lmv;
628         int rc = 0;
629         ENTRY;
630         rc = lmv_check_connect(obd);
631         if (rc)
632                 RETURN(rc);
633         CDEBUG(D_OTHER, "validate %lu/%lu/%lu\n", (unsigned long) fid->mds,
634                (unsigned long) fid->id, (unsigned long) fid->generation);
635         LASSERT(fid->mds < lmv->desc.ld_tgt_count);
636         rc = md_valid_attrs(lmv->tgts[fid->mds].ltd_exp, fid);
637         RETURN(rc);
638 }
639
640 int lmv_close(struct obd_export *exp, struct obdo *obdo,
641                   struct obd_client_handle *och,
642                   struct ptlrpc_request **request)
643 {
644         struct obd_device *obd = exp->exp_obd;
645         struct lmv_obd *lmv = &obd->u.lmv;
646         int rc, i = obdo->o_mds;
647         ENTRY;
648         rc = lmv_check_connect(obd);
649         if (rc)
650                 RETURN(rc);
651         LASSERT(i < lmv->desc.ld_tgt_count);
652         CDEBUG(D_OTHER, "CLOSE %lu/%lu/%lu\n", (unsigned long) obdo->o_mds,
653                (unsigned long) obdo->o_id, (unsigned long) obdo->o_generation);
654         rc = md_close(lmv->tgts[i].ltd_exp, obdo, och, request);
655         RETURN(rc);
656 }
657
658 int lmv_get_mea_and_update_object(struct obd_export *exp, struct ll_fid *fid)
659 {
660         struct obd_device *obd = exp->exp_obd;
661         struct lmv_obd *lmv = &obd->u.lmv;
662         struct ptlrpc_request *req = NULL;
663         struct lustre_md md;
664         unsigned long valid;
665         int mealen, rc;
666
667         md.mea = NULL;
668         mealen = MEA_SIZE_LMV(lmv);
669         
670         valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
671
672         /* time to update mea of parent fid */
673         rc = md_getattr(lmv->tgts[fid->mds].ltd_exp, fid,
674                         valid, mealen, &req);
675         if (rc) {
676                 CERROR("md_getattr() failed, error %d\n", rc);
677                 GOTO(cleanup, rc);
678         }
679
680         rc = mdc_req2lustre_md(exp, req, 0, NULL, &md);
681         if (rc) {
682                 CERROR("mdc_req2lustre_md() failed, error %d\n", rc);
683                 GOTO(cleanup, rc);
684         }
685
686         if (md.mea == NULL)
687                 GOTO(cleanup, rc = -ENODATA);
688
689         rc = lmv_create_obj(exp, fid, md.mea);
690         obd_free_memmd(exp, (struct lov_stripe_md **)&md.mea);
691
692 cleanup:
693         if (req)
694                 ptlrpc_req_finished(req);
695         RETURN(rc);
696 }
697
698 int lmv_create(struct obd_export *exp, struct mdc_op_data *op_data,
699                const void *data, int datalen, int mode, __u32 uid,
700                __u32 gid, __u64 rdev, struct ptlrpc_request **request)
701 {
702         struct obd_device *obd = exp->exp_obd;
703         struct lmv_obd *lmv = &obd->u.lmv;
704         struct mds_body *mds_body;
705         struct lmv_obj *obj;
706         int rc, mds;
707         ENTRY;
708
709         rc = lmv_check_connect(obd);
710         if (rc)
711                 RETURN(rc);
712
713         if (!lmv->desc.ld_active_tgt_count)
714                 RETURN(-EIO);
715 repeat:
716         obj = lmv_grab_obj(obd, &op_data->fid1);
717         if (obj) {
718                 mds = raw_name2idx(obj->objcount, op_data->name,
719                                    op_data->namelen);
720                 op_data->fid1 = obj->objs[mds].fid;
721                 lmv_put_obj(obj);
722         }
723
724         CDEBUG(D_OTHER, "CREATE '%*s' on %lu/%lu/%lu\n",
725                         op_data->namelen, op_data->name,
726                         (unsigned long) op_data->fid1.mds,
727                         (unsigned long) op_data->fid1.id,
728                         (unsigned long) op_data->fid1.generation);
729         rc = md_create(lmv->tgts[op_data->fid1.mds].ltd_exp, op_data, data,
730                        datalen, mode, uid, gid, rdev, request);
731         if (rc == 0) {
732                 if (*request == NULL)
733                      RETURN(rc);
734                 mds_body = lustre_msg_buf((*request)->rq_repmsg, 0,
735                                           sizeof(*mds_body));
736                 LASSERT(mds_body != NULL);
737                 CDEBUG(D_OTHER, "created. id = %lu, generation = %lu, mds = %d\n",
738                        (unsigned long) mds_body->fid1.id,
739                        (unsigned long) mds_body->fid1.generation,
740                        op_data->fid1.mds);
741                 LASSERT(mds_body->valid & OBD_MD_MDS ||
742                         mds_body->mds == op_data->fid1.mds);
743         } else if (rc == -ERESTART) {
744                 /* directory got splitted. time to update local object
745                  * and repeat the request with proper MDS */
746                 rc = lmv_get_mea_and_update_object(exp, &op_data->fid1);
747                 if (rc == 0) {
748                         ptlrpc_req_finished(*request);
749                         goto repeat;
750                 }
751         }
752         RETURN(rc);
753 }
754
755 int lmv_done_writing(struct obd_export *exp, struct obdo *obdo)
756 {
757         struct obd_device *obd = exp->exp_obd;
758         struct lmv_obd *lmv = &obd->u.lmv;
759         int rc;
760         ENTRY;
761         rc = lmv_check_connect(obd);
762         if (rc)
763                 RETURN(rc);
764
765         /* FIXME: choose right MDC here */
766         CWARN("this method isn't implemented yet\n");
767         rc = md_done_writing(lmv->tgts[0].ltd_exp, obdo);
768         RETURN(rc);
769 }
770
771 int lmv_enqueue_slaves(struct obd_export *exp, int locktype,
772                          struct lookup_intent *it, int lockmode,
773                          struct mdc_op_data *data, struct lustre_handle *lockh,
774                          void *lmm, int lmmsize,
775                          ldlm_completion_callback cb_completion,
776                          ldlm_blocking_callback cb_blocking, void *cb_data)
777 {
778         struct obd_device *obd = exp->exp_obd;
779         struct lmv_obd *lmv = &obd->u.lmv;
780         struct mea *mea = data->mea1;
781         struct mdc_op_data data2;
782         int i, rc, mds;
783         ENTRY;
784
785         LASSERT(mea != NULL);
786         for (i = 0; i < mea->mea_count; i++) {
787                 if (lmv->tgts[i].ltd_exp == NULL)
788                         continue;
789
790                 memset(&data2, 0, sizeof(data2));
791                 data2.fid1 = mea->mea_fids[i];
792                 mds = data2.fid1.mds;
793                 
794                 rc = md_enqueue(lmv->tgts[mds].ltd_exp, locktype, it, lockmode,
795                                 &data2, lockh + i, lmm, lmmsize, cb_completion,
796                                 cb_blocking, cb_data);
797                 
798                 CDEBUG(D_OTHER, "take lock on slave %lu/%lu/%lu -> %d/%d\n",
799                        (unsigned long)mea->mea_fids[i].mds,
800                        (unsigned long)mea->mea_fids[i].id,
801                        (unsigned long)mea->mea_fids[i].generation,
802                        rc, it->d.lustre.it_status);
803                 if (rc)
804                         GOTO(cleanup, rc);
805                 if (it->d.lustre.it_data) {
806                         struct ptlrpc_request *req;
807                         req = (struct ptlrpc_request *) it->d.lustre.it_data;
808                         ptlrpc_req_finished(req);
809                 }
810                 
811                 if (it->d.lustre.it_status)
812                         GOTO(cleanup, rc = it->d.lustre.it_status);
813         }
814         RETURN(0);
815         
816 cleanup:
817         /* drop all taken locks */
818         while (--i >= 0) {
819                 if (lockh[i].cookie)
820                         ldlm_lock_decref(lockh + i, lockmode);
821                 lockh[i].cookie = 0;
822         }
823         RETURN(rc);
824 }
825
826 int lmv_enqueue(struct obd_export *exp, int lock_type,
827                 struct lookup_intent *it, int lock_mode,
828                 struct mdc_op_data *data, struct lustre_handle *lockh,
829                 void *lmm, int lmmsize,
830                 ldlm_completion_callback cb_completion,
831                 ldlm_blocking_callback cb_blocking, void *cb_data)
832 {
833         struct obd_device *obd = exp->exp_obd;
834         struct lmv_obd *lmv = &obd->u.lmv;
835         struct lmv_obj *obj;
836         int rc, mds;
837         ENTRY;
838
839         rc = lmv_check_connect(obd);
840         if (rc)
841                 RETURN(rc);
842
843         if (it->it_op == IT_UNLINK) {
844                 rc = lmv_enqueue_slaves(exp, lock_type, it, lock_mode,
845                                         data, lockh, lmm, lmmsize,
846                                         cb_completion, cb_blocking, cb_data);
847                 RETURN(rc);
848         }
849
850         if (data->namelen) {
851                 obj = lmv_grab_obj(obd, &data->fid1);
852                 if (obj) {
853                         /* directory is splitted. look for
854                          * right mds for this name */
855                         mds = raw_name2idx(obj->objcount, (char *)data->name,
856                                            data->namelen);
857                         data->fid1 = obj->objs[mds].fid;
858                         lmv_put_obj(obj);
859                 }
860         }
861         CDEBUG(D_OTHER, "ENQUEUE '%s' on %lu/%lu\n",
862                LL_IT2STR(it), (unsigned long) data->fid1.id,
863                (unsigned long) data->fid1.generation);
864         rc = md_enqueue(lmv->tgts[data->fid1.mds].ltd_exp, lock_type, it,
865                         lock_mode, data, lockh, lmm, lmmsize, cb_completion,
866                         cb_blocking, cb_data);
867
868         RETURN(rc);
869 }
870
871 int lmv_getattr_name(struct obd_export *exp, struct ll_fid *fid,
872                          char *filename, int namelen, unsigned long valid,
873                          unsigned int ea_size, struct ptlrpc_request **request)
874 {
875         struct obd_device *obd = exp->exp_obd;
876         struct lmv_obd *lmv = &obd->u.lmv;
877         struct ll_fid rfid = *fid;
878         int rc, mds = fid->mds;
879         struct mds_body *body;
880         struct lmv_obj *obj;
881         ENTRY;
882         rc = lmv_check_connect(obd);
883         if (rc)
884                 RETURN(rc);
885 repeat:
886         obj = lmv_grab_obj(obd, fid);
887         if (obj) {
888                 /* directory is splitted. look for right mds for this name */
889                 mds = raw_name2idx(obj->objcount, filename, namelen - 1);
890                 rfid = obj->objs[mds].fid;
891                 lmv_put_obj(obj);
892         }
893         CDEBUG(D_OTHER, "getattr_name for %*s on %lu/%lu/%lu -> %lu/%lu/%lu\n",
894                namelen, filename, (unsigned long) fid->mds,
895                (unsigned long) fid->id, (unsigned long) fid->generation,
896                (unsigned long) rfid.mds, (unsigned long) rfid.id,
897                (unsigned long) rfid.generation);
898         rc = md_getattr_name(lmv->tgts[mds].ltd_exp, &rfid, filename, namelen,
899                                   valid, ea_size, request);
900         if (rc == 0) {
901                 /* this could be cross-node reference. in this case all
902                  * we have right now is mds/ino/generation triple. we'd
903                  * like to find other attributes */
904                 body = lustre_msg_buf((*request)->rq_repmsg, 0, sizeof(*body));
905                 LASSERT(body != NULL);
906                 if (body->valid & OBD_MD_MDS) {
907                         struct ptlrpc_request *req = NULL;
908                         rfid = body->fid1;
909                         CDEBUG(D_OTHER, "request attrs for %lu/%lu/%lu\n",
910                                (unsigned long) rfid.mds,
911                                (unsigned long) rfid.id,
912                                (unsigned long) rfid.generation);
913                         rc = md_getattr_name(lmv->tgts[rfid.mds].ltd_exp, &rfid,
914                                              NULL, 1, valid, ea_size, &req);
915                         ptlrpc_req_finished(*request);
916                         *request = req;
917                 }
918         } else if (rc == -ERESTART) {
919                 /* directory got splitted. time to update local object
920                  * and repeat the request with proper MDS */
921                 rc = lmv_get_mea_and_update_object(exp, &rfid);
922                 if (rc == 0) {
923                         ptlrpc_req_finished(*request);
924                         goto repeat;
925                 }
926         }
927         RETURN(rc);
928 }
929
930
931 /*
932  * llite passes fid of an target inode in data->fid1 and
933  * fid of directory in data->fid2
934  */
935 int lmv_link(struct obd_export *exp, struct mdc_op_data *data,
936              struct ptlrpc_request **request)
937 {
938         struct obd_device *obd = exp->exp_obd;
939         struct lmv_obd *lmv = &obd->u.lmv;
940         struct lmv_obj *obj;
941         int rc;
942         ENTRY;
943         rc = lmv_check_connect(obd);
944         if (rc)
945                 RETURN(rc);
946         if (data->namelen != 0) {
947                 /* usual link request */
948                 obj = lmv_grab_obj(obd, &data->fid1);
949                 if (obj) {
950                         rc = raw_name2idx(obj->objcount, data->name,
951                                           data->namelen);
952                         data->fid1 = obj->objs[rc].fid;
953                         lmv_put_obj(obj);
954                 }
955                 CDEBUG(D_OTHER,"link %u/%u/%u:%*s to %u/%u/%u mds %d\n",
956                        (unsigned) data->fid2.mds, (unsigned) data->fid2.id,
957                        (unsigned) data->fid2.generation, data->namelen,
958                        data->name, (unsigned) data->fid1.mds,
959                        (unsigned) data->fid1.id,
960                        (unsigned) data->fid1.generation, data->fid1.mds);
961         } else {
962                 /* request from MDS to acquire i_links for inode by fid1 */
963                 CDEBUG(D_OTHER, "inc i_nlinks for %u/%u/%u\n",
964                        (unsigned) data->fid1.mds, (unsigned) data->fid1.id,
965                        (unsigned) data->fid1.generation);
966         }
967                         
968         rc = md_link(lmv->tgts[data->fid1.mds].ltd_exp, data, request);
969         RETURN(rc);
970 }
971
972 int lmv_rename(struct obd_export *exp, struct mdc_op_data *data,
973                const char *old, int oldlen, const char *new, int newlen,
974                struct ptlrpc_request **request)
975 {
976         struct obd_device *obd = exp->exp_obd;
977         struct lmv_obd *lmv = &obd->u.lmv;
978         struct lmv_obj *obj;
979         int rc, mds;
980         ENTRY;
981
982         CDEBUG(D_OTHER, "rename %*s in %lu/%lu/%lu to %*s in %lu/%lu/%lu\n",
983                oldlen, old, (unsigned long) data->fid1.mds,
984                (unsigned long) data->fid1.id,
985                (unsigned long) data->fid1.generation,
986                newlen, new, (unsigned long) data->fid2.mds,
987                (unsigned long) data->fid2.id,
988                (unsigned long) data->fid2.generation);
989         if (!fid_equal(&data->fid1, &data->fid2))
990                 CWARN("cross-node rename %lu/%lu/%lu:%*s to %lu/%lu/%lu:%*s\n",
991                       (unsigned long) data->fid1.mds,
992                       (unsigned long) data->fid1.id,
993                       (unsigned long) data->fid1.generation, oldlen, old,
994                       (unsigned long) data->fid2.mds,
995                       (unsigned long) data->fid2.id,
996                       (unsigned long) data->fid2.generation, newlen, new);
997
998         rc = lmv_check_connect(obd);
999         if (rc)
1000                 RETURN(rc);
1001
1002         if (oldlen == 0) {
1003                 /* MDS with old dir entry is asking another MDS
1004                  * to create name there */
1005                 CDEBUG(D_OTHER,
1006                        "create %*s(%d/%d) in %lu/%lu/%lu pointing to %lu/%lu/%lu\n",
1007                        newlen, new, oldlen, newlen,
1008                        (unsigned long) data->fid2.mds,
1009                        (unsigned long) data->fid2.id,
1010                        (unsigned long) data->fid2.generation,
1011                        (unsigned long) data->fid1.mds,
1012                        (unsigned long) data->fid1.id,
1013                        (unsigned long) data->fid1.generation);
1014                 mds = data->fid2.mds;
1015                 goto request;
1016         }
1017
1018         obj = lmv_grab_obj(obd, &data->fid1);
1019         if (obj) {
1020                 /* directory is already splitted, so we have to forward
1021                  * request to the right MDS */
1022                 mds = raw_name2idx(obj->objcount, (char *)old, oldlen);
1023                 data->fid1 = obj->objs[mds].fid;
1024                 CDEBUG(D_OTHER, "forward to MDS #%u (%lu/%lu/%lu)\n", mds,
1025                        (unsigned long) obj->objs[mds].fid.mds,
1026                        (unsigned long) obj->objs[mds].fid.id,
1027                        (unsigned long) obj->objs[mds].fid.generation);
1028                 lmv_put_obj(obj);
1029         }
1030
1031         obj = lmv_grab_obj(obd, &data->fid2);
1032         if (obj) {
1033                 /* directory is already splitted, so we have to forward
1034                  * request to the right MDS */
1035                 mds = raw_name2idx(obj->objcount, (char *)new, newlen);
1036                 data->fid2 = obj->objs[mds].fid;
1037                 CDEBUG(D_OTHER, "forward to MDS #%u (%lu/%lu/%lu)\n", mds,
1038                        (unsigned long) obj->objs[mds].fid.mds,
1039                        (unsigned long) obj->objs[mds].fid.id,
1040                        (unsigned long) obj->objs[mds].fid.generation);
1041                 lmv_put_obj(obj);
1042         }
1043         
1044         mds = data->fid1.mds;
1045
1046 request:
1047         rc = md_rename(lmv->tgts[mds].ltd_exp, data, old, oldlen,
1048                        new, newlen, request); 
1049         RETURN(rc);
1050 }
1051
1052 int lmv_setattr(struct obd_export *exp, struct mdc_op_data *data,
1053                 struct iattr *iattr, void *ea, int ealen, void *ea2,
1054                 int ea2len, struct ptlrpc_request **request)
1055 {
1056         struct obd_device *obd = exp->exp_obd;
1057         struct lmv_obd *lmv = &obd->u.lmv;
1058         int rc = 0, i = data->fid1.mds;
1059         struct ptlrpc_request *req;
1060         struct mds_body *mds_body;
1061         struct lmv_obj *obj;
1062         ENTRY;
1063
1064         rc = lmv_check_connect(obd);
1065         if (rc)
1066                 RETURN(rc);
1067
1068         obj = lmv_grab_obj(obd, &data->fid1);
1069         CDEBUG(D_OTHER, "SETATTR for %lu/%lu/%lu, valid 0x%x%s\n",
1070                (unsigned long) data->fid1.mds,
1071                (unsigned long) data->fid1.id,
1072                (unsigned long) data->fid1.generation, iattr->ia_valid,
1073                obj ? ", splitted" : "");
1074         if (obj) {
1075                 for (i = 0; i < obj->objcount; i++) {
1076                         data->fid1 = obj->objs[i].fid;
1077                         rc = md_setattr(lmv->tgts[i].ltd_exp, data, iattr, ea,
1078                                         ealen, ea2, ea2len, &req);
1079                         LASSERT(rc == 0);
1080                         if (fid_equal(&obj->fid, &obj->objs[i].fid)) {
1081                                 /* this is master object and this request
1082                                  * should be returned back to llite */
1083                                 *request = req;
1084                         } else {
1085                                 ptlrpc_req_finished(req);
1086                         }
1087                 }
1088                 lmv_put_obj(obj);
1089         } else {
1090                 LASSERT(data->fid1.mds < lmv->desc.ld_tgt_count);
1091                 rc = md_setattr(lmv->tgts[i].ltd_exp, data, iattr, ea, ealen,
1092                                 ea2, ea2len, request); 
1093                 if (rc == 0) {
1094                         mds_body = lustre_msg_buf((*request)->rq_repmsg, 0,
1095                                         sizeof(*mds_body));
1096                         LASSERT(mds_body != NULL);
1097                         LASSERT(mds_body->mds == i);
1098                 }
1099         }
1100         RETURN(rc);
1101 }
1102
1103 int lmv_sync(struct obd_export *exp, struct ll_fid *fid,
1104              struct ptlrpc_request **request)
1105 {
1106         struct obd_device *obd = exp->exp_obd;
1107         struct lmv_obd *lmv = &obd->u.lmv;
1108         int rc;
1109         ENTRY;
1110
1111         rc = lmv_check_connect(obd);
1112         if (rc)
1113                 RETURN(rc);
1114
1115         rc = md_sync(lmv->tgts[0].ltd_exp, fid, request); 
1116         RETURN(rc);
1117 }
1118
1119 int lmv_dirobj_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
1120                             void *data, int flag)
1121 {
1122         struct lustre_handle lockh;
1123         struct lmv_obj *obj;
1124         int rc;
1125         ENTRY;
1126
1127         switch (flag) {
1128         case LDLM_CB_BLOCKING:
1129                 ldlm_lock2handle(lock, &lockh);
1130                 rc = ldlm_cli_cancel(&lockh);
1131                 if (rc < 0) {
1132                         CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
1133                         RETURN(rc);
1134                 }
1135                 break;
1136         case LDLM_CB_CANCELING:
1137                 /* time to drop cached attrs for dirobj */
1138                 obj = lock->l_ast_data;
1139                 if (obj) {
1140                         CDEBUG(D_OTHER, "cancel %s on %lu/%lu, master %lu/%lu/%lu\n",
1141                                lock->l_resource->lr_name.name[3] == 1 ? "LOOKUP" : "UPDATE",
1142                                (unsigned long)lock->l_resource->lr_name.name[0],
1143                                (unsigned long)lock->l_resource->lr_name.name[1],
1144                                (unsigned long)obj->fid.mds, (unsigned long)obj->fid.id,
1145                                (unsigned long)obj->fid.generation);
1146                         lmv_put_obj(obj);
1147                 }
1148                 break;
1149         default:
1150                 LBUG();
1151         }
1152         RETURN(0);
1153 }
1154
1155 void lmv_remove_dots(struct page *page)
1156 {
1157         char *kaddr = page_address(page);
1158         unsigned limit = PAGE_CACHE_SIZE;
1159         unsigned offs, rec_len;
1160         struct ext2_dir_entry_2 *p;
1161
1162         for (offs = 0; offs <= limit - EXT2_DIR_REC_LEN(1); offs += rec_len) {
1163                 p = (struct ext2_dir_entry_2 *)(kaddr + offs);
1164                 rec_len = le16_to_cpu(p->rec_len);
1165
1166                 if ((p->name_len == 1 && p->name[0] == '.') ||
1167                     (p->name_len == 2 && p->name[0] == '.' && p->name[1] == '.'))
1168                         p->inode = 0;
1169         }
1170 }
1171
1172 int lmv_readpage(struct obd_export *exp, struct ll_fid *mdc_fid,
1173                  __u64 offset, struct page *page,
1174                  struct ptlrpc_request **request)
1175 {
1176         struct obd_device *obd = exp->exp_obd;
1177         struct lmv_obd *lmv = &obd->u.lmv;
1178         struct ll_fid rfid = *mdc_fid;
1179         struct lmv_obj *obj;
1180         int rc, i;
1181         ENTRY;
1182
1183         rc = lmv_check_connect(obd);
1184         if (rc)
1185                 RETURN(rc);
1186
1187         LASSERT(mdc_fid->mds < lmv->desc.ld_tgt_count);
1188         CDEBUG(D_OTHER, "READPAGE at %llu from %lu/%lu/%lu\n",
1189                offset, (unsigned long) rfid.mds,
1190                (unsigned long) rfid.id,
1191                (unsigned long) rfid.generation);
1192
1193         obj = lmv_grab_obj(obd, mdc_fid);
1194         if (obj) {
1195                 /* find dirobj containing page with requested offset */
1196                 /* FIXME: what about protecting cached attrs here? */
1197                 for (i = 0; i < obj->objcount; i++) {
1198                         if (offset < obj->objs[i].size)
1199                                 break;
1200                         offset -= obj->objs[i].size;
1201                 }
1202                 rfid = obj->objs[i].fid;
1203                 lmv_put_obj(obj);
1204                 
1205                 CDEBUG(D_OTHER, "forward to %lu/%lu/%lu with offset %lu\n",
1206                        (unsigned long)rfid.mds, (unsigned long)rfid.id,
1207                        (unsigned long)rfid.generation, (unsigned long)offset);
1208         }
1209         rc = md_readpage(lmv->tgts[rfid.mds].ltd_exp, &rfid, offset,
1210                          page, request);
1211         
1212         if (rc == 0 && !fid_equal(&rfid, mdc_fid))
1213                 /* this page isn't from master object. To avoid "." and ".." 
1214                  * duplication in directory, we have to remove them from all
1215                  * slave objects */
1216                 lmv_remove_dots(page);
1217         
1218         RETURN(rc);
1219 }
1220
1221 int lmv_unlink_slaves(struct obd_export *exp, struct mdc_op_data *data,
1222                       struct ptlrpc_request **req)
1223 {
1224         struct obd_device *obd = exp->exp_obd;
1225         struct lmv_obd *lmv = &obd->u.lmv;
1226         struct mea *mea = data->mea1;
1227         struct mdc_op_data data2;
1228         int i, rc = 0, mds;
1229         ENTRY;
1230
1231         LASSERT(mea != NULL);
1232         for (i = 0; i < mea->mea_count; i++) {
1233                 if (lmv->tgts[i].ltd_exp == NULL)
1234                         continue;
1235
1236                 memset(&data2, 0, sizeof(data2));
1237                 data2.fid1 = mea->mea_fids[i];
1238                 data2.create_mode = MDS_MODE_DONT_LOCK | S_IFDIR;
1239                 mds = data2.fid1.mds;
1240                 rc = md_unlink(lmv->tgts[mds].ltd_exp, &data2, req);
1241                 CDEBUG(D_OTHER, "unlink slave %lu/%lu/%lu -> %d\n",
1242                        (unsigned long) mea->mea_fids[i].mds,
1243                        (unsigned long) mea->mea_fids[i].id,
1244                        (unsigned long) mea->mea_fids[i].generation, rc);
1245                 if (*req) {
1246                         ptlrpc_req_finished(*req);
1247                         *req = NULL;
1248                 }
1249                 if (rc)
1250                         break;
1251         }
1252         RETURN(rc);
1253 }
1254
1255 int lmv_unlink(struct obd_export *exp, struct mdc_op_data *data,
1256                struct ptlrpc_request **request)
1257 {
1258         struct obd_device *obd = exp->exp_obd;
1259         struct lmv_obd *lmv = &obd->u.lmv;
1260         int rc, i = 0;
1261         ENTRY;
1262         
1263         rc = lmv_check_connect(obd);
1264         if (rc)
1265                 RETURN(rc);
1266
1267         if (data->namelen == 0 && data->mea1 != NULL) {
1268                 /* mds asks to remove slave objects */
1269                 rc = lmv_unlink_slaves(exp, data, request);
1270                 RETURN(rc);
1271         } else if (data->namelen != 0) {
1272                 struct lmv_obj *obj;
1273                 
1274                 obj = lmv_grab_obj(obd, &data->fid1);
1275                 if (obj) {
1276                         i = raw_name2idx(obj->objcount, data->name,
1277                                          data->namelen);
1278                         data->fid1 = obj->objs[i].fid;
1279                         lmv_put_obj(obj);
1280                 }
1281                 CDEBUG(D_OTHER, "unlink '%*s' in %lu/%lu/%lu -> %u\n",
1282                        data->namelen, data->name,
1283                        (unsigned long) data->fid1.mds,
1284                        (unsigned long) data->fid1.id,
1285                        (unsigned long) data->fid1.generation, i);
1286         } else {
1287                 CDEBUG(D_OTHER, "drop i_nlink on %lu/%lu/%lu\n",
1288                        (unsigned long) data->fid1.mds,
1289                        (unsigned long) data->fid1.id,
1290                        (unsigned long) data->fid1.generation);
1291         }
1292         rc = md_unlink(lmv->tgts[data->fid1.mds].ltd_exp, data, request); 
1293         RETURN(rc);
1294 }
1295
1296 struct obd_device *lmv_get_real_obd(struct obd_export *exp,
1297                                     char *name, int len)
1298 {
1299         struct obd_device *obd = exp->exp_obd;
1300         struct lmv_obd *lmv = &obd->u.lmv;
1301         int rc;
1302         ENTRY;
1303
1304         rc = lmv_check_connect(obd);
1305         if (rc)
1306                 RETURN(ERR_PTR(rc));
1307         obd = lmv->tgts[0].ltd_exp->exp_obd;
1308         EXIT;
1309         return obd;
1310 }
1311
1312 int lmv_init_ea_size(struct obd_export *exp, int easize, int cookiesize)
1313 {
1314         struct obd_device *obd = exp->exp_obd;
1315         struct lmv_obd *lmv = &obd->u.lmv;
1316         int i, rc = 0, change = 0;
1317         ENTRY;
1318
1319         if (lmv->max_easize < easize) {
1320                 lmv->max_easize = easize;
1321                 change = 1;
1322         }
1323         if (lmv->max_cookiesize < cookiesize) {
1324                 lmv->max_cookiesize = cookiesize;
1325                 change = 1;
1326         }
1327         if (change == 0)
1328                 RETURN(0);
1329         
1330         if (lmv->connected == 0)
1331                 RETURN(0);
1332
1333         for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
1334                 rc = obd_init_ea_size(lmv->tgts[i].ltd_exp, easize, cookiesize);
1335                 if (rc) {
1336                         CERROR("obd_init_ea_size() failed on MDT target %d, "
1337                                "error %d.\n", i, rc);
1338                         break;
1339                 }
1340         }
1341         RETURN(rc);
1342 }
1343
1344 int lmv_obd_create_single(struct obd_export *exp, struct obdo *oa,
1345                           struct lov_stripe_md **ea, struct obd_trans_info *oti)
1346 {
1347         struct obd_device *obd = exp->exp_obd;
1348         struct lmv_obd *lmv = &obd->u.lmv;
1349         struct lov_stripe_md obj_md;
1350         struct lov_stripe_md *obj_mdp = &obj_md;
1351         int rc = 0;
1352         ENTRY;
1353
1354         rc = lmv_check_connect(obd);
1355         if (rc)
1356                 RETURN(rc);
1357
1358         LASSERT(ea == NULL);
1359         LASSERT(oa->o_mds < lmv->desc.ld_tgt_count);
1360
1361         rc = obd_create(lmv->tgts[oa->o_mds].ltd_exp, oa, &obj_mdp, oti);
1362
1363         RETURN(rc);
1364 }
1365
1366 /*
1367  * to be called from MDS only
1368  */
1369 int lmv_obd_create(struct obd_export *exp, struct obdo *oa,
1370                    struct lov_stripe_md **ea, struct obd_trans_info *oti)
1371 {
1372         struct obd_device *obd = exp->exp_obd;
1373         struct lmv_obd *lmv = &obd->u.lmv;
1374         struct mea *mea;
1375         int i, c, rc = 0;
1376         struct ll_fid mfid;
1377         ENTRY;
1378
1379         rc = lmv_check_connect(obd);
1380         if (rc)
1381                 RETURN(rc);
1382
1383         LASSERT(oa != NULL);
1384         
1385         if (ea == NULL) {
1386                 rc = lmv_obd_create_single(exp, oa, NULL, oti);
1387                 RETURN(rc);
1388         }
1389
1390         if (*ea == NULL) {
1391                 rc = obd_alloc_diskmd(exp, (struct lov_mds_md **)ea);
1392                 if (rc < 0) {
1393                         CERROR("obd_alloc_diskmd() failed, error %d\n",
1394                                rc);
1395                         RETURN(rc);
1396                 }
1397                 
1398                 if (*ea == NULL)
1399                         RETURN(-EINVAL);
1400         }
1401
1402         rc = 0;
1403         mfid.id = oa->o_id;
1404         mfid.generation = oa->o_generation;
1405         
1406         mea = (struct mea *)*ea;
1407         if (!mea->mea_count || mea->mea_count > lmv->desc.ld_tgt_count)
1408                 mea->mea_count = lmv->desc.ld_tgt_count;
1409
1410         mea->mea_master = -1;
1411         
1412         for (i = 0, c = 0; c < mea->mea_count && 
1413                 i < lmv->desc.ld_tgt_count; i++) {
1414                 struct lov_stripe_md obj_md;
1415                 struct lov_stripe_md *obj_mdp = &obj_md;
1416                
1417                 if (lmv->tgts[i].ltd_exp == NULL) {
1418                         /* this is master MDS */
1419                         mea->mea_fids[c].id = mfid.id;
1420                         mea->mea_fids[c].generation = mfid.generation;
1421                         mea->mea_fids[c].mds = i;
1422                         mea->mea_master = i;
1423                         c++;
1424                         continue;
1425                 }
1426
1427                 /* "master" MDS should always be part of stripped dir, so scan
1428                    for it. */
1429                 if (mea->mea_master == -1 && c == mea->mea_count - 1)
1430                         continue;
1431
1432                 oa->o_valid = OBD_MD_FLGENER | OBD_MD_FLTYPE | OBD_MD_FLMODE
1433                         | OBD_MD_FLUID | OBD_MD_FLGID | OBD_MD_FLID;
1434
1435                 rc = obd_create(lmv->tgts[c].ltd_exp, oa, &obj_mdp, oti);
1436                 if (rc) {
1437                         CERROR("obd_create() failed on MDT target %d, "
1438                                "error %d\n", c, rc);
1439                         RETURN(rc);
1440                 }
1441
1442                 mea->mea_fids[c].id = oa->o_id;
1443                 mea->mea_fids[c].generation = oa->o_generation;
1444                 mea->mea_fids[c].mds = i;
1445                 c++;
1446                 CDEBUG(D_OTHER, "dirobj at mds %d: "LPU64"/%u\n",
1447                        i, oa->o_id, oa->o_generation);
1448         }
1449         LASSERT(c == mea->mea_count);
1450         CDEBUG(D_OTHER, "%d dirobjects created\n", (int) mea->mea_count);
1451
1452         RETURN(rc);
1453 }
1454
1455 static int lmv_get_info(struct obd_export *exp, __u32 keylen,
1456                         void *key, __u32 *vallen, void *val)
1457 {
1458         struct obd_device *obd;
1459         struct lmv_obd *lmv;
1460         ENTRY;
1461
1462         obd = class_exp2obd(exp);
1463         if (obd == NULL) {
1464                 CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",
1465                        exp->exp_handle.h_cookie);
1466                 RETURN(-EINVAL);
1467         }
1468
1469         lmv = &obd->u.lmv;
1470         if (keylen == 6 && memcmp(key, "mdsize", 6) == 0) {
1471                 __u32 *mdsize = val;
1472                 *vallen = sizeof(__u32);
1473                 *mdsize = sizeof(struct ll_fid) * lmv->desc.ld_tgt_count
1474                                 + sizeof(struct mea);
1475                 RETURN(0);
1476         } else if (keylen == 6 && memcmp(key, "mdsnum", 6) == 0) {
1477                 struct obd_uuid *cluuid = &lmv->cluuid;
1478                 struct lmv_tgt_desc *tgts;
1479                 __u32 *mdsnum = val;
1480                 int i;
1481
1482                 for (i = 0, tgts = lmv->tgts; i < lmv->desc.ld_tgt_count; i++, tgts++) {
1483                         if (obd_uuid_equals(&tgts->uuid, cluuid)) {
1484                                 *vallen = sizeof(__u32);
1485                                 *mdsnum = i;
1486                                 RETURN(0);
1487                         }
1488                 }
1489                 LASSERT(0);
1490         }
1491
1492         CDEBUG(D_IOCTL, "invalid key\n");
1493         RETURN(-EINVAL);
1494 }
1495
1496 int lmv_set_info(struct obd_export *exp, obd_count keylen,
1497                  void *key, obd_count vallen, void *val)
1498 {
1499         struct obd_device *obd;
1500         struct lmv_obd *lmv;
1501         ENTRY;
1502
1503         obd = class_exp2obd(exp);
1504         if (obd == NULL) {
1505                 CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",
1506                        exp->exp_handle.h_cookie);
1507                 RETURN(-EINVAL);
1508         }
1509         lmv = &obd->u.lmv;
1510
1511         if (keylen >= strlen("client") && strcmp(key, "client") == 0) {
1512                 struct lmv_tgt_desc *tgts;
1513                 int i, rc;
1514
1515                 rc = lmv_check_connect(obd);
1516                 if (rc)
1517                         RETURN(rc);
1518
1519                 for (i = 0, tgts = lmv->tgts; 
1520                         i < lmv->desc.ld_tgt_count; i++, tgts++) {
1521                         rc = obd_set_info(tgts->ltd_exp, keylen, key, vallen, val);
1522                         if (rc)
1523                                 RETURN(rc);
1524                 }
1525                 RETURN(0);
1526         } else if (keylen >= strlen("inter_mds") && strcmp(key, "inter_mds") == 0) {
1527                 lmv->server_timeout = 1;
1528                 lmv_set_timeouts(obd);
1529                 RETURN(0);
1530         }
1531         
1532         RETURN(-EINVAL);
1533 }
1534
1535 int lmv_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
1536                struct lov_stripe_md *lsm)
1537 {
1538         struct obd_device *obd = class_exp2obd(exp);
1539         struct lmv_obd *lmv = &obd->u.lmv;
1540         int mea_size;
1541         ENTRY;
1542
1543         mea_size = sizeof(struct ll_fid) * 
1544                 lmv->desc.ld_tgt_count + sizeof(struct mea);
1545         if (!lmmp)
1546                 RETURN(mea_size);
1547
1548         if (*lmmp && !lsm) {
1549                 OBD_FREE(*lmmp, mea_size);
1550                 *lmmp = NULL;
1551                 RETURN(0);
1552         }
1553
1554         if (*lmmp == NULL) {
1555                 OBD_ALLOC(*lmmp, mea_size);
1556                 if (*lmmp == NULL)
1557                         RETURN(-ENOMEM);
1558         }
1559
1560         if (!lsm)
1561                 RETURN(mea_size);
1562
1563 #warning "MEA packing/convertation must be here! -bzzz"
1564         memcpy(*lmmp, lsm, mea_size);
1565         RETURN(mea_size);
1566 }
1567
1568 int lmv_unpackmd(struct obd_export *exp, struct lov_stripe_md **mem_tgt,
1569                  struct lov_mds_md *disk_src, int mdsize)
1570 {
1571         struct obd_device *obd = class_exp2obd(exp);
1572         struct lmv_obd *lmv = &obd->u.lmv;
1573         struct mea **tmea = (struct mea **) mem_tgt;
1574         struct mea *mea = (void *) disk_src;
1575         int mea_size;
1576         ENTRY;
1577
1578         mea_size = sizeof(struct ll_fid) * 
1579                 lmv->desc.ld_tgt_count + sizeof(struct mea);
1580         if (mem_tgt == NULL)
1581                 return mea_size;
1582
1583         if (*mem_tgt != NULL && disk_src == NULL) {
1584                 OBD_FREE(*tmea, mea_size);
1585                 RETURN(0);
1586         }
1587
1588         LASSERT(mea_size == mdsize);
1589
1590         OBD_ALLOC(*tmea, mea_size);
1591         if (*tmea == NULL)
1592                 RETURN(-ENOMEM);
1593
1594         if (!disk_src)
1595                 RETURN(mea_size);
1596
1597 #warning "MEA unpacking/convertation must be here! -bzzz"
1598         memcpy(*tmea, mea, mdsize);
1599         RETURN(mea_size);
1600 }
1601
1602 int lmv_brw(int rw, struct obd_export *exp, struct obdo *oa,
1603             struct lov_stripe_md *ea, obd_count oa_bufs,
1604             struct brw_page *pgarr, struct obd_trans_info *oti)
1605 {
1606         struct obd_device *obd = exp->exp_obd;
1607         struct lmv_obd *lmv = &obd->u.lmv;
1608         struct mea *mea = (struct mea *) ea;
1609         int err;
1610       
1611         LASSERT(oa != NULL);
1612         LASSERT(ea != NULL);
1613         LASSERT(pgarr != NULL);
1614         LASSERT(oa->o_mds < lmv->desc.ld_tgt_count);
1615
1616         oa->o_gr = mea->mea_fids[oa->o_mds].generation;
1617         oa->o_id = mea->mea_fids[oa->o_mds].id;
1618         oa->o_valid =  OBD_MD_FLID | OBD_MD_FLGROUP;
1619         err = obd_brw(rw, lmv->tgts[oa->o_mds].ltd_exp, oa,
1620                       NULL, oa_bufs, pgarr, oti);
1621         RETURN(err);
1622 }
1623
1624 struct obd_ops lmv_obd_ops = {
1625         .o_owner                = THIS_MODULE,
1626         .o_attach               = lmv_attach,
1627         .o_detach               = lmv_detach,
1628         .o_setup                = lmv_setup,
1629         .o_cleanup              = lmv_cleanup,
1630         .o_connect              = lmv_connect,
1631         .o_disconnect           = lmv_disconnect,
1632         .o_statfs               = lmv_statfs,
1633         .o_get_info             = lmv_get_info,
1634         .o_set_info             = lmv_set_info,
1635         .o_create               = lmv_obd_create,
1636         .o_packmd               = lmv_packmd,
1637         .o_unpackmd             = lmv_unpackmd,
1638         .o_brw                  = lmv_brw,
1639         .o_init_ea_size         = lmv_init_ea_size,
1640         .o_notify               = lmv_notify,
1641         .o_iocontrol            = lmv_iocontrol,
1642 };
1643
1644 struct md_ops lmv_md_ops = {
1645         .m_getstatus            = lmv_getstatus,
1646         .m_getattr              = lmv_getattr,
1647         .m_change_cbdata        = lmv_change_cbdata,
1648         .m_change_cbdata_name   = lmv_change_cbdata_name,
1649         .m_close                = lmv_close,
1650         .m_create               = lmv_create,
1651         .m_done_writing         = lmv_done_writing,
1652         .m_enqueue              = lmv_enqueue,
1653         .m_getattr_name         = lmv_getattr_name,
1654         .m_intent_lock          = lmv_intent_lock,
1655         .m_link                 = lmv_link,
1656         .m_rename               = lmv_rename,
1657         .m_setattr              = lmv_setattr,
1658         .m_sync                 = lmv_sync,
1659         .m_readpage             = lmv_readpage,
1660         .m_unlink               = lmv_unlink,
1661         .m_get_real_obd         = lmv_get_real_obd,
1662         .m_valid_attrs          = lmv_valid_attrs,
1663 };
1664
1665 int __init lmv_init(void)
1666 {
1667         struct lprocfs_static_vars lvars;
1668         int rc;
1669
1670         lprocfs_init_vars(lmv, &lvars);
1671         rc = class_register_type(&lmv_obd_ops, &lmv_md_ops,
1672                                  lvars.module_vars, OBD_LMV_DEVICENAME);
1673         RETURN(rc);
1674 }
1675
1676 #ifdef __KERNEL__
1677 static void lmv_exit(void)
1678 {
1679         class_unregister_type(OBD_LMV_DEVICENAME);
1680 }
1681
1682 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
1683 MODULE_DESCRIPTION("Lustre Logical Metadata Volume OBD driver");
1684 MODULE_LICENSE("GPL");
1685
1686 module_init(lmv_init);
1687 module_exit(lmv_exit);
1688 #endif