Whamcloud - gitweb
Added client disconnect error message.
[fs/lustre-release.git] / lustre / lmv / lmv_obd.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
5  *
6  *   This file is part of Lustre, http://www.lustre.org.
7  *
8  *   Lustre is free software; you can redistribute it and/or
9  *   modify it under the terms of version 2 of the GNU General Public
10  *   License as published by the Free Software Foundation.
11  *
12  *   Lustre is distributed in the hope that it will be useful,
13  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *   GNU General Public License for more details.
16  *
17  *   You should have received a copy of the GNU General Public License
18  *   along with Lustre; if not, write to the Free Software
19  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20  */
21
22 #ifndef EXPORT_SYMTAB
23 # define EXPORT_SYMTAB
24 #endif
25 #define DEBUG_SUBSYSTEM S_LMV
26 #ifdef __KERNEL__
27 #include <linux/slab.h>
28 #include <linux/module.h>
29 #include <linux/init.h>
30 #include <linux/slab.h>
31 #include <linux/pagemap.h>
32 #include <asm/div64.h>
33 #include <linux/seq_file.h>
34 #else
35 #include <liblustre.h>
36 #endif
37 #include <linux/ext2_fs.h>
38
39 #include <linux/obd_support.h>
40 #include <linux/lustre_lib.h>
41 #include <linux/lustre_net.h>
42 #include <linux/lustre_idl.h>
43 #include <linux/lustre_dlm.h>
44 #include <linux/lustre_mds.h>
45 #include <linux/obd_class.h>
46 #include <linux/obd_ost.h>
47 #include <linux/lprocfs_status.h>
48 #include <linux/lustre_fsfilt.h>
49 #include <linux/obd_lmv.h>
50 #include "lmv_internal.h"
51
52 /* Error codes:
53  *
54  *  -EINVAL  : UUID can't be found in the LMV's target list
55  *  -ENOTCONN: The UUID is found, but the target connection is bad (!)
56  *  -EBADF   : The UUID is found, but the OBD of the wrong type (!)
57  */
58 static int lmv_set_mdc_active(struct lmv_obd *lmv, struct obd_uuid *uuid,
59                               int activate)
60 {
61         struct obd_device *obd;
62         struct lmv_tgt_desc *tgt;
63         int i, rc = 0;
64         ENTRY;
65
66         CDEBUG(D_INFO, "Searching in lmv %p for uuid %s (activate=%d)\n",
67                lmv, uuid->uuid, activate);
68
69         spin_lock(&lmv->lmv_lock);
70         for (i = 0, tgt = lmv->tgts; i < lmv->desc.ld_tgt_count; i++, tgt++) {
71                 CDEBUG(D_INFO, "lmv idx %d is %s conn "LPX64"\n",
72                        i, tgt->uuid.uuid, tgt->ltd_exp->exp_handle.h_cookie);
73                 if (strncmp(uuid->uuid, tgt->uuid.uuid, sizeof uuid->uuid) == 0)
74                         break;
75         }
76
77         if (i == lmv->desc.ld_tgt_count)
78                 GOTO(out, rc = -EINVAL);
79
80         obd = class_exp2obd(tgt->ltd_exp);
81         if (obd == NULL)
82                 GOTO(out, rc = -ENOTCONN);
83
84         CDEBUG(D_INFO, "Found OBD %s=%s device %d (%p) type %s at LMV idx %d\n",
85                obd->obd_name, obd->obd_uuid.uuid, obd->obd_minor, obd,
86                obd->obd_type->typ_name, i);
87         LASSERT(strcmp(obd->obd_type->typ_name, LUSTRE_MDC_NAME) == 0);
88
89         if (tgt->active == activate) {
90                 CDEBUG(D_INFO, "OBD %p already %sactive!\n", obd,
91                        activate ? "" : "in");
92                 GOTO(out, rc);
93         }
94
95         CDEBUG(D_INFO, "Marking OBD %p %sactive\n", obd, activate ? "" : "in");
96
97         tgt->active = activate;
98         if (activate)
99                 lmv->desc.ld_active_tgt_count++;
100         else
101                 lmv->desc.ld_active_tgt_count--;
102
103         EXIT;
104  out:
105         spin_unlock(&lmv->lmv_lock);
106         return rc;
107 }
108
109 static int lmv_notify(struct obd_device *obd, struct obd_device *watched,
110                       int active)
111 {
112         int rc;
113         struct obd_uuid *uuid;
114
115         if (strcmp(watched->obd_type->typ_name, LUSTRE_MDC_NAME)) {
116                 CERROR("unexpected notification of %s %s!\n",
117                        watched->obd_type->typ_name,
118                        watched->obd_name);
119                 return -EINVAL;
120         }
121         uuid = &watched->u.cli.cl_import->imp_target_uuid;
122
123         /* Set MDC as active before notifying the observer, so the
124          * observer can use the MDC normally.  
125          */
126         rc = lmv_set_mdc_active(&obd->u.lmv, uuid, active);
127         if (rc) {
128                 CERROR("%sactivation of %s failed: %d\n",
129                        active ? "" : "de", uuid->uuid, rc);
130                 RETURN(rc);
131         }
132
133         if (obd->obd_observer)
134                 /* Pass the notification up the chain. */
135                 rc = obd_notify(obd->obd_observer, watched, active);
136
137         RETURN(rc);
138 }
139
140 int lmv_attach(struct obd_device *dev, obd_count len, void *data)
141 {
142         struct lprocfs_static_vars lvars;
143         int rc;
144         ENTRY;
145
146         lprocfs_init_vars(lmv, &lvars);
147         rc = lprocfs_obd_attach(dev, lvars.obd_vars);
148         if (rc == 0) {
149 #ifdef __KERNEL__
150                 struct proc_dir_entry *entry;
151                 
152                 entry = create_proc_entry("target_obd", 0444, dev->obd_proc_entry);
153                 if (entry == NULL)
154                         RETURN(-ENOMEM);
155                 /* entry->proc_fops = &lmv_proc_target_fops; */
156                 entry->data = dev;
157 #endif
158        }
159         RETURN (rc);
160 }
161
162 int lmv_detach(struct obd_device *dev)
163 {
164         return lprocfs_obd_detach(dev);
165 }
166
167 /* This is fake connect function. Its purpose is to initialize lmv and 
168  * say caller that everything is okay. Real connection will be performed
169  * later. */
170 static int lmv_connect(struct lustre_handle *conn, struct obd_device *obd,
171                        struct obd_uuid *cluuid)
172 {
173         struct lmv_obd *lmv = &obd->u.lmv;
174         struct obd_export *exp;
175         int rc;
176         ENTRY;
177
178         rc = class_connect(conn, obd, cluuid);
179         if (rc) {
180                 CERROR("class_connection() returned %d\n", rc);
181                 RETURN(rc);
182         }
183
184         exp = class_conn2export(conn);
185         /* We don't want to actually do the underlying connections more than
186          * once, so keep track. */
187         lmv->refcount++;
188         if (lmv->refcount > 1) {
189                 class_export_put(exp);
190                 RETURN(0);
191         }
192
193         lmv->cluuid = *cluuid;
194         lmv->connected = 0;
195         lmv->exp = exp;
196
197         RETURN(0);
198 }
199
200 void lmv_set_timeouts(struct obd_device *obd)
201 {
202         struct lmv_tgt_desc *tgts;
203         struct lmv_obd *lmv;
204         int i;
205
206         lmv = &obd->u.lmv;
207         if (lmv->server_timeout == 0)
208                 return;
209
210         if (lmv->connected == 0)
211                 return;
212
213         for (i = 0, tgts = lmv->tgts; i < lmv->desc.ld_tgt_count; i++, tgts++) {
214                 if (tgts->ltd_exp == NULL)
215                         continue;
216                 obd_set_info(tgts->ltd_exp, strlen("inter_mds"),
217                              "inter_mds", 0, NULL);
218         }
219 }
220
221 /* Performs a check if passed obd is connected. If no - connect it. */
222 int lmv_check_connect(struct obd_device *obd) {
223         struct lmv_obd *lmv = &obd->u.lmv;
224         struct obd_uuid *cluuid;
225         struct lmv_tgt_desc *tgts;
226         struct obd_export *exp;
227         int rc, rc2, i;
228
229         if (lmv->connected)
230                 return 0;
231       
232         lmv->connected = 1;
233         cluuid = &lmv->cluuid;
234         exp = lmv->exp;
235         
236         CDEBUG(D_OTHER, "time to connect %s to %s\n",
237                cluuid->uuid, obd->obd_name);
238
239         for (i = 0, tgts = lmv->tgts; i < lmv->desc.ld_tgt_count; i++, tgts++) {
240                 struct obd_device *tgt_obd;
241                 struct obd_uuid lmv_osc_uuid = { "LMV_OSC_UUID" };
242                 struct lustre_handle conn = {0, };
243
244                 LASSERT(tgts != NULL);
245
246                 tgt_obd = class_find_client_obd(&tgts->uuid, LUSTRE_MDC_NAME, 
247                                                 &obd->obd_uuid);
248                 if (!tgt_obd) {
249                         CERROR("Target %s not attached\n", tgts->uuid.uuid);
250                         GOTO(out_disc, rc = -EINVAL);
251                 }
252
253                 /* for MDS: don't connect to yourself */
254                 if (obd_uuid_equals(&tgts->uuid, cluuid)) {
255                         CDEBUG(D_OTHER, "don't connect back to %s\n",
256                                cluuid->uuid);
257                         tgts->ltd_exp = NULL;
258                         continue;
259                 }
260
261                 CDEBUG(D_OTHER, "connect to %s(%s) - %s, %s FOR %s\n",
262                         tgt_obd->obd_name, tgt_obd->obd_uuid.uuid,
263                         tgts->uuid.uuid, obd->obd_uuid.uuid,
264                         cluuid->uuid);
265
266                 if (!tgt_obd->obd_set_up) {
267                         CERROR("Target %s not set up\n", tgts->uuid.uuid);
268                         GOTO(out_disc, rc = -EINVAL);
269                 }
270                 
271                 rc = obd_connect(&conn, tgt_obd, &lmv_osc_uuid);
272                 if (rc) {
273                         CERROR("Target %s connect error %d\n",
274                                 tgts->uuid.uuid, rc);
275                         GOTO(out_disc, rc);
276                 }
277                 tgts->ltd_exp = class_conn2export(&conn);
278
279                 obd_init_ea_size(tgts->ltd_exp, lmv->max_easize,
280                                  lmv->max_cookiesize);
281                 
282                 rc = obd_register_observer(tgt_obd, obd);
283                 if (rc) {
284                         CERROR("Target %s register_observer error %d\n",
285                                tgts->uuid.uuid, rc);
286                         obd_disconnect(tgts->ltd_exp, 0);
287                         GOTO(out_disc, rc);
288                 }
289
290                 lmv->desc.ld_active_tgt_count++;
291                 tgts->active = 1;
292                 
293                 CDEBUG(D_OTHER, "connected to %s(%s) successfully (%d)\n",
294                         tgt_obd->obd_name, tgt_obd->obd_uuid.uuid,
295                         atomic_read(&obd->obd_refcount));
296         }
297
298         lmv_set_timeouts(obd);
299
300         class_export_put(exp);
301         return 0;
302
303  out_disc:
304         while (i-- > 0) {
305                 struct obd_uuid uuid;
306                 --tgts;
307                 --lmv->desc.ld_active_tgt_count;
308                 tgts->active = 0;
309                 /* save for CERROR below; (we know it's terminated) */
310                 uuid = tgts->uuid;
311                 rc2 = obd_disconnect(tgts->ltd_exp, 0);
312                 if (rc2)
313                         CERROR("error: LMV target %s disconnect on MDT idx %d: "
314                                "rc = %d\n", uuid.uuid, i, rc2);
315         }
316         class_disconnect(exp, 0);
317         RETURN (rc);
318 }
319
320 static int lmv_disconnect(struct obd_export *exp, int flags)
321 {
322         struct obd_device *obd = class_exp2obd(exp);
323         struct lmv_obd *lmv = &obd->u.lmv;
324         int rc, i;
325         ENTRY;
326
327         if (!lmv->tgts)
328                 goto out_local;
329
330         /* Only disconnect the underlying layers on the final disconnect. */
331         lmv->refcount--;
332         if (lmv->refcount != 0)
333                 goto out_local;
334
335         for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
336                 if (lmv->tgts[i].ltd_exp == NULL)
337                         continue;
338
339                 if (obd->obd_no_recov) {
340                         /* Pass it on to our clients.
341                          * XXX This should be an argument to disconnect,
342                          * XXX not a back-door flag on the OBD.  Ah well.
343                          */
344                         struct obd_device *mdc_obd;
345                         mdc_obd = class_exp2obd(lmv->tgts[i].ltd_exp);
346                         if (mdc_obd)
347                                 mdc_obd->obd_no_recov = 1;
348                 }
349
350                 CDEBUG(D_OTHER, "disconnected from %s(%s) successfully\n",
351                         lmv->tgts[i].ltd_exp->exp_obd->obd_name,
352                         lmv->tgts[i].ltd_exp->exp_obd->obd_uuid.uuid);
353
354                 obd_register_observer(lmv->tgts[i].ltd_exp->exp_obd, NULL);
355
356                 rc = obd_disconnect(lmv->tgts[i].ltd_exp, flags);
357                 if (rc) {
358                         if (lmv->tgts[i].active) {
359                                 CERROR("Target %s disconnect error %d\n",
360                                        lmv->tgts[i].uuid.uuid, rc);
361                         }
362                         rc = 0;
363                 }
364                 if (lmv->tgts[i].active) {
365                         lmv->desc.ld_active_tgt_count--;
366                         lmv->tgts[i].active = 0;
367                 }
368                 lmv->tgts[i].ltd_exp = NULL;
369         }
370
371 out_local:
372         /* This is the case when no real connection is established by
373          * lmv_check_connect(). */
374         if (!lmv->connected)
375                 class_export_put(exp);
376         rc = class_disconnect(exp, 0);
377         RETURN(rc);
378 }
379
380 static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
381                          int len, void *karg, void *uarg)
382 {
383         struct obd_device *obddev = class_exp2obd(exp);
384         struct lmv_obd *lmv = &obddev->u.lmv;
385         int i, rc = 0, set = 0;
386
387         ENTRY;
388
389         if (lmv->desc.ld_tgt_count == 0)
390                 RETURN(-ENOTTY);
391         
392         for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
393                 int err;
394
395                 err = obd_iocontrol(cmd, lmv->tgts[i].ltd_exp,
396                                     len, karg, uarg);
397                 if (err) {
398                         if (lmv->tgts[i].active) {
399                                 CERROR("error: iocontrol MDC %s on MDT"
400                                        "idx %d: err = %d\n",
401                                        lmv->tgts[i].uuid.uuid, i, err);
402                                 if (!rc)
403                                         rc = err;
404                         }
405                 } else
406                         set = 1;
407         }
408         if (!set && !rc)
409                 rc = -EIO;
410
411         RETURN(rc);
412 }
413
414 static int lmv_setup(struct obd_device *obd, obd_count len, void *buf)
415 {
416         struct lustre_cfg *lcfg = buf;
417         struct lmv_desc *desc;
418         struct lmv_obd *lmv = &obd->u.lmv;
419         struct obd_uuid *uuids;
420         struct lmv_tgt_desc *tgts;
421         int i;
422         int rc = 0;
423         ENTRY;
424
425         if (lcfg->lcfg_inllen1 < 1) {
426                 CERROR("LMV setup requires a descriptor\n");
427                 RETURN(-EINVAL);
428         }
429
430         if (lcfg->lcfg_inllen2 < 1) {
431                 CERROR("LMV setup requires an OST UUID list\n");
432                 RETURN(-EINVAL);
433         }
434
435         desc = (struct lmv_desc *)lcfg->lcfg_inlbuf1;
436         if (sizeof(*desc) > lcfg->lcfg_inllen1) {
437                 CERROR("descriptor size wrong: %d > %d\n",
438                        (int)sizeof(*desc), lcfg->lcfg_inllen1);
439                 RETURN(-EINVAL);
440         }
441
442         uuids = (struct obd_uuid *)lcfg->lcfg_inlbuf2;
443         if (sizeof(*uuids) * desc->ld_tgt_count != lcfg->lcfg_inllen2) {
444                 CERROR("UUID array size wrong: %u * %u != %u\n",
445                        sizeof(*uuids), desc->ld_tgt_count, lcfg->lcfg_inllen2);
446                 RETURN(-EINVAL);
447         }
448
449         lmv->bufsize = sizeof(struct lmv_tgt_desc) * desc->ld_tgt_count;
450         OBD_ALLOC(lmv->tgts, lmv->bufsize);
451         if (lmv->tgts == NULL) {
452                 CERROR("Out of memory\n");
453                 RETURN(-EINVAL);
454         }
455
456         lmv->desc = *desc;
457         spin_lock_init(&lmv->lmv_lock);
458         
459         for (i = 0, tgts = lmv->tgts; i < desc->ld_tgt_count; i++, tgts++)
460                 tgts->uuid = uuids[i];
461         
462         lmv->max_easize = sizeof(struct ll_fid) *
463                 desc->ld_tgt_count + sizeof(struct mea);
464         
465         lmv->max_cookiesize = 0;
466
467         RETURN(rc);
468 }
469
470 static int lmv_statfs(struct obd_device *obd, struct obd_statfs *osfs,
471                       unsigned long max_age)
472 {
473         struct lmv_obd *lmv = &obd->u.lmv;
474         struct obd_statfs temp;
475         int rc = 0, i;
476         ENTRY;
477         
478         rc = lmv_check_connect(obd);
479         if (rc)
480                 RETURN(rc);
481                 
482         for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
483                 rc = obd_statfs(lmv->tgts[i].ltd_exp->exp_obd, &temp, max_age);
484                 if (rc) {
485                         CERROR("can't stat MDS #%d (%s)\n", i,
486                                lmv->tgts[i].ltd_exp->exp_obd->obd_name);
487                         RETURN(rc);
488                 }
489                 if (i == 0) {
490                         memcpy(osfs, &temp, sizeof(temp));
491                 } else {
492                         osfs->os_bavail += temp.os_bavail;
493                         osfs->os_blocks += temp.os_blocks;
494                         osfs->os_ffree += temp.os_ffree;
495                         osfs->os_files += temp.os_files;
496                 }
497         }
498         RETURN(rc);
499 }
500
501 static int lmv_cleanup(struct obd_device *obd, int flags) 
502 {
503         struct lmv_obd *lmv = &obd->u.lmv;
504         ENTRY;
505         lmv_cleanup_objs(obd);
506         OBD_FREE(lmv->tgts, lmv->bufsize);
507         RETURN(0);
508 }
509
510 static int lmv_getstatus(struct obd_export *exp, struct ll_fid *fid)
511 {
512         struct obd_device *obd = exp->exp_obd;
513         struct lmv_obd *lmv = &obd->u.lmv;
514         int rc;
515         ENTRY;
516         rc = lmv_check_connect(obd);
517         if (rc)
518                 RETURN(rc);
519         rc = md_getstatus(lmv->tgts[0].ltd_exp, fid);
520         fid->mds = 0;
521         RETURN(rc);
522 }
523
524 static int lmv_getattr(struct obd_export *exp, struct ll_fid *fid,
525                 unsigned long valid, unsigned int ea_size,
526                 struct ptlrpc_request **request)
527 {
528         struct obd_device *obd = exp->exp_obd;
529         struct lmv_obd *lmv = &obd->u.lmv;
530         int rc, i = fid->mds;
531         struct lmv_obj *obj;
532         ENTRY;
533         rc = lmv_check_connect(obd);
534         if (rc)
535                 RETURN(rc);
536         obj = lmv_grab_obj(obd, fid, 0);
537         CDEBUG(D_OTHER, "GETATTR for %lu/%lu/%lu %s\n",
538                (unsigned long) fid->mds,
539                (unsigned long) fid->id,
540                (unsigned long) fid->generation,
541                obj ? "(splitted)" : "");
542
543         LASSERT(fid->mds < lmv->desc.ld_tgt_count);
544         rc = md_getattr(lmv->tgts[i].ltd_exp, fid,
545                              valid, ea_size, request);
546         if (rc == 0 && obj) {
547                 /* we have to loop over dirobjs here and gather attrs
548                  * for all the slaves */
549 #warning "attrs gathering here"
550         }
551         lmv_put_obj(obj);
552         RETURN(rc);
553 }
554
555 static int lmv_change_cbdata(struct obd_export *exp,
556                                  struct ll_fid *fid, 
557                                  ldlm_iterator_t it, void *data)
558 {
559         struct obd_device *obd = exp->exp_obd;
560         struct lmv_obd *lmv = &obd->u.lmv;
561         int rc = 0;
562         ENTRY;
563         
564         rc = lmv_check_connect(obd);
565         if (rc)
566                 RETURN(rc);
567         CDEBUG(D_OTHER, "CBDATA for %lu/%lu/%lu\n",
568                (unsigned long) fid->mds,
569                (unsigned long) fid->id,
570                (unsigned long) fid->generation);
571         LASSERT(fid->mds < lmv->desc.ld_tgt_count);
572         rc = md_change_cbdata(lmv->tgts[fid->mds].ltd_exp, fid, it, data);
573         RETURN(rc);
574 }
575
576 static int lmv_change_cbdata_name(struct obd_export *exp, struct ll_fid *pfid,
577                                   char *name, int len, struct ll_fid *cfid,
578                                   ldlm_iterator_t it, void *data)
579 {
580         struct obd_device *obd = exp->exp_obd;
581         struct lmv_obd *lmv = &obd->u.lmv;
582         struct lmv_obj *obj;
583         int rc = 0, mds;
584         ENTRY;
585         rc = lmv_check_connect(obd);
586         if (rc)
587                 RETURN(rc);
588         LASSERT(pfid->mds < lmv->desc.ld_tgt_count);
589         LASSERT(cfid->mds < lmv->desc.ld_tgt_count);
590         CDEBUG(D_OTHER, "CBDATA for %lu/%lu/%lu:%*s -> %lu/%lu/%lu\n",
591                (unsigned long) pfid->mds, (unsigned long) pfid->id,
592                (unsigned long) pfid->generation, len, name,
593                (unsigned long) cfid->mds, (unsigned long) cfid->id,
594                (unsigned long) cfid->generation);
595
596         /* this is default mds for directory name belongs to */
597         mds = pfid->mds;
598         obj = lmv_grab_obj(obd, pfid, 0);
599         if (obj) {
600                 /* directory is splitted. look for right mds for this name */
601                 mds = raw_name2idx(obj->objcount, name, len);
602                 lmv_put_obj(obj);
603         }
604         rc = md_change_cbdata(lmv->tgts[mds].ltd_exp, cfid, it, data);
605         RETURN(rc);
606 }
607
608 static int lmv_valid_attrs(struct obd_export *exp, struct ll_fid *fid) 
609 {
610         struct obd_device *obd = exp->exp_obd;
611         struct lmv_obd *lmv = &obd->u.lmv;
612         int rc = 0;
613         ENTRY;
614         rc = lmv_check_connect(obd);
615         if (rc)
616                 RETURN(rc);
617         CDEBUG(D_OTHER, "validate %lu/%lu/%lu\n",
618                (unsigned long) fid->mds,
619                (unsigned long) fid->id,
620                (unsigned long) fid->generation);
621         LASSERT(fid->mds < lmv->desc.ld_tgt_count);
622         rc = md_valid_attrs(lmv->tgts[fid->mds].ltd_exp, fid);
623         RETURN(rc);
624 }
625
626 int lmv_close(struct obd_export *exp, struct obdo *obdo,
627                   struct obd_client_handle *och,
628                   struct ptlrpc_request **request)
629 {
630         struct obd_device *obd = exp->exp_obd;
631         struct lmv_obd *lmv = &obd->u.lmv;
632         int rc, i = obdo->o_mds;
633         ENTRY;
634         rc = lmv_check_connect(obd);
635         if (rc)
636                 RETURN(rc);
637         LASSERT(i < lmv->desc.ld_tgt_count);
638         CDEBUG(D_OTHER, "CLOSE %lu/%lu/%lu\n", (unsigned long) obdo->o_mds,
639                (unsigned long) obdo->o_id, (unsigned long) obdo->o_generation);
640         rc = md_close(lmv->tgts[i].ltd_exp, obdo, och, request);
641         RETURN(rc);
642 }
643
644 int lmv_get_mea_and_update_object(struct obd_export *exp, struct ll_fid *fid)
645 {
646         struct obd_device *obd = exp->exp_obd;
647         struct lmv_obd *lmv = &obd->u.lmv;
648         struct ptlrpc_request *req = NULL;
649         struct lustre_md md;
650         unsigned long valid;
651         int mealen, rc;
652
653         md.mea = NULL;
654         mealen = MEA_SIZE_LMV(lmv);
655         
656         valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
657
658         /* time to update mea of parent fid */
659         rc = md_getattr(lmv->tgts[fid->mds].ltd_exp, fid,
660                         valid, mealen, &req);
661         if (rc) {
662                 CERROR("md_getattr() failed, rc = %d\n", rc);
663                 GOTO(cleanup, rc);
664         }
665
666         rc = mdc_req2lustre_md(exp, req, 0, NULL, &md);
667         if (rc) {
668                 CERROR("mdc_req2lustre_md() failed, rc = %d\n", rc);
669                 GOTO(cleanup, rc);
670         }
671
672         if (md.mea == NULL)
673                 GOTO(cleanup, rc = -ENODATA);
674
675         rc = lmv_create_obj_from_attrs(exp, fid, md.mea);
676         obd_free_memmd(exp, (struct lov_stripe_md **) &md.mea);
677
678 cleanup:
679         if (req)
680                 ptlrpc_req_finished(req);
681         RETURN(rc);
682 }
683
684 int lmv_create(struct obd_export *exp, struct mdc_op_data *op_data,
685                const void *data, int datalen, int mode, __u32 uid,
686                __u32 gid, __u64 rdev, struct ptlrpc_request **request)
687 {
688         struct obd_device *obd = exp->exp_obd;
689         struct lmv_obd *lmv = &obd->u.lmv;
690         struct mds_body *mds_body;
691         struct lmv_obj *obj;
692         int rc, mds;
693         ENTRY;
694
695         rc = lmv_check_connect(obd);
696         if (rc)
697                 RETURN(rc);
698
699         if (!lmv->desc.ld_active_tgt_count)
700                 RETURN(-EIO);
701 repeat:
702         obj = lmv_grab_obj(obd, &op_data->fid1, 0);
703         if (obj) {
704                 mds = raw_name2idx(obj->objcount, op_data->name,
705                                    op_data->namelen);
706                 op_data->fid1 = obj->objs[mds].fid;
707                 lmv_put_obj(obj);
708         }
709
710         CDEBUG(D_OTHER, "CREATE '%*s' on %lu/%lu/%lu\n",
711                         op_data->namelen, op_data->name,
712                         (unsigned long) op_data->fid1.mds,
713                         (unsigned long) op_data->fid1.id,
714                         (unsigned long) op_data->fid1.generation);
715         rc = md_create(lmv->tgts[op_data->fid1.mds].ltd_exp, op_data, data,
716                        datalen, mode, uid, gid, rdev, request);
717         if (rc == 0) {
718                 if (*request == NULL)
719                      RETURN(rc);
720                 mds_body = lustre_msg_buf((*request)->rq_repmsg, 0,
721                                           sizeof(*mds_body));
722                 LASSERT(mds_body != NULL);
723                 CDEBUG(D_OTHER, "created. id = %lu, generation = %lu, mds = %d\n",
724                        (unsigned long) mds_body->fid1.id,
725                        (unsigned long) mds_body->fid1.generation,
726                        op_data->fid1.mds);
727                 LASSERT(mds_body->valid & OBD_MD_MDS ||
728                         mds_body->mds == op_data->fid1.mds);
729         } else if (rc == -ERESTART) {
730                 /* directory got splitted. time to update local object
731                  * and repeat the request with proper MDS */
732                 rc = lmv_get_mea_and_update_object(exp, &op_data->fid1);
733                 if (rc == 0) {
734                         ptlrpc_req_finished(*request);
735                         goto repeat;
736                 }
737         }
738         RETURN(rc);
739 }
740
741 int lmv_done_writing(struct obd_export *exp, struct obdo *obdo)
742 {
743         struct obd_device *obd = exp->exp_obd;
744         struct lmv_obd *lmv = &obd->u.lmv;
745         int rc;
746         ENTRY;
747         rc = lmv_check_connect(obd);
748         if (rc)
749                 RETURN(rc);
750
751         /* FIXME: choose right MDC here */
752         CWARN("this method isn't implemented yet\n");
753         rc = md_done_writing(lmv->tgts[0].ltd_exp, obdo);
754         RETURN(rc);
755 }
756
757 int lmv_enqueue_slaves(struct obd_export *exp, int locktype,
758                          struct lookup_intent *it, int lockmode,
759                          struct mdc_op_data *data, struct lustre_handle *lockh,
760                          void *lmm, int lmmsize,
761                          ldlm_completion_callback cb_completion,
762                          ldlm_blocking_callback cb_blocking, void *cb_data)
763 {
764         struct obd_device *obd = exp->exp_obd;
765         struct lmv_obd *lmv = &obd->u.lmv;
766         struct mea *mea = data->mea1;
767         struct mdc_op_data data2;
768         int i, rc, mds;
769         ENTRY;
770
771         LASSERT(mea != NULL);
772         for (i = 0; i < mea->mea_count; i++) {
773                 if (lmv->tgts[i].ltd_exp == NULL)
774                         continue;
775
776                 memset(&data2, 0, sizeof(data2));
777                 data2.fid1 = mea->mea_fids[i];
778                 mds = data2.fid1.mds;
779                 rc = md_enqueue(lmv->tgts[mds].ltd_exp, locktype, it, lockmode,
780                                 &data2, lockh + i, lmm, lmmsize, cb_completion,
781                                 cb_blocking, cb_data);
782                 CDEBUG(D_OTHER, "take lock on slave %lu/%lu/%lu -> %d/%d\n",
783                        (unsigned long) mea->mea_fids[i].mds,
784                        (unsigned long) mea->mea_fids[i].id,
785                        (unsigned long) mea->mea_fids[i].generation,
786                        rc, it->d.lustre.it_status);
787                 if (rc)
788                         GOTO(cleanup, rc);
789                 if (it->d.lustre.it_data) {
790                         struct ptlrpc_request *req;
791                         req = (struct ptlrpc_request *) it->d.lustre.it_data;
792                         ptlrpc_req_finished(req);
793                 }
794                 
795                 if (it->d.lustre.it_status)
796                         GOTO(cleanup, rc = it->d.lustre.it_status);
797         }
798         RETURN(0);
799         
800 cleanup:
801         /* drop all taken locks */
802         while (--i >= 0) {
803                 if (lockh[i].cookie)
804                         ldlm_lock_decref(lockh + i, lockmode);
805                 lockh[i].cookie = 0;
806         }
807         RETURN(rc);
808 }
809
810 int lmv_enqueue(struct obd_export *exp, int lock_type,
811                 struct lookup_intent *it, int lock_mode,
812                 struct mdc_op_data *data, struct lustre_handle *lockh,
813                 void *lmm, int lmmsize,
814                 ldlm_completion_callback cb_completion,
815                 ldlm_blocking_callback cb_blocking, void *cb_data)
816 {
817         struct obd_device *obd = exp->exp_obd;
818         struct lmv_obd *lmv = &obd->u.lmv;
819         struct lmv_obj *obj;
820         int rc, mds;
821         ENTRY;
822
823         rc = lmv_check_connect(obd);
824         if (rc)
825                 RETURN(rc);
826
827         if (it->it_op == IT_UNLINK) {
828                 rc = lmv_enqueue_slaves(exp, lock_type, it, lock_mode,
829                                         data, lockh, lmm, lmmsize,
830                                         cb_completion, cb_blocking, cb_data);
831                 RETURN(rc);
832         }
833
834         if (data->namelen) {
835                 obj = lmv_grab_obj(obd, &data->fid1, 0);
836                 if (obj) {
837                         /* directory is splitted. look for
838                          * right mds for this name */
839                         mds = raw_name2idx(obj->objcount, (char *)data->name,
840                                            data->namelen);
841                         data->fid1 = obj->objs[mds].fid;
842                         lmv_put_obj(obj);
843                 }
844         }
845         CDEBUG(D_OTHER, "ENQUEUE '%s' on %lu/%lu\n",
846                LL_IT2STR(it), (unsigned long) data->fid1.id,
847                (unsigned long) data->fid1.generation);
848         rc = md_enqueue(lmv->tgts[data->fid1.mds].ltd_exp, lock_type, it,
849                         lock_mode, data, lockh, lmm, lmmsize, cb_completion,
850                         cb_blocking, cb_data);
851
852         RETURN(rc);
853 }
854
855 int lmv_getattr_name(struct obd_export *exp, struct ll_fid *fid,
856                          char *filename, int namelen, unsigned long valid,
857                          unsigned int ea_size, struct ptlrpc_request **request)
858 {
859         struct obd_device *obd = exp->exp_obd;
860         struct lmv_obd *lmv = &obd->u.lmv;
861         struct ll_fid rfid = *fid;
862         int rc, mds = fid->mds;
863         struct mds_body *body;
864         struct lmv_obj *obj;
865         ENTRY;
866         rc = lmv_check_connect(obd);
867         if (rc)
868                 RETURN(rc);
869 repeat:
870         obj = lmv_grab_obj(obd, fid, 0);
871         if (obj) {
872                 /* directory is splitted. look for right mds for this name */
873                 mds = raw_name2idx(obj->objcount, filename, namelen - 1);
874                 rfid = obj->objs[mds].fid;
875                 lmv_put_obj(obj);
876         }
877         CDEBUG(D_OTHER, "getattr_name for %*s on %lu/%lu/%lu -> %lu/%lu/%lu\n",
878                namelen, filename, (unsigned long) fid->mds,
879                (unsigned long) fid->id, (unsigned long) fid->generation,
880                (unsigned long) rfid.mds, (unsigned long) rfid.id,
881                (unsigned long) rfid.generation);
882         rc = md_getattr_name(lmv->tgts[mds].ltd_exp, &rfid, filename, namelen,
883                                   valid, ea_size, request);
884         if (rc == 0) {
885                 /* this could be cross-node reference. in this case all
886                  * we have right now is mds/ino/generation triple. we'd
887                  * like to find other attributes */
888                 body = lustre_msg_buf((*request)->rq_repmsg, 0, sizeof(*body));
889                 LASSERT(body != NULL);
890                 if (body->valid & OBD_MD_MDS) {
891                         struct ptlrpc_request *req = NULL;
892                         rfid = body->fid1;
893                         CDEBUG(D_OTHER, "request attrs for %lu/%lu/%lu\n",
894                                (unsigned long) rfid.mds,
895                                (unsigned long) rfid.id,
896                                (unsigned long) rfid.generation);
897                         rc = md_getattr_name(lmv->tgts[rfid.mds].ltd_exp, &rfid,
898                                              NULL, 1, valid, ea_size, &req);
899                         ptlrpc_req_finished(*request);
900                         *request = req;
901                 }
902         } else if (rc == -ERESTART) {
903                 /* directory got splitted. time to update local object
904                  * and repeat the request with proper MDS */
905                 rc = lmv_get_mea_and_update_object(exp, &rfid);
906                 if (rc == 0) {
907                         ptlrpc_req_finished(*request);
908                         goto repeat;
909                 }
910         }
911         RETURN(rc);
912 }
913
914
915 /*
916  * llite passes fid of an target inode in data->fid1 and
917  * fid of directory in data->fid2
918  */
919 int lmv_link(struct obd_export *exp, struct mdc_op_data *data,
920              struct ptlrpc_request **request)
921 {
922         struct obd_device *obd = exp->exp_obd;
923         struct lmv_obd *lmv = &obd->u.lmv;
924         struct lmv_obj *obj;
925         int rc;
926         ENTRY;
927         rc = lmv_check_connect(obd);
928         if (rc)
929                 RETURN(rc);
930         if (data->namelen != 0) {
931                 /* usual link request */
932                 obj = lmv_grab_obj(obd, &data->fid1, 0);
933                 if (obj) {
934                         rc = raw_name2idx(obj->objcount, data->name,
935                                          data->namelen);
936                         data->fid1 = obj->objs[rc].fid;
937                         lmv_put_obj(obj);
938                 }
939                 CDEBUG(D_OTHER,"link %u/%u/%u:%*s to %u/%u/%u mds %d\n",
940                        (unsigned) data->fid2.mds, (unsigned) data->fid2.id,
941                        (unsigned) data->fid2.generation, data->namelen,
942                        data->name, (unsigned) data->fid1.mds,
943                        (unsigned) data->fid1.id,
944                        (unsigned) data->fid1.generation, data->fid1.mds);
945         } else {
946                 /* request from MDS to acquire i_links for inode by fid1 */
947                 CDEBUG(D_OTHER, "inc i_nlinks for %u/%u/%u\n",
948                        (unsigned) data->fid1.mds, (unsigned) data->fid1.id,
949                        (unsigned) data->fid1.generation);
950         }
951                         
952         rc = md_link(lmv->tgts[data->fid1.mds].ltd_exp, data, request);
953         RETURN(rc);
954 }
955
956 int lmv_rename(struct obd_export *exp, struct mdc_op_data *data,
957                const char *old, int oldlen, const char *new, int newlen,
958                struct ptlrpc_request **request)
959 {
960         struct obd_device *obd = exp->exp_obd;
961         struct lmv_obd *lmv = &obd->u.lmv;
962         struct lmv_obj *obj;
963         int rc, mds;
964         ENTRY;
965
966         CDEBUG(D_OTHER, "rename %*s in %lu/%lu/%lu to %*s in %lu/%lu/%lu\n",
967                oldlen, old, (unsigned long) data->fid1.mds,
968                (unsigned long) data->fid1.id,
969                (unsigned long) data->fid1.generation,
970                newlen, new, (unsigned long) data->fid2.mds,
971                (unsigned long) data->fid2.id,
972                (unsigned long) data->fid2.generation);
973         if (!fid_equal(&data->fid1, &data->fid2))
974                 CWARN("cross-node rename %lu/%lu/%lu:%*s to %lu/%lu/%lu:%*s\n",
975                       (unsigned long) data->fid1.mds,
976                       (unsigned long) data->fid1.id,
977                       (unsigned long) data->fid1.generation, oldlen, old,
978                       (unsigned long) data->fid2.mds,
979                       (unsigned long) data->fid2.id,
980                       (unsigned long) data->fid2.generation, newlen, new);
981
982         rc = lmv_check_connect(obd);
983         if (rc)
984                 RETURN(rc);
985
986         if (oldlen == 0) {
987                 /* MDS with old dir entry is asking another MDS
988                  * to create name there */
989                 CDEBUG(D_OTHER,
990                        "create %*s(%d/%d) in %lu/%lu/%lu pointing to %lu/%lu/%lu\n",
991                        newlen, new, oldlen, newlen,
992                        (unsigned long) data->fid2.mds,
993                        (unsigned long) data->fid2.id,
994                        (unsigned long) data->fid2.generation,
995                        (unsigned long) data->fid1.mds,
996                        (unsigned long) data->fid1.id,
997                        (unsigned long) data->fid1.generation);
998                 mds = data->fid2.mds;
999                 goto request;
1000         }
1001
1002         obj = lmv_grab_obj(obd, &data->fid1, 0);
1003         if (obj) {
1004                 /* directory is already splitted, so we have to forward
1005                  * request to the right MDS */
1006                 mds = raw_name2idx(obj->objcount, (char *)old, oldlen);
1007                 data->fid1 = obj->objs[mds].fid;
1008                 CDEBUG(D_OTHER, "forward to MDS #%u (%lu/%lu/%lu)\n", mds,
1009                        (unsigned long) obj->objs[mds].fid.mds,
1010                        (unsigned long) obj->objs[mds].fid.id,
1011                        (unsigned long) obj->objs[mds].fid.generation);
1012         }
1013         lmv_put_obj(obj);
1014
1015         obj = lmv_grab_obj(obd, &data->fid2, 0);
1016         if (obj) {
1017                 /* directory is already splitted, so we have to forward
1018                  * request to the right MDS */
1019                 mds = raw_name2idx(obj->objcount, (char *)new, newlen);
1020                 data->fid2 = obj->objs[mds].fid;
1021                 CDEBUG(D_OTHER, "forward to MDS #%u (%lu/%lu/%lu)\n", mds,
1022                        (unsigned long) obj->objs[mds].fid.mds,
1023                        (unsigned long) obj->objs[mds].fid.id,
1024                        (unsigned long) obj->objs[mds].fid.generation);
1025         }
1026         lmv_put_obj(obj);
1027         
1028         mds = data->fid1.mds;
1029
1030 request:
1031         rc = md_rename(lmv->tgts[mds].ltd_exp, data, old, oldlen,
1032                        new, newlen, request); 
1033         RETURN(rc);
1034 }
1035
1036 int lmv_setattr(struct obd_export *exp, struct mdc_op_data *data,
1037                 struct iattr *iattr, void *ea, int ealen, void *ea2, int ea2len,
1038                 struct ptlrpc_request **request)
1039 {
1040         struct obd_device *obd = exp->exp_obd;
1041         struct lmv_obd *lmv = &obd->u.lmv;
1042         int rc = 0, i = data->fid1.mds;
1043         struct ptlrpc_request *req;
1044         struct mds_body *mds_body;
1045         struct lmv_obj *obj;
1046         ENTRY;
1047
1048         rc = lmv_check_connect(obd);
1049         if (rc)
1050                 RETURN(rc);
1051
1052         obj = lmv_grab_obj(obd, &data->fid1, 0);
1053         CDEBUG(D_OTHER, "SETATTR for %lu/%lu/%lu, valid 0x%x%s\n",
1054                (unsigned long) data->fid1.mds,
1055                (unsigned long) data->fid1.id,
1056                (unsigned long) data->fid1.generation, iattr->ia_valid,
1057                obj ? ", splitted" : "");
1058         if (obj) {
1059                 for (i = 0; i < obj->objcount; i++) {
1060                         data->fid1 = obj->objs[i].fid;
1061                         rc = md_setattr(lmv->tgts[i].ltd_exp, data, iattr, ea,
1062                                         ealen, ea2, ea2len, &req);
1063                         LASSERT(rc == 0);
1064                         if (fid_equal(&obj->fid, &obj->objs[i].fid)) {
1065                                 /* this is master object and this request
1066                                  * should be returned back to llite */
1067                                 *request = req;
1068                         } else {
1069                                 ptlrpc_req_finished(req);
1070                         }
1071                 }
1072                 lmv_put_obj(obj);
1073         } else {
1074                 LASSERT(data->fid1.mds < lmv->desc.ld_tgt_count);
1075                 rc = md_setattr(lmv->tgts[i].ltd_exp, data, iattr, ea, ealen,
1076                                 ea2, ea2len, request); 
1077                 if (rc == 0) {
1078                         mds_body = lustre_msg_buf((*request)->rq_repmsg, 0,
1079                                         sizeof(*mds_body));
1080                         LASSERT(mds_body != NULL);
1081                         LASSERT(mds_body->mds == i);
1082                 }
1083         }
1084         RETURN(rc);
1085 }
1086
1087 int lmv_sync(struct obd_export *exp, struct ll_fid *fid,
1088              struct ptlrpc_request **request)
1089 {
1090         struct obd_device *obd = exp->exp_obd;
1091         struct lmv_obd *lmv = &obd->u.lmv;
1092         int rc;
1093         ENTRY;
1094
1095         rc = lmv_check_connect(obd);
1096         if (rc)
1097                 RETURN(rc);
1098
1099         rc = md_sync(lmv->tgts[0].ltd_exp, fid, request); 
1100         RETURN(rc);
1101 }
1102
1103 int lmv_dirobj_blocking_ast(struct ldlm_lock *lock,
1104                             struct ldlm_lock_desc *desc, void *data, int flag)
1105 {
1106         struct lustre_handle lockh;
1107         struct lmv_obj *obj;
1108         int rc;
1109         ENTRY;
1110
1111         switch (flag) {
1112         case LDLM_CB_BLOCKING:
1113                 ldlm_lock2handle(lock, &lockh);
1114                 rc = ldlm_cli_cancel(&lockh);
1115                 if (rc < 0) {
1116                         CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
1117                         RETURN(rc);
1118                 }
1119                 break;
1120         case LDLM_CB_CANCELING:
1121                 /* time to drop cached attrs for dirobj */
1122                 obj = lock->l_ast_data;
1123                 if (!obj)
1124                         break;
1125
1126                 CDEBUG(D_OTHER, "cancel %s on %lu/%lu, master %lu/%lu/%lu\n",
1127                        lock->l_resource->lr_name.name[3] == 1 ?
1128                                 "LOOKUP" : "UPDATE",
1129                        (unsigned long) lock->l_resource->lr_name.name[0],
1130                        (unsigned long) lock->l_resource->lr_name.name[1],
1131                        (unsigned long) obj->fid.mds,
1132                        (unsigned long) obj->fid.id,
1133                        (unsigned long) obj->fid.generation);
1134                 break;
1135         default:
1136                 LBUG();
1137         }
1138         RETURN(0);
1139 }
1140
1141 void lmv_remove_dots(struct page *page)
1142 {
1143         char *kaddr = page_address(page);
1144         unsigned limit = PAGE_CACHE_SIZE;
1145         unsigned offs, rec_len;
1146         struct ext2_dir_entry_2 *p;
1147
1148         for (offs = 0; offs <= limit - EXT2_DIR_REC_LEN(1); offs += rec_len) {
1149                 p = (struct ext2_dir_entry_2 *)(kaddr + offs);
1150                 rec_len = le16_to_cpu(p->rec_len);
1151
1152                 if ((p->name_len == 1 && p->name[0] == '.') ||
1153                     (p->name_len == 2 && p->name[0] == '.' && p->name[1] == '.'))
1154                         p->inode = 0;
1155         }
1156 }
1157
1158 int lmv_readpage(struct obd_export *exp, struct ll_fid *mdc_fid,
1159                  __u64 offset, struct page *page,
1160                  struct ptlrpc_request **request)
1161 {
1162         struct obd_device *obd = exp->exp_obd;
1163         struct lmv_obd *lmv = &obd->u.lmv;
1164         struct ll_fid rfid = *mdc_fid;
1165         struct lmv_obj *obj;
1166         int rc, i;
1167         ENTRY;
1168
1169         rc = lmv_check_connect(obd);
1170         if (rc)
1171                 RETURN(rc);
1172
1173         LASSERT(mdc_fid->mds < lmv->desc.ld_tgt_count);
1174         CDEBUG(D_OTHER, "READPAGE at %llu from %lu/%lu/%lu\n",
1175                offset, (unsigned long) rfid.mds,
1176                (unsigned long) rfid.id,
1177                (unsigned long) rfid.generation);
1178
1179         obj = lmv_grab_obj(obd, mdc_fid, 0);
1180         if (obj) {
1181                 /* find dirobj containing page with requested offset */
1182                 /* FIXME: what about protecting cached attrs here? */
1183                 for (i = 0; i < obj->objcount; i++) {
1184                         if (offset < obj->objs[i].size)
1185                                 break;
1186                         offset -= obj->objs[i].size;
1187                 }
1188                 rfid = obj->objs[i].fid;
1189                 CDEBUG(D_OTHER, "forward to %lu/%lu/%lu with offset %lu\n",
1190                        (unsigned long) rfid.mds,
1191                        (unsigned long) rfid.id,
1192                        (unsigned long) rfid.generation,
1193                        (unsigned long) offset);
1194         }
1195         rc = md_readpage(lmv->tgts[rfid.mds].ltd_exp, &rfid, offset, page, request);
1196         if (rc == 0 && !fid_equal(&rfid, mdc_fid)) {
1197                 /* this page isn't from master object. to avoid
1198                  * ./.. duplication in directory, we have to remove them
1199                  * from all slave objects */
1200                 lmv_remove_dots(page);
1201         }
1202       
1203         lmv_put_obj(obj);
1204
1205         RETURN(rc);
1206 }
1207
1208 int lmv_unlink_slaves(struct obd_export *exp, struct mdc_op_data *data,
1209                       struct ptlrpc_request **req)
1210 {
1211         struct obd_device *obd = exp->exp_obd;
1212         struct lmv_obd *lmv = &obd->u.lmv;
1213         struct mea *mea = data->mea1;
1214         struct mdc_op_data data2;
1215         int i, rc = 0, mds;
1216         ENTRY;
1217
1218         LASSERT(mea != NULL);
1219         for (i = 0; i < mea->mea_count; i++) {
1220                 if (lmv->tgts[i].ltd_exp == NULL)
1221                         continue;
1222
1223                 memset(&data2, 0, sizeof(data2));
1224                 data2.fid1 = mea->mea_fids[i];
1225                 data2.create_mode = MDS_MODE_DONT_LOCK | S_IFDIR;
1226                 mds = data2.fid1.mds;
1227                 rc = md_unlink(lmv->tgts[mds].ltd_exp, &data2, req);
1228                 CDEBUG(D_OTHER, "unlink slave %lu/%lu/%lu -> %d\n",
1229                        (unsigned long) mea->mea_fids[i].mds,
1230                        (unsigned long) mea->mea_fids[i].id,
1231                        (unsigned long) mea->mea_fids[i].generation, rc);
1232                 if (*req) {
1233                         ptlrpc_req_finished(*req);
1234                         *req = NULL;
1235                 }
1236                 if (rc)
1237                         break;
1238         }
1239         RETURN(rc);
1240 }
1241
1242 int lmv_unlink(struct obd_export *exp, struct mdc_op_data *data,
1243                struct ptlrpc_request **request)
1244 {
1245         struct obd_device *obd = exp->exp_obd;
1246         struct lmv_obd *lmv = &obd->u.lmv;
1247         int rc, i = 0;
1248         ENTRY;
1249         rc = lmv_check_connect(obd);
1250         if (rc)
1251                 RETURN(rc);
1252
1253         if (data->namelen == 0 && data->mea1 != NULL) {
1254                 /* mds asks to remove slave objects */
1255                 rc = lmv_unlink_slaves(exp, data, request);
1256                 RETURN(rc);
1257         } else if (data->namelen != 0) {
1258                 struct lmv_obj *obj;
1259                 obj = lmv_grab_obj(obd, &data->fid1, 0);
1260                 if (obj) {
1261                         i = raw_name2idx(obj->objcount, data->name,
1262                                          data->namelen);
1263                         data->fid1 = obj->objs[i].fid;
1264                         lmv_put_obj(obj);
1265                 }
1266                 CDEBUG(D_OTHER, "unlink '%*s' in %lu/%lu/%lu -> %u\n",
1267                        data->namelen, data->name,
1268                        (unsigned long) data->fid1.mds,
1269                        (unsigned long) data->fid1.id,
1270                        (unsigned long) data->fid1.generation, i);
1271         } else {
1272                 CDEBUG(D_OTHER, "drop i_nlink on %lu/%lu/%lu\n",
1273                        (unsigned long) data->fid1.mds,
1274                        (unsigned long) data->fid1.id,
1275                        (unsigned long) data->fid1.generation);
1276         }
1277         rc = md_unlink(lmv->tgts[data->fid1.mds].ltd_exp, data, request); 
1278         RETURN(rc);
1279 }
1280
1281 struct obd_device *lmv_get_real_obd(struct obd_export *exp,
1282                                     char *name, int len)
1283 {
1284         struct obd_device *obd = exp->exp_obd;
1285         struct lmv_obd *lmv = &obd->u.lmv;
1286         int rc;
1287         ENTRY;
1288
1289         rc = lmv_check_connect(obd);
1290         if (rc)
1291                 RETURN(ERR_PTR(rc));
1292         obd = lmv->tgts[0].ltd_exp->exp_obd;
1293         EXIT;
1294         return obd;
1295 }
1296
1297 int lmv_init_ea_size(struct obd_export *exp, int easize, int cookiesize)
1298 {
1299         struct obd_device *obd = exp->exp_obd;
1300         struct lmv_obd *lmv = &obd->u.lmv;
1301         int i, rc = 0, change = 0;
1302         ENTRY;
1303
1304         if (lmv->max_easize < easize) {
1305                 lmv->max_easize = easize;
1306                 change = 1;
1307         }
1308         if (lmv->max_cookiesize < cookiesize) {
1309                 lmv->max_cookiesize = cookiesize;
1310                 change = 1;
1311         }
1312         if (change == 0)
1313                 RETURN(0);
1314         
1315         if (lmv->connected == 0)
1316                 RETURN(0);
1317
1318         /* FIXME: error handling? */
1319         for (i = 0; i < lmv->desc.ld_tgt_count; i++)
1320                 rc = obd_init_ea_size(lmv->tgts[i].ltd_exp, easize, cookiesize);
1321         RETURN(rc);
1322 }
1323
1324 int lmv_obd_create_single(struct obd_export *exp, struct obdo *oa,
1325                           struct lov_stripe_md **ea, struct obd_trans_info *oti)
1326 {
1327         struct obd_device *obd = exp->exp_obd;
1328         struct lmv_obd *lmv = &obd->u.lmv;
1329         struct lov_stripe_md obj_md;
1330         struct lov_stripe_md *obj_mdp = &obj_md;
1331         int rc = 0;
1332         ENTRY;
1333
1334         rc = lmv_check_connect(obd);
1335         if (rc)
1336                 RETURN(rc);
1337
1338         LASSERT(ea == NULL);
1339         LASSERT(oa->o_mds < lmv->desc.ld_tgt_count);
1340
1341         rc = obd_create(lmv->tgts[oa->o_mds].ltd_exp, oa, &obj_mdp, oti);
1342         LASSERT(rc == 0);
1343
1344         RETURN(rc);
1345 }
1346
1347 /*
1348  * to be called from MDS only
1349  */
1350 int lmv_obd_create(struct obd_export *exp, struct obdo *oa,
1351                    struct lov_stripe_md **ea, struct obd_trans_info *oti)
1352 {
1353         struct obd_device *obd = exp->exp_obd;
1354         struct lmv_obd *lmv = &obd->u.lmv;
1355         struct mea *mea;
1356         int i, c, rc = 0;
1357         struct ll_fid mfid;
1358         ENTRY;
1359
1360         rc = lmv_check_connect(obd);
1361         if (rc)
1362                 RETURN(rc);
1363
1364         LASSERT(oa != NULL);
1365         
1366         if (ea == NULL) {
1367                 rc = lmv_obd_create_single(exp, oa, NULL, oti);
1368                 RETURN(rc);
1369         }
1370
1371         if (*ea == NULL) {
1372                 rc = obd_alloc_diskmd(exp, (struct lov_mds_md **)ea);
1373                 LASSERT(*ea != NULL);
1374         }
1375
1376         mea = (struct mea *)*ea;
1377         mfid.id = oa->o_id;
1378         mfid.generation = oa->o_generation;
1379         rc = 0;
1380         if (!mea->mea_count || mea->mea_count > lmv->desc.ld_tgt_count)
1381                 mea->mea_count = lmv->desc.ld_tgt_count;
1382
1383         mea->mea_master = -1;
1384         
1385         /* FIXME: error handling? */
1386         for (i = 0, c = 0; c < mea->mea_count && 
1387                 i < lmv->desc.ld_tgt_count; i++) {
1388                 struct lov_stripe_md obj_md;
1389                 struct lov_stripe_md *obj_mdp = &obj_md;
1390                
1391                 if (lmv->tgts[i].ltd_exp == NULL) {
1392                         /* this is master MDS */
1393                         mea->mea_fids[c].id = mfid.id;
1394                         mea->mea_fids[c].generation = mfid.generation;
1395                         mea->mea_fids[c].mds = i;
1396                         mea->mea_master = i;
1397                         c++;
1398                         continue;
1399                 }
1400
1401                 /* "Master" MDS should always be part of stripped dir, so
1402                    scan for it */
1403                 if (mea->mea_master == -1 && c == mea->mea_count - 1)
1404                         continue;
1405
1406                 oa->o_valid = OBD_MD_FLGENER | OBD_MD_FLTYPE | OBD_MD_FLMODE
1407                                 | OBD_MD_FLUID | OBD_MD_FLGID | OBD_MD_FLID;
1408
1409                 rc = obd_create(lmv->tgts[c].ltd_exp, oa, &obj_mdp, oti);
1410                 /* FIXME: error handling here */
1411                 LASSERT(rc == 0);
1412
1413                 mea->mea_fids[c].id = oa->o_id;
1414                 mea->mea_fids[c].generation = oa->o_generation;
1415                 mea->mea_fids[c].mds = i;
1416                 c++;
1417                 CDEBUG(D_OTHER, "dirobj at mds %d: "LPU64"/%u\n",
1418                        i, oa->o_id, oa->o_generation);
1419         }
1420         LASSERT(c == mea->mea_count);
1421         CDEBUG(D_OTHER, "%d dirobjects created\n", (int) mea->mea_count);
1422
1423         RETURN(rc);
1424 }
1425
1426 static int lmv_get_info(struct obd_export *exp, __u32 keylen,
1427                         void *key, __u32 *vallen, void *val)
1428 {
1429         struct obd_device *obd;
1430         struct lmv_obd *lmv;
1431         ENTRY;
1432
1433         obd = class_exp2obd(exp);
1434         if (obd == NULL) {
1435                 CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",
1436                        exp->exp_handle.h_cookie);
1437                 RETURN(-EINVAL);
1438         }
1439
1440         lmv = &obd->u.lmv;
1441         if (keylen == 6 && memcmp(key, "mdsize", 6) == 0) {
1442                 __u32 *mdsize = val;
1443                 *vallen = sizeof(__u32);
1444                 *mdsize = sizeof(struct ll_fid) * lmv->desc.ld_tgt_count
1445                                 + sizeof(struct mea);
1446                 RETURN(0);
1447         } else if (keylen == 6 && memcmp(key, "mdsnum", 6) == 0) {
1448                 struct obd_uuid *cluuid = &lmv->cluuid;
1449                 struct lmv_tgt_desc *tgts;
1450                 __u32 *mdsnum = val;
1451                 int i;
1452
1453                 for (i = 0, tgts = lmv->tgts; i < lmv->desc.ld_tgt_count; i++, tgts++) {
1454                         if (obd_uuid_equals(&tgts->uuid, cluuid)) {
1455                                 *vallen = sizeof(__u32);
1456                                 *mdsnum = i;
1457                                 RETURN(0);
1458                         }
1459                 }
1460                 LASSERT(0);
1461         }
1462
1463         CDEBUG(D_IOCTL, "invalid key\n");
1464         RETURN(-EINVAL);
1465 }
1466
1467 int lmv_set_info(struct obd_export *exp, obd_count keylen,
1468                  void *key, obd_count vallen, void *val)
1469 {
1470         struct obd_device *obd;
1471         struct lmv_obd *lmv;
1472         ENTRY;
1473
1474         obd = class_exp2obd(exp);
1475         if (obd == NULL) {
1476                 CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",
1477                        exp->exp_handle.h_cookie);
1478                 RETURN(-EINVAL);
1479         }
1480         lmv = &obd->u.lmv;
1481
1482         if (keylen >= strlen("client") && strcmp(key, "client") == 0) {
1483                 struct lmv_tgt_desc *tgts;
1484                 int i, rc;
1485
1486                 rc = lmv_check_connect(obd);
1487                 if (rc)
1488                         RETURN(rc);
1489
1490                 for (i = 0, tgts = lmv->tgts; 
1491                         i < lmv->desc.ld_tgt_count; i++, tgts++) {
1492                         rc = obd_set_info(tgts->ltd_exp, keylen, key, vallen, val);
1493                         if (rc)
1494                                 RETURN(rc);
1495                 }
1496                 RETURN(0);
1497         } else if (keylen >= strlen("inter_mds") && strcmp(key, "inter_mds") == 0) {
1498                 lmv->server_timeout = 1;
1499                 lmv_set_timeouts(obd);
1500                 RETURN(0);
1501         }
1502         
1503         RETURN(-EINVAL);
1504 }
1505
1506 int lmv_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
1507                struct lov_stripe_md *lsm)
1508 {
1509         struct obd_device *obd = class_exp2obd(exp);
1510         struct lmv_obd *lmv = &obd->u.lmv;
1511         int mea_size;
1512         ENTRY;
1513
1514         mea_size = sizeof(struct ll_fid) * 
1515                 lmv->desc.ld_tgt_count + sizeof(struct mea);
1516         if (!lmmp)
1517                 RETURN(mea_size);
1518
1519         if (*lmmp && !lsm) {
1520                 OBD_FREE(*lmmp, mea_size);
1521                 *lmmp = NULL;
1522                 RETURN(0);
1523         }
1524
1525         if (!*lmmp) {
1526                 OBD_ALLOC(*lmmp, mea_size);
1527                 if (!*lmmp)
1528                         RETURN(-ENOMEM);
1529         }
1530
1531         if (!lsm)
1532                 RETURN(mea_size);
1533
1534 #warning "MEA packing/convertation must be here! -bzzz"
1535         memcpy(*lmmp, lsm, mea_size);
1536         RETURN(mea_size);
1537 }
1538
1539 int lmv_unpackmd(struct obd_export *exp, struct lov_stripe_md **mem_tgt,
1540                         struct lov_mds_md *disk_src, int mdsize)
1541 {
1542         struct obd_device *obd = class_exp2obd(exp);
1543         struct lmv_obd *lmv = &obd->u.lmv;
1544         struct mea **tmea = (struct mea **) mem_tgt;
1545         struct mea *mea = (void *) disk_src;
1546         int mea_size;
1547         ENTRY;
1548
1549         mea_size = sizeof(struct ll_fid) * 
1550                 lmv->desc.ld_tgt_count + sizeof(struct mea);
1551         if (mem_tgt == NULL)
1552                 return mea_size;
1553
1554         if (*mem_tgt != NULL && disk_src == NULL) {
1555                 OBD_FREE(*tmea, mea_size);
1556                 RETURN(0);
1557         }
1558
1559         LASSERT(mea_size == mdsize);
1560
1561         OBD_ALLOC(*tmea, mea_size);
1562         /* FIXME: error handling here */
1563         LASSERT(*tmea != NULL);
1564
1565         if (!disk_src)
1566                 RETURN(mea_size);
1567
1568 #warning "MEA unpacking/convertation must be here! -bzzz"
1569         memcpy(*tmea, mea, mdsize);
1570         RETURN(mea_size);
1571 }
1572
1573 int lmv_brw(int rw, struct obd_export *exp, struct obdo *oa,
1574                 struct lov_stripe_md *ea, obd_count oa_bufs,
1575                 struct brw_page *pgarr, struct obd_trans_info *oti)
1576 {
1577         struct obd_device *obd = exp->exp_obd;
1578         struct lmv_obd *lmv = &obd->u.lmv;
1579         struct mea *mea = (struct mea *) ea;
1580         int err;
1581       
1582         LASSERT(oa != NULL);
1583         LASSERT(ea != NULL);
1584         LASSERT(pgarr != NULL);
1585         LASSERT(oa->o_mds < lmv->desc.ld_tgt_count);
1586
1587         oa->o_gr = mea->mea_fids[oa->o_mds].generation;
1588         oa->o_id = mea->mea_fids[oa->o_mds].id;
1589         oa->o_valid =  OBD_MD_FLID | OBD_MD_FLGROUP;
1590         err = obd_brw(rw, lmv->tgts[oa->o_mds].ltd_exp, oa,
1591                       NULL, oa_bufs, pgarr, oti);
1592         RETURN(err);
1593 }
1594
1595 struct obd_ops lmv_obd_ops = {
1596         .o_owner                = THIS_MODULE,
1597         .o_attach               = lmv_attach,
1598         .o_detach               = lmv_detach,
1599         .o_setup                = lmv_setup,
1600         .o_cleanup              = lmv_cleanup,
1601         .o_connect              = lmv_connect,
1602         .o_disconnect           = lmv_disconnect,
1603         .o_statfs               = lmv_statfs,
1604         .o_get_info             = lmv_get_info,
1605         .o_set_info             = lmv_set_info,
1606         .o_create               = lmv_obd_create,
1607         .o_packmd               = lmv_packmd,
1608         .o_unpackmd             = lmv_unpackmd,
1609         .o_brw                  = lmv_brw,
1610         .o_init_ea_size         = lmv_init_ea_size,
1611         .o_notify               = lmv_notify,
1612         .o_iocontrol            = lmv_iocontrol,
1613 };
1614
1615 struct md_ops lmv_md_ops = {
1616         .m_getstatus            = lmv_getstatus,
1617         .m_getattr              = lmv_getattr,
1618         .m_change_cbdata        = lmv_change_cbdata,
1619         .m_change_cbdata_name   = lmv_change_cbdata_name,
1620         .m_close                = lmv_close,
1621         .m_create               = lmv_create,
1622         .m_done_writing         = lmv_done_writing,
1623         .m_enqueue              = lmv_enqueue,
1624         .m_getattr_name         = lmv_getattr_name,
1625         .m_intent_lock          = lmv_intent_lock,
1626         .m_link                 = lmv_link,
1627         .m_rename               = lmv_rename,
1628         .m_setattr              = lmv_setattr,
1629         .m_sync                 = lmv_sync,
1630         .m_readpage             = lmv_readpage,
1631         .m_unlink               = lmv_unlink,
1632         .m_get_real_obd         = lmv_get_real_obd,
1633         .m_valid_attrs          = lmv_valid_attrs,
1634 };
1635
1636 int __init lmv_init(void)
1637 {
1638         struct lprocfs_static_vars lvars;
1639         int rc;
1640
1641         lprocfs_init_vars(lmv, &lvars);
1642         rc = class_register_type(&lmv_obd_ops, &lmv_md_ops,
1643                                  lvars.module_vars, OBD_LMV_DEVICENAME);
1644         RETURN(rc);
1645 }
1646
1647 #ifdef __KERNEL__
1648 static void lmv_exit(void)
1649 {
1650         class_unregister_type(OBD_LMV_DEVICENAME);
1651 }
1652
1653 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
1654 MODULE_DESCRIPTION("Lustre Logical Metadata Volume OBD driver");
1655 MODULE_LICENSE("GPL");
1656
1657 module_init(lmv_init);
1658 module_exit(lmv_exit);
1659 #endif