Whamcloud - gitweb
Branch: HEAD
[fs/lustre-release.git] / lustre / lmv / lmv_obd.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
5  *
6  *   This file is part of Lustre, http://www.lustre.org.
7  *
8  *   Lustre is free software; you can redistribute it and/or
9  *   modify it under the terms of version 2 of the GNU General Public
10  *   License as published by the Free Software Foundation.
11  *
12  *   Lustre is distributed in the hope that it will be useful,
13  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *   GNU General Public License for more details.
16  *
17  *   You should have received a copy of the GNU General Public License
18  *   along with Lustre; if not, write to the Free Software
19  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20  */
21
22 #ifndef EXPORT_SYMTAB
23 # define EXPORT_SYMTAB
24 #endif
25 #define DEBUG_SUBSYSTEM S_LMV
26 #ifdef __KERNEL__
27 #include <linux/slab.h>
28 #include <linux/module.h>
29 #include <linux/init.h>
30 #include <linux/slab.h>
31 #include <linux/pagemap.h>
32 #include <asm/div64.h>
33 #include <linux/seq_file.h>
34 #include <linux/namei.h>
35 #else
36 #include <liblustre.h>
37 #endif
38 #include <linux/ext2_fs.h>
39
40 #include <linux/obd_support.h>
41 #include <linux/lustre_lib.h>
42 #include <linux/lustre_net.h>
43 #include <linux/lustre_idl.h>
44 #include <linux/lustre_dlm.h>
45 #include <linux/lustre_mds.h>
46 #include <linux/obd_class.h>
47 #include <linux/obd_ost.h>
48 #include <linux/lprocfs_status.h>
49 #include <linux/lustre_fsfilt.h>
50 #include <linux/obd_lmv.h>
51 #include <linux/lustre_lite.h>
52 #include <linux/lustre_audit.h>
53 #include "lmv_internal.h"
54
55 /* not defined for liblustre building */
56 #if !defined(ATOMIC_INIT)
57 #define ATOMIC_INIT(val) { (val) }
58 #endif
59
60 /* object cache. */
61 kmem_cache_t *obj_cache;
62 atomic_t obj_cache_count = ATOMIC_INIT(0);
63
64 static void lmv_activate_target(struct lmv_obd *lmv,
65                                 struct lmv_tgt_desc *tgt,
66                                 int activate)
67 {
68         if (tgt->active == activate)
69                 return;
70         
71         tgt->active = activate;
72         lmv->desc.ld_active_tgt_count += (activate ? 1 : -1);
73 }
74
75 /* Error codes:
76  *
77  *  -EINVAL  : UUID can't be found in the LMV's target list
78  *  -ENOTCONN: The UUID is found, but the target connection is bad (!)
79  *  -EBADF   : The UUID is found, but the OBD of the wrong type (!)
80  */
81 static int lmv_set_mdc_active(struct lmv_obd *lmv, struct obd_uuid *uuid,
82                               int activate)
83 {
84         struct lmv_tgt_desc *tgt;
85         struct obd_device *obd;
86         int i, rc = 0;
87         ENTRY;
88
89         CDEBUG(D_INFO, "Searching in lmv %p for uuid %s (activate=%d)\n",
90                lmv, uuid->uuid, activate);
91
92         spin_lock(&lmv->lmv_lock);
93         for (i = 0, tgt = lmv->tgts; i < lmv->desc.ld_tgt_count; i++, tgt++) {
94                 if (tgt->ltd_exp == NULL)
95                         continue;
96
97                 CDEBUG(D_INFO, "lmv idx %d is %s conn "LPX64"\n",
98                        i, tgt->uuid.uuid, tgt->ltd_exp->exp_handle.h_cookie);
99
100                 if (obd_uuid_equals(uuid, &tgt->uuid))
101                         break;
102         }
103
104         if (i == lmv->desc.ld_tgt_count)
105                 GOTO(out_lmv_lock, rc = -EINVAL);
106
107         obd = class_exp2obd(tgt->ltd_exp);
108         if (obd == NULL)
109                 GOTO(out_lmv_lock, rc = -ENOTCONN);
110
111         CDEBUG(D_INFO, "Found OBD %s=%s device %d (%p) type %s at LMV idx %d\n",
112                obd->obd_name, obd->obd_uuid.uuid, obd->obd_minor, obd,
113                obd->obd_type->typ_name, i);
114         LASSERT(strcmp(obd->obd_type->typ_name, OBD_MDC_DEVICENAME) == 0);
115
116         if (tgt->active == activate) {
117                 CDEBUG(D_INFO, "OBD %p already %sactive!\n", obd,
118                        activate ? "" : "in");
119                 GOTO(out_lmv_lock, rc);
120         }
121
122         CDEBUG(D_INFO, "Marking OBD %p %sactive\n",
123                obd, activate ? "" : "in");
124
125         lmv_activate_target(lmv, tgt, activate);
126
127         EXIT;
128         
129  out_lmv_lock:
130         spin_unlock(&lmv->lmv_lock);
131         return rc;
132 }
133
134 static int lmv_notify(struct obd_device *obd, struct obd_device *watched,
135                       int active, void *data)
136 {
137         struct obd_uuid *uuid;
138         int rc;
139         ENTRY;
140
141         if (strcmp(watched->obd_type->typ_name, OBD_MDC_DEVICENAME)) {
142                 CERROR("unexpected notification of %s %s!\n",
143                        watched->obd_type->typ_name,
144                        watched->obd_name);
145                 RETURN(-EINVAL);
146         }
147         uuid = &watched->u.cli.cl_import->imp_target_uuid;
148
149         /* Set MDC as active before notifying the observer, so the observer can
150          * use the MDC normally.
151          */
152         rc = lmv_set_mdc_active(&obd->u.lmv, uuid, active);
153         if (rc) {
154                 CERROR("%sactivation of %s failed: %d\n",
155                        active ? "" : "de", uuid->uuid, rc);
156                 RETURN(rc);
157         }
158
159         if (obd->obd_observer)
160                 /* Pass the notification up the chain. */
161                 rc = obd_notify(obd->obd_observer, watched, active, data);
162
163         RETURN(rc);
164 }
165
166 static int lmv_attach(struct obd_device *dev, obd_count len, void *data)
167 {
168         struct lprocfs_static_vars lvars;
169         int rc;
170         ENTRY;
171
172         lprocfs_init_vars(lmv, &lvars);
173         rc = lprocfs_obd_attach(dev, lvars.obd_vars);
174 #ifdef __KERNEL__
175         if (rc == 0) {
176                 struct proc_dir_entry *entry;
177                 
178                 entry = create_proc_entry("target_obd_status", 0444, 
179                                            dev->obd_proc_entry);
180                 if (entry == NULL)
181                         RETURN(-ENOMEM);
182                 entry->proc_fops = &lmv_proc_target_fops; 
183                 entry->data = dev;
184        }
185 #endif
186         RETURN (rc);
187 }
188
189 static int lmv_detach(struct obd_device *dev)
190 {
191         return lprocfs_obd_detach(dev);
192 }
193
194 /* this is fake connect function. Its purpose is to initialize lmv and say
195  * caller that everything is okay. Real connection will be performed later. */
196 static int lmv_connect(struct lustre_handle *conn, struct obd_device *obd,
197                        struct obd_uuid *cluuid, struct obd_connect_data *data,
198                        unsigned long flags)
199 {
200 #ifdef __KERNEL__
201         struct proc_dir_entry *lmv_proc_dir;
202 #endif
203         struct lmv_obd *lmv = &obd->u.lmv;
204         struct obd_export *exp;
205         int rc = 0;
206         ENTRY;
207
208         rc = class_connect(conn, obd, cluuid);
209         if (rc) {
210                 CERROR("class_connection() returned %d\n", rc);
211                 RETURN(rc);
212         }
213
214         exp = class_conn2export(conn);
215         
216         /* we don't want to actually do the underlying connections more than
217          * once, so keep track. */
218         lmv->refcount++;
219         if (lmv->refcount > 1) {
220                 class_export_put(exp);
221                 RETURN(0);
222         }
223
224         lmv->exp = exp;
225         lmv->connected = 0;
226         lmv->cluuid = *cluuid;
227         lmv->connect_flags = flags;
228         sema_init(&lmv->init_sem, 1);
229         if (data)
230                 memcpy(&lmv->conn_data, data, sizeof(*data));
231
232 #ifdef __KERNEL__
233         lmv_proc_dir = lprocfs_register("target_obds", obd->obd_proc_entry,
234                                         NULL, NULL);
235         if (IS_ERR(lmv_proc_dir)) {
236                 CERROR("could not register /proc/fs/lustre/%s/%s/target_obds.",
237                        obd->obd_type->typ_name, obd->obd_name);
238                 lmv_proc_dir = NULL;
239         }
240 #endif
241
242         /* 
243          * all real clients shouls perform actual connection rightaway, because
244          * it is possible, that LMV will not have opportunity to connect
245          * targets, as MDC stuff will bit called directly, for instance while
246          * reading ../mdc/../kbytesfree procfs file, etc.
247          */
248         if (flags & OBD_OPT_REAL_CLIENT)
249                 rc = lmv_check_connect(obd);
250
251 #ifdef __KERNEL__
252         if (rc) {
253                 if (lmv_proc_dir)
254                         lprocfs_remove(lmv_proc_dir);
255         }
256 #endif
257
258         RETURN(rc);
259 }
260
261 static void lmv_set_timeouts(struct obd_device *obd)
262 {
263         struct lmv_tgt_desc *tgts;
264         struct lmv_obd *lmv;
265         int i;
266
267         lmv = &obd->u.lmv;
268         if (lmv->server_timeout == 0)
269                 return;
270
271         if (lmv->connected == 0)
272                 return;
273
274         for (i = 0, tgts = lmv->tgts; i < lmv->desc.ld_tgt_count; i++, tgts++) {
275                 if (tgts->ltd_exp == NULL)
276                         continue;
277                 
278                 obd_set_info(tgts->ltd_exp, strlen("inter_mds"),
279                              "inter_mds", 0, NULL);
280         }
281 }
282
283 #define MAX_STRING_SIZE 128
284
285 /* performs a check if passed obd is connected. If no - connect it. */
286 int lmv_check_connect(struct obd_device *obd)
287 {
288 #ifdef __KERNEL__
289         struct proc_dir_entry *lmv_proc_dir;
290 #endif
291         struct lmv_obd *lmv = &obd->u.lmv;
292         struct lmv_tgt_desc *tgts;
293         struct obd_uuid *cluuid;
294         struct obd_export *exp;
295         int rc, rc2, i;
296         ENTRY;
297
298         if (lmv->connected)
299                 RETURN(0);
300         
301         down(&lmv->init_sem);
302         if (lmv->connected) {
303                 up(&lmv->init_sem);
304                 RETURN(0);
305         }
306
307         cluuid = &lmv->cluuid;
308         exp = lmv->exp;
309         
310         CDEBUG(D_OTHER, "time to connect %s to %s\n",
311                cluuid->uuid, obd->obd_name);
312
313         for (i = 0, tgts = lmv->tgts; i < lmv->desc.ld_tgt_count; i++, tgts++) {
314                 struct obd_uuid lmv_mdc_uuid = { "LMV_MDC_UUID" };
315                 struct lustre_handle conn = {0, };
316                 struct obd_device *tgt_obd;
317
318                 LASSERT(tgts != NULL);
319
320                 tgt_obd = class_find_client_obd(&tgts->uuid, OBD_MDC_DEVICENAME, 
321                                                 &obd->obd_uuid);
322                 if (!tgt_obd) {
323                         CERROR("target %s not attached\n", tgts->uuid.uuid);
324                         GOTO(out_disc, rc = -EINVAL);
325                 }
326
327                 /* for MDS: don't connect to yourself */
328                 if (obd_uuid_equals(&tgts->uuid, cluuid)) {
329                         CDEBUG(D_OTHER, "don't connect back to %s\n",
330                                cluuid->uuid);
331                         tgts->ltd_exp = NULL;
332                         continue;
333                 }
334
335                 CDEBUG(D_OTHER, "connect to %s(%s) - %s, %s FOR %s\n",
336                         tgt_obd->obd_name, tgt_obd->obd_uuid.uuid,
337                         tgts->uuid.uuid, obd->obd_uuid.uuid,
338                         cluuid->uuid);
339
340                 if (!tgt_obd->obd_set_up) {
341                         CERROR("target %s not set up\n", tgts->uuid.uuid);
342                         GOTO(out_disc, rc = -EINVAL);
343                 }
344                 
345                 rc = obd_connect(&conn, tgt_obd, &lmv_mdc_uuid, &lmv->conn_data,
346                                  lmv->connect_flags);
347                 if (rc) {
348                         CERROR("target %s connect error %d\n",
349                                 tgts->uuid.uuid, rc);
350                         GOTO(out_disc, rc);
351                 }
352                 tgts->ltd_exp = class_conn2export(&conn);
353
354                 obd_init_ea_size(tgts->ltd_exp, lmv->max_easize,
355                                  lmv->max_cookiesize);
356
357                 rc = obd_register_observer(tgt_obd, obd);
358                 if (rc) {
359                         CERROR("target %s register_observer error %d\n",
360                                tgts->uuid.uuid, rc);
361                         obd_disconnect(tgts->ltd_exp, 0);
362                         GOTO(out_disc, rc);
363                 }
364
365                 lmv->desc.ld_active_tgt_count++;
366                 tgts->active = 1;
367
368                 CDEBUG(D_OTHER, "connected to %s(%s) successfully (%d)\n",
369                         tgt_obd->obd_name, tgt_obd->obd_uuid.uuid,
370                         atomic_read(&obd->obd_refcount));
371
372 #ifdef __KERNEL__
373                 lmv_proc_dir = lprocfs_srch(obd->obd_proc_entry, "target_obds");
374                 if (lmv_proc_dir) {
375                         struct obd_device *mdc_obd = class_conn2obd(&conn);
376                         struct proc_dir_entry *mdc_symlink;
377                         char name[MAX_STRING_SIZE + 1];
378
379                         LASSERT(mdc_obd != NULL);
380                         LASSERT(mdc_obd->obd_type != NULL);
381                         LASSERT(mdc_obd->obd_type->typ_name != NULL);
382                         name[MAX_STRING_SIZE] = '\0';
383                         snprintf(name, MAX_STRING_SIZE, "../../../%s/%s",
384                                  mdc_obd->obd_type->typ_name,
385                                  mdc_obd->obd_name);
386                         mdc_symlink = proc_symlink(mdc_obd->obd_name,
387                                                    lmv_proc_dir, name);
388                         if (mdc_symlink == NULL) {
389                                 CERROR("could not register LMV target "
390                                        "/proc/fs/lustre/%s/%s/target_obds/%s.",
391                                        obd->obd_type->typ_name, obd->obd_name,
392                                        mdc_obd->obd_name);
393                                 lprocfs_remove(lmv_proc_dir);
394                                 lmv_proc_dir = NULL;
395                         }
396                 }
397 #endif
398         }
399
400         lmv_set_timeouts(obd);
401         class_export_put(exp);
402         lmv->connected = 1;
403         up(&lmv->init_sem);
404         RETURN(0);
405
406  out_disc:
407         while (i-- > 0) {
408                 struct obd_uuid uuid;
409                 --tgts;
410                 --lmv->desc.ld_active_tgt_count;
411                 tgts->active = 0;
412                 /* save for CERROR below; (we know it's terminated) */
413                 uuid = tgts->uuid;
414                 rc2 = obd_disconnect(tgts->ltd_exp, 0);
415                 if (rc2)
416                         CERROR("error: LMV target %s disconnect on MDC idx %d: "
417                                "error %d\n", uuid.uuid, i, rc2);
418         }
419         class_disconnect(exp, 0);
420         up(&lmv->init_sem);
421         return rc;
422 }
423
424 static int lmv_disconnect(struct obd_export *exp, unsigned long flags)
425 {
426         struct obd_device *obd = class_exp2obd(exp);
427         struct lmv_obd *lmv = &obd->u.lmv;
428
429 #ifdef __KERNEL__
430         struct proc_dir_entry *lmv_proc_dir;
431 #endif
432         int rc, i;
433         ENTRY;
434
435         if (!lmv->tgts)
436                 goto out_local;
437
438         /* Only disconnect the underlying layers on the final disconnect. */
439         lmv->refcount--;
440         if (lmv->refcount != 0)
441                 goto out_local;
442
443 #ifdef __KERNEL__
444         lmv_proc_dir = lprocfs_srch(obd->obd_proc_entry, "target_obds");
445 #endif
446
447         for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
448                 struct obd_device *mdc_obd; 
449                 
450                 if (lmv->tgts[i].ltd_exp == NULL)
451                         continue;
452
453                 mdc_obd = class_exp2obd(lmv->tgts[i].ltd_exp);
454
455                 if (mdc_obd)
456                         mdc_obd->obd_no_recov = obd->obd_no_recov;
457
458 #ifdef __KERNEL__
459                 if (lmv_proc_dir) {
460                         struct proc_dir_entry *mdc_symlink;
461
462                         mdc_symlink = lprocfs_srch(lmv_proc_dir, mdc_obd->obd_name);
463                         if (mdc_symlink) {
464                                 lprocfs_remove(mdc_symlink);
465                         } else {
466                                 CERROR("/proc/fs/lustre/%s/%s/target_obds/%s missing\n",
467                                        obd->obd_type->typ_name, obd->obd_name,
468                                        mdc_obd->obd_name);
469                         }
470                 }
471 #endif
472                 CDEBUG(D_OTHER, "disconnected from %s(%s) successfully\n",
473                         lmv->tgts[i].ltd_exp->exp_obd->obd_name,
474                         lmv->tgts[i].ltd_exp->exp_obd->obd_uuid.uuid);
475
476                 obd_register_observer(lmv->tgts[i].ltd_exp->exp_obd, NULL);
477                 rc = obd_disconnect(lmv->tgts[i].ltd_exp, flags);
478                 if (rc) {
479                         if (lmv->tgts[i].active) {
480                                 CERROR("Target %s disconnect error %d\n",
481                                        lmv->tgts[i].uuid.uuid, rc);
482                         }
483                         rc = 0;
484                 }
485                 
486                 lmv_activate_target(lmv, &lmv->tgts[i], 0);
487                 lmv->tgts[i].ltd_exp = NULL;
488         }
489
490 #ifdef __KERNEL__
491         if (lmv_proc_dir) {
492                 lprocfs_remove(lmv_proc_dir);
493         } else {
494                 CERROR("/proc/fs/lustre/%s/%s/target_obds missing\n",
495                        obd->obd_type->typ_name, obd->obd_name);
496         }
497 #endif
498
499 out_local:
500         /* this is the case when no real connection is established by
501          * lmv_check_connect(). */
502         if (!lmv->connected)
503                 class_export_put(exp);
504         rc = class_disconnect(exp, 0);
505         if (lmv->refcount == 0)
506                 lmv->connected = 0;
507         RETURN(rc);
508 }
509
510 static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
511                          int len, void *karg, void *uarg)
512 {
513         struct obd_device *obddev = class_exp2obd(exp);
514         struct lmv_obd *lmv = &obddev->u.lmv;
515         int i, rc = 0, set = 0;
516         ENTRY;
517
518         if (lmv->desc.ld_tgt_count == 0)
519                 RETURN(-ENOTTY);
520         
521         for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
522                 int err;
523
524                 if (lmv->tgts[i].ltd_exp == NULL)
525                         continue;
526
527                 err = obd_iocontrol(cmd, lmv->tgts[i].ltd_exp, len, karg, uarg);
528                 if (err) {
529                         if (lmv->tgts[i].active) {
530                                 CERROR("error: iocontrol MDC %s on MDT"
531                                        "idx %d: err = %d\n",
532                                        lmv->tgts[i].uuid.uuid, i, err);
533                                 if (!rc)
534                                         rc = err;
535                         }
536                 } else
537                         set = 1;
538         }
539         if (!set && !rc)
540                 rc = -EIO;
541
542         RETURN(rc);
543 }
544
545 static int lmv_setup(struct obd_device *obd, obd_count len, void *buf)
546 {
547         int i, rc = 0;
548         struct lmv_desc *desc;
549         struct obd_uuid *uuids;
550         struct lmv_tgt_desc *tgts;
551         struct obd_device *tgt_obd;
552         struct lustre_cfg *lcfg = buf;
553         struct lmv_obd *lmv = &obd->u.lmv;
554         ENTRY;
555
556         if (LUSTRE_CFG_BUFLEN(lcfg, 1) < 1) {
557                 CERROR("LMV setup requires a descriptor\n");
558                 RETURN(-EINVAL);
559         }
560
561         if (LUSTRE_CFG_BUFLEN(lcfg, 2) < 1) {
562                 CERROR("LMV setup requires an MDT UUID list\n");
563                 RETURN(-EINVAL);
564         }
565
566         desc = (struct lmv_desc *)lustre_cfg_buf(lcfg, 1);
567         if (sizeof(*desc) > LUSTRE_CFG_BUFLEN(lcfg, 1)) {
568                 CERROR("descriptor size wrong: %d > %d\n",
569                        (int)sizeof(*desc), LUSTRE_CFG_BUFLEN(lcfg, 1));
570                 RETURN(-EINVAL);
571         }
572
573         uuids = (struct obd_uuid *)lustre_cfg_buf(lcfg, 2);
574         if (sizeof(*uuids) * desc->ld_tgt_count != LUSTRE_CFG_BUFLEN(lcfg, 2)) {
575                 CERROR("UUID array size wrong: %u * %u != %u\n",
576                        sizeof(*uuids), desc->ld_tgt_count, LUSTRE_CFG_BUFLEN(lcfg, 2));
577                 RETURN(-EINVAL);
578         }
579
580         lmv->tgts_size = sizeof(struct lmv_tgt_desc) * desc->ld_tgt_count;
581         OBD_ALLOC(lmv->tgts, lmv->tgts_size);
582         if (lmv->tgts == NULL) {
583                 CERROR("Out of memory\n");
584                 RETURN(-ENOMEM);
585         }
586
587         lmv->desc = *desc;
588         spin_lock_init(&lmv->lmv_lock);
589         
590         for (i = 0, tgts = lmv->tgts; i < desc->ld_tgt_count; i++, tgts++)
591                 tgts->uuid = uuids[i];
592         
593         lmv->max_cookiesize = 0;
594
595         lmv->max_easize = sizeof(struct lustre_id) *
596                 desc->ld_tgt_count + sizeof(struct mea);
597         
598         rc = lmv_setup_mgr(obd);
599         if (rc) {
600                 CERROR("Can't setup LMV object manager, "
601                        "error %d.\n", rc);
602                 OBD_FREE(lmv->tgts, lmv->tgts_size);
603                 RETURN(rc);
604         }
605
606         tgt_obd = class_find_client_obd(&lmv->tgts->uuid, OBD_MDC_DEVICENAME,
607                                         &obd->obd_uuid);
608         if (!tgt_obd) {
609                 CERROR("Target %s not attached\n", lmv->tgts->uuid.uuid);
610                 RETURN(-EINVAL);
611         }
612
613         rc = obd_llog_init(obd, &obd->obd_llogs, tgt_obd, 0, NULL);
614         if (rc) {
615                 CERROR("lmv_setup failed to setup llogging subsystems\n");
616         }
617
618         RETURN(rc);
619 }
620
621 static int lmv_cleanup(struct obd_device *obd, int flags) 
622 {
623         struct lmv_obd *lmv = &obd->u.lmv;
624         ENTRY;
625
626         lmv_cleanup_mgr(obd);
627         OBD_FREE(lmv->tgts, lmv->tgts_size);
628         
629         RETURN(0);
630 }
631
632 static int lmv_statfs(struct obd_device *obd, struct obd_statfs *osfs,
633                       unsigned long max_age)
634 {
635         struct lmv_obd *lmv = &obd->u.lmv;
636         struct obd_statfs *temp;
637         int rc = 0, i;
638         ENTRY;
639         
640         rc = lmv_check_connect(obd);
641         if (rc)
642                 RETURN(rc);
643
644         OBD_ALLOC(temp, sizeof(*temp));
645         if (temp == NULL)
646                 RETURN(-ENOMEM);
647                 
648         for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
649                 if (lmv->tgts[i].ltd_exp == NULL)
650                         continue;
651
652                 rc = obd_statfs(lmv->tgts[i].ltd_exp->exp_obd, temp, max_age);
653                 if (rc) {
654                         CERROR("can't stat MDS #%d (%s), error %d\n", i,
655                                lmv->tgts[i].ltd_exp->exp_obd->obd_name,
656                                rc);
657                         GOTO(out_free_temp, rc);
658                 }
659                 if (i == 0) {
660                         memcpy(osfs, temp, sizeof(*temp));
661                 } else {
662                         osfs->os_bavail += temp->os_bavail;
663                         osfs->os_blocks += temp->os_blocks;
664                         osfs->os_ffree += temp->os_ffree;
665                         osfs->os_files += temp->os_files;
666                 }
667         }
668
669         EXIT;
670 out_free_temp:
671         OBD_FREE(temp, sizeof(*temp));
672         return rc;
673 }
674
675 static int lmv_getstatus(struct obd_export *exp, struct lustre_id *id)
676 {
677         struct obd_device *obd = exp->exp_obd;
678         struct lmv_obd *lmv = &obd->u.lmv;
679         int rc;
680         ENTRY;
681
682         rc = lmv_check_connect(obd);
683         if (rc)
684                 RETURN(rc);
685
686         rc = md_getstatus(lmv->tgts[0].ltd_exp, id);
687         id_group(id) = 0;
688         
689         RETURN(rc);
690 }
691
692 static int lmv_getattr(struct obd_export *exp, struct lustre_id *id,
693                        __u64 valid, const char *xattr_name,
694                        const void *xattr_data, unsigned int xattr_datalen,
695                        unsigned int ea_size, struct obd_capa *ocapa,
696                        struct ptlrpc_request **request)
697 {
698         struct obd_device *obd = exp->exp_obd;
699         struct lmv_obd *lmv = &obd->u.lmv;
700         int rc, i = id_group(id);
701         struct lmv_obj *obj;
702         ENTRY;
703
704         rc = lmv_check_connect(obd);
705         if (rc)
706                 RETURN(rc);
707
708         LASSERT(i < lmv->desc.ld_tgt_count);
709
710
711         rc = md_getattr(lmv->tgts[i].ltd_exp, id, valid,
712                         xattr_name, xattr_data, xattr_datalen,
713                         ea_size, ocapa, request);
714         if (rc)
715                 RETURN(rc);
716         
717         obj = lmv_grab_obj(obd, id);
718         
719         CDEBUG(D_OTHER, "GETATTR for "DLID4" %s\n",
720                OLID4(id), obj ? "(splitted)" : "");
721
722         /*
723          * if object is splitted, then we loop over all the slaves and gather
724          * size attribute. In ideal world we would have to gather also mds field
725          * from all slaves, as object is spread over the cluster and this is
726          * definitely interesting information and it is not good to loss it,
727          * but...
728          */
729         if (obj) {
730                 struct mds_body *body;
731
732                 if (*request == NULL) {
733                         lmv_put_obj(obj);
734                         RETURN(rc);
735                 }
736                         
737                 body = lustre_msg_buf((*request)->rq_repmsg, 0,
738                                       sizeof(*body));
739                 LASSERT(body != NULL);
740
741                 lmv_lock_obj(obj);
742         
743                 for (i = 0; i < obj->objcount; i++) {
744
745                         if (lmv->tgts[i].ltd_exp == NULL) {
746                                 CWARN("%s: NULL export for %d\n",
747                                       obd->obd_name, i);
748                                 continue;
749                         }
750
751                         /* skip master obj. */
752                         if (id_equal_fid(&obj->id, &obj->objs[i].id))
753                                 continue;
754                         
755                         body->size += obj->objs[i].size;
756                 }
757
758                 lmv_unlock_obj(obj);
759                 lmv_put_obj(obj);
760         }
761         
762         RETURN(rc);
763 }
764
765 static int lmv_access_check(struct obd_export *exp,
766                             struct lustre_id *id,
767                             struct ptlrpc_request **request)
768 {
769         struct obd_device *obd = exp->exp_obd;
770         struct lmv_obd *lmv = &obd->u.lmv;
771         int rc, i = id_group(id);
772         ENTRY;
773
774         rc = lmv_check_connect(obd);
775         if (rc)
776                 RETURN(rc);
777
778         LASSERT(i < lmv->desc.ld_tgt_count);
779         rc = md_access_check(lmv->tgts[i].ltd_exp, id, request);
780         RETURN(rc);
781 }
782
783 static int lmv_change_cbdata(struct obd_export *exp,
784                              struct lustre_id *id, 
785                              ldlm_iterator_t it,
786                              void *data)
787 {
788         struct obd_device *obd = exp->exp_obd;
789         struct lmv_obd *lmv = &obd->u.lmv;
790         int i, rc;
791         ENTRY;
792         
793         rc = lmv_check_connect(obd);
794         if (rc)
795                 RETURN(rc);
796         
797         CDEBUG(D_OTHER, "CBDATA for "DLID4"\n", OLID4(id));
798         LASSERT(id_group(id) < lmv->desc.ld_tgt_count);
799
800         /* with CMD every object can have two locks in different
801          * namespaces: lookup lock in space of mds storing direntry
802          * and update/open lock in space of mds storing inode */
803         for (i = 0; i < lmv->desc.ld_tgt_count; i++)
804                 md_change_cbdata(lmv->tgts[i].ltd_exp, id, it, data);
805         
806         RETURN(0);
807 }
808
809 static int lmv_change_cbdata_name(struct obd_export *exp,
810                                   struct lustre_id *pid,
811                                   char *name, int len,
812                                   struct lustre_id *cid,
813                                   ldlm_iterator_t it,
814                                   void *data)
815 {
816         struct obd_device *obd = exp->exp_obd;
817         struct lmv_obd *lmv = &obd->u.lmv;
818         struct lustre_id rcid = *cid;
819         struct lmv_obj *obj;
820         int rc = 0, mds;
821         ENTRY;
822
823         rc = lmv_check_connect(obd);
824         if (rc)
825                 RETURN(rc);
826
827         LASSERT(id_group(pid) < lmv->desc.ld_tgt_count);
828         LASSERT(id_group(cid) < lmv->desc.ld_tgt_count);
829         
830         CDEBUG(D_OTHER, "CBDATA for "DLID4":%*s -> "DLID4"\n",
831                OLID4(pid), len, name, OLID4(cid));
832
833         /* this is default mds for directory name belongs to. */
834         mds = id_group(pid);
835         obj = lmv_grab_obj(obd, pid);
836         if (obj) {
837                 /* directory is splitted. look for right mds for this name. */
838                 mds = raw_name2idx(obj->hashtype, obj->objcount, name, len);
839                 rcid = obj->objs[mds].id;
840                 mds = id_group(&rcid);
841                 lmv_put_obj(obj);
842         }
843         rc = md_change_cbdata(lmv->tgts[mds].ltd_exp, &rcid, it, data);
844         RETURN(rc);
845 }
846
847 static int lmv_valid_attrs(struct obd_export *exp, struct lustre_id *id) 
848 {
849         struct obd_device *obd = exp->exp_obd;
850         struct lmv_obd *lmv = &obd->u.lmv;
851         int rc = 0;
852         ENTRY;
853
854         rc = lmv_check_connect(obd);
855         if (rc)
856                 RETURN(rc);
857
858         CDEBUG(D_OTHER, "validate "DLID4"\n", OLID4(id));
859         LASSERT(id_group(id) < lmv->desc.ld_tgt_count);
860         rc = md_valid_attrs(lmv->tgts[id_group(id)].ltd_exp, id);
861         RETURN(rc);
862 }
863
864 static int lmv_close(struct obd_export *exp, struct mdc_op_data *op_data,
865                      struct obd_client_handle *och,
866                      struct ptlrpc_request **request)
867 {
868         struct obd_device *obd = exp->exp_obd;
869         struct lmv_obd *lmv = &obd->u.lmv;
870         int rc, i = id_group(&op_data->id1);
871         ENTRY;
872         
873         rc = lmv_check_connect(obd);
874         if (rc)
875                 RETURN(rc);
876
877         LASSERT(i < lmv->desc.ld_tgt_count);
878         CDEBUG(D_OTHER, "CLOSE "DLID4"\n", OLID4(&op_data->id1));
879         rc = md_close(lmv->tgts[i].ltd_exp, op_data, och, request);
880         RETURN(rc);
881 }
882
883 int lmv_get_mea_and_update_object(struct obd_export *exp, 
884                                   struct lustre_id *id)
885 {
886         struct obd_device *obd = exp->exp_obd;
887         struct lmv_obd *lmv = &obd->u.lmv;
888         struct ptlrpc_request *req = NULL;
889         struct lmv_obj *obj;
890         struct lustre_md md;
891         int mealen, rc;
892         __u64 valid;
893         ENTRY;
894
895         md.mea = NULL;
896         mealen = MEA_SIZE_LMV(lmv);
897         
898         valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA | OBD_MD_MEA;
899
900         /* time to update mea of parent id */
901         rc = md_getattr(lmv->tgts[id_group(id)].ltd_exp,
902                         id, valid, NULL, NULL, 0, mealen, NULL, &req);
903         if (rc) {
904                 CERROR("md_getattr() failed, error %d\n", rc);
905                 GOTO(cleanup, rc);
906         }
907
908         rc = mdc_req2lustre_md(exp, req, 0, NULL, &md);
909         if (rc) {
910                 CERROR("mdc_req2lustre_md() failed, error %d\n", rc);
911                 GOTO(cleanup, rc);
912         }
913
914         if (md.mea == NULL)
915                 GOTO(cleanup, rc = -ENODATA);
916
917         obj = lmv_create_obj(exp, id, md.mea);
918         if (IS_ERR(obj))
919                 rc = PTR_ERR(obj);
920         else
921                 lmv_put_obj(obj);
922
923         obd_free_memmd(exp, (struct lov_stripe_md **)&md.mea);
924
925         EXIT;
926 cleanup:
927         if (req)
928                 ptlrpc_req_finished(req);
929         return rc;
930 }
931
932 int lmv_create(struct obd_export *exp, struct mdc_op_data *op_data,
933                const void *data, int datalen, int mode, __u32 uid,
934                __u32 gid, __u64 rdev, struct ptlrpc_request **request)
935 {
936         struct obd_device *obd = exp->exp_obd;
937         struct lmv_obd *lmv = &obd->u.lmv;
938         struct mds_body *body;
939         struct lmv_obj *obj;
940         int rc, mds, loop = 0;
941         ENTRY;
942
943         rc = lmv_check_connect(obd);
944         if (rc)
945                 RETURN(rc);
946
947         if (!lmv->desc.ld_active_tgt_count)
948                 RETURN(-EIO);
949 repeat:
950         LASSERT(++loop <= 2);
951         obj = lmv_grab_obj(obd, &op_data->id1);
952         if (obj) {
953                 mds = raw_name2idx(obj->hashtype, obj->objcount, 
954                                    op_data->name, op_data->namelen);
955                 op_data->id1 = obj->objs[mds].id;
956                 lmv_put_obj(obj);
957         }
958
959         CDEBUG(D_OTHER, "CREATE '%*s' on "DLID4"\n", op_data->namelen,
960                op_data->name, OLID4(&op_data->id1));
961         
962         rc = md_create(lmv->tgts[id_group(&op_data->id1)].ltd_exp, 
963                        op_data, data, datalen, mode, uid, gid, rdev,
964                        request);
965         if (rc == 0) {
966                 if (*request == NULL)
967                         RETURN(rc);
968
969                 body = lustre_msg_buf((*request)->rq_repmsg, 0,
970                                       sizeof(*body));
971                 if (body == NULL)
972                         RETURN(-ENOMEM);
973                 
974                 CDEBUG(D_OTHER, "created. "DLID4"\n", OLID4(&op_data->id1));
975         } else if (rc == -ERESTART) {
976                 /*
977                  * directory got splitted. time to update local object and
978                  * repeat the request with proper MDS.
979                  */
980                 rc = lmv_get_mea_and_update_object(exp, &op_data->id1);
981                 if (rc == 0) {
982                         ptlrpc_req_finished(*request);
983                         goto repeat;
984                 }
985         }
986         RETURN(rc);
987 }
988
989 static int lmv_done_writing(struct obd_export *exp, struct obdo *obdo)
990 {
991         struct obd_device *obd = exp->exp_obd;
992         struct lmv_obd *lmv = &obd->u.lmv;
993         int rc;
994         ENTRY;
995         
996         rc = lmv_check_connect(obd);
997         if (rc)
998                 RETURN(rc);
999
1000         /* FIXME: choose right MDC here */
1001         CWARN("this method isn't implemented yet\n");
1002         rc = md_done_writing(lmv->tgts[0].ltd_exp, obdo);
1003         RETURN(rc);
1004 }
1005
1006 static int
1007 lmv_enqueue_slaves(struct obd_export *exp, int locktype,
1008                    struct lookup_intent *it, int lockmode,
1009                    struct mdc_op_data *data, struct lustre_handle *lockh,
1010                    void *lmm, int lmmsize, ldlm_completion_callback cb_compl,
1011                    ldlm_blocking_callback cb_blocking, void *cb_data)
1012 {
1013         struct obd_device *obd = exp->exp_obd;
1014         struct lmv_obd *lmv = &obd->u.lmv;
1015         struct mea *mea = data->mea1;
1016         struct mdc_op_data *data2;
1017         int i, rc, mds;
1018         ENTRY;
1019
1020         OBD_ALLOC(data2, sizeof(*data2));
1021         if (data2 == NULL)
1022                 RETURN(-ENOMEM);
1023         
1024         LASSERT(mea != NULL);
1025         for (i = 0; i < mea->mea_count; i++) {
1026                 memset(data2, 0, sizeof(*data2));
1027                 data2->id1 = mea->mea_ids[i];
1028                 mds = id_group(&data2->id1);
1029                 
1030                 if (lmv->tgts[mds].ltd_exp == NULL)
1031                         continue;
1032
1033                 rc = md_enqueue(lmv->tgts[mds].ltd_exp, locktype, it, 
1034                                 lockmode, data2, lockh + i, lmm, lmmsize, 
1035                                 cb_compl, cb_blocking, cb_data);
1036                 
1037                 CDEBUG(D_OTHER, "take lock on slave "DLID4" -> %d/%d\n",
1038                        OLID4(&mea->mea_ids[i]), rc, LUSTRE_IT(it)->it_status);
1039                 if (rc)
1040                         GOTO(cleanup, rc);
1041                 if (LUSTRE_IT(it)->it_data) {
1042                         struct ptlrpc_request *req;
1043                         req = (struct ptlrpc_request *) LUSTRE_IT(it)->it_data;
1044                         ptlrpc_req_finished(req);
1045                 }
1046                 
1047                 if (LUSTRE_IT(it)->it_status)
1048                         GOTO(cleanup, rc = LUSTRE_IT(it)->it_status);
1049         }
1050         
1051         OBD_FREE(data2, sizeof(*data2));
1052         RETURN(0);
1053 cleanup:
1054         OBD_FREE(data2, sizeof(*data2));
1055         
1056         /* drop all taken locks */
1057         while (--i >= 0) {
1058                 if (lockh[i].cookie)
1059                         ldlm_lock_decref(lockh + i, lockmode);
1060                 lockh[i].cookie = 0;
1061         }
1062         return rc;
1063 }
1064
1065 static int
1066 lmv_enqueue_remote(struct obd_export *exp, int lock_type,
1067                    struct lookup_intent *it, int lock_mode,
1068                    struct mdc_op_data *data, struct lustre_handle *lockh,
1069                    void *lmm, int lmmsize, ldlm_completion_callback cb_compl,
1070                    ldlm_blocking_callback cb_blocking, void *cb_data)
1071 {
1072         struct ptlrpc_request *req = LUSTRE_IT(it)->it_data;
1073         struct obd_device *obd = exp->exp_obd;
1074         struct lmv_obd *lmv = &obd->u.lmv;
1075         struct lustre_handle plock;
1076         struct mdc_op_data rdata;
1077         struct mds_body *body = NULL;
1078         int rc = 0, pmode;
1079         ENTRY;
1080
1081         body = lustre_msg_buf(req->rq_repmsg, 1, sizeof(*body));
1082         LASSERT(body != NULL);
1083
1084         if (!(body->valid & OBD_MD_MDS))
1085                 RETURN(0);
1086
1087         CDEBUG(D_OTHER, "ENQUEUE '%s' on "DLID4" -> "DLID4"\n",
1088                LL_IT2STR(it), OLID4(&data->id1), OLID4(&body->id1));
1089
1090         /* we got LOOKUP lock, but we really need attrs */
1091         pmode = LUSTRE_IT(it)->it_lock_mode;
1092         LASSERT(pmode != 0);
1093         memcpy(&plock, lockh, sizeof(plock));
1094         LUSTRE_IT(it)->it_lock_mode = 0;
1095         LUSTRE_IT(it)->it_data = NULL;
1096         LASSERT((body->valid & OBD_MD_FID) != 0);
1097
1098         memcpy(&rdata, data, sizeof(rdata));
1099         rdata.id1 = body->id1;
1100         rdata.name = NULL;
1101         rdata.namelen = 0;
1102
1103         LUSTRE_IT(it)->it_disposition &= ~DISP_ENQ_COMPLETE;
1104         ptlrpc_req_finished(req);
1105
1106         rc = md_enqueue(lmv->tgts[id_group(&rdata.id1)].ltd_exp, 
1107                         lock_type, it, lock_mode, &rdata, lockh, lmm, 
1108                         lmmsize, cb_compl, cb_blocking, cb_data);
1109         ldlm_lock_decref(&plock, pmode);
1110         RETURN(rc);
1111 }
1112
1113 static int
1114 lmv_enqueue(struct obd_export *exp, int lock_type,
1115             struct lookup_intent *it, int lock_mode,
1116             struct mdc_op_data *data, struct lustre_handle *lockh,
1117             void *lmm, int lmmsize, ldlm_completion_callback cb_compl,
1118             ldlm_blocking_callback cb_blocking, void *cb_data)
1119 {
1120         struct obd_device *obd = exp->exp_obd;
1121         struct lmv_obd *lmv = &obd->u.lmv;
1122         struct lmv_obj *obj;
1123         int rc, mds;
1124         ENTRY;
1125
1126         rc = lmv_check_connect(obd);
1127         if (rc)
1128                 RETURN(rc);
1129
1130         if (data->mea1 && it->it_op == IT_UNLINK) {
1131                 rc = lmv_enqueue_slaves(exp, lock_type, it, lock_mode,
1132                                         data, lockh, lmm, lmmsize,
1133                                         cb_compl, cb_blocking, cb_data);
1134                 RETURN(rc);
1135         }
1136
1137         if (data->namelen) {
1138                 obj = lmv_grab_obj(obd, &data->id1);
1139                 if (obj) {
1140                         /* directory is splitted. look for right mds for this
1141                          * name */
1142                         mds = raw_name2idx(obj->hashtype, obj->objcount,
1143                                            (char *)data->name, data->namelen);
1144                         data->id1 = obj->objs[mds].id;
1145                         lmv_put_obj(obj);
1146                 }
1147         }
1148         CDEBUG(D_OTHER, "ENQUEUE '%s' on "DLID4"\n", LL_IT2STR(it),
1149                OLID4(&data->id1));
1150         
1151         rc = md_enqueue(lmv->tgts[id_group(&data->id1)].ltd_exp, 
1152                         lock_type, it, lock_mode, data, lockh, lmm, 
1153                         lmmsize, cb_compl, cb_blocking, cb_data);
1154         if (rc == 0 && it->it_op == IT_OPEN)
1155                 rc = lmv_enqueue_remote(exp, lock_type, it, lock_mode,
1156                                         data, lockh, lmm, lmmsize,
1157                                         cb_compl, cb_blocking, cb_data);
1158         RETURN(rc);
1159 }
1160
1161 static int
1162 lmv_getattr_lock(struct obd_export *exp, struct lustre_id *id,
1163                  char *filename, int namelen, __u64 valid,
1164                  unsigned int ea_size, struct ptlrpc_request **request)
1165 {
1166         int rc, mds = id_group(id), loop = 0;
1167         struct obd_device *obd = exp->exp_obd;
1168         struct lmv_obd *lmv = &obd->u.lmv;
1169         struct lustre_id rid = *id;
1170         struct mds_body *body;
1171         struct lmv_obj *obj;
1172         ENTRY;
1173         
1174         rc = lmv_check_connect(obd);
1175         if (rc)
1176                 RETURN(rc);
1177 repeat:
1178         LASSERT(++loop <= 2);
1179         obj = lmv_grab_obj(obd, id);
1180         if (obj) {
1181                 /* directory is splitted. look for right mds for this name */
1182                 mds = raw_name2idx(obj->hashtype, obj->objcount, 
1183                                    filename, namelen - 1);
1184                 rid = obj->objs[mds].id;
1185                 lmv_put_obj(obj);
1186         }
1187         
1188         CDEBUG(D_OTHER, "getattr_lock for %*s on "DLID4" -> "DLID4"\n",
1189                namelen, filename, OLID4(id), OLID4(&rid));
1190
1191         rc = md_getattr_lock(lmv->tgts[id_group(&rid)].ltd_exp,
1192                              &rid, filename, namelen,
1193                              valid == OBD_MD_FLID ? valid : valid | OBD_MD_FID,
1194                              ea_size, request);
1195         if (rc == 0) {
1196                 /*
1197                  * this could be cross-node reference. in this case all we have
1198                  * right now is lustre_id triple. we'd like to find other
1199                  * attributes.
1200                  */
1201                 body = lustre_msg_buf((*request)->rq_repmsg, 0, sizeof(*body));
1202                 LASSERT(body != NULL);
1203                 LASSERT((body->valid & OBD_MD_FID) != 0
1204                                 || body->valid == OBD_MD_FLID);
1205
1206                 if (body->valid & OBD_MD_MDS) {
1207                         struct ptlrpc_request *req = NULL;
1208                         
1209                         rid = body->id1;
1210                         CDEBUG(D_OTHER, "request attrs for "DLID4"\n", OLID4(&rid));
1211
1212                         rc = md_getattr_lock(lmv->tgts[id_group(&rid)].ltd_exp, 
1213                                              &rid, NULL, 1, valid, ea_size, &req);
1214                         ptlrpc_req_finished(*request);
1215                         *request = req;
1216                 }
1217         } else if (rc == -ERESTART) {
1218                 /* directory got splitted. time to update local object and
1219                  * repeat the request with proper MDS */
1220                 rc = lmv_get_mea_and_update_object(exp, &rid);
1221                 if (rc == 0) {
1222                         ptlrpc_req_finished(*request);
1223                         goto repeat;
1224                 }
1225         }
1226         RETURN(rc);
1227 }
1228
1229 /*
1230  * llite passes id of an target inode in data->id1 and id of directory in
1231  * data->id2
1232  */
1233 static int lmv_link(struct obd_export *exp, struct mdc_op_data *data,
1234                     struct ptlrpc_request **request)
1235 {
1236         struct obd_device *obd = exp->exp_obd;
1237         struct lmv_obd *lmv = &obd->u.lmv;
1238         struct lmv_obj *obj;
1239         int rc, mds;
1240         ENTRY;
1241         
1242         rc = lmv_check_connect(obd);
1243         if (rc)
1244                 RETURN(rc);
1245
1246         if (data->namelen != 0) {
1247                 /* usual link request */
1248                 obj = lmv_grab_obj(obd, &data->id2);
1249                 if (obj) {
1250                         rc = raw_name2idx(obj->hashtype, obj->objcount, 
1251                                           data->name, data->namelen);
1252                         data->id2 = obj->objs[rc].id;
1253                         lmv_put_obj(obj);
1254                 }
1255
1256                 mds = id_group(&data->id2);
1257                 
1258                 CDEBUG(D_OTHER,"link "DLID4":%*s to "DLID4"\n",
1259                        OLID4(&data->id2), data->namelen, data->name,
1260                        OLID4(&data->id1));
1261         } else {
1262                 mds = id_group(&data->id1);
1263                 
1264                 /* request from MDS to acquire i_links for inode by id1 */
1265                 CDEBUG(D_OTHER, "inc i_nlinks for "DLID4"\n",
1266                        OLID4(&data->id1));
1267         }
1268
1269         CDEBUG(D_OTHER, "forward to MDS #%u ("DLID4")\n",
1270                mds, OLID4(&data->id1));
1271         rc = md_link(lmv->tgts[mds].ltd_exp, data, request);
1272         
1273         RETURN(rc);
1274 }
1275
1276 static int lmv_rename(struct obd_export *exp, struct mdc_op_data *data,
1277                       const char *old, int oldlen, const char *new, int newlen,
1278                       struct ptlrpc_request **request)
1279 {
1280         struct obd_device *obd = exp->exp_obd;
1281         struct lmv_obd *lmv = &obd->u.lmv;
1282         struct lmv_obj *obj;
1283         int rc, mds;
1284         ENTRY;
1285
1286         CDEBUG(D_OTHER, "rename %*s in "DLID4" to %*s in "DLID4"\n",
1287                oldlen, old, OLID4(&data->id1), newlen, new,
1288                OLID4(&data->id2));
1289
1290         rc = lmv_check_connect(obd);
1291         if (rc)
1292                 RETURN(rc);
1293
1294         if (oldlen == 0) {
1295                 /*
1296                  * MDS with old dir entry is asking another MDS to create name
1297                  * there.
1298                  */
1299                 CDEBUG(D_OTHER,
1300                        "create %*s(%d/%d) in "DLID4" pointing "
1301                        "to "DLID4"\n", newlen, new, oldlen, newlen,
1302                        OLID4(&data->id2), OLID4(&data->id1));
1303
1304                 mds = id_group(&data->id2);
1305
1306                 /* 
1307                  * target directory can be splitted, sowe should forward request
1308                  * to the right MDS.
1309                  */
1310                 obj = lmv_grab_obj(obd, &data->id2);
1311                 if (obj) {
1312                         mds = raw_name2idx(obj->hashtype, obj->objcount, 
1313                                            (char *)new, newlen);
1314                         data->id2 = obj->objs[mds].id;
1315                         CDEBUG(D_OTHER, "forward to MDS #%u ("DLID4")\n", mds,
1316                                OLID4(&data->id2));
1317                         lmv_put_obj(obj);
1318                 }
1319                 goto request;
1320         }
1321
1322         obj = lmv_grab_obj(obd, &data->id1);
1323         if (obj) {
1324                 /*
1325                  * directory is already splitted, so we have to forward request
1326                  * to the right MDS.
1327                  */
1328                 mds = raw_name2idx(obj->hashtype, obj->objcount, 
1329                                    (char *)old, oldlen);
1330                 data->id1 = obj->objs[mds].id;
1331                 CDEBUG(D_OTHER, "forward to MDS #%u ("DLID4")\n", mds,
1332                        OLID4(&data->id1));
1333                 lmv_put_obj(obj);
1334         }
1335
1336         obj = lmv_grab_obj(obd, &data->id2);
1337         if (obj) {
1338                 /*
1339                  * directory is already splitted, so we have to forward request
1340                  * to the right MDS.
1341                  */
1342                 mds = raw_name2idx(obj->hashtype, obj->objcount, 
1343                                    (char *)new, newlen);
1344                 
1345                 data->id2 = obj->objs[mds].id;
1346                 CDEBUG(D_OTHER, "forward to MDS #%u ("DLID4")\n", mds,
1347                        OLID4(&data->id2));
1348                 lmv_put_obj(obj);
1349         }
1350         
1351         mds = id_group(&data->id1);
1352
1353 request:
1354         if (id_group(&data->id1) != id_group(&data->id2)) {
1355                 CDEBUG(D_OTHER,"cross-node rename "DLID4"/%*s to "DLID4"/%*s\n",
1356                        OLID4(&data->id1), oldlen, old, OLID4(&data->id2),
1357                        newlen, new);
1358         }
1359
1360         rc = md_rename(lmv->tgts[mds].ltd_exp, data, old, oldlen,
1361                        new, newlen, request); 
1362         RETURN(rc);
1363 }
1364
1365 static int lmv_setattr(struct obd_export *exp, struct mdc_op_data *data,
1366                        struct iattr *iattr, void *ea, int ealen, void *ea2,
1367                        int ea2len, void *ea3, int ea3len, 
1368                        struct ptlrpc_request **request)
1369 {
1370         struct obd_device *obd = exp->exp_obd;
1371         struct lmv_obd *lmv = &obd->u.lmv;
1372         struct ptlrpc_request *req;
1373         struct mds_body *body;
1374         struct lmv_obj *obj;
1375         int rc = 0, i;
1376         ENTRY;
1377
1378         rc = lmv_check_connect(obd);
1379         if (rc)
1380                 RETURN(rc);
1381
1382         obj = lmv_grab_obj(obd, &data->id1);
1383         
1384         CDEBUG(D_OTHER, "SETATTR for "DLID4", valid 0x%x%s\n",
1385                OLID4(&data->id1), iattr->ia_valid, obj ? ", splitted" : "");
1386         
1387         if (obj) {
1388                 for (i = 0; i < obj->objcount; i++) {
1389                         data->id1 = obj->objs[i].id;
1390                         
1391                         rc = md_setattr(lmv->tgts[id_group(&data->id1)].ltd_exp, 
1392                                         data, iattr, ea, ealen, ea2, ea2len, 
1393                                         ea3, ea3len, &req);
1394
1395                         if (id_equal_fid(&obj->id, &obj->objs[i].id)) {
1396                                 /*
1397                                  * this is master object and this request should
1398                                  * be returned back to llite.
1399                                  */
1400                                 *request = req;
1401                         } else {
1402                                 ptlrpc_req_finished(req);
1403                         }
1404
1405                         if (rc)
1406                                 break;
1407                 }
1408                 lmv_put_obj(obj);
1409         } else {
1410                 LASSERT(id_group(&data->id1) < lmv->desc.ld_tgt_count);
1411                 rc = md_setattr(lmv->tgts[id_group(&data->id1)].ltd_exp,
1412                                 data, iattr, ea, ealen, ea2, ea2len, ea3,
1413                                 ea3len, request); 
1414                 if (rc == 0) {
1415                         body = lustre_msg_buf((*request)->rq_repmsg, 0,
1416                                               sizeof(*body));
1417                         LASSERT(body != NULL);
1418                         LASSERT((body->valid & OBD_MD_FID) != 0);
1419                         LASSERT(id_group(&body->id1) == id_group(&data->id1));
1420                 }
1421         }
1422         RETURN(rc);
1423 }
1424
1425 static int lmv_sync(struct obd_export *exp, struct lustre_id *id,
1426                     struct ptlrpc_request **request)
1427 {
1428         struct obd_device *obd = exp->exp_obd;
1429         struct lmv_obd *lmv = &obd->u.lmv;
1430         int rc;
1431         ENTRY;
1432
1433         rc = lmv_check_connect(obd);
1434         if (rc)
1435                 RETURN(rc);
1436
1437         rc = md_sync(lmv->tgts[id_group(id)].ltd_exp, 
1438                      id, request);
1439         RETURN(rc);
1440 }
1441
1442 int lmv_dirobj_blocking_ast(struct ldlm_lock *lock, 
1443                             struct ldlm_lock_desc *desc,
1444                             void *data, int flag)
1445 {
1446         struct lustre_handle lockh;
1447         struct lmv_obj *obj;
1448         int rc;
1449         ENTRY;
1450
1451         switch (flag) {
1452         case LDLM_CB_BLOCKING:
1453                 ldlm_lock2handle(lock, &lockh);
1454                 rc = ldlm_cli_cancel(&lockh);
1455                 if (rc < 0) {
1456                         CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
1457                         RETURN(rc);
1458                 }
1459                 break;
1460         case LDLM_CB_CANCELING:
1461                 /* time to drop cached attrs for dirobj */
1462                 obj = lock->l_ast_data;
1463                 if (obj) {
1464                         CDEBUG(D_OTHER, "cancel %s on "LPU64"/"LPU64
1465                                ", master "DLID4"\n",
1466                                lock->l_resource->lr_name.name[3] == 1 ?
1467                                "LOOKUP" : "UPDATE",
1468                                lock->l_resource->lr_name.name[0],
1469                                lock->l_resource->lr_name.name[1], 
1470                                OLID4(&obj->id));
1471                         lmv_put_obj(obj);
1472                 }
1473                 break;
1474         default:
1475                 LBUG();
1476         }
1477         RETURN(0);
1478 }
1479
1480 static void lmv_remove_dots(struct page *page)
1481 {
1482         unsigned limit = PAGE_CACHE_SIZE;
1483         char *kaddr = page_address(page);
1484         struct ext2_dir_entry_2 *p;
1485         unsigned offs, rec_len;
1486
1487         for (offs = 0; offs <= limit - EXT2_DIR_REC_LEN(1); offs += rec_len) {
1488                 p = (struct ext2_dir_entry_2 *)(kaddr + offs);
1489                 rec_len = le16_to_cpu(p->rec_len);
1490
1491                 if ((p->name_len == 1 && p->name[0] == '.') ||
1492                     (p->name_len == 2 && p->name[0] == '.' && p->name[1] == '.'))
1493                         p->inode = 0;
1494         }
1495 }
1496
1497 static int lmv_readpage(struct obd_export *exp, struct lustre_id *id,
1498                         __u64 offset, struct page *page,
1499                         struct ptlrpc_request **request)
1500 {
1501         struct obd_device *obd = exp->exp_obd;
1502         struct lmv_obd *lmv = &obd->u.lmv;
1503         struct lustre_id rid = *id;
1504         struct lmv_obj *obj;
1505         int rc, i;
1506         ENTRY;
1507
1508 #warning "we need well-desgined readdir() implementation"
1509         rc = lmv_check_connect(obd);
1510         if (rc)
1511                 RETURN(rc);
1512
1513         LASSERT(id_group(id) < lmv->desc.ld_tgt_count);
1514         CDEBUG(D_OTHER, "READPAGE at %llu from "DLID4"\n",
1515                offset, OLID4(&rid));
1516
1517         obj = lmv_grab_obj(obd, id);
1518         if (obj) {
1519                 lmv_lock_obj(obj);
1520
1521                 /* find dirobj containing page with requested offset. */
1522                 for (i = 0; i < obj->objcount; i++) {
1523                         if (offset < obj->objs[i].size)
1524                                 break;
1525                         offset -= obj->objs[i].size;
1526                 }
1527                 rid = obj->objs[i].id;
1528                 
1529                 lmv_unlock_obj(obj);
1530                 lmv_put_obj(obj);
1531                 
1532                 CDEBUG(D_OTHER, "forward to "DLID4" with offset %lu\n",
1533                        OLID4(&rid), (unsigned long)offset);
1534         }
1535         rc = md_readpage(lmv->tgts[id_group(&rid)].ltd_exp, &rid, 
1536                          offset, page, request);
1537         
1538         if (rc == 0 && !id_equal_fid(&rid, id))
1539                 /* this page isn't from master object. To avoid "." and ".." 
1540                  * duplication in directory, we have to remove them from all
1541                  * slave objects */
1542                 lmv_remove_dots(page);
1543         
1544         RETURN(rc);
1545 }
1546
1547 static int lmv_unlink_slaves(struct obd_export *exp, struct mdc_op_data *data,
1548                              struct ptlrpc_request **req)
1549 {
1550         struct obd_device *obd = exp->exp_obd;
1551         struct lmv_obd *lmv = &obd->u.lmv;
1552         struct mea *mea = data->mea1;
1553         struct mdc_op_data *data2;
1554         int i, rc = 0;
1555         ENTRY;
1556
1557         OBD_ALLOC(data2, sizeof(*data2));
1558         if (data2 == NULL)
1559                 RETURN(-ENOMEM);
1560         
1561         LASSERT(mea != NULL);
1562         for (i = 0; i < mea->mea_count; i++) {
1563                 memset(data2, 0, sizeof(*data2));
1564                 data2->id1 = mea->mea_ids[i];
1565                 data2->create_mode = MDS_MODE_DONT_LOCK | S_IFDIR;
1566                 
1567                 if (lmv->tgts[id_group(&data2->id1)].ltd_exp == NULL)
1568                         continue;
1569
1570                 rc = md_unlink(lmv->tgts[id_group(&data2->id1)].ltd_exp,
1571                                data2, req);
1572                 
1573                 CDEBUG(D_OTHER, "unlink slave "DLID4" -> %d\n",
1574                        OLID4(&mea->mea_ids[i]), rc);
1575                 
1576                 if (*req) {
1577                         ptlrpc_req_finished(*req);
1578                         *req = NULL;
1579                 }
1580                 if (rc)
1581                         RETURN(rc);
1582         }
1583         OBD_FREE(data2, sizeof(*data2));
1584         RETURN(rc);
1585 }
1586
1587 static int lmv_delete_inode(struct obd_export *exp, struct lustre_id *id)
1588 {
1589         ENTRY;
1590
1591         LASSERT(exp && id);
1592         if (lmv_delete_obj(exp, id)) {
1593                 CDEBUG(D_OTHER, "lmv object "DLID4" is destroyed.\n",
1594                        OLID4(id));
1595         }
1596         RETURN(0);
1597 }
1598
1599 static int lmv_unlink(struct obd_export *exp, struct mdc_op_data *data,
1600                       struct ptlrpc_request **request)
1601 {
1602         struct obd_device *obd = exp->exp_obd;
1603         struct lmv_obd *lmv = &obd->u.lmv;
1604         int rc, i = 0;
1605         ENTRY;
1606         
1607         rc = lmv_check_connect(obd);
1608         if (rc)
1609                 RETURN(rc);
1610
1611         if (data->namelen == 0 && data->mea1 != NULL) {
1612                 /* mds asks to remove slave objects */
1613                 rc = lmv_unlink_slaves(exp, data, request);
1614                 RETURN(rc);
1615         }
1616
1617         if (data->namelen != 0) {
1618                 struct lmv_obj *obj;
1619                 
1620                 obj = lmv_grab_obj(obd, &data->id1);
1621                 if (obj) {
1622                         i = raw_name2idx(obj->hashtype, obj->objcount,
1623                                          data->name, data->namelen);
1624                         data->id1 = obj->objs[i].id;
1625                         lmv_put_obj(obj);
1626                 }
1627                 CDEBUG(D_OTHER, "unlink '%*s' in "DLID4" -> %u\n",
1628                        data->namelen, data->name, OLID4(&data->id1),
1629                        i);
1630         } else {
1631                 CDEBUG(D_OTHER, "drop i_nlink on "DLID4"\n",
1632                        OLID4(&data->id1));
1633         }
1634         rc = md_unlink(lmv->tgts[id_group(&data->id1)].ltd_exp, 
1635                        data, request);
1636         RETURN(rc);
1637 }
1638
1639 static struct obd_device *lmv_get_real_obd(struct obd_export *exp,
1640                                            struct lustre_id *id)
1641 {
1642         struct obd_device *obd = exp->exp_obd;
1643         struct lmv_obd *lmv = &obd->u.lmv;
1644         int rc;
1645         ENTRY;
1646
1647         rc = lmv_check_connect(obd);
1648         if (rc)
1649                 RETURN(ERR_PTR(rc));
1650         obd = lmv->tgts[id_group(id)].ltd_exp->exp_obd;
1651         EXIT;
1652         
1653         return obd;
1654 }
1655
1656 static int lmv_init_ea_size(struct obd_export *exp, int easize,
1657                             int cookiesize)
1658 {
1659         struct obd_device *obd = exp->exp_obd;
1660         struct lmv_obd *lmv = &obd->u.lmv;
1661         int i, rc = 0, change = 0;
1662         ENTRY;
1663
1664         if (lmv->max_easize < easize) {
1665                 lmv->max_easize = easize;
1666                 change = 1;
1667         }
1668         if (lmv->max_cookiesize < cookiesize) {
1669                 lmv->max_cookiesize = cookiesize;
1670                 change = 1;
1671         }
1672         if (change == 0)
1673                 RETURN(0);
1674         
1675         if (lmv->connected == 0)
1676                 RETURN(0);
1677
1678         for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
1679                 if (lmv->tgts[i].ltd_exp == NULL) {
1680                         CWARN("%s: NULL export for %d\n", obd->obd_name, i);
1681                         continue;
1682                 }
1683
1684                 rc = obd_init_ea_size(lmv->tgts[i].ltd_exp, easize, cookiesize);
1685                 if (rc) {
1686                         CERROR("obd_init_ea_size() failed on MDT target %d, "
1687                                "error %d.\n", i, rc);
1688                         break;
1689                 }
1690         }
1691         RETURN(rc);
1692 }
1693
1694 static int lmv_obd_create_single(struct obd_export *exp, struct obdo *oa,
1695                                  void *acl, int acl_size,
1696                                  struct lov_stripe_md **ea,
1697                                  struct obd_trans_info *oti)
1698 {
1699         struct obd_device *obd = exp->exp_obd;
1700         struct lmv_obd *lmv = &obd->u.lmv;
1701         struct lov_stripe_md obj_md;
1702         struct lov_stripe_md *obj_mdp = &obj_md;
1703         int rc = 0;
1704         ENTRY;
1705
1706         LASSERT(ea == NULL);
1707         LASSERT(oa->o_mds < lmv->desc.ld_tgt_count);
1708
1709         rc = obd_create(lmv->tgts[oa->o_mds].ltd_exp, oa,
1710                         acl, acl_size, &obj_mdp, oti);
1711
1712         RETURN(rc);
1713 }
1714
1715 /*
1716  * to be called from MDS only. @oa should have correct store cookie and o_fid
1717  * values for "master" object, as it will be used.
1718  */
1719 int lmv_obd_create(struct obd_export *exp, struct obdo *oa,
1720                    void *acl, int acl_size,
1721                    struct lov_stripe_md **ea, struct obd_trans_info *oti)
1722 {
1723         struct obd_device *obd = exp->exp_obd;
1724         struct lmv_obd *lmv = &obd->u.lmv;
1725         struct lustre_id mid;
1726         int i, c, rc = 0;
1727         struct mea *mea;
1728         ENTRY;
1729
1730         rc = lmv_check_connect(obd);
1731         if (rc)
1732                 RETURN(rc);
1733
1734         LASSERT(oa != NULL);
1735         
1736         if (ea == NULL) {
1737                 rc = lmv_obd_create_single(exp, oa, acl, acl_size, NULL, oti);
1738                 if (rc)
1739                         CERROR("Can't create object, rc = %d\n", rc);
1740                 RETURN(rc);
1741         }
1742
1743         /* acl is only suppied when mds create single remote obj */
1744         LASSERT(acl == NULL && acl_size == 0);
1745
1746         if (*ea == NULL) {
1747                 rc = obd_alloc_diskmd(exp, (struct lov_mds_md **)ea);
1748                 if (rc < 0) {
1749                         CERROR("obd_alloc_diskmd() failed, error %d\n",
1750                                rc);
1751                         RETURN(rc);
1752                 } else
1753                         rc = 0;
1754                 
1755                 if (*ea == NULL)
1756                         RETURN(-ENOMEM);
1757         }
1758
1759         /* 
1760          * here we should take care about splitted dir, so store cookie and fid
1761          * for "master" object should already be allocated and passed in @oa.
1762          */
1763         LASSERT(oa->o_id != 0);
1764         LASSERT(oa->o_fid != 0);
1765
1766         /* save "master" object id */
1767         obdo2id(&mid, oa);
1768
1769         mea = (struct mea *)*ea;
1770         mea->mea_master = -1;
1771         mea->mea_magic = MEA_MAGIC_ALL_CHARS;
1772
1773         if (!mea->mea_count || mea->mea_count > lmv->desc.ld_tgt_count)
1774                 mea->mea_count = lmv->desc.ld_tgt_count;
1775
1776         for (i = 0, c = 0; c < mea->mea_count && i < lmv->desc.ld_tgt_count; i++) {
1777                 struct lov_stripe_md obj_md;
1778                 struct lov_stripe_md *obj_mdp = &obj_md;
1779                
1780                 if (lmv->tgts[i].ltd_exp == NULL) {
1781                         /* this is "master" MDS */
1782                         mea->mea_master = i;
1783                         mea->mea_ids[c] = mid;
1784                         c++;
1785                         continue;
1786                 }
1787
1788                 /*
1789                  * "master" MDS should always be part of stripped dir,
1790                  * so scan for it.
1791                  */
1792                 if (mea->mea_master == -1 && c == mea->mea_count - 1)
1793                         continue;
1794
1795                 oa->o_valid = OBD_MD_FLGENER | OBD_MD_FLTYPE | OBD_MD_FLMODE |
1796                         OBD_MD_FLUID | OBD_MD_FLGID | OBD_MD_FLID;
1797
1798                 rc = obd_create(lmv->tgts[c].ltd_exp, oa, NULL, 0,
1799                                 &obj_mdp, oti);
1800                 if (rc) {
1801                         CERROR("obd_create() failed on MDT target %d, "
1802                                "error %d\n", c, rc);
1803                         RETURN(rc);
1804                 }
1805
1806                 CDEBUG(D_OTHER, "dirobj at mds %d: "LPU64"/%u\n",
1807                        i, oa->o_id, oa->o_generation);
1808
1809
1810                 /*
1811                  * here, when object is created (or it is master and was passed
1812                  * from caller) on desired MDS we save its fid to local mea_ids.
1813                  */
1814                 LASSERT(oa->o_fid);
1815
1816                 /* 
1817                  * store cookie should be defined here for both cases (master
1818                  * object and not master), because master is already created.
1819                  */
1820                 LASSERT(oa->o_id);
1821
1822                 /* fill mea by store cookie and fid */
1823                 obdo2id(&mea->mea_ids[c], oa);
1824                 c++;
1825         }
1826         LASSERT(c == mea->mea_count);
1827
1828         CDEBUG(D_OTHER, "%d dirobjects created\n",
1829                (int)mea->mea_count);
1830         
1831         RETURN(rc);
1832 }
1833
1834 static int lmv_llog_init(struct obd_device *obd, struct obd_llogs *llogs, 
1835                          struct obd_device *tgt, int count,
1836                          struct llog_catid *logid)
1837 {
1838         struct llog_ctxt *ctxt;
1839         int rc;
1840         ENTRY;
1841
1842         rc = obd_llog_setup(obd, llogs, LLOG_CONFIG_REPL_CTXT, tgt, 0, NULL,
1843                             &llog_client_ops);
1844         if (rc == 0) {
1845                 ctxt = llog_get_context(llogs, LLOG_CONFIG_REPL_CTXT);
1846                 ctxt->loc_imp = tgt->u.cli.cl_import;
1847         }
1848
1849         RETURN(rc);
1850 }
1851
1852 static int lmv_llog_finish(struct obd_device *obd,
1853                            struct obd_llogs *llogs, int count)
1854 {
1855         int rc;
1856         ENTRY;
1857
1858         rc = obd_llog_cleanup(llog_get_context(llogs, LLOG_CONFIG_REPL_CTXT));
1859         RETURN(rc);
1860 }
1861
1862 static int lmv_get_info(struct obd_export *exp, __u32 keylen,
1863                         void *key, __u32 *vallen, void *val)
1864 {
1865         struct obd_device *obd;
1866         struct lmv_obd *lmv;
1867         int rc = 0;
1868         ENTRY;
1869
1870         obd = class_exp2obd(exp);
1871         if (obd == NULL) {
1872                 CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",
1873                        exp->exp_handle.h_cookie);
1874                 RETURN(-EINVAL);
1875         }
1876
1877         lmv = &obd->u.lmv;
1878         if (keylen == strlen("mdsize") && !strcmp(key, "mdsize")) {
1879                 __u32 *mdsize = val;
1880                 *vallen = sizeof(__u32);
1881                 *mdsize = sizeof(struct lustre_id) * lmv->desc.ld_tgt_count
1882                         + sizeof(struct mea);
1883                 RETURN(0);
1884         } else if (keylen == strlen("mdsnum") && !strcmp(key, "mdsnum")) {
1885                 struct obd_uuid *cluuid = &lmv->cluuid;
1886                 struct lmv_tgt_desc *tgts;
1887                 __u32 *mdsnum = val;
1888                 int i;
1889
1890                 for (i = 0, tgts = lmv->tgts; i < lmv->desc.ld_tgt_count; i++, tgts++) {
1891                         if (obd_uuid_equals(&tgts->uuid, cluuid)) {
1892                                 *vallen = sizeof(__u32);
1893                                 *mdsnum = i;
1894                                 RETURN(0);
1895                         }
1896                 }
1897                 LASSERT(0);
1898         } else if (keylen == strlen("rootid") && !strcmp(key, "rootid")) {
1899                 rc = lmv_check_connect(obd);
1900                 if (rc)
1901                         RETURN(rc);
1902                 
1903                 /* getting rootid from first MDS. */
1904                 rc = obd_get_info(lmv->tgts[0].ltd_exp, keylen, key,
1905                                   vallen, val);
1906                 RETURN(rc);
1907         } else if (keylen >= strlen("lmvdesc") && !strcmp(key, "lmvdesc")) {
1908                 struct lmv_desc *desc_ret = val;
1909                 *desc_ret = lmv->desc;
1910                 RETURN(0);
1911         } else if (keylen >= strlen("remote_flag") && !strcmp(key, "remote_flag")) {
1912                 struct lmv_tgt_desc *tgts;
1913                 int i;
1914
1915                 rc = lmv_check_connect(obd);
1916                 if (rc)
1917                         RETURN(rc);
1918                 
1919                 LASSERT(*vallen == sizeof(__u32));
1920                 for (i = 0, tgts = lmv->tgts; i < lmv->desc.ld_tgt_count;
1921                      i++, tgts++) {
1922
1923                         /* all tgts should be connected when this get called. */
1924                         if (!tgts || !tgts->ltd_exp) {
1925                                 CERROR("target not setup?\n");
1926                                 continue;
1927                         }
1928
1929                         if (!obd_get_info(tgts->ltd_exp, keylen, key,
1930                                           vallen, val))
1931                                 RETURN(0);
1932                 }
1933                 RETURN(-EINVAL);
1934         } else if (keylen >= strlen("lovdesc") && !strcmp(key, "lovdesc")) {
1935                 rc = lmv_check_connect(obd);
1936                 if (rc)
1937                         RETURN(rc);
1938
1939                 /* forwarding this request to first MDS, it should know LOV
1940                  * desc. */
1941                 rc = obd_get_info(lmv->tgts[0].ltd_exp, keylen, key,
1942                                   vallen, val);
1943                 RETURN(rc);
1944         } else if (keylen >= strlen("getext") && !strcmp(key, "getext")) {
1945                 struct lmv_tgt_desc *tgts;
1946                 int i;
1947
1948                 rc = lmv_check_connect(obd);
1949                 if (rc)
1950                         RETURN(rc);
1951
1952                 LASSERT(*vallen == sizeof(struct fid_extent));
1953                 for (i = 0, tgts = lmv->tgts; i < lmv->desc.ld_tgt_count;
1954                      i++, tgts++) {
1955
1956                         /* all tgts should be connected when this get called. */
1957                         if (!tgts || !tgts->ltd_exp) {
1958                                 CERROR("target not setup?\n");
1959                                 continue;
1960                         }
1961
1962                         rc = obd_get_info(tgts->ltd_exp, keylen, key,
1963                                           vallen, val);
1964                         if (rc)
1965                                 RETURN(rc);
1966                 }
1967                 RETURN(0);
1968         }
1969
1970         CDEBUG(D_IOCTL, "invalid key\n");
1971         RETURN(-EINVAL);
1972 }
1973
1974 int lmv_set_info(struct obd_export *exp, obd_count keylen,
1975                  void *key, obd_count vallen, void *val)
1976 {
1977         struct lmv_tgt_desc    *tgt;
1978         struct obd_device      *obd;
1979         struct lmv_obd         *lmv;
1980         int rc = 0;
1981         ENTRY;
1982
1983         obd = class_exp2obd(exp);
1984         if (obd == NULL) {
1985                 CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",
1986                        exp->exp_handle.h_cookie);
1987                 RETURN(-EINVAL);
1988         }
1989         lmv = &obd->u.lmv;
1990
1991         if (keylen >= strlen("inter_mds") && strcmp(key, "inter_mds") == 0) {
1992                 lmv->server_timeout = 1;
1993                 lmv_set_timeouts(obd);
1994                 RETURN(0);
1995         }
1996
1997         /* maybe this could be default */
1998         if ((keylen == strlen("sec") && strcmp(key, "sec") == 0) ||
1999             (keylen == strlen("sec_flags") && strcmp(key, "sec_flags") == 0) ||
2000             (keylen == strlen("nllu") && strcmp(key, "nllu") == 0)) {
2001                 struct obd_export *exp;
2002                 int err, i;
2003
2004                 spin_lock(&lmv->lmv_lock);
2005                 for (i = 0, tgt = lmv->tgts; i < lmv->desc.ld_tgt_count;
2006                      i++, tgt++) {
2007                         exp = tgt->ltd_exp;
2008                         /* during setup time the connections to mdc might
2009                          * haven't been established.
2010                          */
2011                         if (exp == NULL) {
2012                                 struct obd_device *tgt_obd;
2013
2014                                 tgt_obd = class_find_client_obd(&tgt->uuid,
2015                                                                 OBD_MDC_DEVICENAME,
2016                                                                 &obd->obd_uuid);
2017                                 if (!tgt_obd) {
2018                                         CERROR("can't set info %s, "
2019                                                "device %s not attached?\n",
2020                                                 (char *) key, tgt->uuid.uuid);
2021                                         rc = -EINVAL;
2022                                         continue;
2023                                 }
2024                                 exp = tgt_obd->obd_self_export;
2025                         }
2026
2027                         err = obd_set_info(exp, keylen, key, vallen, val);
2028                         if (!rc)
2029                                 rc = err;
2030                 }
2031                 spin_unlock(&lmv->lmv_lock);
2032
2033                 RETURN(rc);
2034         }
2035         if (keylen == 5 && strcmp(key, "audit") == 0) {
2036                 struct audit_attr_msg * msg = val;
2037                 int mds = id_group(&msg->id);
2038                 int i;
2039                 LASSERT(mds < lmv->desc.ld_tgt_count);
2040                 
2041                 if (IS_AUDIT_OP(msg->attr, AUDIT_FS)) {
2042                         //FS audit, send message to all mds
2043                         for (i = 0; i < lmv->desc.ld_tgt_count;i++) {
2044                                 obd_set_info(lmv->tgts[i].ltd_exp, 
2045                                                   keylen, key, vallen, val);
2046                         }
2047                 }
2048                 else if (IS_AUDIT_OP(msg->attr, AUDIT_DIR)) {
2049                         //audit for dir.
2050                         //if dir is splitted, send RPC to all mds involved
2051                         struct lmv_obj *obj;
2052                         struct lustre_id rid;
2053                         int i;
2054                         
2055                         obj = lmv_grab_obj(obd, &msg->id);
2056                         if (obj) {
2057                                 lmv_lock_obj(obj);
2058                                 for (i = 0; i < obj->objcount; i++) {
2059                                         rid = obj->objs[i].id;
2060                                         mds = id_group(&rid);
2061                                         obd_set_info(lmv->tgts[mds].ltd_exp,
2062                                                           keylen, key,
2063                                                           vallen, val);
2064                                 }
2065                                 lmv_unlock_obj(obj);
2066                                 lmv_put_obj(obj);
2067                         }
2068                         else {
2069                                 rc = obd_set_info(lmv->tgts[mds].ltd_exp,
2070                                                  keylen, key, vallen, val);
2071                         }
2072                 }
2073                 else {
2074                         //set audit for file
2075                         rc = obd_set_info(lmv->tgts[mds].ltd_exp,
2076                                           keylen, key, vallen, val);                        
2077                 }
2078                 RETURN(rc);
2079         }
2080         if (((keylen == strlen("flush_cred") &&
2081              strcmp(key, "flush_cred") == 0)) || 
2082              ((keylen == strlen("crypto_type") &&
2083              strcmp(key, "crypto_type") == 0))) {
2084                 int i;
2085
2086                 for (i = 0, tgt = lmv->tgts; i < lmv->desc.ld_tgt_count;
2087                      i++, tgt++) {
2088                         if (!tgt->ltd_exp)
2089                                 continue;
2090                         rc = obd_set_info(tgt->ltd_exp,
2091                                           keylen, key, vallen, val);
2092                         if (rc)
2093                                 RETURN(rc);
2094                 }
2095
2096                 RETURN(0);
2097         }
2098         
2099         if (keylen == strlen("ids") && memcmp(key, "ids", keylen) == 0) {
2100                 struct lustre_id *id = (struct lustre_id *)val;
2101                 
2102                 rc = lmv_check_connect(obd);
2103                 if (rc)
2104                         RETURN(rc);
2105
2106                 rc = obd_set_info(lmv->tgts[id_group(id)].ltd_exp, 
2107                                   keylen, key, vallen, val); 
2108                 RETURN(rc);
2109         }
2110
2111         if (keylen == strlen("chkconnect") && 
2112             memcpy(key, "chkconnect", keylen) == 0) {
2113                 rc = lmv_check_connect(obd);
2114                 RETURN(rc);
2115         }
2116
2117         RETURN(-EINVAL);
2118 }
2119
2120 int lmv_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
2121                struct lov_stripe_md *lsm)
2122 {
2123         struct obd_device *obd = class_exp2obd(exp);
2124         struct lmv_obd *lmv = &obd->u.lmv;
2125         struct mea *meap, *lsmp;
2126         int mea_size, i;
2127         ENTRY;
2128
2129         mea_size = (sizeof(struct lustre_id) * 
2130                     lmv->desc.ld_tgt_count) + sizeof(struct mea);
2131         if (!lmmp)
2132                 RETURN(mea_size);
2133
2134         if (*lmmp && !lsm) {
2135                 OBD_FREE(*lmmp, mea_size);
2136                 *lmmp = NULL;
2137                 RETURN(0);
2138         }
2139
2140         if (*lmmp == NULL) {
2141                 OBD_ALLOC(*lmmp, mea_size);
2142                 if (*lmmp == NULL)
2143                         RETURN(-ENOMEM);
2144         }
2145
2146         if (!lsm)
2147                 RETURN(mea_size);
2148
2149         lsmp = (struct mea *)lsm;
2150         meap = (struct mea *)*lmmp;
2151
2152         if (lsmp->mea_magic != MEA_MAGIC_LAST_CHAR &&
2153             lsmp->mea_magic != MEA_MAGIC_ALL_CHARS)
2154                 RETURN(-EINVAL);
2155
2156         meap->mea_magic = cpu_to_le32(lsmp->mea_magic);
2157         meap->mea_count = cpu_to_le32(lsmp->mea_count);
2158         meap->mea_master = cpu_to_le32(lsmp->mea_master);
2159
2160         for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
2161                 meap->mea_ids[i] = meap->mea_ids[i];
2162                 id_cpu_to_le(&meap->mea_ids[i]);
2163         }
2164
2165         RETURN(mea_size);
2166 }
2167
2168 int lmv_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp,
2169                  struct lov_mds_md *lmm, int lmm_size)
2170 {
2171         struct obd_device *obd = class_exp2obd(exp);
2172         struct mea **tmea = (struct mea **)lsmp;
2173         struct mea *mea = (struct mea *)lmm;
2174         struct lmv_obd *lmv = &obd->u.lmv;
2175         int mea_size, i, rc = 0;
2176         __u32 magic;
2177         ENTRY;
2178
2179         mea_size = sizeof(struct lustre_id) * 
2180                 lmv->desc.ld_tgt_count + sizeof(struct mea);
2181
2182         if (lsmp == NULL)
2183                 return mea_size;
2184
2185         if (*lsmp != NULL && lmm == NULL) {
2186                 OBD_FREE(*tmea, mea_size);
2187                 RETURN(0);
2188         }
2189
2190         LASSERT(mea_size == lmm_size);
2191
2192         OBD_ALLOC(*tmea, mea_size);
2193         if (*tmea == NULL)
2194                 RETURN(-ENOMEM);
2195
2196         if (!lmm)
2197                 RETURN(mea_size);
2198
2199         if (mea->mea_magic == MEA_MAGIC_LAST_CHAR ||
2200             mea->mea_magic == MEA_MAGIC_ALL_CHARS)
2201         {
2202                 magic = le32_to_cpu(mea->mea_magic);
2203         } else {
2204                 struct mea_old *old = (struct mea_old *)lmm;
2205         
2206                 mea_size = sizeof(struct lustre_id) * old->mea_count + 
2207                         sizeof(struct mea_old);
2208         
2209                 if (old->mea_count > 256 || old->mea_master > 256 ||
2210                     lmm_size < mea_size || old->mea_master > old->mea_count) {
2211                         CWARN("bad MEA: count %u, master %u, size %u\n",
2212                               old->mea_count, old->mea_master, mea_size);
2213                         GOTO(out_free_mea, rc = -EINVAL);
2214                 }
2215                 magic = MEA_MAGIC_LAST_CHAR;
2216         }
2217
2218         (*tmea)->mea_magic = magic;
2219         (*tmea)->mea_count = le32_to_cpu(mea->mea_count);
2220         (*tmea)->mea_master = le32_to_cpu(mea->mea_master);
2221
2222         for (i = 0; i < (*tmea)->mea_count; i++) {
2223                 (*tmea)->mea_ids[i] = mea->mea_ids[i];
2224                 id_le_to_cpu(&(*tmea)->mea_ids[i]);
2225         }
2226         RETURN(mea_size);
2227
2228 out_free_mea:
2229         OBD_FREE(*tmea, mea_size);
2230         return rc;
2231 }
2232
2233 int lmv_brw(int rw, struct obd_export *exp, struct obdo *oa,
2234             struct lov_stripe_md *ea, obd_count oa_bufs,
2235             struct brw_page *pgarr, struct obd_trans_info *oti)
2236 {
2237         struct obd_device *obd = exp->exp_obd;
2238         struct lmv_obd *lmv = &obd->u.lmv;
2239         struct mea *mea = (struct mea *) ea;
2240         int err;
2241       
2242         LASSERT(oa != NULL);
2243         LASSERT(ea != NULL);
2244         LASSERT(pgarr != NULL);
2245         LASSERT(oa->o_mds < lmv->desc.ld_tgt_count);
2246
2247         oa->o_gr = id_gen(&mea->mea_ids[oa->o_mds]);
2248         oa->o_id = id_ino(&mea->mea_ids[oa->o_mds]);
2249         oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
2250         
2251         err = obd_brw(rw, lmv->tgts[oa->o_mds].ltd_exp,
2252                       oa, NULL, oa_bufs, pgarr, oti);
2253         RETURN(err);
2254 }
2255
2256 static int lmv_cancel_unused(struct obd_export *exp,
2257                              struct lov_stripe_md *lsm, 
2258                              int flags, void *opaque)
2259 {
2260         struct obd_device *obd = exp->exp_obd;
2261         struct lmv_obd *lmv = &obd->u.lmv;
2262         int rc = 0, err, i;
2263         ENTRY;
2264
2265         LASSERT(lsm == NULL);
2266         
2267         for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
2268                 if (!lmv->tgts[i].ltd_exp || !lmv->tgts[i].active)
2269                         continue;
2270                 
2271                 err = obd_cancel_unused(lmv->tgts[i].ltd_exp,
2272                                         NULL, flags, opaque);
2273                 if (!rc)
2274                         rc = err;
2275         }
2276         RETURN(rc);
2277 }
2278
2279 struct obd_ops lmv_obd_ops = {
2280         .o_owner                = THIS_MODULE,
2281         .o_attach               = lmv_attach,
2282         .o_detach               = lmv_detach,
2283         .o_setup                = lmv_setup,
2284         .o_cleanup              = lmv_cleanup,
2285         .o_connect              = lmv_connect,
2286         .o_disconnect           = lmv_disconnect,
2287         .o_statfs               = lmv_statfs,
2288         .o_llog_init            = lmv_llog_init,
2289         .o_llog_finish          = lmv_llog_finish,
2290         .o_get_info             = lmv_get_info,
2291         .o_set_info             = lmv_set_info,
2292         .o_create               = lmv_obd_create,
2293         .o_packmd               = lmv_packmd,
2294         .o_unpackmd             = lmv_unpackmd,
2295         .o_brw                  = lmv_brw,
2296         .o_init_ea_size         = lmv_init_ea_size,
2297         .o_notify               = lmv_notify,
2298         .o_iocontrol            = lmv_iocontrol,
2299         .o_cancel_unused        = lmv_cancel_unused,
2300 };
2301
2302 struct md_ops lmv_md_ops = {
2303         .m_getstatus           = lmv_getstatus,
2304         .m_getattr             = lmv_getattr,
2305         .m_change_cbdata       = lmv_change_cbdata,
2306         .m_change_cbdata_name  = lmv_change_cbdata_name,
2307         .m_close               = lmv_close,
2308         .m_create              = lmv_create,
2309         .m_done_writing        = lmv_done_writing,
2310         .m_enqueue             = lmv_enqueue,
2311         .m_getattr_lock        = lmv_getattr_lock,
2312         .m_intent_lock         = lmv_intent_lock,
2313         .m_link                = lmv_link,
2314         .m_rename              = lmv_rename,
2315         .m_setattr             = lmv_setattr,
2316         .m_sync                = lmv_sync,
2317         .m_readpage            = lmv_readpage,
2318         .m_unlink              = lmv_unlink,
2319         .m_get_real_obd        = lmv_get_real_obd,
2320         .m_valid_attrs         = lmv_valid_attrs,
2321         .m_delete_inode        = lmv_delete_inode,
2322         .m_access_check        = lmv_access_check,
2323 };
2324
2325 int __init lmv_init(void)
2326 {
2327         struct lprocfs_static_vars lvars;
2328         int rc;
2329
2330         obj_cache = kmem_cache_create("lmv_objects",
2331                                       sizeof(struct lmv_obj),
2332                                       0, 0, NULL, NULL);
2333         if (!obj_cache) {
2334                 CERROR("error allocating lmv objects cache\n");
2335                 return -ENOMEM;
2336         }
2337
2338         lprocfs_init_vars(lmv, &lvars);
2339         rc = class_register_type(&lmv_obd_ops, &lmv_md_ops,
2340                                  lvars.module_vars,
2341                                  OBD_LMV_DEVICENAME);
2342         if (rc)
2343                 kmem_cache_destroy(obj_cache);
2344         
2345         return rc;
2346 }
2347
2348 #ifdef __KERNEL__
2349 static void lmv_exit(void)
2350 {
2351         class_unregister_type(OBD_LMV_DEVICENAME);
2352
2353         LASSERTF(kmem_cache_destroy(obj_cache) == 0,
2354                  "can't free lmv objects cache, %d object(s)"
2355                  "still in use\n", atomic_read(&obj_cache_count));
2356 }
2357
2358 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
2359 MODULE_DESCRIPTION("Lustre Logical Metadata Volume OBD driver");
2360 MODULE_LICENSE("GPL");
2361
2362 module_init(lmv_init);
2363 module_exit(lmv_exit);
2364 #endif