Whamcloud - gitweb
- moved dir entries are deleted from the original dir (master object)
[fs/lustre-release.git] / lustre / lmv / lmv_obd.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
5  *
6  *   This file is part of Lustre, http://www.lustre.org.
7  *
8  *   Lustre is free software; you can redistribute it and/or
9  *   modify it under the terms of version 2 of the GNU General Public
10  *   License as published by the Free Software Foundation.
11  *
12  *   Lustre is distributed in the hope that it will be useful,
13  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *   GNU General Public License for more details.
16  *
17  *   You should have received a copy of the GNU General Public License
18  *   along with Lustre; if not, write to the Free Software
19  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20  */
21
22 #ifndef EXPORT_SYMTAB
23 # define EXPORT_SYMTAB
24 #endif
25 #define DEBUG_SUBSYSTEM S_LMV
26 #ifdef __KERNEL__
27 #include <linux/slab.h>
28 #include <linux/module.h>
29 #include <linux/init.h>
30 #include <linux/slab.h>
31 #include <linux/pagemap.h>
32 #include <asm/div64.h>
33 #include <linux/seq_file.h>
34 #else
35 #include <liblustre.h>
36 #endif
37 #include <linux/ext2_fs.h>
38
39 #include <linux/obd_support.h>
40 #include <linux/lustre_lib.h>
41 #include <linux/lustre_net.h>
42 #include <linux/lustre_idl.h>
43 #include <linux/lustre_dlm.h>
44 #include <linux/lustre_mds.h>
45 #include <linux/obd_class.h>
46 #include <linux/obd_ost.h>
47 #include <linux/lprocfs_status.h>
48 #include <linux/lustre_fsfilt.h>
49 #include <linux/obd_lmv.h>
50 #include "lmv_internal.h"
51
52 /* Error codes:
53  *
54  *  -EINVAL  : UUID can't be found in the LMV's target list
55  *  -ENOTCONN: The UUID is found, but the target connection is bad (!)
56  *  -EBADF   : The UUID is found, but the OBD of the wrong type (!)
57  */
58 static int lmv_set_mdc_active(struct lmv_obd *lmv, struct obd_uuid *uuid,
59                               int activate)
60 {
61         struct obd_device *obd;
62         struct lmv_tgt_desc *tgt;
63         int i, rc = 0;
64         ENTRY;
65
66         CDEBUG(D_INFO, "Searching in lmv %p for uuid %s (activate=%d)\n",
67                lmv, uuid->uuid, activate);
68
69         spin_lock(&lmv->lmv_lock);
70         for (i = 0, tgt = lmv->tgts; i < lmv->desc.ld_tgt_count; i++, tgt++) {
71                 CDEBUG(D_INFO, "lmv idx %d is %s conn "LPX64"\n",
72                        i, tgt->uuid.uuid, tgt->ltd_exp->exp_handle.h_cookie);
73                 if (strncmp(uuid->uuid, tgt->uuid.uuid, sizeof uuid->uuid) == 0)
74                         break;
75         }
76
77         if (i == lmv->desc.ld_tgt_count)
78                 GOTO(out, rc = -EINVAL);
79
80         obd = class_exp2obd(tgt->ltd_exp);
81         if (obd == NULL) {
82                 /* This can happen if OST failure races with node shutdown */
83                 GOTO(out, rc = -ENOTCONN);
84         }
85
86         CDEBUG(D_INFO, "Found OBD %s=%s device %d (%p) type %s at LMV idx %d\n",
87                obd->obd_name, obd->obd_uuid.uuid, obd->obd_minor, obd,
88                obd->obd_type->typ_name, i);
89         LASSERT(strcmp(obd->obd_type->typ_name, LUSTRE_MDC_NAME) == 0);
90
91         if (tgt->active == activate) {
92                 CDEBUG(D_INFO, "OBD %p already %sactive!\n", obd,
93                        activate ? "" : "in");
94                 GOTO(out, rc);
95         }
96
97         CDEBUG(D_INFO, "Marking OBD %p %sactive\n", obd, activate ? "" : "in");
98
99         tgt->active = activate;
100         if (activate)
101                 lmv->desc.ld_active_tgt_count++;
102         else
103                 lmv->desc.ld_active_tgt_count--;
104
105         EXIT;
106  out:
107         spin_unlock(&lmv->lmv_lock);
108         return rc;
109 }
110
111 static int lmv_notify(struct obd_device *obd, struct obd_device *watched,
112                       int active)
113 {
114         int rc;
115         struct obd_uuid *uuid;
116
117         if (strcmp(watched->obd_type->typ_name, LUSTRE_MDC_NAME)) {
118                 CERROR("unexpected notification of %s %s!\n",
119                        watched->obd_type->typ_name,
120                        watched->obd_name);
121                 return -EINVAL;
122         }
123         uuid = &watched->u.cli.cl_import->imp_target_uuid;
124
125         /* Set MDC as active before notifying the observer, so the
126          * observer can use the MDC normally.  
127          */
128         rc = lmv_set_mdc_active(&obd->u.lmv, uuid, active);
129         if (rc) {
130                 CERROR("%sactivation of %s failed: %d\n",
131                        active ? "" : "de", uuid->uuid, rc);
132                 RETURN(rc);
133         }
134
135         if (obd->obd_observer)
136                 /* Pass the notification up the chain. */
137                 rc = obd_notify(obd->obd_observer, watched, active);
138
139         RETURN(rc);
140 }
141
142 int lmv_attach(struct obd_device *dev, obd_count len, void *data)
143 {
144         struct lprocfs_static_vars lvars;
145         int rc;
146         ENTRY;
147
148         lprocfs_init_vars(lmv, &lvars);
149         rc = lprocfs_obd_attach(dev, lvars.obd_vars);
150         if (rc == 0) {
151 #ifdef __KERNEL__
152                 struct proc_dir_entry *entry;
153                 
154                 entry = create_proc_entry("target_obd", 0444, dev->obd_proc_entry);
155                 if (entry == NULL)
156                         RETURN(-ENOMEM);
157                 /* entry->proc_fops = &lmv_proc_target_fops; */
158                 entry->data = dev;
159 #endif
160        }
161         RETURN (rc);
162 }
163
164 int lmv_detach(struct obd_device *dev)
165 {
166         return lprocfs_obd_detach(dev);
167 }
168
169 /* This is fake connect function. Its purpose is to initialize lmv and 
170  * say caller that everything is okay. Real connection will be performed
171  * later. */
172 static int lmv_connect(struct lustre_handle *conn, struct obd_device *obd,
173                        struct obd_uuid *cluuid)
174 {
175         struct lmv_obd *lmv = &obd->u.lmv;
176         struct obd_export *exp;
177         int rc;
178         ENTRY;
179
180         rc = class_connect(conn, obd, cluuid);
181         if (rc) {
182                 CERROR("class_connection() returned %d\n", rc);
183                 RETURN(rc);
184         }
185
186         exp = class_conn2export(conn);
187         /* We don't want to actually do the underlying connections more than
188          * once, so keep track. */
189         lmv->refcount++;
190         if (lmv->refcount > 1) {
191                 class_export_put(exp);
192                 RETURN(0);
193         }
194
195         lmv->cluuid = *cluuid;
196         lmv->connected = 0;
197         lmv->exp = exp;
198
199         RETURN(0);
200 }
201
202 void lmv_set_timeouts(struct obd_device *obd)
203 {
204         struct lmv_tgt_desc *tgts;
205         struct lmv_obd *lmv;
206         int i;
207
208         lmv = &obd->u.lmv;
209         if (lmv->server_timeout == 0)
210                 return;
211
212         if (lmv->connected == 0)
213                 return;
214
215         for (i = 0, tgts = lmv->tgts; i < lmv->desc.ld_tgt_count; i++, tgts++) {
216                 if (tgts->ltd_exp == NULL)
217                         continue;
218                 obd_set_info(tgts->ltd_exp, strlen("inter_mds"),
219                              "inter_mds", 0, NULL);
220         }
221 }
222
223 /* Performs a check if passed obd is connected. If no - connect it. */
224 int lmv_check_connect(struct obd_device *obd) {
225         struct lmv_obd *lmv = &obd->u.lmv;
226         struct obd_uuid *cluuid;
227         struct lmv_tgt_desc *tgts;
228         struct obd_export *exp;
229         int rc, rc2, i;
230
231         if (lmv->connected)
232                 return 0;
233       
234         lmv->connected = 1;
235         cluuid = &lmv->cluuid;
236         exp = lmv->exp;
237         CDEBUG(D_OTHER, "time to connect %s to %s\n",
238                         cluuid->uuid, obd->obd_name);
239
240         for (i = 0, tgts = lmv->tgts; i < lmv->desc.ld_tgt_count; i++, tgts++) {
241                 struct obd_device *tgt_obd;
242                 struct obd_uuid lmv_osc_uuid = { "LMV_OSC_UUID" };
243                 struct lustre_handle conn = {0, };
244
245                 LASSERT(tgts != NULL);
246
247                 tgt_obd = class_find_client_obd(&tgts->uuid, LUSTRE_MDC_NAME, 
248                                                 &obd->obd_uuid);
249                 if (!tgt_obd) {
250                         CERROR("Target %s not attached\n", tgts->uuid.uuid);
251                         GOTO(out_disc, rc = -EINVAL);
252                 }
253
254                 /* for MDS: don't connect to yourself */
255                 if (obd_uuid_equals(&tgts->uuid, cluuid)) {
256                         CDEBUG(D_OTHER, "don't connect back to %s\n",
257                                cluuid->uuid);
258                         tgts->ltd_exp = NULL;
259                         continue;
260                 }
261
262                 CDEBUG(D_OTHER, "connect to %s(%s) - %s, %s FOR %s\n",
263                         tgt_obd->obd_name, tgt_obd->obd_uuid.uuid,
264                         tgts->uuid.uuid, obd->obd_uuid.uuid,
265                         cluuid->uuid);
266
267                 if (!tgt_obd->obd_set_up) {
268                         CERROR("Target %s not set up\n", tgts->uuid.uuid);
269                         GOTO(out_disc, rc = -EINVAL);
270                 }
271                 
272                 rc = obd_connect(&conn, tgt_obd, &lmv_osc_uuid);
273                 if (rc) {
274                         CERROR("Target %s connect error %d\n",
275                                 tgts->uuid.uuid, rc);
276                         GOTO(out_disc, rc);
277                 }
278                 tgts->ltd_exp = class_conn2export(&conn);
279
280                 obd_init_ea_size(tgts->ltd_exp, lmv->max_easize,
281                                  lmv->max_cookiesize);
282                 
283                 rc = obd_register_observer(tgt_obd, obd);
284                 if (rc) {
285                         CERROR("Target %s register_observer error %d\n",
286                                tgts->uuid.uuid, rc);
287                         obd_disconnect(tgts->ltd_exp, 0);
288                         GOTO(out_disc, rc);
289                 }
290
291                 lmv->desc.ld_active_tgt_count++;
292                 tgts->active = 1;
293                 
294                 CDEBUG(D_OTHER, "connected to %s(%s) successfully (%d)\n",
295                         tgt_obd->obd_name, tgt_obd->obd_uuid.uuid,
296                         atomic_read(&obd->obd_refcount));
297         }
298
299         lmv_set_timeouts(obd);
300
301         class_export_put(exp);
302         return 0;
303
304  out_disc:
305         while (i-- > 0) {
306                 struct obd_uuid uuid;
307                 --tgts;
308                 --lmv->desc.ld_active_tgt_count;
309                 tgts->active = 0;
310                 /* save for CERROR below; (we know it's terminated) */
311                 uuid = tgts->uuid;
312                 rc2 = obd_disconnect(tgts->ltd_exp, 0);
313                 if (rc2)
314                         CERROR("error: LMV target %s disconnect on MDT idx %d: "
315                                "rc = %d\n", uuid.uuid, i, rc2);
316         }
317         class_disconnect(exp, 0);
318         RETURN (rc);
319 }
320
321 static int lmv_disconnect(struct obd_export *exp, int flags)
322 {
323         struct obd_device *obd = class_exp2obd(exp);
324         struct lmv_obd *lmv = &obd->u.lmv;
325         int rc, i;
326         ENTRY;
327
328         if (!lmv->tgts)
329                 goto out_local;
330
331         /* Only disconnect the underlying layers on the final disconnect. */
332         lmv->refcount--;
333         if (lmv->refcount != 0)
334                 goto out_local;
335
336         for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
337                 if (lmv->tgts[i].ltd_exp == NULL)
338                         continue;
339
340                 if (obd->obd_no_recov) {
341                         /* Pass it on to our clients.
342                          * XXX This should be an argument to disconnect,
343                          * XXX not a back-door flag on the OBD.  Ah well.
344                          */
345                         struct obd_device *mdc_obd;
346                         mdc_obd = class_exp2obd(lmv->tgts[i].ltd_exp);
347                         if (mdc_obd)
348                                 mdc_obd->obd_no_recov = 1;
349                 }
350
351                 CDEBUG(D_OTHER, "disconnected from %s(%s) successfully\n",
352                         lmv->tgts[i].ltd_exp->exp_obd->obd_name,
353                         lmv->tgts[i].ltd_exp->exp_obd->obd_uuid.uuid);
354
355                 obd_register_observer(lmv->tgts[i].ltd_exp->exp_obd, NULL);
356
357                 rc = obd_disconnect(lmv->tgts[i].ltd_exp, flags);
358                 if (lmv->tgts[i].active) {
359                         lmv->desc.ld_active_tgt_count--;
360                         lmv->tgts[i].active = 0;
361                 }
362                 lmv->tgts[i].ltd_exp = NULL;
363         }
364
365  out_local:
366         /* FIXME: cleanup here */
367         if (!lmv->connected)
368                 class_export_put(exp);
369         rc = class_disconnect(exp, 0);
370         RETURN(rc);
371 }
372
373 static int lmv_setup(struct obd_device *obd, obd_count len, void *buf)
374 {
375         struct lustre_cfg *lcfg = buf;
376         struct lmv_desc *desc;
377         struct lmv_obd *lmv = &obd->u.lmv;
378         struct obd_uuid *uuids;
379         struct lmv_tgt_desc *tgts;
380         int i;
381         int rc = 0;
382         ENTRY;
383
384         if (lcfg->lcfg_inllen1 < 1) {
385                 CERROR("LMV setup requires a descriptor\n");
386                 RETURN(-EINVAL);
387         }
388
389         if (lcfg->lcfg_inllen2 < 1) {
390                 CERROR("LMV setup requires an OST UUID list\n");
391                 RETURN(-EINVAL);
392         }
393
394         desc = (struct lmv_desc *)lcfg->lcfg_inlbuf1;
395         if (sizeof(*desc) > lcfg->lcfg_inllen1) {
396                 CERROR("descriptor size wrong: %d > %d\n",
397                        (int)sizeof(*desc), lcfg->lcfg_inllen1);
398                 RETURN(-EINVAL);
399         }
400
401         uuids = (struct obd_uuid *)lcfg->lcfg_inlbuf2;
402         if (sizeof(*uuids) * desc->ld_tgt_count != lcfg->lcfg_inllen2) {
403                 CERROR("UUID array size wrong: %u * %u != %u\n",
404                        sizeof(*uuids), desc->ld_tgt_count, lcfg->lcfg_inllen2);
405                 RETURN(-EINVAL);
406         }
407
408         lmv->bufsize = sizeof(struct lmv_tgt_desc) * desc->ld_tgt_count;
409         OBD_ALLOC(lmv->tgts, lmv->bufsize);
410         if (lmv->tgts == NULL) {
411                 CERROR("Out of memory\n");
412                 RETURN(-EINVAL);
413         }
414
415         lmv->desc = *desc;
416         spin_lock_init(&lmv->lmv_lock);
417         
418         for (i = 0, tgts = lmv->tgts; i < desc->ld_tgt_count; i++, tgts++)
419                 tgts->uuid = uuids[i];
420         
421         lmv->max_easize = sizeof(struct ll_fid) * desc->ld_tgt_count
422                 + sizeof(struct mea);
423         lmv->max_cookiesize = 0;
424
425         RETURN(rc);
426 }
427
428 static int lmv_statfs(struct obd_device *obd, struct obd_statfs *osfs,
429                       unsigned long max_age)
430 {
431         struct lmv_obd *lmv = &obd->u.lmv;
432         struct obd_statfs temp;
433         int rc = 0, i;
434         ENTRY;
435         
436         rc = lmv_check_connect(obd);
437         if (rc)
438                 RETURN(rc);
439                 
440         for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
441                 rc = obd_statfs(lmv->tgts[i].ltd_exp->exp_obd, &temp, max_age);
442                 if (rc) {
443                         CERROR("can't stat MDS #%d (%s)\n", i,
444                                lmv->tgts[i].ltd_exp->exp_obd->obd_name);
445                         RETURN(rc);
446                 }
447                 if (i == 0) {
448                         memcpy(osfs, &temp, sizeof(temp));
449                 } else {
450                         osfs->os_bavail += temp.os_bavail;
451                         osfs->os_blocks += temp.os_blocks;
452                         osfs->os_ffree += temp.os_ffree;
453                         osfs->os_files += temp.os_files;
454                 }
455         }
456         RETURN(rc);
457 }
458
459 static int lmv_cleanup(struct obd_device *obd, int flags) 
460 {
461         struct lmv_obd *lmv = &obd->u.lmv;
462         ENTRY;
463         lmv_cleanup_objs(obd);
464         OBD_FREE(lmv->tgts, lmv->bufsize);
465         RETURN(0);
466 }
467
468 static int lmv_getstatus(struct obd_export *exp, struct ll_fid *fid)
469 {
470         struct obd_device *obd = exp->exp_obd;
471         struct lmv_obd *lmv = &obd->u.lmv;
472         int rc;
473         ENTRY;
474         rc = lmv_check_connect(obd);
475         if (rc)
476                 RETURN(rc);
477         rc = md_getstatus(lmv->tgts[0].ltd_exp, fid);
478         fid->mds = 0;
479         RETURN(rc);
480 }
481
482 static int lmv_getattr(struct obd_export *exp, struct ll_fid *fid,
483                 unsigned long valid, unsigned int ea_size,
484                 struct ptlrpc_request **request)
485 {
486         struct obd_device *obd = exp->exp_obd;
487         struct lmv_obd *lmv = &obd->u.lmv;
488         int rc, i = fid->mds;
489         struct lmv_obj *obj;
490         ENTRY;
491         rc = lmv_check_connect(obd);
492         if (rc)
493                 RETURN(rc);
494         obj = lmv_grab_obj(obd, fid, 0);
495         CDEBUG(D_OTHER, "GETATTR for %lu/%lu/%lu %s\n",
496                (unsigned long) fid->mds,
497                (unsigned long) fid->id,
498                (unsigned long) fid->generation,
499                obj ? "(splitted)" : "");
500
501         LASSERT(fid->mds < lmv->desc.ld_tgt_count);
502         rc = md_getattr(lmv->tgts[i].ltd_exp, fid,
503                              valid, ea_size, request);
504         if (rc == 0 && obj) {
505                 /* we have to loop over dirobjs here and gather attrs
506                  * for all the slaves */
507 #warning "attrs gathering here"
508         }
509         lmv_put_obj(obj);
510         RETURN(rc);
511 }
512
513 static int lmv_change_cbdata(struct obd_export *exp,
514                                  struct ll_fid *fid, 
515                                  ldlm_iterator_t it, void *data)
516 {
517         struct obd_device *obd = exp->exp_obd;
518         struct lmv_obd *lmv = &obd->u.lmv;
519         int rc = 0;
520         ENTRY;
521         
522         rc = lmv_check_connect(obd);
523         if (rc)
524                 RETURN(rc);
525         CDEBUG(D_OTHER, "CBDATA for %lu/%lu/%lu\n",
526                (unsigned long) fid->mds,
527                (unsigned long) fid->id,
528                (unsigned long) fid->generation);
529         LASSERT(fid->mds < lmv->desc.ld_tgt_count);
530         rc = md_change_cbdata(lmv->tgts[fid->mds].ltd_exp, fid, it, data);
531         RETURN(rc);
532 }
533
534 static int lmv_change_cbdata_name(struct obd_export *exp, struct ll_fid *pfid,
535                                   char *name, int len, struct ll_fid *cfid,
536                                   ldlm_iterator_t it, void *data)
537 {
538         struct obd_device *obd = exp->exp_obd;
539         struct lmv_obd *lmv = &obd->u.lmv;
540         struct lmv_obj *obj;
541         int rc = 0, mds;
542         ENTRY;
543         rc = lmv_check_connect(obd);
544         if (rc)
545                 RETURN(rc);
546         LASSERT(pfid->mds < lmv->desc.ld_tgt_count);
547         LASSERT(cfid->mds < lmv->desc.ld_tgt_count);
548         CDEBUG(D_OTHER, "CBDATA for %lu/%lu/%lu:%*s -> %lu/%lu/%lu\n",
549                (unsigned long) pfid->mds, (unsigned long) pfid->id,
550                (unsigned long) pfid->generation, len, name,
551                (unsigned long) cfid->mds, (unsigned long) cfid->id,
552                (unsigned long) cfid->generation);
553
554         /* this is default mds for directory name belongs to */
555         mds = pfid->mds;
556         obj = lmv_grab_obj(obd, pfid, 0);
557         if (obj) {
558                 /* directory is splitted. look for right mds for this name */
559                 mds = raw_name2idx(obj->objcount, name, len);
560                 lmv_put_obj(obj);
561         }
562         rc = md_change_cbdata(lmv->tgts[mds].ltd_exp, cfid, it, data);
563         RETURN(rc);
564 }
565
566 static int lmv_valid_attrs(struct obd_export *exp, struct ll_fid *fid) 
567 {
568         struct obd_device *obd = exp->exp_obd;
569         struct lmv_obd *lmv = &obd->u.lmv;
570         int rc = 0;
571         ENTRY;
572         rc = lmv_check_connect(obd);
573         if (rc)
574                 RETURN(rc);
575         CDEBUG(D_OTHER, "validate %lu/%lu/%lu\n",
576                (unsigned long) fid->mds,
577                (unsigned long) fid->id,
578                (unsigned long) fid->generation);
579         LASSERT(fid->mds < lmv->desc.ld_tgt_count);
580         rc = md_valid_attrs(lmv->tgts[fid->mds].ltd_exp, fid);
581         RETURN(rc);
582 }
583
584 int lmv_close(struct obd_export *exp, struct obdo *obdo,
585                   struct obd_client_handle *och,
586                   struct ptlrpc_request **request)
587 {
588         struct obd_device *obd = exp->exp_obd;
589         struct lmv_obd *lmv = &obd->u.lmv;
590         int rc, i = obdo->o_mds;
591         ENTRY;
592         rc = lmv_check_connect(obd);
593         if (rc)
594                 RETURN(rc);
595         LASSERT(i < lmv->desc.ld_tgt_count);
596         CDEBUG(D_OTHER, "CLOSE %lu/%lu/%lu\n", (unsigned long) obdo->o_mds,
597                (unsigned long) obdo->o_id, (unsigned long) obdo->o_generation);
598         rc = md_close(lmv->tgts[i].ltd_exp, obdo, och, request);
599         RETURN(rc);
600 }
601
602 int lmv_get_mea_and_update_object(struct obd_export *exp, struct ll_fid *fid)
603 {
604         struct obd_device *obd = exp->exp_obd;
605         struct lmv_obd *lmv = &obd->u.lmv;
606         struct ptlrpc_request *req = NULL;
607         struct lustre_md md;
608         unsigned long valid;
609         int mealen, rc;
610
611         md.mea = NULL;
612         mealen = MEA_SIZE_LMV(lmv);
613         
614         valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
615
616         /* time to update mea of parent fid */
617         rc = md_getattr(lmv->tgts[fid->mds].ltd_exp, fid,
618                         valid, mealen, &req);
619         if (rc) {
620                 CERROR("md_getattr() failed, rc = %d\n", rc);
621                 GOTO(cleanup, rc);
622         }
623
624         rc = mdc_req2lustre_md(exp, req, 0, NULL, &md);
625         if (rc) {
626                 CERROR("mdc_req2lustre_md() failed, rc = %d\n", rc);
627                 GOTO(cleanup, rc);
628         }
629
630         if (md.mea == NULL)
631                 GOTO(cleanup, rc = -ENODATA);
632
633         rc = lmv_create_obj_from_attrs(exp, fid, md.mea);
634         obd_free_memmd(exp, (struct lov_stripe_md **) &md.mea);
635
636 cleanup:
637         if (req)
638                 ptlrpc_req_finished(req);
639         RETURN(rc);
640 }
641
642 int lmv_create(struct obd_export *exp, struct mdc_op_data *op_data,
643                const void *data, int datalen, int mode, __u32 uid,
644                __u32 gid, __u64 rdev, struct ptlrpc_request **request)
645 {
646         struct obd_device *obd = exp->exp_obd;
647         struct lmv_obd *lmv = &obd->u.lmv;
648         struct mds_body *mds_body;
649         struct lmv_obj *obj;
650         int rc, mds;
651         ENTRY;
652
653         rc = lmv_check_connect(obd);
654         if (rc)
655                 RETURN(rc);
656
657         if (!lmv->desc.ld_active_tgt_count)
658                 RETURN(-EIO);
659 repeat:
660         obj = lmv_grab_obj(obd, &op_data->fid1, 0);
661         if (obj) {
662                 mds = raw_name2idx(obj->objcount, op_data->name,
663                                    op_data->namelen);
664                 op_data->fid1 = obj->objs[mds].fid;
665                 lmv_put_obj(obj);
666         }
667
668         CDEBUG(D_OTHER, "CREATE '%*s' on %lu/%lu/%lu\n",
669                         op_data->namelen, op_data->name,
670                         (unsigned long) op_data->fid1.mds,
671                         (unsigned long) op_data->fid1.id,
672                         (unsigned long) op_data->fid1.generation);
673         rc = md_create(lmv->tgts[op_data->fid1.mds].ltd_exp, op_data, data,
674                        datalen, mode, uid, gid, rdev, request);
675         if (rc == 0) {
676                 if (*request == NULL)
677                      RETURN(rc);
678                 mds_body = lustre_msg_buf((*request)->rq_repmsg, 0,
679                                           sizeof(*mds_body));
680                 LASSERT(mds_body != NULL);
681                 CDEBUG(D_OTHER, "created. id = %lu, generation = %lu, mds = %d\n",
682                        (unsigned long) mds_body->fid1.id,
683                        (unsigned long) mds_body->fid1.generation,
684                        op_data->fid1.mds);
685                 LASSERT(mds_body->valid & OBD_MD_MDS ||
686                         mds_body->mds == op_data->fid1.mds);
687         } else if (rc == -ERESTART) {
688                 /* directory got splitted. time to update local object
689                  * and repeat the request with proper MDS */
690                 rc = lmv_get_mea_and_update_object(exp, &op_data->fid1);
691                 if (rc == 0) {
692                         ptlrpc_req_finished(*request);
693                         goto repeat;
694                 }
695         }
696         RETURN(rc);
697 }
698
699 int lmv_done_writing(struct obd_export *exp, struct obdo *obdo)
700 {
701         struct obd_device *obd = exp->exp_obd;
702         struct lmv_obd *lmv = &obd->u.lmv;
703         int rc;
704         ENTRY;
705         rc = lmv_check_connect(obd);
706         if (rc)
707                 RETURN(rc);
708
709         /* FIXME: choose right MDC here */
710         CWARN("this method isn't implemented yet\n");
711         rc = md_done_writing(lmv->tgts[0].ltd_exp, obdo);
712         RETURN(rc);
713 }
714
715 int lmv_enqueue_slaves(struct obd_export *exp, int locktype,
716                          struct lookup_intent *it, int lockmode,
717                          struct mdc_op_data *data, struct lustre_handle *lockh,
718                          void *lmm, int lmmsize,
719                          ldlm_completion_callback cb_completion,
720                          ldlm_blocking_callback cb_blocking, void *cb_data)
721 {
722         struct obd_device *obd = exp->exp_obd;
723         struct lmv_obd *lmv = &obd->u.lmv;
724         struct mea *mea = data->mea1;
725         struct mdc_op_data data2;
726         int i, rc, mds;
727         ENTRY;
728
729         LASSERT(mea != NULL);
730         for (i = 0; i < mea->mea_count; i++) {
731                 if (lmv->tgts[i].ltd_exp == NULL)
732                         continue;
733
734                 memset(&data2, 0, sizeof(data2));
735                 data2.fid1 = mea->mea_fids[i];
736                 mds = data2.fid1.mds;
737                 rc = md_enqueue(lmv->tgts[mds].ltd_exp, locktype, it, lockmode,
738                                 &data2, lockh + i, lmm, lmmsize, cb_completion,
739                                 cb_blocking, cb_data);
740                 CDEBUG(D_OTHER, "take lock on slave %lu/%lu/%lu -> %d/%d\n",
741                        (unsigned long) mea->mea_fids[i].mds,
742                        (unsigned long) mea->mea_fids[i].id,
743                        (unsigned long) mea->mea_fids[i].generation,
744                        rc, it->d.lustre.it_status);
745                 if (rc)
746                         GOTO(cleanup, rc);
747                 if (it->d.lustre.it_data) {
748                         struct ptlrpc_request *req;
749                         req = (struct ptlrpc_request *) it->d.lustre.it_data;
750                         ptlrpc_req_finished(req);
751                 }
752                 
753                 if (it->d.lustre.it_status)
754                         GOTO(cleanup, rc = it->d.lustre.it_status);
755         }
756         RETURN(0);
757         
758 cleanup:
759         /* drop all taken locks */
760         while (--i >= 0) {
761                 if (lockh[i].cookie)
762                         ldlm_lock_decref(lockh + i, lockmode);
763                 lockh[i].cookie = 0;
764         }
765         RETURN(rc);
766 }
767
768 int lmv_enqueue(struct obd_export *exp, int lock_type,
769                 struct lookup_intent *it, int lock_mode,
770                 struct mdc_op_data *data, struct lustre_handle *lockh,
771                 void *lmm, int lmmsize,
772                 ldlm_completion_callback cb_completion,
773                 ldlm_blocking_callback cb_blocking, void *cb_data)
774 {
775         struct obd_device *obd = exp->exp_obd;
776         struct lmv_obd *lmv = &obd->u.lmv;
777         struct lmv_obj *obj;
778         int rc, mds;
779         ENTRY;
780
781         rc = lmv_check_connect(obd);
782         if (rc)
783                 RETURN(rc);
784
785         if (it->it_op == IT_UNLINK) {
786                 rc = lmv_enqueue_slaves(exp, lock_type, it, lock_mode,
787                                         data, lockh, lmm, lmmsize,
788                                         cb_completion, cb_blocking, cb_data);
789                 RETURN(rc);
790         }
791
792         if (data->namelen) {
793                 obj = lmv_grab_obj(obd, &data->fid1, 0);
794                 if (obj) {
795                         /* directory is splitted. look for
796                          * right mds for this name */
797                         mds = raw_name2idx(obj->objcount, (char *)data->name,
798                                            data->namelen);
799                         data->fid1 = obj->objs[mds].fid;
800                         lmv_put_obj(obj);
801                 }
802         }
803         CDEBUG(D_OTHER, "ENQUEUE '%s' on %lu/%lu\n",
804                LL_IT2STR(it), (unsigned long) data->fid1.id,
805                (unsigned long) data->fid1.generation);
806         rc = md_enqueue(lmv->tgts[data->fid1.mds].ltd_exp, lock_type, it,
807                         lock_mode, data, lockh, lmm, lmmsize, cb_completion,
808                         cb_blocking, cb_data);
809
810         RETURN(rc);
811 }
812
813 int lmv_getattr_name(struct obd_export *exp, struct ll_fid *fid,
814                          char *filename, int namelen, unsigned long valid,
815                          unsigned int ea_size, struct ptlrpc_request **request)
816 {
817         struct obd_device *obd = exp->exp_obd;
818         struct lmv_obd *lmv = &obd->u.lmv;
819         struct ll_fid rfid = *fid;
820         int rc, mds = fid->mds;
821         struct mds_body *body;
822         struct lmv_obj *obj;
823         ENTRY;
824         rc = lmv_check_connect(obd);
825         if (rc)
826                 RETURN(rc);
827 repeat:
828         obj = lmv_grab_obj(obd, fid, 0);
829         if (obj) {
830                 /* directory is splitted. look for right mds for this name */
831                 mds = raw_name2idx(obj->objcount, filename, namelen - 1);
832                 rfid = obj->objs[mds].fid;
833                 lmv_put_obj(obj);
834         }
835         CDEBUG(D_OTHER, "getattr_name for %*s on %lu/%lu/%lu -> %lu/%lu/%lu\n",
836                namelen, filename, (unsigned long) fid->mds,
837                (unsigned long) fid->id, (unsigned long) fid->generation,
838                (unsigned long) rfid.mds, (unsigned long) rfid.id,
839                (unsigned long) rfid.generation);
840         rc = md_getattr_name(lmv->tgts[mds].ltd_exp, &rfid, filename, namelen,
841                                   valid, ea_size, request);
842         if (rc == 0) {
843                 /* this could be cross-node reference. in this case all
844                  * we have right now is mds/ino/generation triple. we'd
845                  * like to find other attributes */
846                 body = lustre_msg_buf((*request)->rq_repmsg, 0, sizeof(*body));
847                 LASSERT(body != NULL);
848                 if (body->valid & OBD_MD_MDS) {
849                         struct ptlrpc_request *req = NULL;
850                         rfid = body->fid1;
851                         CDEBUG(D_OTHER, "request attrs for %lu/%lu/%lu\n",
852                                (unsigned long) rfid.mds,
853                                (unsigned long) rfid.id,
854                                (unsigned long) rfid.generation);
855                         rc = md_getattr_name(lmv->tgts[rfid.mds].ltd_exp, &rfid,
856                                              NULL, 1, valid, ea_size, &req);
857                         ptlrpc_req_finished(*request);
858                         *request = req;
859                 }
860         } else if (rc == -ERESTART) {
861                 /* directory got splitted. time to update local object
862                  * and repeat the request with proper MDS */
863                 rc = lmv_get_mea_and_update_object(exp, &rfid);
864                 if (rc == 0) {
865                         ptlrpc_req_finished(*request);
866                         goto repeat;
867                 }
868         }
869         RETURN(rc);
870 }
871
872
873 /*
874  * llite passes fid of an target inode in data->fid1 and
875  * fid of directory in data->fid2
876  */
877 int lmv_link(struct obd_export *exp, struct mdc_op_data *data,
878              struct ptlrpc_request **request)
879 {
880         struct obd_device *obd = exp->exp_obd;
881         struct lmv_obd *lmv = &obd->u.lmv;
882         struct lmv_obj *obj;
883         int rc;
884         ENTRY;
885         rc = lmv_check_connect(obd);
886         if (rc)
887                 RETURN(rc);
888         if (data->namelen != 0) {
889                 /* usual link request */
890                 obj = lmv_grab_obj(obd, &data->fid1, 0);
891                 if (obj) {
892                         rc = raw_name2idx(obj->objcount, data->name,
893                                          data->namelen);
894                         data->fid1 = obj->objs[rc].fid;
895                         lmv_put_obj(obj);
896                 }
897                 CDEBUG(D_OTHER,"link %u/%u/%u:%*s to %u/%u/%u mds %d\n",
898                        (unsigned) data->fid2.mds, (unsigned) data->fid2.id,
899                        (unsigned) data->fid2.generation, data->namelen,
900                        data->name, (unsigned) data->fid1.mds,
901                        (unsigned) data->fid1.id,
902                        (unsigned) data->fid1.generation, data->fid1.mds);
903         } else {
904                 /* request from MDS to acquire i_links for inode by fid1 */
905                 CDEBUG(D_OTHER, "inc i_nlinks for %u/%u/%u\n",
906                        (unsigned) data->fid1.mds, (unsigned) data->fid1.id,
907                        (unsigned) data->fid1.generation);
908         }
909                         
910         rc = md_link(lmv->tgts[data->fid1.mds].ltd_exp, data, request);
911         RETURN(rc);
912 }
913
914 int lmv_rename(struct obd_export *exp, struct mdc_op_data *data,
915                const char *old, int oldlen, const char *new, int newlen,
916                struct ptlrpc_request **request)
917 {
918         struct obd_device *obd = exp->exp_obd;
919         struct lmv_obd *lmv = &obd->u.lmv;
920         struct lmv_obj *obj;
921         int rc, mds;
922         ENTRY;
923
924         CDEBUG(D_OTHER, "rename %*s in %lu/%lu/%lu to %*s in %lu/%lu/%lu\n",
925                oldlen, old, (unsigned long) data->fid1.mds,
926                (unsigned long) data->fid1.id,
927                (unsigned long) data->fid1.generation,
928                newlen, new, (unsigned long) data->fid2.mds,
929                (unsigned long) data->fid2.id,
930                (unsigned long) data->fid2.generation);
931         if (!fid_equal(&data->fid1, &data->fid2))
932                 CWARN("cross-node rename %lu/%lu/%lu:%*s to %lu/%lu/%lu:%*s\n",
933                       (unsigned long) data->fid1.mds,
934                       (unsigned long) data->fid1.id,
935                       (unsigned long) data->fid1.generation, oldlen, old,
936                       (unsigned long) data->fid2.mds,
937                       (unsigned long) data->fid2.id,
938                       (unsigned long) data->fid2.generation, newlen, new);
939
940         rc = lmv_check_connect(obd);
941         if (rc)
942                 RETURN(rc);
943
944         if (oldlen == 0) {
945                 /* MDS with old dir entry is asking another MDS
946                  * to create name there */
947                 CDEBUG(D_OTHER,
948                        "create %*s(%d/%d) in %lu/%lu/%lu pointing to %lu/%lu/%lu\n",
949                        newlen, new, oldlen, newlen,
950                        (unsigned long) data->fid2.mds,
951                        (unsigned long) data->fid2.id,
952                        (unsigned long) data->fid2.generation,
953                        (unsigned long) data->fid1.mds,
954                        (unsigned long) data->fid1.id,
955                        (unsigned long) data->fid1.generation);
956                 mds = data->fid2.mds;
957                 goto request;
958         }
959
960         obj = lmv_grab_obj(obd, &data->fid1, 0);
961         if (obj) {
962                 /* directory is already splitted, so we have to forward
963                  * request to the right MDS */
964                 mds = raw_name2idx(obj->objcount, (char *)old, oldlen);
965                 data->fid1 = obj->objs[mds].fid;
966                 CDEBUG(D_OTHER, "forward to MDS #%u (%lu/%lu/%lu)\n", mds,
967                        (unsigned long) obj->objs[mds].fid.mds,
968                        (unsigned long) obj->objs[mds].fid.id,
969                        (unsigned long) obj->objs[mds].fid.generation);
970         }
971         lmv_put_obj(obj);
972
973         obj = lmv_grab_obj(obd, &data->fid2, 0);
974         if (obj) {
975                 /* directory is already splitted, so we have to forward
976                  * request to the right MDS */
977                 mds = raw_name2idx(obj->objcount, (char *)new, newlen);
978                 data->fid2 = obj->objs[mds].fid;
979                 CDEBUG(D_OTHER, "forward to MDS #%u (%lu/%lu/%lu)\n", mds,
980                        (unsigned long) obj->objs[mds].fid.mds,
981                        (unsigned long) obj->objs[mds].fid.id,
982                        (unsigned long) obj->objs[mds].fid.generation);
983         }
984         lmv_put_obj(obj);
985         
986         mds = data->fid1.mds;
987
988 request:
989         rc = md_rename(lmv->tgts[mds].ltd_exp, data, old, oldlen,
990                        new, newlen, request); 
991         RETURN(rc);
992 }
993
994 int lmv_setattr(struct obd_export *exp, struct mdc_op_data *data,
995                 struct iattr *iattr, void *ea, int ealen, void *ea2, int ea2len,
996                 struct ptlrpc_request **request)
997 {
998         struct obd_device *obd = exp->exp_obd;
999         struct lmv_obd *lmv = &obd->u.lmv;
1000         int rc = 0, i = data->fid1.mds;
1001         struct ptlrpc_request *req;
1002         struct mds_body *mds_body;
1003         struct lmv_obj *obj;
1004         ENTRY;
1005
1006         rc = lmv_check_connect(obd);
1007         if (rc)
1008                 RETURN(rc);
1009
1010         obj = lmv_grab_obj(obd, &data->fid1, 0);
1011         CDEBUG(D_OTHER, "SETATTR for %lu/%lu/%lu, valid 0x%x%s\n",
1012                (unsigned long) data->fid1.mds,
1013                (unsigned long) data->fid1.id,
1014                (unsigned long) data->fid1.generation, iattr->ia_valid,
1015                obj ? ", splitted" : "");
1016         if (obj) {
1017                 for (i = 0; i < obj->objcount; i++) {
1018                         data->fid1 = obj->objs[i].fid;
1019                         rc = md_setattr(lmv->tgts[i].ltd_exp, data, iattr, ea,
1020                                         ealen, ea2, ea2len, &req);
1021                         LASSERT(rc == 0);
1022                         if (fid_equal(&obj->fid, &obj->objs[i].fid)) {
1023                                 /* this is master object and this request
1024                                  * should be returned back to llite */
1025                                 *request = req;
1026                         } else {
1027                                 ptlrpc_req_finished(req);
1028                         }
1029                 }
1030                 lmv_put_obj(obj);
1031         } else {
1032                 LASSERT(data->fid1.mds < lmv->desc.ld_tgt_count);
1033                 rc = md_setattr(lmv->tgts[i].ltd_exp, data, iattr, ea, ealen,
1034                                 ea2, ea2len, request); 
1035                 if (rc == 0) {
1036                         mds_body = lustre_msg_buf((*request)->rq_repmsg, 0,
1037                                         sizeof(*mds_body));
1038                         LASSERT(mds_body != NULL);
1039                         LASSERT(mds_body->mds == i);
1040                 }
1041         }
1042         RETURN(rc);
1043 }
1044
1045 int lmv_sync(struct obd_export *exp, struct ll_fid *fid,
1046              struct ptlrpc_request **request)
1047 {
1048         struct obd_device *obd = exp->exp_obd;
1049         struct lmv_obd *lmv = &obd->u.lmv;
1050         int rc;
1051         ENTRY;
1052
1053         rc = lmv_check_connect(obd);
1054         if (rc)
1055                 RETURN(rc);
1056
1057         rc = md_sync(lmv->tgts[0].ltd_exp, fid, request); 
1058         RETURN(rc);
1059 }
1060
1061 int lmv_dirobj_blocking_ast(struct ldlm_lock *lock,
1062                             struct ldlm_lock_desc *desc, void *data, int flag)
1063 {
1064         struct lustre_handle lockh;
1065         struct lmv_obj *obj;
1066         int rc;
1067         ENTRY;
1068
1069         switch (flag) {
1070         case LDLM_CB_BLOCKING:
1071                 ldlm_lock2handle(lock, &lockh);
1072                 rc = ldlm_cli_cancel(&lockh);
1073                 if (rc < 0) {
1074                         CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
1075                         RETURN(rc);
1076                 }
1077                 break;
1078         case LDLM_CB_CANCELING:
1079                 /* time to drop cached attrs for dirobj */
1080                 obj = lock->l_ast_data;
1081                 if (!obj)
1082                         break;
1083
1084                 CDEBUG(D_OTHER, "cancel %s on %lu/%lu, master %lu/%lu/%lu\n",
1085                        lock->l_resource->lr_name.name[3] == 1 ?
1086                                 "LOOKUP" : "UPDATE",
1087                        (unsigned long) lock->l_resource->lr_name.name[0],
1088                        (unsigned long) lock->l_resource->lr_name.name[1],
1089                        (unsigned long) obj->fid.mds,
1090                        (unsigned long) obj->fid.id,
1091                        (unsigned long) obj->fid.generation);
1092                 break;
1093         default:
1094                 LBUG();
1095         }
1096         RETURN(0);
1097 }
1098
1099 void lmv_remove_dots(struct page *page)
1100 {
1101         char *kaddr = page_address(page);
1102         unsigned limit = PAGE_CACHE_SIZE;
1103         unsigned offs, rec_len;
1104         struct ext2_dir_entry_2 *p;
1105
1106         for (offs = 0; offs <= limit - EXT2_DIR_REC_LEN(1); offs += rec_len) {
1107                 p = (struct ext2_dir_entry_2 *)(kaddr + offs);
1108                 rec_len = le16_to_cpu(p->rec_len);
1109
1110                 if ((p->name_len == 1 && p->name[0] == '.') ||
1111                     (p->name_len == 2 && p->name[0] == '.' && p->name[1] == '.'))
1112                         p->inode = 0;
1113         }
1114 }
1115
1116 int lmv_readpage(struct obd_export *exp, struct ll_fid *mdc_fid,
1117                  __u64 offset, struct page *page,
1118                  struct ptlrpc_request **request)
1119 {
1120         struct obd_device *obd = exp->exp_obd;
1121         struct lmv_obd *lmv = &obd->u.lmv;
1122         struct ll_fid rfid = *mdc_fid;
1123         struct lmv_obj *obj;
1124         int rc, i;
1125         ENTRY;
1126
1127         rc = lmv_check_connect(obd);
1128         if (rc)
1129                 RETURN(rc);
1130
1131         LASSERT(mdc_fid->mds < lmv->desc.ld_tgt_count);
1132         CDEBUG(D_OTHER, "READPAGE at %llu from %lu/%lu/%lu\n",
1133                offset, (unsigned long) rfid.mds,
1134                (unsigned long) rfid.id,
1135                (unsigned long) rfid.generation);
1136
1137         obj = lmv_grab_obj(obd, mdc_fid, 0);
1138         if (obj) {
1139                 /* find dirobj containing page with requested offset */
1140                 /* FIXME: what about protecting cached attrs here? */
1141                 for (i = 0; i < obj->objcount; i++) {
1142                         if (offset < obj->objs[i].size)
1143                                 break;
1144                         offset -= obj->objs[i].size;
1145                 }
1146                 rfid = obj->objs[i].fid;
1147                 CDEBUG(D_OTHER, "forward to %lu/%lu/%lu with offset %lu\n",
1148                        (unsigned long) rfid.mds,
1149                        (unsigned long) rfid.id,
1150                        (unsigned long) rfid.generation,
1151                        (unsigned long) offset);
1152         }
1153         rc = md_readpage(lmv->tgts[rfid.mds].ltd_exp, &rfid, offset, page, request);
1154         if (rc == 0 && !fid_equal(&rfid, mdc_fid)) {
1155                 /* this page isn't from master object. to avoid
1156                  * ./.. duplication in directory, we have to remove them
1157                  * from all slave objects */
1158                 lmv_remove_dots(page);
1159         }
1160       
1161         lmv_put_obj(obj);
1162
1163         RETURN(rc);
1164 }
1165
1166 int lmv_unlink_slaves(struct obd_export *exp,
1167                          struct mdc_op_data *data, struct ptlrpc_request **req)
1168 {
1169         struct obd_device *obd = exp->exp_obd;
1170         struct lmv_obd *lmv = &obd->u.lmv;
1171         struct mea *mea = data->mea1;
1172         struct mdc_op_data data2;
1173         int i, rc = 0, mds;
1174         ENTRY;
1175
1176         LASSERT(mea != NULL);
1177         for (i = 0; i < mea->mea_count; i++) {
1178                 if (lmv->tgts[i].ltd_exp == NULL)
1179                         continue;
1180
1181                 memset(&data2, 0, sizeof(data2));
1182                 data2.fid1 = mea->mea_fids[i];
1183                 data2.create_mode = MDS_MODE_DONT_LOCK | S_IFDIR;
1184                 mds = data2.fid1.mds;
1185                 rc = md_unlink(lmv->tgts[mds].ltd_exp, &data2, req);
1186                 CDEBUG(D_OTHER, "unlink slave %lu/%lu/%lu -> %d\n",
1187                        (unsigned long) mea->mea_fids[i].mds,
1188                        (unsigned long) mea->mea_fids[i].id,
1189                        (unsigned long) mea->mea_fids[i].generation, rc);
1190                 if (*req) {
1191                         ptlrpc_req_finished(*req);
1192                         *req = NULL;
1193                 }
1194                 if (rc)
1195                         break;
1196         }
1197         RETURN(rc);
1198 }
1199
1200 int lmv_unlink(struct obd_export *exp, struct mdc_op_data *data,
1201                struct ptlrpc_request **request)
1202 {
1203         struct obd_device *obd = exp->exp_obd;
1204         struct lmv_obd *lmv = &obd->u.lmv;
1205         int rc, i = 0;
1206         ENTRY;
1207         rc = lmv_check_connect(obd);
1208         if (rc)
1209                 RETURN(rc);
1210
1211         if (data->namelen == 0 && data->mea1 != NULL) {
1212                 /* mds asks to remove slave objects */
1213                 rc = lmv_unlink_slaves(exp, data, request);
1214                 RETURN(rc);
1215         } else if (data->namelen != 0) {
1216                 struct lmv_obj *obj;
1217                 obj = lmv_grab_obj(obd, &data->fid1, 0);
1218                 if (obj) {
1219                         i = raw_name2idx(obj->objcount, data->name,
1220                                          data->namelen);
1221                         data->fid1 = obj->objs[i].fid;
1222                         lmv_put_obj(obj);
1223                 }
1224                 CDEBUG(D_OTHER, "unlink '%*s' in %lu/%lu/%lu -> %u\n",
1225                        data->namelen, data->name,
1226                        (unsigned long) data->fid1.mds,
1227                        (unsigned long) data->fid1.id,
1228                        (unsigned long) data->fid1.generation, i);
1229         } else {
1230                 CDEBUG(D_OTHER, "drop i_nlink on %lu/%lu/%lu\n",
1231                        (unsigned long) data->fid1.mds,
1232                        (unsigned long) data->fid1.id,
1233                        (unsigned long) data->fid1.generation);
1234         }
1235         rc = md_unlink(lmv->tgts[data->fid1.mds].ltd_exp, data, request); 
1236         RETURN(rc);
1237 }
1238
1239 struct obd_device *lmv_get_real_obd(struct obd_export *exp,
1240                                     char *name, int len)
1241 {
1242         struct obd_device *obd = exp->exp_obd;
1243         struct lmv_obd *lmv = &obd->u.lmv;
1244         int rc;
1245         ENTRY;
1246
1247         rc = lmv_check_connect(obd);
1248         if (rc)
1249                 RETURN(ERR_PTR(rc));
1250         obd = lmv->tgts[0].ltd_exp->exp_obd;
1251         EXIT;
1252         return obd;
1253 }
1254
1255 int lmv_init_ea_size(struct obd_export *exp, int easize, int cookiesize)
1256 {
1257         struct obd_device *obd = exp->exp_obd;
1258         struct lmv_obd *lmv = &obd->u.lmv;
1259         int i, rc = 0, change = 0;
1260         ENTRY;
1261
1262         if (lmv->max_easize < easize) {
1263                 lmv->max_easize = easize;
1264                 change = 1;
1265         }
1266         if (lmv->max_cookiesize < cookiesize) {
1267                 lmv->max_cookiesize = cookiesize;
1268                 change = 1;
1269         }
1270         if (change == 0)
1271                 RETURN(0);
1272         
1273         if (lmv->connected == 0)
1274                 RETURN(0);
1275
1276         /* FIXME: error handling? */
1277         for (i = 0; i < lmv->desc.ld_tgt_count; i++)
1278                 rc = obd_init_ea_size(lmv->tgts[i].ltd_exp, easize, cookiesize);
1279         RETURN(rc);
1280 }
1281
1282 int lmv_obd_create_single(struct obd_export *exp, struct obdo *oa,
1283                           struct lov_stripe_md **ea, struct obd_trans_info *oti)
1284 {
1285         struct obd_device *obd = exp->exp_obd;
1286         struct lmv_obd *lmv = &obd->u.lmv;
1287         struct lov_stripe_md obj_md;
1288         struct lov_stripe_md *obj_mdp = &obj_md;
1289         int rc = 0;
1290         ENTRY;
1291
1292         rc = lmv_check_connect(obd);
1293         if (rc)
1294                 RETURN(rc);
1295
1296         LASSERT(ea == NULL);
1297         LASSERT(oa->o_mds < lmv->desc.ld_tgt_count);
1298
1299         rc = obd_create(lmv->tgts[oa->o_mds].ltd_exp, oa, &obj_mdp, oti);
1300         LASSERT(rc == 0);
1301
1302         RETURN(rc);
1303 }
1304
1305 /*
1306  * to be called from MDS only
1307  */
1308 int lmv_obd_create(struct obd_export *exp, struct obdo *oa,
1309                struct lov_stripe_md **ea, struct obd_trans_info *oti)
1310 {
1311         struct obd_device *obd = exp->exp_obd;
1312         struct lmv_obd *lmv = &obd->u.lmv;
1313         struct mea *mea;
1314         int i, c, rc = 0;
1315         struct ll_fid mfid;
1316         ENTRY;
1317
1318         rc = lmv_check_connect(obd);
1319         if (rc)
1320                 RETURN(rc);
1321
1322         LASSERT(oa != NULL);
1323         
1324         if (ea == NULL) {
1325                 rc = lmv_obd_create_single(exp, oa, NULL, oti);
1326                 RETURN(rc);
1327         }
1328
1329         if (*ea == NULL) {
1330                 rc = obd_alloc_diskmd(exp, (struct lov_mds_md **)ea);
1331                 LASSERT(*ea != NULL);
1332         }
1333
1334         mea = (struct mea *)*ea;
1335         mfid.id = oa->o_id;
1336         mfid.generation = oa->o_generation;
1337         rc = 0;
1338         if (!mea->mea_count || mea->mea_count > lmv->desc.ld_tgt_count)
1339                 mea->mea_count = lmv->desc.ld_tgt_count;
1340
1341         mea->mea_master = -1;
1342         
1343         /* FIXME: error handling? */
1344         for (i = 0, c = 0; c < mea->mea_count && 
1345                 i < lmv->desc.ld_tgt_count; i++) {
1346                 struct lov_stripe_md obj_md;
1347                 struct lov_stripe_md *obj_mdp = &obj_md;
1348                
1349                 if (lmv->tgts[i].ltd_exp == NULL) {
1350                         /* this is master MDS */
1351                         mea->mea_fids[c].id = mfid.id;
1352                         mea->mea_fids[c].generation = mfid.generation;
1353                         mea->mea_fids[c].mds = i;
1354                         mea->mea_master = i;
1355                         c++;
1356                         continue;
1357                 }
1358
1359                 /* "Master" MDS should always be part of stripped dir, so
1360                    scan for it */
1361                 if (mea->mea_master == -1 && c == mea->mea_count - 1)
1362                         continue;
1363
1364                 oa->o_valid = OBD_MD_FLGENER | OBD_MD_FLTYPE | OBD_MD_FLMODE
1365                                 | OBD_MD_FLUID | OBD_MD_FLGID | OBD_MD_FLID;
1366
1367                 rc = obd_create(lmv->tgts[c].ltd_exp, oa, &obj_mdp, oti);
1368                 /* FIXME: error handling here */
1369                 LASSERT(rc == 0);
1370
1371                 mea->mea_fids[c].id = oa->o_id;
1372                 mea->mea_fids[c].generation = oa->o_generation;
1373                 mea->mea_fids[c].mds = i;
1374                 c++;
1375                 CDEBUG(D_OTHER, "dirobj at mds %d: "LPU64"/%u\n",
1376                        i, oa->o_id, oa->o_generation);
1377         }
1378         LASSERT(c == mea->mea_count);
1379         CDEBUG(D_OTHER, "%d dirobjects created\n", (int) mea->mea_count);
1380
1381         RETURN(rc);
1382 }
1383
1384 static int lmv_get_info(struct obd_export *exp, __u32 keylen,
1385                            void *key, __u32 *vallen, void *val)
1386 {
1387         struct obd_device *obd;
1388         struct lmv_obd *lmv;
1389         ENTRY;
1390
1391         obd = class_exp2obd(exp);
1392         if (obd == NULL) {
1393                 CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",
1394                        exp->exp_handle.h_cookie);
1395                 RETURN(-EINVAL);
1396         }
1397
1398         lmv = &obd->u.lmv;
1399         if (keylen == 6 && memcmp(key, "mdsize", 6) == 0) {
1400                 __u32 *mdsize = val;
1401                 *vallen = sizeof(__u32);
1402                 *mdsize = sizeof(struct ll_fid) * lmv->desc.ld_tgt_count
1403                                 + sizeof(struct mea);
1404                 RETURN(0);
1405         } else if (keylen == 6 && memcmp(key, "mdsnum", 6) == 0) {
1406                 struct obd_uuid *cluuid = &lmv->cluuid;
1407                 struct lmv_tgt_desc *tgts;
1408                 __u32 *mdsnum = val;
1409                 int i;
1410
1411                 for (i = 0, tgts = lmv->tgts; i < lmv->desc.ld_tgt_count; i++, tgts++) {
1412                         if (obd_uuid_equals(&tgts->uuid, cluuid)) {
1413                                 *vallen = sizeof(__u32);
1414                                 *mdsnum = i;
1415                                 RETURN(0);
1416                         }
1417                 }
1418                 LASSERT(0);
1419         }
1420
1421         CDEBUG(D_IOCTL, "invalid key\n");
1422         RETURN(-EINVAL);
1423 }
1424
1425 int lmv_set_info(struct obd_export *exp, obd_count keylen,
1426                  void *key, obd_count vallen, void *val)
1427 {
1428         struct obd_device *obd;
1429         struct lmv_obd *lmv;
1430         ENTRY;
1431
1432         obd = class_exp2obd(exp);
1433         if (obd == NULL) {
1434                 CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",
1435                        exp->exp_handle.h_cookie);
1436                 RETURN(-EINVAL);
1437         }
1438         lmv = &obd->u.lmv;
1439
1440         if (keylen >= strlen("client") && strcmp(key, "client") == 0) {
1441                 struct lmv_tgt_desc *tgts;
1442                 int i, rc;
1443
1444                 rc = lmv_check_connect(obd);
1445                 if (rc)
1446                         RETURN(rc);
1447
1448                 for (i = 0, tgts = lmv->tgts; 
1449                         i < lmv->desc.ld_tgt_count; i++, tgts++) {
1450                         rc = obd_set_info(tgts->ltd_exp, keylen, key, vallen, val);
1451                         if (rc)
1452                                 RETURN(rc);
1453                 }
1454                 RETURN(0);
1455         } else if (keylen >= strlen("inter_mds") && strcmp(key, "inter_mds") == 0) {
1456                 lmv->server_timeout = 1;
1457                 lmv_set_timeouts(obd);
1458                 RETURN(0);
1459         }
1460         
1461         RETURN(-EINVAL);
1462 }
1463
1464 int lmv_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
1465                struct lov_stripe_md *lsm)
1466 {
1467         struct obd_device *obd = class_exp2obd(exp);
1468         struct lmv_obd *lmv = &obd->u.lmv;
1469         int mea_size;
1470         ENTRY;
1471
1472         mea_size = sizeof(struct ll_fid) * 
1473                 lmv->desc.ld_tgt_count + sizeof(struct mea);
1474         if (!lmmp)
1475                 RETURN(mea_size);
1476
1477         if (*lmmp && !lsm) {
1478                 OBD_FREE(*lmmp, mea_size);
1479                 *lmmp = NULL;
1480                 RETURN(0);
1481         }
1482
1483         if (!*lmmp) {
1484                 OBD_ALLOC(*lmmp, mea_size);
1485                 if (!*lmmp)
1486                         RETURN(-ENOMEM);
1487         }
1488
1489         if (!lsm)
1490                 RETURN(mea_size);
1491
1492 #warning "MEA packing/convertation must be here! -bzzz"
1493         memcpy(*lmmp, lsm, mea_size);
1494         RETURN(mea_size);
1495 }
1496
1497 int lmv_unpackmd(struct obd_export *exp, struct lov_stripe_md **mem_tgt,
1498                         struct lov_mds_md *disk_src, int mdsize)
1499 {
1500         struct obd_device *obd = class_exp2obd(exp);
1501         struct lmv_obd *lmv = &obd->u.lmv;
1502         struct mea **tmea = (struct mea **) mem_tgt;
1503         struct mea *mea = (void *) disk_src;
1504         int mea_size;
1505         ENTRY;
1506
1507         mea_size = sizeof(struct ll_fid) * 
1508                 lmv->desc.ld_tgt_count + sizeof(struct mea);
1509         if (mem_tgt == NULL)
1510                 return mea_size;
1511
1512         if (*mem_tgt != NULL && disk_src == NULL) {
1513                 OBD_FREE(*tmea, mea_size);
1514                 RETURN(0);
1515         }
1516
1517         LASSERT(mea_size == mdsize);
1518
1519         OBD_ALLOC(*tmea, mea_size);
1520         /* FIXME: error handling here */
1521         LASSERT(*tmea != NULL);
1522
1523         if (!disk_src)
1524                 RETURN(mea_size);
1525
1526 #warning "MEA unpacking/convertation must be here! -bzzz"
1527         memcpy(*tmea, mea, mdsize);
1528         RETURN(mea_size);
1529 }
1530
1531 int lmv_brw(int rw, struct obd_export *exp, struct obdo *oa,
1532                 struct lov_stripe_md *ea, obd_count oa_bufs,
1533                 struct brw_page *pgarr, struct obd_trans_info *oti)
1534 {
1535         struct obd_device *obd = exp->exp_obd;
1536         struct lmv_obd *lmv = &obd->u.lmv;
1537         struct mea *mea = (struct mea *) ea;
1538         int err;
1539       
1540         LASSERT(oa != NULL);
1541         LASSERT(ea != NULL);
1542         LASSERT(pgarr != NULL);
1543         LASSERT(oa->o_mds < lmv->desc.ld_tgt_count);
1544
1545         oa->o_gr = mea->mea_fids[oa->o_mds].generation;
1546         oa->o_id = mea->mea_fids[oa->o_mds].id;
1547         oa->o_valid =  OBD_MD_FLID | OBD_MD_FLGROUP;
1548         err = obd_brw(rw, lmv->tgts[oa->o_mds].ltd_exp, oa,
1549                       NULL, oa_bufs, pgarr, oti);
1550         RETURN(err);
1551 }
1552
1553 struct obd_ops lmv_obd_ops = {
1554         .o_owner                = THIS_MODULE,
1555         .o_attach               = lmv_attach,
1556         .o_detach               = lmv_detach,
1557         .o_setup                = lmv_setup,
1558         .o_cleanup              = lmv_cleanup,
1559         .o_connect              = lmv_connect,
1560         .o_disconnect           = lmv_disconnect,
1561         .o_statfs               = lmv_statfs,
1562         .o_get_info             = lmv_get_info,
1563         .o_set_info             = lmv_set_info,
1564         .o_create               = lmv_obd_create,
1565         .o_packmd               = lmv_packmd,
1566         .o_unpackmd             = lmv_unpackmd,
1567         .o_brw                  = lmv_brw,
1568         .o_init_ea_size         = lmv_init_ea_size,
1569         .o_notify               = lmv_notify,
1570 };
1571
1572 struct md_ops lmv_md_ops = {
1573         .m_getstatus            = lmv_getstatus,
1574         .m_getattr              = lmv_getattr,
1575         .m_change_cbdata        = lmv_change_cbdata,
1576         .m_change_cbdata_name   = lmv_change_cbdata_name,
1577         .m_close                = lmv_close,
1578         .m_create               = lmv_create,
1579         .m_done_writing         = lmv_done_writing,
1580         .m_enqueue              = lmv_enqueue,
1581         .m_getattr_name         = lmv_getattr_name,
1582         .m_intent_lock          = lmv_intent_lock,
1583         .m_link                 = lmv_link,
1584         .m_rename               = lmv_rename,
1585         .m_setattr              = lmv_setattr,
1586         .m_sync                 = lmv_sync,
1587         .m_readpage             = lmv_readpage,
1588         .m_unlink               = lmv_unlink,
1589         .m_get_real_obd         = lmv_get_real_obd,
1590         .m_valid_attrs          = lmv_valid_attrs,
1591 };
1592
1593 int __init lmv_init(void)
1594 {
1595         struct lprocfs_static_vars lvars;
1596         int rc;
1597
1598         lprocfs_init_vars(lmv, &lvars);
1599         rc = class_register_type(&lmv_obd_ops, &lmv_md_ops,
1600                                  lvars.module_vars, OBD_LMV_DEVICENAME);
1601         RETURN(rc);
1602 }
1603
1604 #ifdef __KERNEL__
1605 static void lmv_exit(void)
1606 {
1607         class_unregister_type(OBD_LMV_DEVICENAME);
1608 }
1609
1610 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
1611 MODULE_DESCRIPTION("Lustre Logical Metadata Volume OBD driver");
1612 MODULE_LICENSE("GPL");
1613
1614 module_init(lmv_init);
1615 module_exit(lmv_exit);
1616 #endif