Whamcloud - gitweb
smash the HEAD with the contents of b_cmd. HEAD_PRE_CMD_SMASH and
[fs/lustre-release.git] / lustre / lmv / lmv_obd.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
5  *
6  *   This file is part of Lustre, http://www.lustre.org.
7  *
8  *   Lustre is free software; you can redistribute it and/or
9  *   modify it under the terms of version 2 of the GNU General Public
10  *   License as published by the Free Software Foundation.
11  *
12  *   Lustre is distributed in the hope that it will be useful,
13  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *   GNU General Public License for more details.
16  *
17  *   You should have received a copy of the GNU General Public License
18  *   along with Lustre; if not, write to the Free Software
19  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20  */
21
22 #ifndef EXPORT_SYMTAB
23 # define EXPORT_SYMTAB
24 #endif
25 #define DEBUG_SUBSYSTEM S_LMV
26 #ifdef __KERNEL__
27 #include <linux/slab.h>
28 #include <linux/module.h>
29 #include <linux/init.h>
30 #include <linux/slab.h>
31 #include <linux/pagemap.h>
32 #include <asm/div64.h>
33 #else
34 #include <liblustre.h>
35 #endif
36
37 #include <linux/obd_support.h>
38 #include <linux/lustre_lib.h>
39 #include <linux/lustre_net.h>
40 #include <linux/lustre_idl.h>
41 #include <linux/lustre_dlm.h>
42 #include <linux/lustre_mds.h>
43 #include <linux/obd_class.h>
44 #include <linux/obd_ost.h>
45 #include <linux/seq_file.h>
46 #include <linux/lprocfs_status.h>
47 #include <linux/lustre_fsfilt.h>
48 #include <linux/obd_lmv.h>
49 #include "lmv_internal.h"
50
51 int lmv_attach(struct obd_device *dev, obd_count len, void *data)
52 {
53         struct lprocfs_static_vars lvars;
54         struct proc_dir_entry *entry;
55         int rc;
56         ENTRY;
57
58         lprocfs_init_vars(lmv, &lvars);
59         rc = lprocfs_obd_attach(dev, lvars.obd_vars);
60         if (rc)
61                 RETURN (rc);
62
63         entry = create_proc_entry("target_obd", 0444, dev->obd_proc_entry);
64         if (entry == NULL)
65                 RETURN(-ENOMEM);
66         /* entry->proc_fops = &lmv_proc_target_fops; */
67         entry->data = dev;
68
69         RETURN (rc);
70 }
71
72 int lmv_detach(struct obd_device *dev)
73 {
74         return lprocfs_obd_detach(dev);
75 }
76
77 static int lmv_connect_fake(struct lustre_handle *conn,
78                             struct obd_device *obd,
79                             struct obd_uuid *cluuid)
80 {
81         struct lmv_obd *lmv = &obd->u.lmv;
82         int rc;
83         ENTRY;
84
85         rc = class_connect(conn, obd, cluuid);
86         if (rc) {
87                 CERROR("class_connection() returned %d\n", rc);
88                 RETURN(rc);
89         }
90
91         lmv->exp = class_conn2export(conn);
92         LASSERT(lmv->exp != NULL);
93
94         lmv->cluuid = *cluuid;
95         lmv->connected = 0;
96
97         RETURN(0);
98 }
99
100 int lmv_connect(struct obd_device *obd)
101 {
102         struct lmv_obd *lmv = &obd->u.lmv;
103         struct obd_uuid *cluuid;
104         struct lmv_tgt_desc *tgts;
105         struct obd_export *exp;
106         int rc, i;
107         ENTRY;
108
109         if (lmv->connected)
110                 RETURN(0);
111       
112         lmv->connected = 1;
113         cluuid = &lmv->cluuid;
114         exp = lmv->exp;
115         CDEBUG(D_OTHER, "time to connect %s to %s\n",
116                         cluuid->uuid, obd->obd_name);
117
118         /* We don't want to actually do the underlying connections more than
119          * once, so keep track. */
120         lmv->refcount++;
121         if (lmv->refcount > 1) {
122                 class_export_put(exp);
123                 RETURN(0);
124         }
125
126         for (i = 0, tgts = lmv->tgts; i < lmv->count; i++, tgts++) {
127                 struct obd_device *tgt_obd;
128                 struct obd_uuid lmv_osc_uuid = { "LMV_OSC_UUID" };
129                 struct lustre_handle conn = {0, };
130
131                 LASSERT(tgts != NULL);
132
133                 tgt_obd = class_find_client_obd(&tgts->uuid, LUSTRE_MDC_NAME, 
134                                                 &obd->obd_uuid);
135                 if (!tgt_obd) {
136                         CERROR("Target %s not attached\n", tgts->uuid.uuid);
137                         GOTO(out_disc, rc = -EINVAL);
138                 }
139
140                 /* for MDS: don't connect to yourself */
141                 if (obd_uuid_equals(&tgts->uuid, cluuid)) {
142                         CDEBUG(D_OTHER, "don't connect back to %s\n",
143                                cluuid->uuid);
144                         tgts->exp = NULL;
145                         continue;
146                 }
147
148                 CDEBUG(D_OTHER, "connect to %s(%s) - %s, %s FOR %s\n",
149                         tgt_obd->obd_name, tgt_obd->obd_uuid.uuid,
150                         tgts->uuid.uuid, obd->obd_uuid.uuid,
151                         cluuid->uuid);
152
153                 if (!tgt_obd->obd_set_up) {
154                         CERROR("Target %s not set up\n", tgts->uuid.uuid);
155                         GOTO(out_disc, rc = -EINVAL);
156                 }
157                 
158                 rc = obd_connect(&conn, tgt_obd, &lmv_osc_uuid);
159                 if (rc) {
160                         CERROR("Target %s connect error %d\n",
161                                 tgts->uuid.uuid, rc);
162                         GOTO(out_disc, rc);
163                 }
164                 tgts->exp = class_conn2export(&conn);
165
166                 obd_init_ea_size(tgts->exp, lmv->max_easize,
167                                         lmv->max_cookiesize);
168                 
169                 rc = obd_register_observer(tgt_obd, obd);
170                 if (rc) {
171                         CERROR("Target %s register_observer error %d\n",
172                                tgts->uuid.uuid, rc);
173                         obd_disconnect(tgts->exp, 0);
174                         GOTO(out_disc, rc);
175                 }
176
177                 CDEBUG(D_OTHER, "connected to %s(%s) successfully (%d)\n",
178                         tgt_obd->obd_name, tgt_obd->obd_uuid.uuid,
179                         atomic_read(&obd->obd_refcount));
180         }
181
182         class_export_put(exp);
183         RETURN (0);
184
185  out_disc:
186         /* FIXME: cleanup here */
187         class_disconnect(exp, 0);
188         RETURN (rc);
189 }
190
191 static int lmv_disconnect(struct obd_export *exp, int flags)
192 {
193         struct obd_device *obd = class_exp2obd(exp);
194         struct lmv_obd *lmv = &obd->u.lmv;
195         int rc, i;
196         ENTRY;
197
198         if (!lmv->tgts)
199                 goto out_local;
200
201         /* Only disconnect the underlying layers on the final disconnect. */
202         lmv->refcount--;
203         if (lmv->refcount != 0)
204                 goto out_local;
205
206         for (i = 0; i < lmv->count; i++) {
207                 if (lmv->tgts[i].exp == NULL)
208                         continue;
209
210                 if (obd->obd_no_recov) {
211                         /* Pass it on to our clients.
212                          * XXX This should be an argument to disconnect,
213                          * XXX not a back-door flag on the OBD.  Ah well.
214                          */
215                         struct obd_device *mdc_obd;
216                         mdc_obd = class_exp2obd(lmv->tgts[i].exp);
217                         if (mdc_obd)
218                                 mdc_obd->obd_no_recov = 1;
219                 }
220
221                 CDEBUG(D_OTHER, "disconnected from %s(%s) successfully\n",
222                         lmv->tgts[i].exp->exp_obd->obd_name,
223                         lmv->tgts[i].exp->exp_obd->obd_uuid.uuid);
224
225                 obd_register_observer(lmv->tgts[i].exp->exp_obd, NULL);
226
227                 rc = obd_disconnect(lmv->tgts[i].exp, flags);
228                 lmv->tgts[i].exp = NULL;
229         }
230
231  out_local:
232         /* FIXME: cleanup here */
233         if (!lmv->connected)
234                 class_export_put(exp);
235         rc = class_disconnect(exp, 0);
236         RETURN(rc);
237 }
238
239 static int lmv_setup(struct obd_device *obd, obd_count len, void *buf)
240 {
241         struct lustre_cfg *lcfg = buf;
242         struct lmv_desc *desc;
243         struct lmv_obd *lmv = &obd->u.lmv;
244         struct obd_uuid *uuids;
245         struct lmv_tgt_desc *tgts;
246         int i;
247         int count;
248         int rc = 0;
249         ENTRY;
250
251         if (lcfg->lcfg_inllen1 < 1) {
252                 CERROR("LMV setup requires a descriptor\n");
253                 RETURN(-EINVAL);
254         }
255
256         if (lcfg->lcfg_inllen2 < 1) {
257                 CERROR("LMV setup requires an OST UUID list\n");
258                 RETURN(-EINVAL);
259         }
260
261         desc = (struct lmv_desc *)lcfg->lcfg_inlbuf1;
262         if (sizeof(*desc) > lcfg->lcfg_inllen1) {
263                 CERROR("descriptor size wrong: %d > %d\n",
264                        (int)sizeof(*desc), lcfg->lcfg_inllen1);
265                 RETURN(-EINVAL);
266         }
267
268         count = desc->ld_count;
269         uuids = (struct obd_uuid *)lcfg->lcfg_inlbuf2;
270         if (sizeof(*uuids) * count != lcfg->lcfg_inllen2) {
271                 CERROR("UUID array size wrong: %u * %u != %u\n",
272                        sizeof(*uuids), count, lcfg->lcfg_inllen2);
273                 RETURN(-EINVAL);
274         }
275
276         lmv->bufsize = sizeof(struct lmv_tgt_desc) * count;
277         OBD_ALLOC(lmv->tgts, lmv->bufsize);
278         if (lmv->tgts == NULL) {
279                 CERROR("Out of memory\n");
280                 RETURN(-EINVAL);
281         }
282
283         for (i = 0, tgts = lmv->tgts; i < count; i++, tgts++) {
284                 tgts->uuid = uuids[i];
285                 lmv->count++;
286         }
287
288         lmv->max_easize = sizeof(struct ll_fid) * lmv->count
289                                         + sizeof(struct mea);
290         lmv->max_cookiesize = 0;
291
292         RETURN(rc);
293 }
294
295 static int lmv_statfs(struct obd_device *obd, struct obd_statfs *osfs,
296                       unsigned long max_age)
297 {
298         struct lmv_obd *lmv = &obd->u.lmv;
299         struct obd_statfs temp;
300         int rc = 0, i;
301         ENTRY;
302         lmv_connect(obd);
303         for (i = 0; i < lmv->count; i++) {
304                 rc = obd_statfs(lmv->tgts[i].exp->exp_obd, &temp, max_age);
305                 if (rc) {
306                         CERROR("can't stat MDS #%d (%s)\n", i,
307                                lmv->tgts[i].exp->exp_obd->obd_name);
308                         RETURN(rc);
309                 }
310                 if (i == 0) {
311                         memcpy(osfs, &temp, sizeof(temp));
312                 } else {
313                         osfs->os_bavail += temp.os_bavail;
314                         osfs->os_blocks += temp.os_blocks;
315                         osfs->os_ffree += temp.os_ffree;
316                         osfs->os_files += temp.os_files;
317                 }
318         }
319         RETURN(rc);
320 }
321
322 static int lmv_cleanup(struct obd_device *obd, int flags) 
323 {
324         struct lmv_obd *lmv = &obd->u.lmv;
325         ENTRY;
326         lmv_cleanup_objs(obd);
327         OBD_FREE(lmv->tgts, lmv->bufsize);
328         RETURN(0);
329 }
330
331 static int lmv_getstatus(struct obd_export *exp, struct ll_fid *fid)
332 {
333         struct obd_device *obd = exp->exp_obd;
334         struct lmv_obd *lmv = &obd->u.lmv;
335         int rc;
336         ENTRY;
337         lmv_connect(obd);
338         rc = md_getstatus(lmv->tgts[0].exp, fid);
339         fid->mds = 0;
340         RETURN(rc);
341 }
342
343 static int lmv_getattr(struct obd_export *exp, struct ll_fid *fid,
344                 unsigned long valid, unsigned int ea_size,
345                 struct ptlrpc_request **request)
346 {
347         struct obd_device *obd = exp->exp_obd;
348         struct lmv_obd *lmv = &obd->u.lmv;
349         int rc, i = fid->mds;
350         struct lmv_obj *obj;
351         ENTRY;
352         lmv_connect(obd);
353         obj = lmv_grab_obj(obd, fid, 0);
354         CDEBUG(D_OTHER, "GETATTR for %lu/%lu/%lu %s\n",
355                (unsigned long) fid->mds,
356                (unsigned long) fid->id,
357                (unsigned long) fid->generation,
358                obj ? "(splitted)" : "");
359
360         LASSERT(fid->mds < lmv->count);
361         rc = md_getattr(lmv->tgts[i].exp, fid,
362                              valid, ea_size, request);
363         if (rc == 0 && obj) {
364                 /* we have to loop over dirobjs here and gather attrs
365                  * for all the slaves */
366 #warning "attrs gathering here"
367         }
368         lmv_put_obj(obj);
369         RETURN(rc);
370 }
371
372 static int lmv_change_cbdata(struct obd_export *exp,
373                                  struct ll_fid *fid, 
374                                  ldlm_iterator_t it, void *data)
375 {
376         struct obd_device *obd = exp->exp_obd;
377         struct lmv_obd *lmv = &obd->u.lmv;
378         int rc = 0;
379         ENTRY;
380         lmv_connect(obd);
381         CDEBUG(D_OTHER, "CBDATA for %lu/%lu/%lu\n",
382                (unsigned long) fid->mds,
383                (unsigned long) fid->id,
384                (unsigned long) fid->generation);
385         LASSERT(fid->mds < lmv->count);
386         rc = md_change_cbdata(lmv->tgts[fid->mds].exp, fid, it, data);
387         RETURN(rc);
388 }
389
390 static int lmv_change_cbdata_name(struct obd_export *exp, struct ll_fid *pfid,
391                                   char *name, int len, struct ll_fid *cfid,
392                                   ldlm_iterator_t it, void *data)
393 {
394         struct obd_device *obd = exp->exp_obd;
395         struct lmv_obd *lmv = &obd->u.lmv;
396         struct lmv_obj *obj;
397         int rc = 0, mds;
398         ENTRY;
399         lmv_connect(obd);
400         LASSERT(pfid->mds < lmv->count);
401         LASSERT(cfid->mds < lmv->count);
402         CDEBUG(D_OTHER, "CBDATA for %lu/%lu/%lu:%*s -> %lu/%lu/%lu\n",
403                (unsigned long) pfid->mds, (unsigned long) pfid->id,
404                (unsigned long) pfid->generation, len, name,
405                (unsigned long) cfid->mds, (unsigned long) cfid->id,
406                (unsigned long) cfid->generation);
407
408         /* this is default mds for directory name belongs to */
409         mds = pfid->mds;
410         obj = lmv_grab_obj(obd, pfid, 0);
411         if (obj) {
412                 /* directory is splitted. look for right mds for this name */
413                 mds = raw_name2idx(obj->objcount, name, len);
414                 lmv_put_obj(obj);
415         }
416         rc = md_change_cbdata(lmv->tgts[mds].exp, cfid, it, data);
417         RETURN(rc);
418 }
419
420 static int lmv_valid_attrs(struct obd_export *exp, struct ll_fid *fid) 
421 {
422         struct obd_device *obd = exp->exp_obd;
423         struct lmv_obd *lmv = &obd->u.lmv;
424         int rc = 0;
425         ENTRY;
426         lmv_connect(obd);
427         CDEBUG(D_OTHER, "validate %lu/%lu/%lu\n",
428                (unsigned long) fid->mds,
429                (unsigned long) fid->id,
430                (unsigned long) fid->generation);
431         LASSERT(fid->mds < lmv->count);
432         rc = md_valid_attrs(lmv->tgts[fid->mds].exp, fid);
433         RETURN(rc);
434 }
435
436 int lmv_close(struct obd_export *exp, struct obdo *obdo,
437                   struct obd_client_handle *och,
438                   struct ptlrpc_request **request)
439 {
440         struct obd_device *obd = exp->exp_obd;
441         struct lmv_obd *lmv = &obd->u.lmv;
442         int rc, i = obdo->o_mds;
443         ENTRY;
444         lmv_connect(obd);
445         LASSERT(i < lmv->count);
446         CDEBUG(D_OTHER, "CLOSE %lu/%lu/%lu\n", (unsigned long) obdo->o_mds,
447                (unsigned long) obdo->o_id, (unsigned long) obdo->o_generation);
448         rc = md_close(lmv->tgts[i].exp, obdo, och, request);
449         RETURN(rc);
450 }
451
452 int lmv_create(struct obd_export *exp, struct mdc_op_data *op_data,
453                    const void *data, int datalen, int mode, __u32 uid,
454                    __u32 gid, __u64 rdev, struct ptlrpc_request **request)
455 {
456         struct obd_device *obd = exp->exp_obd;
457         struct lmv_obd *lmv = &obd->u.lmv;
458         struct mea *mea = op_data->mea1;
459         struct mds_body *mds_body;
460         int rc, i, free_mea = 0;
461         ENTRY;
462         lmv_connect(obd);
463         /* TODO: where to create new directories?
464          * current design don't support directory on a slave MDS,
465          * but we lookup by name may forward any request in slave
466          */
467 repeat:
468         i = mea_name2idx(mea, (char *) op_data->name, op_data->namelen);
469         if (mea)
470                 op_data->fid1 = mea->mea_fids[i];
471
472         CDEBUG(D_OTHER, "CREATE '%*s' on %lu/%lu/%lu (mea 0x%p)\n",
473                         op_data->namelen, op_data->name,
474                         (unsigned long) op_data->fid1.mds,
475                         (unsigned long) op_data->fid1.id,
476                         (unsigned long) op_data->fid1.generation, mea);
477         rc = md_create(lmv->tgts[i].exp, op_data, data, datalen,
478                             mode, uid, gid, rdev, request);
479         if (rc == 0) {
480                 if (*request == NULL)
481                      RETURN(rc);
482                 mds_body = lustre_msg_buf((*request)->rq_repmsg, 0,
483                                           sizeof(*mds_body));
484                 LASSERT(mds_body != NULL);
485                 CDEBUG(D_OTHER, "created. id = %lu, generation = %lu, mds = %d\n",
486                        (unsigned long) mds_body->fid1.id,
487                        (unsigned long) mds_body->fid1.generation, i);
488                 LASSERT(mds_body->mds == i);
489         } else if (rc == -ESTALE) {
490                 struct ptlrpc_request *req = NULL;
491                 struct lustre_md md;
492                 int mealen;
493                 
494                 CDEBUG(D_OTHER, "it seems MDS splitted dir\n");
495                 LASSERT(mea == NULL);
496
497                 mealen = sizeof(struct ll_fid)*lmv->count + sizeof(struct mea);
498                 /* time to update mea of parent fid */
499                 i = op_data->fid1.mds;
500                 rc = md_getattr(lmv->tgts[i].exp, &op_data->fid1,
501                                         OBD_MD_FLEASIZE, mealen, &req);
502                 LASSERT(rc == 0);
503                 md.mea = NULL;
504                 rc = mdc_req2lustre_md(req, 0, NULL, exp, &md);
505                 LASSERT(rc == 0);
506                 LASSERT(md.mea != NULL);
507                 mea = md.mea;
508                 ptlrpc_req_finished(req);
509                 free_mea = 1;
510
511                 goto repeat;
512         }
513         if (free_mea)
514                 obd_free_memmd(exp, (struct lov_stripe_md**) &mea);
515         RETURN(rc);
516 }
517
518 int lmv_done_writing(struct obd_export *exp, struct obdo *obdo)
519 {
520         struct obd_device *obd = exp->exp_obd;
521         struct lmv_obd *lmv = &obd->u.lmv;
522         int rc;
523         ENTRY;
524         lmv_connect(obd);
525         /* FIXME: choose right MDC here */
526         rc = md_done_writing(lmv->tgts[0].exp, obdo);
527         RETURN(rc);
528 }
529
530 int lmv_enqueue(struct obd_export *exp, int lock_type,
531                     struct lookup_intent *it, int lock_mode,
532                     struct mdc_op_data *data, struct lustre_handle *lockh,
533                     void *lmm, int lmmsize,
534                     ldlm_completion_callback cb_completion,
535                     ldlm_blocking_callback cb_blocking, void *cb_data)
536 {
537         struct obd_device *obd = exp->exp_obd;
538         struct lmv_obd *lmv = &obd->u.lmv;
539         struct lmv_obj *obj;
540         int rc, mds;
541         ENTRY;
542         lmv_connect(obd);
543         if (data->namelen) {
544                 obj = lmv_grab_obj(obd, &data->fid1, 0);
545                 if (obj) {
546                         /* directory is splitted. look for
547                          * right mds for this name */
548                         mds = raw_name2idx(obj->objcount, data->name,
549                                                 data->namelen);
550                         data->fid1 = obj->objs[mds].fid;
551                         lmv_put_obj(obj);
552                 }
553         }
554         CDEBUG(D_OTHER, "ENQUEUE '%s' on %lu/%lu\n",
555                LL_IT2STR(it), (unsigned long) data->fid1.id,
556                (unsigned long) data->fid1.generation);
557         rc = md_enqueue(lmv->tgts[data->fid1.mds].exp, lock_type, it,
558                         lock_mode, data, lockh, lmm, lmmsize, cb_completion,
559                         cb_blocking, cb_data);
560
561         RETURN(rc);
562 }
563
564 int lmv_getattr_name(struct obd_export *exp, struct ll_fid *fid,
565                          char *filename, int namelen, unsigned long valid,
566                          unsigned int ea_size, struct ptlrpc_request **request)
567 {
568         struct obd_device *obd = exp->exp_obd;
569         struct lmv_obd *lmv = &obd->u.lmv;
570         struct ll_fid rfid = *fid;
571         int rc, mds = fid->mds;
572         struct lmv_obj *obj;
573         ENTRY;
574         lmv_connect(obd);
575         CDEBUG(D_OTHER, "getattr_name for %*s on %lu/%lu/%lu\n",
576                namelen - 1, filename, (unsigned long) fid->mds,
577                (unsigned long) fid->id, (unsigned long) fid->generation);
578         obj = lmv_grab_obj(obd, fid, 0);
579         if (obj) {
580                 /* directory is splitted. look for right mds for this name */
581                 mds = raw_name2idx(obj->objcount, filename, namelen - 1);
582                 rfid = obj->objs[mds].fid;
583                 lmv_put_obj(obj);
584         }
585         rc = md_getattr_name(lmv->tgts[mds].exp, &rfid, filename, namelen,
586                                   valid, ea_size, request);
587         RETURN(rc);
588 }
589
590
591 /*
592  * llite passes fid of an target inode in data->fid1 and
593  * fid of directory in data->fid2
594  */
595 int lmv_link(struct obd_export *exp, struct mdc_op_data *data,
596              struct ptlrpc_request **request)
597 {
598         struct obd_device *obd = exp->exp_obd;
599         struct lmv_obd *lmv = &obd->u.lmv;
600         struct mea *mea = data->mea2;
601         int rc, i;
602         ENTRY;
603         lmv_connect(obd);
604         if (data->namelen != 0) {
605                 /* usual link request */
606                 i = mea_name2idx(mea, (char *) data->name, data->namelen);
607                 if (mea)
608                         data->fid2 = mea->mea_fids[i];
609                 CDEBUG(D_OTHER,"link %u/%u/%u:%*s to %u/%u/%u mds %d mea %p\n",
610                        (unsigned) data->fid2.mds, (unsigned) data->fid2.id,
611                        (unsigned) data->fid2.generation, data->namelen,
612                        data->name, (unsigned) data->fid1.mds,
613                        (unsigned) data->fid1.id,
614                        (unsigned) data->fid1.generation, i, mea);
615         } else {
616                 /* request from MDS to acquire i_links for inode by fid1 */
617                 i = data->fid1.mds;
618                 CDEBUG(D_OTHER, "inc i_nlinks for %u/%u/%u\n",
619                        (unsigned) data->fid1.mds, (unsigned) data->fid1.id,
620                        (unsigned) data->fid1.generation);
621         }
622                         
623         rc = md_link(lmv->tgts[i].exp, data, request);
624         RETURN(rc);
625 }
626
627 int lmv_rename(struct obd_export *exp, struct mdc_op_data *data,
628                const char *old, int oldlen, const char *new, int newlen,
629                struct ptlrpc_request **request)
630 {
631         struct obd_device *obd = exp->exp_obd;
632         struct lmv_obd *lmv = &obd->u.lmv;
633         struct lmv_obj *obj;
634         int rc, mds;
635         ENTRY;
636
637         CDEBUG(D_OTHER, "rename %*s in %lu/%lu/%lu to %*s in %lu/%lu/%lu\n",
638                oldlen, old, (unsigned long) data->fid1.mds,
639                (unsigned long) data->fid1.id,
640                (unsigned long) data->fid1.generation,
641                newlen, new, (unsigned long) data->fid2.mds,
642                (unsigned long) data->fid2.id,
643                (unsigned long) data->fid2.generation);
644
645         lmv_connect(obd);
646
647         if (oldlen == 0) {
648                 /* MDS with old dir entry is asking another MDS
649                  * to create name there */
650                 CDEBUG(D_OTHER,
651                        "create %*s(%d/%d) in %lu/%lu/%lu pointing to %lu/%lu/%lu\n",
652                        newlen, new, oldlen, newlen,
653                        (unsigned long) data->fid2.mds,
654                        (unsigned long) data->fid2.id,
655                        (unsigned long) data->fid2.generation,
656                        (unsigned long) data->fid1.mds,
657                        (unsigned long) data->fid1.id,
658                        (unsigned long) data->fid1.generation);
659                 mds = data->fid2.mds;
660                 goto request;
661         }
662
663         obj = lmv_grab_obj(obd, &data->fid1, 0);
664         if (obj) {
665                 /* directory is already splitted, so we have to forward
666                  * request to the right MDS */
667                 mds = raw_name2idx(obj->objcount, old, oldlen);
668                 data->fid1 = obj->objs[mds].fid;
669                 CDEBUG(D_OTHER, "forward to MDS #%u (%lu/%lu/%lu)\n", mds,
670                        (unsigned long) obj->objs[mds].fid.mds,
671                        (unsigned long) obj->objs[mds].fid.id,
672                        (unsigned long) obj->objs[mds].fid.generation);
673         }
674         lmv_put_obj(obj);
675
676         obj = lmv_grab_obj(obd, &data->fid2, 0);
677         if (obj) {
678                 /* directory is already splitted, so we have to forward
679                  * request to the right MDS */
680                 mds = raw_name2idx(obj->objcount, new, newlen);
681                 data->fid2 = obj->objs[mds].fid;
682                 CDEBUG(D_OTHER, "forward to MDS #%u (%lu/%lu/%lu)\n", mds,
683                        (unsigned long) obj->objs[mds].fid.mds,
684                        (unsigned long) obj->objs[mds].fid.id,
685                        (unsigned long) obj->objs[mds].fid.generation);
686         }
687         lmv_put_obj(obj);
688         
689         mds = data->fid1.mds;
690
691 request:
692         rc = md_rename(lmv->tgts[mds].exp, data, old, oldlen,
693                             new, newlen, request); 
694         RETURN(rc);
695 }
696
697 int lmv_setattr(struct obd_export *exp, struct mdc_op_data *data,
698                 struct iattr *iattr, void *ea, int ealen, void *ea2, int ea2len,
699                 struct ptlrpc_request **request)
700 {
701         struct obd_device *obd = exp->exp_obd;
702         struct lmv_obd *lmv = &obd->u.lmv;
703         int rc = 0, i = data->fid1.mds;
704         struct ptlrpc_request *req;
705         struct mds_body *mds_body;
706         struct lmv_obj *obj;
707         ENTRY;
708         lmv_connect(obd);
709         obj = lmv_grab_obj(obd, &data->fid1, 0);
710         CDEBUG(D_OTHER, "SETATTR for %lu/%lu/%lu, valid 0x%x%s\n",
711                (unsigned long) data->fid1.mds,
712                (unsigned long) data->fid1.id,
713                (unsigned long) data->fid1.generation, iattr->ia_valid,
714                obj ? ", splitted" : "");
715         if (obj) {
716                 for (i = 0; i < obj->objcount; i++) {
717                         data->fid1 = obj->objs[i].fid;
718                         rc = md_setattr(lmv->tgts[i].exp, data, iattr, ea,
719                                         ealen, ea2, ea2len, &req);
720                         LASSERT(rc == 0);
721                         if (fid_equal(&obj->fid, &obj->objs[i].fid)) {
722                                 /* this is master object and this request
723                                  * should be returned back to llite */
724                                 *request = req;
725                         } else {
726                                 ptlrpc_req_finished(req);
727                         }
728                 }
729                 lmv_put_obj(obj);
730         } else {
731                 LASSERT(data->fid1.mds < lmv->count);
732                 rc = md_setattr(lmv->tgts[i].exp, data, iattr, ea, ealen,
733                                 ea2, ea2len, request); 
734                 if (rc == 0) {
735                         mds_body = lustre_msg_buf((*request)->rq_repmsg, 0,
736                                         sizeof(*mds_body));
737                         LASSERT(mds_body != NULL);
738                         LASSERT(mds_body->mds == i);
739                 }
740         }
741         RETURN(rc);
742 }
743
744 int lmv_sync(struct obd_export *exp, struct ll_fid *fid,
745              struct ptlrpc_request **request)
746 {
747         struct obd_device *obd = exp->exp_obd;
748         struct lmv_obd *lmv = &obd->u.lmv;
749         int rc;
750         ENTRY;
751         lmv_connect(obd);
752         rc = md_sync(lmv->tgts[0].exp, fid, request); 
753         RETURN(rc);
754 }
755
756 int lmv_dirobj_blocking_ast(struct ldlm_lock *lock,
757                             struct ldlm_lock_desc *desc, void *data, int flag)
758 {
759         struct lustre_handle lockh;
760         struct lmv_obj *obj;
761         int rc;
762         ENTRY;
763
764         switch (flag) {
765         case LDLM_CB_BLOCKING:
766                 ldlm_lock2handle(lock, &lockh);
767                 rc = ldlm_cli_cancel(&lockh);
768                 if (rc < 0) {
769                         CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
770                         RETURN(rc);
771                 }
772                 break;
773         case LDLM_CB_CANCELING:
774                 /* time to drop cached attrs for dirobj */
775                 obj = lock->l_ast_data;
776                 if (!obj)
777                         break;
778
779                 CDEBUG(D_OTHER, "cancel %s on %lu/%lu, master %lu/%lu/%lu\n",
780                        lock->l_resource->lr_name.name[3] == 1 ?
781                                 "LOOKUP" : "UPDATE",
782                        (unsigned long) lock->l_resource->lr_name.name[0],
783                        (unsigned long) lock->l_resource->lr_name.name[1],
784                        (unsigned long) obj->fid.mds,
785                        (unsigned long) obj->fid.id,
786                        (unsigned long) obj->fid.generation);
787                 break;
788         default:
789                 LBUG();
790         }
791         RETURN(0);
792 }
793
794 int lmv_readpage(struct obd_export *exp, struct ll_fid *mdc_fid,
795                  __u64 offset, struct page *page,
796                  struct ptlrpc_request **request)
797 {
798         struct obd_device *obd = exp->exp_obd;
799         struct lmv_obd *lmv = &obd->u.lmv;
800         struct ll_fid rfid = *mdc_fid;
801         struct lmv_obj *obj;
802         int rc, i;
803         ENTRY;
804         lmv_connect(obd);
805        
806         LASSERT(mdc_fid->mds < lmv->count);
807         CDEBUG(D_OTHER, "READPAGE at %llu from %lu/%lu/%lu\n",
808                offset, (unsigned long) rfid.mds,
809                (unsigned long) rfid.id,
810                (unsigned long) rfid.generation);
811
812         obj = lmv_grab_obj(obd, mdc_fid, 0);
813         if (obj) {
814                 /* find dirobj containing page with requested offset */
815                 /* FIXME: what about protecting cached attrs here? */
816                 for (i = 0; i < obj->objcount; i++) {
817                         if (offset < obj->objs[i].size)
818                                 break;
819                         offset -= obj->objs[i].size;
820                 }
821                 rfid = obj->objs[i].fid;
822                 CDEBUG(D_OTHER, "forward to %lu/%lu/%lu with offset %lu\n",
823                        (unsigned long) rfid.mds,
824                        (unsigned long) rfid.id,
825                        (unsigned long) rfid.generation,
826                        (unsigned long) offset);
827         }
828         rc = md_readpage(lmv->tgts[rfid.mds].exp, &rfid, offset, page, request);
829       
830         lmv_put_obj(obj);
831
832 #warning "we need fix for duplicate . and .. from slaves"
833
834         RETURN(rc);
835 }
836
837 int lmv_unlink(struct obd_export *exp, struct mdc_op_data *data,
838                struct ptlrpc_request **request)
839 {
840         struct obd_device *obd = exp->exp_obd;
841         struct lmv_obd *lmv = &obd->u.lmv;
842         struct mea *mea = data->mea1;
843         int rc, i = 0;
844         ENTRY;
845         lmv_connect(obd);
846         if (data->namelen != 0) {
847                 i = mea_name2idx(mea, (char *) data->name, data->namelen);
848                 if (mea)
849                         data->fid1 = mea->mea_fids[i];
850                 CDEBUG(D_OTHER, "unlink '%*s' in %lu/%lu/%lu -> %u\n",
851                        data->namelen, data->name,
852                        (unsigned long) data->fid1.mds,
853                        (unsigned long) data->fid1.id,
854                        (unsigned long) data->fid1.generation, i);
855         } else {
856                 i = data->fid1.mds;
857                 CDEBUG(D_OTHER, "drop i_nlink on %lu/%lu/%lu\n",
858                        (unsigned long) data->fid1.mds,
859                        (unsigned long) data->fid1.id,
860                        (unsigned long) data->fid1.generation);
861         }
862         rc = md_unlink(lmv->tgts[i].exp, data, request); 
863         RETURN(rc);
864 }
865
866 struct obd_device *lmv_get_real_obd(struct obd_export *exp,
867                                         char *name, int len)
868 {
869         struct obd_device *obd = exp->exp_obd;
870         struct lmv_obd *lmv = &obd->u.lmv;
871         ENTRY;
872         lmv_connect(obd);
873         obd = lmv->tgts[0].exp->exp_obd;
874         EXIT;
875         return obd;
876 }
877
878 int lmv_init_ea_size(struct obd_export *exp, int easize, int cookiesize)
879 {
880         struct obd_device *obd = exp->exp_obd;
881         struct lmv_obd *lmv = &obd->u.lmv;
882         int i, rc = 0, change = 0;
883         ENTRY;
884
885         if (lmv->max_easize < easize) {
886                 lmv->max_easize = easize;
887                 change = 1;
888         }
889         if (lmv->max_cookiesize < cookiesize) {
890                 lmv->max_cookiesize = cookiesize;
891                 change = 1;
892         }
893         if (change == 0)
894                 RETURN(0);
895         
896         if (lmv->connected == 0)
897                 RETURN(0);
898
899         /* FIXME: error handling? */
900         for (i = 0; i < lmv->count; i++)
901                 rc = obd_init_ea_size(lmv->tgts[i].exp, easize, cookiesize);
902         RETURN(rc);
903 }
904
905 /*
906  * to be called from MDS only
907  */
908 int lmv_obd_create(struct obd_export *exp, struct obdo *oa,
909                struct lov_stripe_md **ea, struct obd_trans_info *oti)
910 {
911         struct obd_device *obd = exp->exp_obd;
912         struct lmv_obd *lmv = &obd->u.lmv;
913         struct mea *mea;
914         int i, c, rc = 0;
915         struct ll_fid mfid;
916         ENTRY;
917         lmv_connect(obd);
918
919         LASSERT(ea != NULL);
920         LASSERT(oa != NULL);
921
922         if (*ea == NULL) {
923                 rc = obd_alloc_diskmd(exp, (struct lov_mds_md **) ea);
924                 LASSERT(*ea != NULL);
925         }
926
927         mea = (struct mea *) *ea;
928         mfid.id = oa->o_id;
929         mfid.generation = oa->o_generation;
930         rc = 0;
931         if (!mea->mea_count || mea->mea_count > lmv->count)
932                 mea->mea_count = lmv->count;
933
934         mea->mea_master = -1;
935         
936         /* FIXME: error handling? */
937         for (i = 0, c = 0; c < mea->mea_count && i < lmv->count; i++) {
938                 struct lov_stripe_md obj_md;
939                 struct lov_stripe_md *obj_mdp = &obj_md;
940                
941                 if (lmv->tgts[i].exp == NULL) {
942                         /* this is master MDS */
943                         mea->mea_fids[c].id = mfid.id;
944                         mea->mea_fids[c].generation = mfid.generation;
945                         mea->mea_fids[c].mds = i;
946                         mea->mea_master = i;
947                         c++;
948                         continue;
949                 }
950
951                 /* "Master" MDS should always be part of stripped dir, so
952                    scan for it */
953                 if (mea->mea_master == -1 && c == mea->mea_count - 1)
954                         continue;
955
956                 oa->o_valid = OBD_MD_FLGENER | OBD_MD_FLTYPE | OBD_MD_FLMODE
957                                 | OBD_MD_FLUID | OBD_MD_FLGID;
958
959                 rc = obd_create(lmv->tgts[c].exp, oa, &obj_mdp, oti);
960                 /* FIXME: error handling here */
961                 LASSERT(rc == 0);
962
963                 mea->mea_fids[c].id = oa->o_id;
964                 mea->mea_fids[c].generation = oa->o_generation;
965                 mea->mea_fids[c].mds = i;
966                 c++;
967                 CDEBUG(D_OTHER, "dirobj at mds %d: "LPU64"/%u\n",
968                        i, oa->o_id, oa->o_generation);
969         }
970         LASSERT(c == mea->mea_count);
971         CDEBUG(D_OTHER, "%d dirobjects created\n", (int) mea->mea_count);
972
973         RETURN(rc);
974 }
975
976 static int lmv_get_info(struct obd_export *exp, __u32 keylen,
977                            void *key, __u32 *vallen, void *val)
978 {
979         struct obd_device *obd;
980         struct lmv_obd *lmv;
981         ENTRY;
982
983         obd = class_exp2obd(exp);
984         if (obd == NULL) {
985                 CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",
986                        exp->exp_handle.h_cookie);
987                 RETURN(-EINVAL);
988         }
989
990         lmv = &obd->u.lmv;
991         if (keylen == 6 && memcmp(key, "mdsize", 6) == 0) {
992                 __u32 *mdsize = val;
993                 *vallen = sizeof(__u32);
994                 *mdsize = sizeof(struct ll_fid) * lmv->count
995                                 + sizeof(struct mea);
996                 RETURN(0);
997         } else if (keylen == 6 && memcmp(key, "mdsnum", 6) == 0) {
998                 struct obd_uuid *cluuid = &lmv->cluuid;
999                 struct lmv_tgt_desc *tgts;
1000                 __u32 *mdsnum = val;
1001                 int i;
1002
1003                 for (i = 0, tgts = lmv->tgts; i < lmv->count; i++, tgts++) {
1004                         if (obd_uuid_equals(&tgts->uuid, cluuid)) {
1005                                 *vallen = sizeof(__u32);
1006                                 *mdsnum = i;
1007                                 RETURN(0);
1008                         }
1009                 }
1010                 LASSERT(0);
1011         }
1012
1013         CDEBUG(D_IOCTL, "invalid key\n");
1014         RETURN(-EINVAL);
1015 }
1016
1017 int lmv_set_info(struct obd_export *exp, obd_count keylen,
1018                  void *key, obd_count vallen, void *val)
1019 {
1020         struct obd_device *obd;
1021         struct lmv_obd *lmv;
1022         ENTRY;
1023
1024         obd = class_exp2obd(exp);
1025         if (obd == NULL) {
1026                 CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",
1027                        exp->exp_handle.h_cookie);
1028                 RETURN(-EINVAL);
1029         }
1030         lmv = &obd->u.lmv;
1031         lmv_connect(obd);
1032
1033         if (keylen >= strlen("client") && strcmp(key, "client") == 0) {
1034                 struct lmv_tgt_desc *tgts;
1035                 int i, rc;
1036
1037                 for (i = 0, tgts = lmv->tgts; i < lmv->count; i++, tgts++) {
1038                         rc = obd_set_info(tgts->exp, keylen, key, vallen, val);
1039                         if (rc)
1040                                 RETURN(rc);
1041                 }
1042                 RETURN(0);
1043         }
1044         
1045         RETURN(-EINVAL);
1046 }
1047
1048 int lmv_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
1049                struct lov_stripe_md *lsm)
1050 {
1051         struct obd_device *obd = class_exp2obd(exp);
1052         struct lmv_obd *lmv = &obd->u.lmv;
1053         int mea_size;
1054         ENTRY;
1055         lmv_connect(obd);
1056
1057         mea_size = sizeof(struct ll_fid) * lmv->count + sizeof(struct mea);
1058         if (!lmmp)
1059                 RETURN(mea_size);
1060
1061         if (*lmmp && !lsm) {
1062                 OBD_FREE(*lmmp, mea_size);
1063                 *lmmp = NULL;
1064                 RETURN(0);
1065         }
1066
1067         if (!*lmmp) {
1068                 OBD_ALLOC(*lmmp, mea_size);
1069                 if (!*lmmp)
1070                         RETURN(-ENOMEM);
1071         }
1072
1073         if (!lsm)
1074                 RETURN(mea_size);
1075
1076 #warning "MEA packing/convertation must be here! -bzzz"
1077         memcpy(*lmmp, lsm, mea_size);
1078         RETURN(mea_size);
1079 }
1080
1081 int lmv_unpackmd(struct obd_export *exp, struct lov_stripe_md **mem_tgt,
1082                         struct lov_mds_md *disk_src, int mdsize)
1083 {
1084         struct obd_device *obd = class_exp2obd(exp);
1085         struct lmv_obd *lmv = &obd->u.lmv;
1086         struct mea **tmea = (struct mea **) mem_tgt;
1087         struct mea *mea = (void *) disk_src;
1088         int mea_size;
1089         ENTRY;
1090         lmv_connect(obd);
1091
1092         mea_size = sizeof(struct ll_fid) * lmv->count + sizeof(struct mea);
1093         if (mem_tgt == NULL)
1094                 return mea_size;
1095
1096         if (*mem_tgt != NULL && disk_src == NULL) {
1097                 OBD_FREE(*tmea, mea_size);
1098                 RETURN(0);
1099         }
1100
1101         LASSERT(mea_size == mdsize);
1102
1103         OBD_ALLOC(*tmea, mea_size);
1104         /* FIXME: error handling here */
1105         LASSERT(*tmea != NULL);
1106
1107         if (!disk_src)
1108                 RETURN(mea_size);
1109
1110 #warning "MEA unpacking/convertation must be here! -bzzz"
1111         memcpy(*tmea, mea, mdsize);
1112         RETURN(mea_size);
1113 }
1114
1115 int lmv_brw(int rw, struct obd_export *exp, struct obdo *oa,
1116                 struct lov_stripe_md *ea, obd_count oa_bufs,
1117                 struct brw_page *pgarr, struct obd_trans_info *oti)
1118 {
1119         struct obd_device *obd = exp->exp_obd;
1120         struct lmv_obd *lmv = &obd->u.lmv;
1121         struct mea *mea = (struct mea *) ea;
1122         int err;
1123       
1124         LASSERT(oa != NULL);
1125         LASSERT(ea != NULL);
1126         LASSERT(pgarr != NULL);
1127         LASSERT(oa->o_mds < lmv->count);
1128
1129         oa->o_gr = mea->mea_fids[oa->o_mds].generation;
1130         oa->o_id = mea->mea_fids[oa->o_mds].id;
1131         oa->o_valid =  OBD_MD_FLID | OBD_MD_FLGROUP;
1132         err = obd_brw(rw, lmv->tgts[oa->o_mds].exp, oa,
1133                         NULL, oa_bufs, pgarr, oti);
1134         RETURN(err);
1135 }
1136
1137 struct obd_ops lmv_obd_ops = {
1138         o_owner:                THIS_MODULE,
1139         o_attach:               lmv_attach,
1140         o_detach:               lmv_detach,
1141         o_setup:                lmv_setup,
1142         o_cleanup:              lmv_cleanup,
1143         o_connect:              lmv_connect_fake,
1144         o_disconnect:           lmv_disconnect,
1145         o_statfs:               lmv_statfs,
1146         o_get_info:             lmv_get_info,
1147         o_set_info:             lmv_set_info,
1148         o_create:               lmv_obd_create,
1149         o_packmd:               lmv_packmd,
1150         o_unpackmd:             lmv_unpackmd,
1151         o_brw:                  lmv_brw,
1152         o_init_ea_size:         lmv_init_ea_size,
1153 };
1154
1155 struct md_ops lmv_md_ops = {
1156         m_getstatus:            lmv_getstatus,
1157         m_getattr:              lmv_getattr,
1158         m_change_cbdata:        lmv_change_cbdata,
1159         m_change_cbdata_name:   lmv_change_cbdata_name,
1160         m_close:                lmv_close,
1161         m_create:               lmv_create,
1162         m_done_writing:         lmv_done_writing,
1163         m_enqueue:              lmv_enqueue,
1164         m_getattr_name:         lmv_getattr_name,
1165         m_intent_lock:          lmv_intent_lock,
1166         m_link:                 lmv_link,
1167         m_rename:               lmv_rename,
1168         m_setattr:              lmv_setattr,
1169         m_sync:                 lmv_sync,
1170         m_readpage:             lmv_readpage,
1171         m_unlink:               lmv_unlink,
1172         m_get_real_obd:         lmv_get_real_obd,
1173         m_valid_attrs:          lmv_valid_attrs,
1174 };
1175
1176 //#ifndef LPROCFS
1177 static struct lprocfs_vars lprocfs_module_vars[] = { {0} };
1178 static struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
1179 //#else
1180 LPROCFS_INIT_VARS(lmv, lprocfs_module_vars, lprocfs_obd_vars)
1181
1182 int __init lmv_init(void)
1183 {
1184         struct lprocfs_static_vars lvars;
1185         int rc;
1186
1187         lprocfs_init_vars(lmv, &lvars);
1188         rc = class_register_type(&lmv_obd_ops, &lmv_md_ops,
1189                                  lvars.module_vars, OBD_LMV_DEVICENAME);
1190         RETURN(rc);
1191 }
1192
1193 static void lmv_exit(void)
1194 {
1195         class_unregister_type(OBD_LMV_DEVICENAME);
1196 }
1197
1198 #ifdef __KERNEL__
1199 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
1200 MODULE_DESCRIPTION("Lustre Logical Metadata Volume OBD driver");
1201 MODULE_LICENSE("GPL");
1202
1203 module_init(lmv_init);
1204 module_exit(lmv_exit);
1205 #endif
1206