1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
6 * This file is part of Lustre, http://www.lustre.org.
8 * Lustre is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU General Public
10 * License as published by the Free Software Foundation.
12 * Lustre is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Lustre; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 # define EXPORT_SYMTAB
25 #define DEBUG_SUBSYSTEM S_LMV
27 #include <linux/slab.h>
28 #include <linux/module.h>
29 #include <linux/init.h>
30 #include <linux/slab.h>
31 #include <linux/pagemap.h>
32 #include <asm/div64.h>
34 #include <liblustre.h>
37 #include <linux/obd_support.h>
38 #include <linux/lustre_lib.h>
39 #include <linux/lustre_net.h>
40 #include <linux/lustre_idl.h>
41 #include <linux/lustre_dlm.h>
42 #include <linux/lustre_mds.h>
43 #include <linux/obd_class.h>
44 #include <linux/obd_ost.h>
45 #include <linux/seq_file.h>
46 #include <linux/lprocfs_status.h>
47 #include <linux/lustre_fsfilt.h>
48 #include <linux/obd_lmv.h>
49 #include "lmv_internal.h"
51 int lmv_attach(struct obd_device *dev, obd_count len, void *data)
53 struct lprocfs_static_vars lvars;
54 struct proc_dir_entry *entry;
58 lprocfs_init_vars(lmv, &lvars);
59 rc = lprocfs_obd_attach(dev, lvars.obd_vars);
63 entry = create_proc_entry("target_obd", 0444, dev->obd_proc_entry);
66 /* entry->proc_fops = &lmv_proc_target_fops; */
72 int lmv_detach(struct obd_device *dev)
74 return lprocfs_obd_detach(dev);
77 static int lmv_connect_fake(struct lustre_handle *conn,
78 struct obd_device *obd,
79 struct obd_uuid *cluuid)
81 struct lmv_obd *lmv = &obd->u.lmv;
85 rc = class_connect(conn, obd, cluuid);
87 CERROR("class_connection() returned %d\n", rc);
91 lmv->exp = class_conn2export(conn);
92 LASSERT(lmv->exp != NULL);
94 lmv->cluuid = *cluuid;
100 int lmv_connect(struct obd_device *obd)
102 struct lmv_obd *lmv = &obd->u.lmv;
103 struct obd_uuid *cluuid;
104 struct lmv_tgt_desc *tgts;
105 struct obd_export *exp;
113 cluuid = &lmv->cluuid;
115 CDEBUG(D_OTHER, "time to connect %s to %s\n",
116 cluuid->uuid, obd->obd_name);
118 /* We don't want to actually do the underlying connections more than
119 * once, so keep track. */
121 if (lmv->refcount > 1) {
122 class_export_put(exp);
126 for (i = 0, tgts = lmv->tgts; i < lmv->count; i++, tgts++) {
127 struct obd_device *tgt_obd;
128 struct obd_uuid lmv_osc_uuid = { "LMV_OSC_UUID" };
129 struct lustre_handle conn = {0, };
131 LASSERT(tgts != NULL);
133 tgt_obd = class_find_client_obd(&tgts->uuid, LUSTRE_MDC_NAME,
136 CERROR("Target %s not attached\n", tgts->uuid.uuid);
137 GOTO(out_disc, rc = -EINVAL);
140 /* for MDS: don't connect to yourself */
141 if (obd_uuid_equals(&tgts->uuid, cluuid)) {
142 CDEBUG(D_OTHER, "don't connect back to %s\n",
148 CDEBUG(D_OTHER, "connect to %s(%s) - %s, %s FOR %s\n",
149 tgt_obd->obd_name, tgt_obd->obd_uuid.uuid,
150 tgts->uuid.uuid, obd->obd_uuid.uuid,
153 if (!tgt_obd->obd_set_up) {
154 CERROR("Target %s not set up\n", tgts->uuid.uuid);
155 GOTO(out_disc, rc = -EINVAL);
158 rc = obd_connect(&conn, tgt_obd, &lmv_osc_uuid);
160 CERROR("Target %s connect error %d\n",
161 tgts->uuid.uuid, rc);
164 tgts->exp = class_conn2export(&conn);
166 obd_init_ea_size(tgts->exp, lmv->max_easize,
167 lmv->max_cookiesize);
169 rc = obd_register_observer(tgt_obd, obd);
171 CERROR("Target %s register_observer error %d\n",
172 tgts->uuid.uuid, rc);
173 obd_disconnect(tgts->exp, 0);
177 CDEBUG(D_OTHER, "connected to %s(%s) successfully (%d)\n",
178 tgt_obd->obd_name, tgt_obd->obd_uuid.uuid,
179 atomic_read(&obd->obd_refcount));
182 class_export_put(exp);
186 /* FIXME: cleanup here */
187 class_disconnect(exp, 0);
191 static int lmv_disconnect(struct obd_export *exp, int flags)
193 struct obd_device *obd = class_exp2obd(exp);
194 struct lmv_obd *lmv = &obd->u.lmv;
201 /* Only disconnect the underlying layers on the final disconnect. */
203 if (lmv->refcount != 0)
206 for (i = 0; i < lmv->count; i++) {
207 if (lmv->tgts[i].exp == NULL)
210 if (obd->obd_no_recov) {
211 /* Pass it on to our clients.
212 * XXX This should be an argument to disconnect,
213 * XXX not a back-door flag on the OBD. Ah well.
215 struct obd_device *mdc_obd;
216 mdc_obd = class_exp2obd(lmv->tgts[i].exp);
218 mdc_obd->obd_no_recov = 1;
221 CDEBUG(D_OTHER, "disconnected from %s(%s) successfully\n",
222 lmv->tgts[i].exp->exp_obd->obd_name,
223 lmv->tgts[i].exp->exp_obd->obd_uuid.uuid);
225 obd_register_observer(lmv->tgts[i].exp->exp_obd, NULL);
227 rc = obd_disconnect(lmv->tgts[i].exp, flags);
228 lmv->tgts[i].exp = NULL;
232 /* FIXME: cleanup here */
234 class_export_put(exp);
235 rc = class_disconnect(exp, 0);
239 static int lmv_setup(struct obd_device *obd, obd_count len, void *buf)
241 struct lustre_cfg *lcfg = buf;
242 struct lmv_desc *desc;
243 struct lmv_obd *lmv = &obd->u.lmv;
244 struct obd_uuid *uuids;
245 struct lmv_tgt_desc *tgts;
251 if (lcfg->lcfg_inllen1 < 1) {
252 CERROR("LMV setup requires a descriptor\n");
256 if (lcfg->lcfg_inllen2 < 1) {
257 CERROR("LMV setup requires an OST UUID list\n");
261 desc = (struct lmv_desc *)lcfg->lcfg_inlbuf1;
262 if (sizeof(*desc) > lcfg->lcfg_inllen1) {
263 CERROR("descriptor size wrong: %d > %d\n",
264 (int)sizeof(*desc), lcfg->lcfg_inllen1);
268 count = desc->ld_count;
269 uuids = (struct obd_uuid *)lcfg->lcfg_inlbuf2;
270 if (sizeof(*uuids) * count != lcfg->lcfg_inllen2) {
271 CERROR("UUID array size wrong: %u * %u != %u\n",
272 sizeof(*uuids), count, lcfg->lcfg_inllen2);
276 lmv->bufsize = sizeof(struct lmv_tgt_desc) * count;
277 OBD_ALLOC(lmv->tgts, lmv->bufsize);
278 if (lmv->tgts == NULL) {
279 CERROR("Out of memory\n");
283 for (i = 0, tgts = lmv->tgts; i < count; i++, tgts++) {
284 tgts->uuid = uuids[i];
288 lmv->max_easize = sizeof(struct ll_fid) * lmv->count
289 + sizeof(struct mea);
290 lmv->max_cookiesize = 0;
295 static int lmv_statfs(struct obd_device *obd, struct obd_statfs *osfs,
296 unsigned long max_age)
298 struct lmv_obd *lmv = &obd->u.lmv;
299 struct obd_statfs temp;
303 for (i = 0; i < lmv->count; i++) {
304 rc = obd_statfs(lmv->tgts[i].exp->exp_obd, &temp, max_age);
306 CERROR("can't stat MDS #%d (%s)\n", i,
307 lmv->tgts[i].exp->exp_obd->obd_name);
311 memcpy(osfs, &temp, sizeof(temp));
313 osfs->os_bavail += temp.os_bavail;
314 osfs->os_blocks += temp.os_blocks;
315 osfs->os_ffree += temp.os_ffree;
316 osfs->os_files += temp.os_files;
322 static int lmv_cleanup(struct obd_device *obd, int flags)
324 struct lmv_obd *lmv = &obd->u.lmv;
326 lmv_cleanup_objs(obd);
327 OBD_FREE(lmv->tgts, lmv->bufsize);
331 static int lmv_getstatus(struct obd_export *exp, struct ll_fid *fid)
333 struct obd_device *obd = exp->exp_obd;
334 struct lmv_obd *lmv = &obd->u.lmv;
338 rc = md_getstatus(lmv->tgts[0].exp, fid);
343 static int lmv_getattr(struct obd_export *exp, struct ll_fid *fid,
344 unsigned long valid, unsigned int ea_size,
345 struct ptlrpc_request **request)
347 struct obd_device *obd = exp->exp_obd;
348 struct lmv_obd *lmv = &obd->u.lmv;
349 int rc, i = fid->mds;
353 obj = lmv_grab_obj(obd, fid, 0);
354 CDEBUG(D_OTHER, "GETATTR for %lu/%lu/%lu %s\n",
355 (unsigned long) fid->mds,
356 (unsigned long) fid->id,
357 (unsigned long) fid->generation,
358 obj ? "(splitted)" : "");
360 LASSERT(fid->mds < lmv->count);
361 rc = md_getattr(lmv->tgts[i].exp, fid,
362 valid, ea_size, request);
363 if (rc == 0 && obj) {
364 /* we have to loop over dirobjs here and gather attrs
365 * for all the slaves */
366 #warning "attrs gathering here"
372 static int lmv_change_cbdata(struct obd_export *exp,
374 ldlm_iterator_t it, void *data)
376 struct obd_device *obd = exp->exp_obd;
377 struct lmv_obd *lmv = &obd->u.lmv;
381 CDEBUG(D_OTHER, "CBDATA for %lu/%lu/%lu\n",
382 (unsigned long) fid->mds,
383 (unsigned long) fid->id,
384 (unsigned long) fid->generation);
385 LASSERT(fid->mds < lmv->count);
386 rc = md_change_cbdata(lmv->tgts[fid->mds].exp, fid, it, data);
390 static int lmv_change_cbdata_name(struct obd_export *exp, struct ll_fid *pfid,
391 char *name, int len, struct ll_fid *cfid,
392 ldlm_iterator_t it, void *data)
394 struct obd_device *obd = exp->exp_obd;
395 struct lmv_obd *lmv = &obd->u.lmv;
400 LASSERT(pfid->mds < lmv->count);
401 LASSERT(cfid->mds < lmv->count);
402 CDEBUG(D_OTHER, "CBDATA for %lu/%lu/%lu:%*s -> %lu/%lu/%lu\n",
403 (unsigned long) pfid->mds, (unsigned long) pfid->id,
404 (unsigned long) pfid->generation, len, name,
405 (unsigned long) cfid->mds, (unsigned long) cfid->id,
406 (unsigned long) cfid->generation);
408 /* this is default mds for directory name belongs to */
410 obj = lmv_grab_obj(obd, pfid, 0);
412 /* directory is splitted. look for right mds for this name */
413 mds = raw_name2idx(obj->objcount, name, len);
416 rc = md_change_cbdata(lmv->tgts[mds].exp, cfid, it, data);
420 static int lmv_valid_attrs(struct obd_export *exp, struct ll_fid *fid)
422 struct obd_device *obd = exp->exp_obd;
423 struct lmv_obd *lmv = &obd->u.lmv;
427 CDEBUG(D_OTHER, "validate %lu/%lu/%lu\n",
428 (unsigned long) fid->mds,
429 (unsigned long) fid->id,
430 (unsigned long) fid->generation);
431 LASSERT(fid->mds < lmv->count);
432 rc = md_valid_attrs(lmv->tgts[fid->mds].exp, fid);
436 int lmv_close(struct obd_export *exp, struct obdo *obdo,
437 struct obd_client_handle *och,
438 struct ptlrpc_request **request)
440 struct obd_device *obd = exp->exp_obd;
441 struct lmv_obd *lmv = &obd->u.lmv;
442 int rc, i = obdo->o_mds;
445 LASSERT(i < lmv->count);
446 CDEBUG(D_OTHER, "CLOSE %lu/%lu/%lu\n", (unsigned long) obdo->o_mds,
447 (unsigned long) obdo->o_id, (unsigned long) obdo->o_generation);
448 rc = md_close(lmv->tgts[i].exp, obdo, och, request);
452 int lmv_create(struct obd_export *exp, struct mdc_op_data *op_data,
453 const void *data, int datalen, int mode, __u32 uid,
454 __u32 gid, __u64 rdev, struct ptlrpc_request **request)
456 struct obd_device *obd = exp->exp_obd;
457 struct lmv_obd *lmv = &obd->u.lmv;
458 struct mea *mea = op_data->mea1;
459 struct mds_body *mds_body;
460 int rc, i, free_mea = 0;
463 /* TODO: where to create new directories?
464 * current design don't support directory on a slave MDS,
465 * but we lookup by name may forward any request in slave
468 i = mea_name2idx(mea, (char *) op_data->name, op_data->namelen);
470 op_data->fid1 = mea->mea_fids[i];
472 CDEBUG(D_OTHER, "CREATE '%*s' on %lu/%lu/%lu (mea 0x%p)\n",
473 op_data->namelen, op_data->name,
474 (unsigned long) op_data->fid1.mds,
475 (unsigned long) op_data->fid1.id,
476 (unsigned long) op_data->fid1.generation, mea);
477 rc = md_create(lmv->tgts[i].exp, op_data, data, datalen,
478 mode, uid, gid, rdev, request);
480 if (*request == NULL)
482 mds_body = lustre_msg_buf((*request)->rq_repmsg, 0,
484 LASSERT(mds_body != NULL);
485 CDEBUG(D_OTHER, "created. id = %lu, generation = %lu, mds = %d\n",
486 (unsigned long) mds_body->fid1.id,
487 (unsigned long) mds_body->fid1.generation, i);
488 LASSERT(mds_body->mds == i);
489 } else if (rc == -ESTALE) {
490 struct ptlrpc_request *req = NULL;
494 CDEBUG(D_OTHER, "it seems MDS splitted dir\n");
495 LASSERT(mea == NULL);
497 mealen = sizeof(struct ll_fid)*lmv->count + sizeof(struct mea);
498 /* time to update mea of parent fid */
499 i = op_data->fid1.mds;
500 rc = md_getattr(lmv->tgts[i].exp, &op_data->fid1,
501 OBD_MD_FLEASIZE, mealen, &req);
504 rc = mdc_req2lustre_md(req, 0, NULL, exp, &md);
506 LASSERT(md.mea != NULL);
508 ptlrpc_req_finished(req);
514 obd_free_memmd(exp, (struct lov_stripe_md**) &mea);
518 int lmv_done_writing(struct obd_export *exp, struct obdo *obdo)
520 struct obd_device *obd = exp->exp_obd;
521 struct lmv_obd *lmv = &obd->u.lmv;
525 /* FIXME: choose right MDC here */
526 rc = md_done_writing(lmv->tgts[0].exp, obdo);
530 int lmv_enqueue(struct obd_export *exp, int lock_type,
531 struct lookup_intent *it, int lock_mode,
532 struct mdc_op_data *data, struct lustre_handle *lockh,
533 void *lmm, int lmmsize,
534 ldlm_completion_callback cb_completion,
535 ldlm_blocking_callback cb_blocking, void *cb_data)
537 struct obd_device *obd = exp->exp_obd;
538 struct lmv_obd *lmv = &obd->u.lmv;
544 obj = lmv_grab_obj(obd, &data->fid1, 0);
546 /* directory is splitted. look for
547 * right mds for this name */
548 mds = raw_name2idx(obj->objcount, data->name,
550 data->fid1 = obj->objs[mds].fid;
554 CDEBUG(D_OTHER, "ENQUEUE '%s' on %lu/%lu\n",
555 LL_IT2STR(it), (unsigned long) data->fid1.id,
556 (unsigned long) data->fid1.generation);
557 rc = md_enqueue(lmv->tgts[data->fid1.mds].exp, lock_type, it,
558 lock_mode, data, lockh, lmm, lmmsize, cb_completion,
559 cb_blocking, cb_data);
564 int lmv_getattr_name(struct obd_export *exp, struct ll_fid *fid,
565 char *filename, int namelen, unsigned long valid,
566 unsigned int ea_size, struct ptlrpc_request **request)
568 struct obd_device *obd = exp->exp_obd;
569 struct lmv_obd *lmv = &obd->u.lmv;
570 struct ll_fid rfid = *fid;
571 int rc, mds = fid->mds;
575 CDEBUG(D_OTHER, "getattr_name for %*s on %lu/%lu/%lu\n",
576 namelen - 1, filename, (unsigned long) fid->mds,
577 (unsigned long) fid->id, (unsigned long) fid->generation);
578 obj = lmv_grab_obj(obd, fid, 0);
580 /* directory is splitted. look for right mds for this name */
581 mds = raw_name2idx(obj->objcount, filename, namelen - 1);
582 rfid = obj->objs[mds].fid;
585 rc = md_getattr_name(lmv->tgts[mds].exp, &rfid, filename, namelen,
586 valid, ea_size, request);
592 * llite passes fid of an target inode in data->fid1 and
593 * fid of directory in data->fid2
595 int lmv_link(struct obd_export *exp, struct mdc_op_data *data,
596 struct ptlrpc_request **request)
598 struct obd_device *obd = exp->exp_obd;
599 struct lmv_obd *lmv = &obd->u.lmv;
600 struct mea *mea = data->mea2;
604 if (data->namelen != 0) {
605 /* usual link request */
606 i = mea_name2idx(mea, (char *) data->name, data->namelen);
608 data->fid2 = mea->mea_fids[i];
609 CDEBUG(D_OTHER,"link %u/%u/%u:%*s to %u/%u/%u mds %d mea %p\n",
610 (unsigned) data->fid2.mds, (unsigned) data->fid2.id,
611 (unsigned) data->fid2.generation, data->namelen,
612 data->name, (unsigned) data->fid1.mds,
613 (unsigned) data->fid1.id,
614 (unsigned) data->fid1.generation, i, mea);
616 /* request from MDS to acquire i_links for inode by fid1 */
618 CDEBUG(D_OTHER, "inc i_nlinks for %u/%u/%u\n",
619 (unsigned) data->fid1.mds, (unsigned) data->fid1.id,
620 (unsigned) data->fid1.generation);
623 rc = md_link(lmv->tgts[i].exp, data, request);
627 int lmv_rename(struct obd_export *exp, struct mdc_op_data *data,
628 const char *old, int oldlen, const char *new, int newlen,
629 struct ptlrpc_request **request)
631 struct obd_device *obd = exp->exp_obd;
632 struct lmv_obd *lmv = &obd->u.lmv;
637 CDEBUG(D_OTHER, "rename %*s in %lu/%lu/%lu to %*s in %lu/%lu/%lu\n",
638 oldlen, old, (unsigned long) data->fid1.mds,
639 (unsigned long) data->fid1.id,
640 (unsigned long) data->fid1.generation,
641 newlen, new, (unsigned long) data->fid2.mds,
642 (unsigned long) data->fid2.id,
643 (unsigned long) data->fid2.generation);
648 /* MDS with old dir entry is asking another MDS
649 * to create name there */
651 "create %*s(%d/%d) in %lu/%lu/%lu pointing to %lu/%lu/%lu\n",
652 newlen, new, oldlen, newlen,
653 (unsigned long) data->fid2.mds,
654 (unsigned long) data->fid2.id,
655 (unsigned long) data->fid2.generation,
656 (unsigned long) data->fid1.mds,
657 (unsigned long) data->fid1.id,
658 (unsigned long) data->fid1.generation);
659 mds = data->fid2.mds;
663 obj = lmv_grab_obj(obd, &data->fid1, 0);
665 /* directory is already splitted, so we have to forward
666 * request to the right MDS */
667 mds = raw_name2idx(obj->objcount, old, oldlen);
668 data->fid1 = obj->objs[mds].fid;
669 CDEBUG(D_OTHER, "forward to MDS #%u (%lu/%lu/%lu)\n", mds,
670 (unsigned long) obj->objs[mds].fid.mds,
671 (unsigned long) obj->objs[mds].fid.id,
672 (unsigned long) obj->objs[mds].fid.generation);
676 obj = lmv_grab_obj(obd, &data->fid2, 0);
678 /* directory is already splitted, so we have to forward
679 * request to the right MDS */
680 mds = raw_name2idx(obj->objcount, new, newlen);
681 data->fid2 = obj->objs[mds].fid;
682 CDEBUG(D_OTHER, "forward to MDS #%u (%lu/%lu/%lu)\n", mds,
683 (unsigned long) obj->objs[mds].fid.mds,
684 (unsigned long) obj->objs[mds].fid.id,
685 (unsigned long) obj->objs[mds].fid.generation);
689 mds = data->fid1.mds;
692 rc = md_rename(lmv->tgts[mds].exp, data, old, oldlen,
693 new, newlen, request);
697 int lmv_setattr(struct obd_export *exp, struct mdc_op_data *data,
698 struct iattr *iattr, void *ea, int ealen, void *ea2, int ea2len,
699 struct ptlrpc_request **request)
701 struct obd_device *obd = exp->exp_obd;
702 struct lmv_obd *lmv = &obd->u.lmv;
703 int rc = 0, i = data->fid1.mds;
704 struct ptlrpc_request *req;
705 struct mds_body *mds_body;
709 obj = lmv_grab_obj(obd, &data->fid1, 0);
710 CDEBUG(D_OTHER, "SETATTR for %lu/%lu/%lu, valid 0x%x%s\n",
711 (unsigned long) data->fid1.mds,
712 (unsigned long) data->fid1.id,
713 (unsigned long) data->fid1.generation, iattr->ia_valid,
714 obj ? ", splitted" : "");
716 for (i = 0; i < obj->objcount; i++) {
717 data->fid1 = obj->objs[i].fid;
718 rc = md_setattr(lmv->tgts[i].exp, data, iattr, ea,
719 ealen, ea2, ea2len, &req);
721 if (fid_equal(&obj->fid, &obj->objs[i].fid)) {
722 /* this is master object and this request
723 * should be returned back to llite */
726 ptlrpc_req_finished(req);
731 LASSERT(data->fid1.mds < lmv->count);
732 rc = md_setattr(lmv->tgts[i].exp, data, iattr, ea, ealen,
733 ea2, ea2len, request);
735 mds_body = lustre_msg_buf((*request)->rq_repmsg, 0,
737 LASSERT(mds_body != NULL);
738 LASSERT(mds_body->mds == i);
744 int lmv_sync(struct obd_export *exp, struct ll_fid *fid,
745 struct ptlrpc_request **request)
747 struct obd_device *obd = exp->exp_obd;
748 struct lmv_obd *lmv = &obd->u.lmv;
752 rc = md_sync(lmv->tgts[0].exp, fid, request);
756 int lmv_dirobj_blocking_ast(struct ldlm_lock *lock,
757 struct ldlm_lock_desc *desc, void *data, int flag)
759 struct lustre_handle lockh;
765 case LDLM_CB_BLOCKING:
766 ldlm_lock2handle(lock, &lockh);
767 rc = ldlm_cli_cancel(&lockh);
769 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
773 case LDLM_CB_CANCELING:
774 /* time to drop cached attrs for dirobj */
775 obj = lock->l_ast_data;
779 CDEBUG(D_OTHER, "cancel %s on %lu/%lu, master %lu/%lu/%lu\n",
780 lock->l_resource->lr_name.name[3] == 1 ?
782 (unsigned long) lock->l_resource->lr_name.name[0],
783 (unsigned long) lock->l_resource->lr_name.name[1],
784 (unsigned long) obj->fid.mds,
785 (unsigned long) obj->fid.id,
786 (unsigned long) obj->fid.generation);
794 int lmv_readpage(struct obd_export *exp, struct ll_fid *mdc_fid,
795 __u64 offset, struct page *page,
796 struct ptlrpc_request **request)
798 struct obd_device *obd = exp->exp_obd;
799 struct lmv_obd *lmv = &obd->u.lmv;
800 struct ll_fid rfid = *mdc_fid;
806 LASSERT(mdc_fid->mds < lmv->count);
807 CDEBUG(D_OTHER, "READPAGE at %llu from %lu/%lu/%lu\n",
808 offset, (unsigned long) rfid.mds,
809 (unsigned long) rfid.id,
810 (unsigned long) rfid.generation);
812 obj = lmv_grab_obj(obd, mdc_fid, 0);
814 /* find dirobj containing page with requested offset */
815 /* FIXME: what about protecting cached attrs here? */
816 for (i = 0; i < obj->objcount; i++) {
817 if (offset < obj->objs[i].size)
819 offset -= obj->objs[i].size;
821 rfid = obj->objs[i].fid;
822 CDEBUG(D_OTHER, "forward to %lu/%lu/%lu with offset %lu\n",
823 (unsigned long) rfid.mds,
824 (unsigned long) rfid.id,
825 (unsigned long) rfid.generation,
826 (unsigned long) offset);
828 rc = md_readpage(lmv->tgts[rfid.mds].exp, &rfid, offset, page, request);
832 #warning "we need fix for duplicate . and .. from slaves"
837 int lmv_unlink(struct obd_export *exp, struct mdc_op_data *data,
838 struct ptlrpc_request **request)
840 struct obd_device *obd = exp->exp_obd;
841 struct lmv_obd *lmv = &obd->u.lmv;
842 struct mea *mea = data->mea1;
846 if (data->namelen != 0) {
847 i = mea_name2idx(mea, (char *) data->name, data->namelen);
849 data->fid1 = mea->mea_fids[i];
850 CDEBUG(D_OTHER, "unlink '%*s' in %lu/%lu/%lu -> %u\n",
851 data->namelen, data->name,
852 (unsigned long) data->fid1.mds,
853 (unsigned long) data->fid1.id,
854 (unsigned long) data->fid1.generation, i);
857 CDEBUG(D_OTHER, "drop i_nlink on %lu/%lu/%lu\n",
858 (unsigned long) data->fid1.mds,
859 (unsigned long) data->fid1.id,
860 (unsigned long) data->fid1.generation);
862 rc = md_unlink(lmv->tgts[i].exp, data, request);
866 struct obd_device *lmv_get_real_obd(struct obd_export *exp,
869 struct obd_device *obd = exp->exp_obd;
870 struct lmv_obd *lmv = &obd->u.lmv;
873 obd = lmv->tgts[0].exp->exp_obd;
878 int lmv_init_ea_size(struct obd_export *exp, int easize, int cookiesize)
880 struct obd_device *obd = exp->exp_obd;
881 struct lmv_obd *lmv = &obd->u.lmv;
882 int i, rc = 0, change = 0;
885 if (lmv->max_easize < easize) {
886 lmv->max_easize = easize;
889 if (lmv->max_cookiesize < cookiesize) {
890 lmv->max_cookiesize = cookiesize;
896 if (lmv->connected == 0)
899 /* FIXME: error handling? */
900 for (i = 0; i < lmv->count; i++)
901 rc = obd_init_ea_size(lmv->tgts[i].exp, easize, cookiesize);
906 * to be called from MDS only
908 int lmv_obd_create(struct obd_export *exp, struct obdo *oa,
909 struct lov_stripe_md **ea, struct obd_trans_info *oti)
911 struct obd_device *obd = exp->exp_obd;
912 struct lmv_obd *lmv = &obd->u.lmv;
923 rc = obd_alloc_diskmd(exp, (struct lov_mds_md **) ea);
924 LASSERT(*ea != NULL);
927 mea = (struct mea *) *ea;
929 mfid.generation = oa->o_generation;
931 if (!mea->mea_count || mea->mea_count > lmv->count)
932 mea->mea_count = lmv->count;
934 mea->mea_master = -1;
936 /* FIXME: error handling? */
937 for (i = 0, c = 0; c < mea->mea_count && i < lmv->count; i++) {
938 struct lov_stripe_md obj_md;
939 struct lov_stripe_md *obj_mdp = &obj_md;
941 if (lmv->tgts[i].exp == NULL) {
942 /* this is master MDS */
943 mea->mea_fids[c].id = mfid.id;
944 mea->mea_fids[c].generation = mfid.generation;
945 mea->mea_fids[c].mds = i;
951 /* "Master" MDS should always be part of stripped dir, so
953 if (mea->mea_master == -1 && c == mea->mea_count - 1)
956 oa->o_valid = OBD_MD_FLGENER | OBD_MD_FLTYPE | OBD_MD_FLMODE
957 | OBD_MD_FLUID | OBD_MD_FLGID;
959 rc = obd_create(lmv->tgts[c].exp, oa, &obj_mdp, oti);
960 /* FIXME: error handling here */
963 mea->mea_fids[c].id = oa->o_id;
964 mea->mea_fids[c].generation = oa->o_generation;
965 mea->mea_fids[c].mds = i;
967 CDEBUG(D_OTHER, "dirobj at mds %d: "LPU64"/%u\n",
968 i, oa->o_id, oa->o_generation);
970 LASSERT(c == mea->mea_count);
971 CDEBUG(D_OTHER, "%d dirobjects created\n", (int) mea->mea_count);
976 static int lmv_get_info(struct obd_export *exp, __u32 keylen,
977 void *key, __u32 *vallen, void *val)
979 struct obd_device *obd;
983 obd = class_exp2obd(exp);
985 CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",
986 exp->exp_handle.h_cookie);
991 if (keylen == 6 && memcmp(key, "mdsize", 6) == 0) {
993 *vallen = sizeof(__u32);
994 *mdsize = sizeof(struct ll_fid) * lmv->count
995 + sizeof(struct mea);
997 } else if (keylen == 6 && memcmp(key, "mdsnum", 6) == 0) {
998 struct obd_uuid *cluuid = &lmv->cluuid;
999 struct lmv_tgt_desc *tgts;
1000 __u32 *mdsnum = val;
1003 for (i = 0, tgts = lmv->tgts; i < lmv->count; i++, tgts++) {
1004 if (obd_uuid_equals(&tgts->uuid, cluuid)) {
1005 *vallen = sizeof(__u32);
1013 CDEBUG(D_IOCTL, "invalid key\n");
1017 int lmv_set_info(struct obd_export *exp, obd_count keylen,
1018 void *key, obd_count vallen, void *val)
1020 struct obd_device *obd;
1021 struct lmv_obd *lmv;
1024 obd = class_exp2obd(exp);
1026 CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",
1027 exp->exp_handle.h_cookie);
1033 if (keylen >= strlen("client") && strcmp(key, "client") == 0) {
1034 struct lmv_tgt_desc *tgts;
1037 for (i = 0, tgts = lmv->tgts; i < lmv->count; i++, tgts++) {
1038 rc = obd_set_info(tgts->exp, keylen, key, vallen, val);
1048 int lmv_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
1049 struct lov_stripe_md *lsm)
1051 struct obd_device *obd = class_exp2obd(exp);
1052 struct lmv_obd *lmv = &obd->u.lmv;
1057 mea_size = sizeof(struct ll_fid) * lmv->count + sizeof(struct mea);
1061 if (*lmmp && !lsm) {
1062 OBD_FREE(*lmmp, mea_size);
1068 OBD_ALLOC(*lmmp, mea_size);
1076 #warning "MEA packing/convertation must be here! -bzzz"
1077 memcpy(*lmmp, lsm, mea_size);
1081 int lmv_unpackmd(struct obd_export *exp, struct lov_stripe_md **mem_tgt,
1082 struct lov_mds_md *disk_src, int mdsize)
1084 struct obd_device *obd = class_exp2obd(exp);
1085 struct lmv_obd *lmv = &obd->u.lmv;
1086 struct mea **tmea = (struct mea **) mem_tgt;
1087 struct mea *mea = (void *) disk_src;
1092 mea_size = sizeof(struct ll_fid) * lmv->count + sizeof(struct mea);
1093 if (mem_tgt == NULL)
1096 if (*mem_tgt != NULL && disk_src == NULL) {
1097 OBD_FREE(*tmea, mea_size);
1101 LASSERT(mea_size == mdsize);
1103 OBD_ALLOC(*tmea, mea_size);
1104 /* FIXME: error handling here */
1105 LASSERT(*tmea != NULL);
1110 #warning "MEA unpacking/convertation must be here! -bzzz"
1111 memcpy(*tmea, mea, mdsize);
1115 int lmv_brw(int rw, struct obd_export *exp, struct obdo *oa,
1116 struct lov_stripe_md *ea, obd_count oa_bufs,
1117 struct brw_page *pgarr, struct obd_trans_info *oti)
1119 struct obd_device *obd = exp->exp_obd;
1120 struct lmv_obd *lmv = &obd->u.lmv;
1121 struct mea *mea = (struct mea *) ea;
1124 LASSERT(oa != NULL);
1125 LASSERT(ea != NULL);
1126 LASSERT(pgarr != NULL);
1127 LASSERT(oa->o_mds < lmv->count);
1129 oa->o_gr = mea->mea_fids[oa->o_mds].generation;
1130 oa->o_id = mea->mea_fids[oa->o_mds].id;
1131 oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1132 err = obd_brw(rw, lmv->tgts[oa->o_mds].exp, oa,
1133 NULL, oa_bufs, pgarr, oti);
1137 struct obd_ops lmv_obd_ops = {
1138 o_owner: THIS_MODULE,
1139 o_attach: lmv_attach,
1140 o_detach: lmv_detach,
1142 o_cleanup: lmv_cleanup,
1143 o_connect: lmv_connect_fake,
1144 o_disconnect: lmv_disconnect,
1145 o_statfs: lmv_statfs,
1146 o_get_info: lmv_get_info,
1147 o_set_info: lmv_set_info,
1148 o_create: lmv_obd_create,
1149 o_packmd: lmv_packmd,
1150 o_unpackmd: lmv_unpackmd,
1152 o_init_ea_size: lmv_init_ea_size,
1155 struct md_ops lmv_md_ops = {
1156 m_getstatus: lmv_getstatus,
1157 m_getattr: lmv_getattr,
1158 m_change_cbdata: lmv_change_cbdata,
1159 m_change_cbdata_name: lmv_change_cbdata_name,
1161 m_create: lmv_create,
1162 m_done_writing: lmv_done_writing,
1163 m_enqueue: lmv_enqueue,
1164 m_getattr_name: lmv_getattr_name,
1165 m_intent_lock: lmv_intent_lock,
1167 m_rename: lmv_rename,
1168 m_setattr: lmv_setattr,
1170 m_readpage: lmv_readpage,
1171 m_unlink: lmv_unlink,
1172 m_get_real_obd: lmv_get_real_obd,
1173 m_valid_attrs: lmv_valid_attrs,
1177 static struct lprocfs_vars lprocfs_module_vars[] = { {0} };
1178 static struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
1180 LPROCFS_INIT_VARS(lmv, lprocfs_module_vars, lprocfs_obd_vars)
1182 int __init lmv_init(void)
1184 struct lprocfs_static_vars lvars;
1187 lprocfs_init_vars(lmv, &lvars);
1188 rc = class_register_type(&lmv_obd_ops, &lmv_md_ops,
1189 lvars.module_vars, OBD_LMV_DEVICENAME);
1193 static void lmv_exit(void)
1195 class_unregister_type(OBD_LMV_DEVICENAME);
1199 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
1200 MODULE_DESCRIPTION("Lustre Logical Metadata Volume OBD driver");
1201 MODULE_LICENSE("GPL");
1203 module_init(lmv_init);
1204 module_exit(lmv_exit);