1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
6 * This file is part of Lustre, http://www.lustre.org.
8 * Lustre is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU General Public
10 * License as published by the Free Software Foundation.
12 * Lustre is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Lustre; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 # define EXPORT_SYMTAB
25 #define DEBUG_SUBSYSTEM S_LMV
27 #include <linux/slab.h>
28 #include <linux/module.h>
29 #include <linux/init.h>
30 #include <linux/slab.h>
31 #include <linux/pagemap.h>
32 #include <asm/div64.h>
33 #include <linux/seq_file.h>
35 #include <liblustre.h>
37 #include <linux/ext2_fs.h>
39 #include <linux/obd_support.h>
40 #include <linux/lustre_lib.h>
41 #include <linux/lustre_net.h>
42 #include <linux/lustre_idl.h>
43 #include <linux/lustre_dlm.h>
44 #include <linux/lustre_mds.h>
45 #include <linux/obd_class.h>
46 #include <linux/obd_ost.h>
47 #include <linux/lprocfs_status.h>
48 #include <linux/lustre_fsfilt.h>
49 #include <linux/obd_lmv.h>
50 #include "lmv_internal.h"
52 int lmv_attach(struct obd_device *dev, obd_count len, void *data)
54 struct lprocfs_static_vars lvars;
58 lprocfs_init_vars(lmv, &lvars);
59 rc = lprocfs_obd_attach(dev, lvars.obd_vars);
62 struct proc_dir_entry *entry;
64 entry = create_proc_entry("target_obd", 0444, dev->obd_proc_entry);
67 /* entry->proc_fops = &lmv_proc_target_fops; */
74 int lmv_detach(struct obd_device *dev)
76 return lprocfs_obd_detach(dev);
79 static int lmv_connect_fake(struct lustre_handle *conn,
80 struct obd_device *obd,
81 struct obd_uuid *cluuid)
83 struct lmv_obd *lmv = &obd->u.lmv;
84 struct obd_export *exp;
88 rc = class_connect(conn, obd, cluuid);
90 CERROR("class_connection() returned %d\n", rc);
94 exp = class_conn2export(conn);
95 /* We don't want to actually do the underlying connections more than
96 * once, so keep track. */
98 if (lmv->refcount > 1) {
99 class_export_put(exp);
103 lmv->cluuid = *cluuid;
110 void lmv_set_timeouts(struct obd_device *obd)
112 struct lmv_tgt_desc *tgts;
117 if (lmv->server_timeout == 0)
120 if (lmv->connected == 0)
123 for (i = 0, tgts = lmv->tgts; i < lmv->count; i++, tgts++) {
124 if (tgts->exp == NULL)
126 obd_set_info(tgts->exp, strlen("inter_mds"),
127 "inter_mds", 0, NULL);
131 int lmv_connect(struct obd_device *obd)
133 struct lmv_obd *lmv = &obd->u.lmv;
134 struct obd_uuid *cluuid;
135 struct lmv_tgt_desc *tgts;
136 struct obd_export *exp;
144 cluuid = &lmv->cluuid;
146 CDEBUG(D_OTHER, "time to connect %s to %s\n",
147 cluuid->uuid, obd->obd_name);
149 for (i = 0, tgts = lmv->tgts; i < lmv->count; i++, tgts++) {
150 struct obd_device *tgt_obd;
151 struct obd_uuid lmv_osc_uuid = { "LMV_OSC_UUID" };
152 struct lustre_handle conn = {0, };
154 LASSERT(tgts != NULL);
156 tgt_obd = class_find_client_obd(&tgts->uuid, LUSTRE_MDC_NAME,
159 CERROR("Target %s not attached\n", tgts->uuid.uuid);
160 GOTO(out_disc, rc = -EINVAL);
163 /* for MDS: don't connect to yourself */
164 if (obd_uuid_equals(&tgts->uuid, cluuid)) {
165 CDEBUG(D_OTHER, "don't connect back to %s\n",
171 CDEBUG(D_OTHER, "connect to %s(%s) - %s, %s FOR %s\n",
172 tgt_obd->obd_name, tgt_obd->obd_uuid.uuid,
173 tgts->uuid.uuid, obd->obd_uuid.uuid,
176 if (!tgt_obd->obd_set_up) {
177 CERROR("Target %s not set up\n", tgts->uuid.uuid);
178 GOTO(out_disc, rc = -EINVAL);
181 rc = obd_connect(&conn, tgt_obd, &lmv_osc_uuid);
183 CERROR("Target %s connect error %d\n",
184 tgts->uuid.uuid, rc);
187 tgts->exp = class_conn2export(&conn);
189 obd_init_ea_size(tgts->exp, lmv->max_easize,
190 lmv->max_cookiesize);
192 rc = obd_register_observer(tgt_obd, obd);
194 CERROR("Target %s register_observer error %d\n",
195 tgts->uuid.uuid, rc);
196 obd_disconnect(tgts->exp, 0);
200 CDEBUG(D_OTHER, "connected to %s(%s) successfully (%d)\n",
201 tgt_obd->obd_name, tgt_obd->obd_uuid.uuid,
202 atomic_read(&obd->obd_refcount));
205 lmv_set_timeouts(obd);
207 class_export_put(exp);
211 /* FIXME: cleanup here */
212 class_disconnect(exp, 0);
216 static int lmv_disconnect(struct obd_export *exp, int flags)
218 struct obd_device *obd = class_exp2obd(exp);
219 struct lmv_obd *lmv = &obd->u.lmv;
226 /* Only disconnect the underlying layers on the final disconnect. */
228 if (lmv->refcount != 0)
231 for (i = 0; i < lmv->count; i++) {
232 if (lmv->tgts[i].exp == NULL)
235 if (obd->obd_no_recov) {
236 /* Pass it on to our clients.
237 * XXX This should be an argument to disconnect,
238 * XXX not a back-door flag on the OBD. Ah well.
240 struct obd_device *mdc_obd;
241 mdc_obd = class_exp2obd(lmv->tgts[i].exp);
243 mdc_obd->obd_no_recov = 1;
246 CDEBUG(D_OTHER, "disconnected from %s(%s) successfully\n",
247 lmv->tgts[i].exp->exp_obd->obd_name,
248 lmv->tgts[i].exp->exp_obd->obd_uuid.uuid);
250 obd_register_observer(lmv->tgts[i].exp->exp_obd, NULL);
252 rc = obd_disconnect(lmv->tgts[i].exp, flags);
253 lmv->tgts[i].exp = NULL;
257 /* FIXME: cleanup here */
259 class_export_put(exp);
260 rc = class_disconnect(exp, 0);
264 static int lmv_setup(struct obd_device *obd, obd_count len, void *buf)
266 struct lustre_cfg *lcfg = buf;
267 struct lmv_desc *desc;
268 struct lmv_obd *lmv = &obd->u.lmv;
269 struct obd_uuid *uuids;
270 struct lmv_tgt_desc *tgts;
276 if (lcfg->lcfg_inllen1 < 1) {
277 CERROR("LMV setup requires a descriptor\n");
281 if (lcfg->lcfg_inllen2 < 1) {
282 CERROR("LMV setup requires an OST UUID list\n");
286 desc = (struct lmv_desc *)lcfg->lcfg_inlbuf1;
287 if (sizeof(*desc) > lcfg->lcfg_inllen1) {
288 CERROR("descriptor size wrong: %d > %d\n",
289 (int)sizeof(*desc), lcfg->lcfg_inllen1);
293 count = desc->ld_count;
294 uuids = (struct obd_uuid *)lcfg->lcfg_inlbuf2;
295 if (sizeof(*uuids) * count != lcfg->lcfg_inllen2) {
296 CERROR("UUID array size wrong: %u * %u != %u\n",
297 sizeof(*uuids), count, lcfg->lcfg_inllen2);
301 lmv->bufsize = sizeof(struct lmv_tgt_desc) * count;
302 OBD_ALLOC(lmv->tgts, lmv->bufsize);
303 if (lmv->tgts == NULL) {
304 CERROR("Out of memory\n");
308 for (i = 0, tgts = lmv->tgts; i < count; i++, tgts++) {
309 tgts->uuid = uuids[i];
313 lmv->max_easize = sizeof(struct ll_fid) * lmv->count
314 + sizeof(struct mea);
315 lmv->max_cookiesize = 0;
320 static int lmv_statfs(struct obd_device *obd, struct obd_statfs *osfs,
321 unsigned long max_age)
323 struct lmv_obd *lmv = &obd->u.lmv;
324 struct obd_statfs temp;
328 for (i = 0; i < lmv->count; i++) {
329 rc = obd_statfs(lmv->tgts[i].exp->exp_obd, &temp, max_age);
331 CERROR("can't stat MDS #%d (%s)\n", i,
332 lmv->tgts[i].exp->exp_obd->obd_name);
336 memcpy(osfs, &temp, sizeof(temp));
338 osfs->os_bavail += temp.os_bavail;
339 osfs->os_blocks += temp.os_blocks;
340 osfs->os_ffree += temp.os_ffree;
341 osfs->os_files += temp.os_files;
347 static int lmv_cleanup(struct obd_device *obd, int flags)
349 struct lmv_obd *lmv = &obd->u.lmv;
351 lmv_cleanup_objs(obd);
352 OBD_FREE(lmv->tgts, lmv->bufsize);
356 static int lmv_getstatus(struct obd_export *exp, struct ll_fid *fid)
358 struct obd_device *obd = exp->exp_obd;
359 struct lmv_obd *lmv = &obd->u.lmv;
363 rc = md_getstatus(lmv->tgts[0].exp, fid);
368 static int lmv_getattr(struct obd_export *exp, struct ll_fid *fid,
369 unsigned long valid, unsigned int ea_size,
370 struct ptlrpc_request **request)
372 struct obd_device *obd = exp->exp_obd;
373 struct lmv_obd *lmv = &obd->u.lmv;
374 int rc, i = fid->mds;
378 obj = lmv_grab_obj(obd, fid, 0);
379 CDEBUG(D_OTHER, "GETATTR for %lu/%lu/%lu %s\n",
380 (unsigned long) fid->mds,
381 (unsigned long) fid->id,
382 (unsigned long) fid->generation,
383 obj ? "(splitted)" : "");
385 LASSERT(fid->mds < lmv->count);
386 rc = md_getattr(lmv->tgts[i].exp, fid,
387 valid, ea_size, request);
388 if (rc == 0 && obj) {
389 /* we have to loop over dirobjs here and gather attrs
390 * for all the slaves */
391 #warning "attrs gathering here"
397 static int lmv_change_cbdata(struct obd_export *exp,
399 ldlm_iterator_t it, void *data)
401 struct obd_device *obd = exp->exp_obd;
402 struct lmv_obd *lmv = &obd->u.lmv;
406 CDEBUG(D_OTHER, "CBDATA for %lu/%lu/%lu\n",
407 (unsigned long) fid->mds,
408 (unsigned long) fid->id,
409 (unsigned long) fid->generation);
410 LASSERT(fid->mds < lmv->count);
411 rc = md_change_cbdata(lmv->tgts[fid->mds].exp, fid, it, data);
415 static int lmv_change_cbdata_name(struct obd_export *exp, struct ll_fid *pfid,
416 char *name, int len, struct ll_fid *cfid,
417 ldlm_iterator_t it, void *data)
419 struct obd_device *obd = exp->exp_obd;
420 struct lmv_obd *lmv = &obd->u.lmv;
425 LASSERT(pfid->mds < lmv->count);
426 LASSERT(cfid->mds < lmv->count);
427 CDEBUG(D_OTHER, "CBDATA for %lu/%lu/%lu:%*s -> %lu/%lu/%lu\n",
428 (unsigned long) pfid->mds, (unsigned long) pfid->id,
429 (unsigned long) pfid->generation, len, name,
430 (unsigned long) cfid->mds, (unsigned long) cfid->id,
431 (unsigned long) cfid->generation);
433 /* this is default mds for directory name belongs to */
435 obj = lmv_grab_obj(obd, pfid, 0);
437 /* directory is splitted. look for right mds for this name */
438 mds = raw_name2idx(obj->objcount, name, len);
441 rc = md_change_cbdata(lmv->tgts[mds].exp, cfid, it, data);
445 static int lmv_valid_attrs(struct obd_export *exp, struct ll_fid *fid)
447 struct obd_device *obd = exp->exp_obd;
448 struct lmv_obd *lmv = &obd->u.lmv;
452 CDEBUG(D_OTHER, "validate %lu/%lu/%lu\n",
453 (unsigned long) fid->mds,
454 (unsigned long) fid->id,
455 (unsigned long) fid->generation);
456 LASSERT(fid->mds < lmv->count);
457 rc = md_valid_attrs(lmv->tgts[fid->mds].exp, fid);
461 int lmv_close(struct obd_export *exp, struct obdo *obdo,
462 struct obd_client_handle *och,
463 struct ptlrpc_request **request)
465 struct obd_device *obd = exp->exp_obd;
466 struct lmv_obd *lmv = &obd->u.lmv;
467 int rc, i = obdo->o_mds;
470 LASSERT(i < lmv->count);
471 CDEBUG(D_OTHER, "CLOSE %lu/%lu/%lu\n", (unsigned long) obdo->o_mds,
472 (unsigned long) obdo->o_id, (unsigned long) obdo->o_generation);
473 rc = md_close(lmv->tgts[i].exp, obdo, och, request);
477 int lmv_get_mea_and_update_object(struct obd_export *exp, struct ll_fid *fid)
479 struct obd_device *obd = exp->exp_obd;
480 struct lmv_obd *lmv = &obd->u.lmv;
481 struct ptlrpc_request *req = NULL;
487 mealen = MEA_SIZE_LMV(lmv);
489 valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
491 /* time to update mea of parent fid */
492 rc = md_getattr(lmv->tgts[fid->mds].exp, fid,
493 valid, mealen, &req);
495 CERROR("md_getattr() failed, rc = %d\n", rc);
499 rc = mdc_req2lustre_md(exp, req, 0, NULL, &md);
501 CERROR("mdc_req2lustre_md() failed, rc = %d\n", rc);
506 GOTO(cleanup, rc = -ENODATA);
508 rc = lmv_create_obj_from_attrs(exp, fid, md.mea);
509 obd_free_memmd(exp, (struct lov_stripe_md **) &md.mea);
513 ptlrpc_req_finished(req);
517 int lmv_create(struct obd_export *exp, struct mdc_op_data *op_data,
518 const void *data, int datalen, int mode, __u32 uid,
519 __u32 gid, __u64 rdev, struct ptlrpc_request **request)
521 struct obd_device *obd = exp->exp_obd;
522 struct lmv_obd *lmv = &obd->u.lmv;
523 struct mds_body *mds_body;
530 obj = lmv_grab_obj(obd, &op_data->fid1, 0);
532 mds = raw_name2idx(obj->objcount, op_data->name,
534 op_data->fid1 = obj->objs[mds].fid;
538 CDEBUG(D_OTHER, "CREATE '%*s' on %lu/%lu/%lu\n",
539 op_data->namelen, op_data->name,
540 (unsigned long) op_data->fid1.mds,
541 (unsigned long) op_data->fid1.id,
542 (unsigned long) op_data->fid1.generation);
543 rc = md_create(lmv->tgts[op_data->fid1.mds].exp, op_data, data,
544 datalen, mode, uid, gid, rdev, request);
546 if (*request == NULL)
548 mds_body = lustre_msg_buf((*request)->rq_repmsg, 0,
550 LASSERT(mds_body != NULL);
551 CDEBUG(D_OTHER, "created. id = %lu, generation = %lu, mds = %d\n",
552 (unsigned long) mds_body->fid1.id,
553 (unsigned long) mds_body->fid1.generation,
555 LASSERT(mds_body->valid & OBD_MD_MDS ||
556 mds_body->mds == op_data->fid1.mds);
557 } else if (rc == -ERESTART) {
558 /* directory got splitted. time to update local object
559 * and repeat the request with proper MDS */
560 rc = lmv_get_mea_and_update_object(exp, &op_data->fid1);
562 ptlrpc_req_finished(*request);
569 int lmv_done_writing(struct obd_export *exp, struct obdo *obdo)
571 struct obd_device *obd = exp->exp_obd;
572 struct lmv_obd *lmv = &obd->u.lmv;
576 /* FIXME: choose right MDC here */
577 rc = md_done_writing(lmv->tgts[0].exp, obdo);
581 int lmv_enqueue(struct obd_export *exp, int lock_type,
582 struct lookup_intent *it, int lock_mode,
583 struct mdc_op_data *data, struct lustre_handle *lockh,
584 void *lmm, int lmmsize,
585 ldlm_completion_callback cb_completion,
586 ldlm_blocking_callback cb_blocking, void *cb_data)
588 struct obd_device *obd = exp->exp_obd;
589 struct lmv_obd *lmv = &obd->u.lmv;
595 obj = lmv_grab_obj(obd, &data->fid1, 0);
597 /* directory is splitted. look for
598 * right mds for this name */
599 mds = raw_name2idx(obj->objcount, (char *)data->name,
601 data->fid1 = obj->objs[mds].fid;
605 CDEBUG(D_OTHER, "ENQUEUE '%s' on %lu/%lu\n",
606 LL_IT2STR(it), (unsigned long) data->fid1.id,
607 (unsigned long) data->fid1.generation);
608 rc = md_enqueue(lmv->tgts[data->fid1.mds].exp, lock_type, it,
609 lock_mode, data, lockh, lmm, lmmsize, cb_completion,
610 cb_blocking, cb_data);
615 int lmv_getattr_name(struct obd_export *exp, struct ll_fid *fid,
616 char *filename, int namelen, unsigned long valid,
617 unsigned int ea_size, struct ptlrpc_request **request)
619 struct obd_device *obd = exp->exp_obd;
620 struct lmv_obd *lmv = &obd->u.lmv;
621 struct ll_fid rfid = *fid;
622 int rc, mds = fid->mds;
623 struct mds_body *body;
628 obj = lmv_grab_obj(obd, fid, 0);
630 /* directory is splitted. look for right mds for this name */
631 mds = raw_name2idx(obj->objcount, filename, namelen - 1);
632 rfid = obj->objs[mds].fid;
635 CDEBUG(D_OTHER, "getattr_name for %*s on %lu/%lu/%lu -> %lu/%lu/%lu\n",
636 namelen, filename, (unsigned long) fid->mds,
637 (unsigned long) fid->id, (unsigned long) fid->generation,
638 (unsigned long) rfid.mds, (unsigned long) rfid.id,
639 (unsigned long) rfid.generation);
640 rc = md_getattr_name(lmv->tgts[mds].exp, &rfid, filename, namelen,
641 valid, ea_size, request);
643 /* this could be cross-node reference. in this case all
644 * we have right now is mds/ino/generation triple. we'd
645 * like to find other attributes */
646 body = lustre_msg_buf((*request)->rq_repmsg, 0, sizeof(*body));
647 LASSERT(body != NULL);
648 if (body->valid & OBD_MD_MDS) {
649 struct ptlrpc_request *req = NULL;
651 CDEBUG(D_OTHER, "request attrs for %lu/%lu/%lu\n",
652 (unsigned long) rfid.mds,
653 (unsigned long) rfid.id,
654 (unsigned long) rfid.generation);
655 rc = md_getattr_name(lmv->tgts[rfid.mds].exp, &rfid,
656 NULL, 1, valid, ea_size, &req);
657 ptlrpc_req_finished(*request);
660 } else if (rc == -ERESTART) {
661 /* directory got splitted. time to update local object
662 * and repeat the request with proper MDS */
663 rc = lmv_get_mea_and_update_object(exp, &rfid);
665 ptlrpc_req_finished(*request);
674 * llite passes fid of an target inode in data->fid1 and
675 * fid of directory in data->fid2
677 int lmv_link(struct obd_export *exp, struct mdc_op_data *data,
678 struct ptlrpc_request **request)
680 struct obd_device *obd = exp->exp_obd;
681 struct lmv_obd *lmv = &obd->u.lmv;
686 if (data->namelen != 0) {
687 /* usual link request */
688 obj = lmv_grab_obj(obd, &data->fid1, 0);
690 rc = raw_name2idx(obj->objcount, data->name,
692 data->fid1 = obj->objs[rc].fid;
695 CDEBUG(D_OTHER,"link %u/%u/%u:%*s to %u/%u/%u mds %d\n",
696 (unsigned) data->fid2.mds, (unsigned) data->fid2.id,
697 (unsigned) data->fid2.generation, data->namelen,
698 data->name, (unsigned) data->fid1.mds,
699 (unsigned) data->fid1.id,
700 (unsigned) data->fid1.generation, data->fid1.mds);
702 /* request from MDS to acquire i_links for inode by fid1 */
703 CDEBUG(D_OTHER, "inc i_nlinks for %u/%u/%u\n",
704 (unsigned) data->fid1.mds, (unsigned) data->fid1.id,
705 (unsigned) data->fid1.generation);
708 rc = md_link(lmv->tgts[data->fid1.mds].exp, data, request);
712 int lmv_rename(struct obd_export *exp, struct mdc_op_data *data,
713 const char *old, int oldlen, const char *new, int newlen,
714 struct ptlrpc_request **request)
716 struct obd_device *obd = exp->exp_obd;
717 struct lmv_obd *lmv = &obd->u.lmv;
722 CDEBUG(D_OTHER, "rename %*s in %lu/%lu/%lu to %*s in %lu/%lu/%lu\n",
723 oldlen, old, (unsigned long) data->fid1.mds,
724 (unsigned long) data->fid1.id,
725 (unsigned long) data->fid1.generation,
726 newlen, new, (unsigned long) data->fid2.mds,
727 (unsigned long) data->fid2.id,
728 (unsigned long) data->fid2.generation);
729 if (!fid_equal(&data->fid1, &data->fid2))
730 CWARN("cross-node rename %lu/%lu/%lu:%*s to %lu/%lu/%lu:%*s\n",
731 (unsigned long) data->fid1.mds,
732 (unsigned long) data->fid1.id,
733 (unsigned long) data->fid1.generation, oldlen, old,
734 (unsigned long) data->fid2.mds,
735 (unsigned long) data->fid2.id,
736 (unsigned long) data->fid2.generation, newlen, new);
741 /* MDS with old dir entry is asking another MDS
742 * to create name there */
744 "create %*s(%d/%d) in %lu/%lu/%lu pointing to %lu/%lu/%lu\n",
745 newlen, new, oldlen, newlen,
746 (unsigned long) data->fid2.mds,
747 (unsigned long) data->fid2.id,
748 (unsigned long) data->fid2.generation,
749 (unsigned long) data->fid1.mds,
750 (unsigned long) data->fid1.id,
751 (unsigned long) data->fid1.generation);
752 mds = data->fid2.mds;
756 obj = lmv_grab_obj(obd, &data->fid1, 0);
758 /* directory is already splitted, so we have to forward
759 * request to the right MDS */
760 mds = raw_name2idx(obj->objcount, (char *)old, oldlen);
761 data->fid1 = obj->objs[mds].fid;
762 CDEBUG(D_OTHER, "forward to MDS #%u (%lu/%lu/%lu)\n", mds,
763 (unsigned long) obj->objs[mds].fid.mds,
764 (unsigned long) obj->objs[mds].fid.id,
765 (unsigned long) obj->objs[mds].fid.generation);
769 obj = lmv_grab_obj(obd, &data->fid2, 0);
771 /* directory is already splitted, so we have to forward
772 * request to the right MDS */
773 mds = raw_name2idx(obj->objcount, (char *)new, newlen);
774 data->fid2 = obj->objs[mds].fid;
775 CDEBUG(D_OTHER, "forward to MDS #%u (%lu/%lu/%lu)\n", mds,
776 (unsigned long) obj->objs[mds].fid.mds,
777 (unsigned long) obj->objs[mds].fid.id,
778 (unsigned long) obj->objs[mds].fid.generation);
782 mds = data->fid1.mds;
785 rc = md_rename(lmv->tgts[mds].exp, data, old, oldlen,
786 new, newlen, request);
790 int lmv_setattr(struct obd_export *exp, struct mdc_op_data *data,
791 struct iattr *iattr, void *ea, int ealen, void *ea2, int ea2len,
792 struct ptlrpc_request **request)
794 struct obd_device *obd = exp->exp_obd;
795 struct lmv_obd *lmv = &obd->u.lmv;
796 int rc = 0, i = data->fid1.mds;
797 struct ptlrpc_request *req;
798 struct mds_body *mds_body;
802 obj = lmv_grab_obj(obd, &data->fid1, 0);
803 CDEBUG(D_OTHER, "SETATTR for %lu/%lu/%lu, valid 0x%x%s\n",
804 (unsigned long) data->fid1.mds,
805 (unsigned long) data->fid1.id,
806 (unsigned long) data->fid1.generation, iattr->ia_valid,
807 obj ? ", splitted" : "");
809 for (i = 0; i < obj->objcount; i++) {
810 data->fid1 = obj->objs[i].fid;
811 rc = md_setattr(lmv->tgts[i].exp, data, iattr, ea,
812 ealen, ea2, ea2len, &req);
814 if (fid_equal(&obj->fid, &obj->objs[i].fid)) {
815 /* this is master object and this request
816 * should be returned back to llite */
819 ptlrpc_req_finished(req);
824 LASSERT(data->fid1.mds < lmv->count);
825 rc = md_setattr(lmv->tgts[i].exp, data, iattr, ea, ealen,
826 ea2, ea2len, request);
828 mds_body = lustre_msg_buf((*request)->rq_repmsg, 0,
830 LASSERT(mds_body != NULL);
831 LASSERT(mds_body->mds == i);
837 int lmv_sync(struct obd_export *exp, struct ll_fid *fid,
838 struct ptlrpc_request **request)
840 struct obd_device *obd = exp->exp_obd;
841 struct lmv_obd *lmv = &obd->u.lmv;
845 rc = md_sync(lmv->tgts[0].exp, fid, request);
849 int lmv_dirobj_blocking_ast(struct ldlm_lock *lock,
850 struct ldlm_lock_desc *desc, void *data, int flag)
852 struct lustre_handle lockh;
858 case LDLM_CB_BLOCKING:
859 ldlm_lock2handle(lock, &lockh);
860 rc = ldlm_cli_cancel(&lockh);
862 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
866 case LDLM_CB_CANCELING:
867 /* time to drop cached attrs for dirobj */
868 obj = lock->l_ast_data;
872 CDEBUG(D_OTHER, "cancel %s on %lu/%lu, master %lu/%lu/%lu\n",
873 lock->l_resource->lr_name.name[3] == 1 ?
875 (unsigned long) lock->l_resource->lr_name.name[0],
876 (unsigned long) lock->l_resource->lr_name.name[1],
877 (unsigned long) obj->fid.mds,
878 (unsigned long) obj->fid.id,
879 (unsigned long) obj->fid.generation);
887 void lmv_remove_dots(struct page *page)
889 char *kaddr = page_address(page);
890 unsigned limit = PAGE_CACHE_SIZE;
891 unsigned offs, rec_len;
892 struct ext2_dir_entry_2 *p;
894 for (offs = 0; offs <= limit - EXT2_DIR_REC_LEN(1); offs += rec_len) {
895 p = (struct ext2_dir_entry_2 *)(kaddr + offs);
896 rec_len = le16_to_cpu(p->rec_len);
898 if ((p->name_len == 1 && p->name[0] == '.') ||
899 (p->name_len == 2 && p->name[0] == '.' && p->name[1] == '.'))
904 int lmv_readpage(struct obd_export *exp, struct ll_fid *mdc_fid,
905 __u64 offset, struct page *page,
906 struct ptlrpc_request **request)
908 struct obd_device *obd = exp->exp_obd;
909 struct lmv_obd *lmv = &obd->u.lmv;
910 struct ll_fid rfid = *mdc_fid;
916 LASSERT(mdc_fid->mds < lmv->count);
917 CDEBUG(D_OTHER, "READPAGE at %llu from %lu/%lu/%lu\n",
918 offset, (unsigned long) rfid.mds,
919 (unsigned long) rfid.id,
920 (unsigned long) rfid.generation);
922 obj = lmv_grab_obj(obd, mdc_fid, 0);
924 /* find dirobj containing page with requested offset */
925 /* FIXME: what about protecting cached attrs here? */
926 for (i = 0; i < obj->objcount; i++) {
927 if (offset < obj->objs[i].size)
929 offset -= obj->objs[i].size;
931 rfid = obj->objs[i].fid;
932 CDEBUG(D_OTHER, "forward to %lu/%lu/%lu with offset %lu\n",
933 (unsigned long) rfid.mds,
934 (unsigned long) rfid.id,
935 (unsigned long) rfid.generation,
936 (unsigned long) offset);
938 rc = md_readpage(lmv->tgts[rfid.mds].exp, &rfid, offset, page, request);
939 if (rc == 0 && !fid_equal(&rfid, mdc_fid)) {
940 /* this page isn't from master object. to avoid
941 * ./.. duplication in directory, we have to remove them
942 * from all slave objects */
943 lmv_remove_dots(page);
951 int lmv_unlink(struct obd_export *exp, struct mdc_op_data *data,
952 struct ptlrpc_request **request)
954 struct obd_device *obd = exp->exp_obd;
955 struct lmv_obd *lmv = &obd->u.lmv;
959 if (data->namelen != 0) {
961 obj = lmv_grab_obj(obd, &data->fid1, 0);
963 i = raw_name2idx(obj->objcount, data->name,
965 data->fid1 = obj->objs[i].fid;
968 CDEBUG(D_OTHER, "unlink '%*s' in %lu/%lu/%lu -> %u\n",
969 data->namelen, data->name,
970 (unsigned long) data->fid1.mds,
971 (unsigned long) data->fid1.id,
972 (unsigned long) data->fid1.generation, i);
974 CDEBUG(D_OTHER, "drop i_nlink on %lu/%lu/%lu\n",
975 (unsigned long) data->fid1.mds,
976 (unsigned long) data->fid1.id,
977 (unsigned long) data->fid1.generation);
979 rc = md_unlink(lmv->tgts[data->fid1.mds].exp, data, request);
983 struct obd_device *lmv_get_real_obd(struct obd_export *exp,
986 struct obd_device *obd = exp->exp_obd;
987 struct lmv_obd *lmv = &obd->u.lmv;
990 obd = lmv->tgts[0].exp->exp_obd;
995 int lmv_init_ea_size(struct obd_export *exp, int easize, int cookiesize)
997 struct obd_device *obd = exp->exp_obd;
998 struct lmv_obd *lmv = &obd->u.lmv;
999 int i, rc = 0, change = 0;
1002 if (lmv->max_easize < easize) {
1003 lmv->max_easize = easize;
1006 if (lmv->max_cookiesize < cookiesize) {
1007 lmv->max_cookiesize = cookiesize;
1013 if (lmv->connected == 0)
1016 /* FIXME: error handling? */
1017 for (i = 0; i < lmv->count; i++)
1018 rc = obd_init_ea_size(lmv->tgts[i].exp, easize, cookiesize);
1022 int lmv_obd_create_single(struct obd_export *exp, struct obdo *oa,
1023 struct lov_stripe_md **ea, struct obd_trans_info *oti)
1025 struct obd_device *obd = exp->exp_obd;
1026 struct lmv_obd *lmv = &obd->u.lmv;
1027 struct lov_stripe_md obj_md;
1028 struct lov_stripe_md *obj_mdp = &obj_md;
1033 LASSERT(ea == NULL);
1034 LASSERT(oa->o_mds < lmv->count);
1036 rc = obd_create(lmv->tgts[oa->o_mds].exp, oa, &obj_mdp, oti);
1043 * to be called from MDS only
1045 int lmv_obd_create(struct obd_export *exp, struct obdo *oa,
1046 struct lov_stripe_md **ea, struct obd_trans_info *oti)
1048 struct obd_device *obd = exp->exp_obd;
1049 struct lmv_obd *lmv = &obd->u.lmv;
1056 LASSERT(oa != NULL);
1059 rc = lmv_obd_create_single(exp, oa, NULL, oti);
1064 rc = obd_alloc_diskmd(exp, (struct lov_mds_md **)ea);
1065 LASSERT(*ea != NULL);
1068 mea = (struct mea *)*ea;
1070 mfid.generation = oa->o_generation;
1072 if (!mea->mea_count || mea->mea_count > lmv->count)
1073 mea->mea_count = lmv->count;
1075 mea->mea_master = -1;
1077 /* FIXME: error handling? */
1078 for (i = 0, c = 0; c < mea->mea_count && i < lmv->count; i++) {
1079 struct lov_stripe_md obj_md;
1080 struct lov_stripe_md *obj_mdp = &obj_md;
1082 if (lmv->tgts[i].exp == NULL) {
1083 /* this is master MDS */
1084 mea->mea_fids[c].id = mfid.id;
1085 mea->mea_fids[c].generation = mfid.generation;
1086 mea->mea_fids[c].mds = i;
1087 mea->mea_master = i;
1092 /* "Master" MDS should always be part of stripped dir, so
1094 if (mea->mea_master == -1 && c == mea->mea_count - 1)
1097 oa->o_valid = OBD_MD_FLGENER | OBD_MD_FLTYPE | OBD_MD_FLMODE
1098 | OBD_MD_FLUID | OBD_MD_FLGID | OBD_MD_FLID;
1100 rc = obd_create(lmv->tgts[c].exp, oa, &obj_mdp, oti);
1101 /* FIXME: error handling here */
1104 mea->mea_fids[c].id = oa->o_id;
1105 mea->mea_fids[c].generation = oa->o_generation;
1106 mea->mea_fids[c].mds = i;
1108 CDEBUG(D_OTHER, "dirobj at mds %d: "LPU64"/%u\n",
1109 i, oa->o_id, oa->o_generation);
1111 LASSERT(c == mea->mea_count);
1112 CDEBUG(D_OTHER, "%d dirobjects created\n", (int) mea->mea_count);
1117 static int lmv_get_info(struct obd_export *exp, __u32 keylen,
1118 void *key, __u32 *vallen, void *val)
1120 struct obd_device *obd;
1121 struct lmv_obd *lmv;
1124 obd = class_exp2obd(exp);
1126 CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",
1127 exp->exp_handle.h_cookie);
1132 if (keylen == 6 && memcmp(key, "mdsize", 6) == 0) {
1133 __u32 *mdsize = val;
1134 *vallen = sizeof(__u32);
1135 *mdsize = sizeof(struct ll_fid) * lmv->count
1136 + sizeof(struct mea);
1138 } else if (keylen == 6 && memcmp(key, "mdsnum", 6) == 0) {
1139 struct obd_uuid *cluuid = &lmv->cluuid;
1140 struct lmv_tgt_desc *tgts;
1141 __u32 *mdsnum = val;
1144 for (i = 0, tgts = lmv->tgts; i < lmv->count; i++, tgts++) {
1145 if (obd_uuid_equals(&tgts->uuid, cluuid)) {
1146 *vallen = sizeof(__u32);
1154 CDEBUG(D_IOCTL, "invalid key\n");
1158 int lmv_set_info(struct obd_export *exp, obd_count keylen,
1159 void *key, obd_count vallen, void *val)
1161 struct obd_device *obd;
1162 struct lmv_obd *lmv;
1165 obd = class_exp2obd(exp);
1167 CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",
1168 exp->exp_handle.h_cookie);
1173 if (keylen >= strlen("client") && strcmp(key, "client") == 0) {
1174 struct lmv_tgt_desc *tgts;
1178 for (i = 0, tgts = lmv->tgts; i < lmv->count; i++, tgts++) {
1179 rc = obd_set_info(tgts->exp, keylen, key, vallen, val);
1184 } else if (keylen >= strlen("inter_mds") && strcmp(key, "inter_mds") == 0) {
1185 lmv->server_timeout = 1;
1186 lmv_set_timeouts(obd);
1193 int lmv_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
1194 struct lov_stripe_md *lsm)
1196 struct obd_device *obd = class_exp2obd(exp);
1197 struct lmv_obd *lmv = &obd->u.lmv;
1201 mea_size = sizeof(struct ll_fid) * lmv->count + sizeof(struct mea);
1205 if (*lmmp && !lsm) {
1206 OBD_FREE(*lmmp, mea_size);
1212 OBD_ALLOC(*lmmp, mea_size);
1220 #warning "MEA packing/convertation must be here! -bzzz"
1221 memcpy(*lmmp, lsm, mea_size);
1225 int lmv_unpackmd(struct obd_export *exp, struct lov_stripe_md **mem_tgt,
1226 struct lov_mds_md *disk_src, int mdsize)
1228 struct obd_device *obd = class_exp2obd(exp);
1229 struct lmv_obd *lmv = &obd->u.lmv;
1230 struct mea **tmea = (struct mea **) mem_tgt;
1231 struct mea *mea = (void *) disk_src;
1235 mea_size = sizeof(struct ll_fid) * lmv->count + sizeof(struct mea);
1236 if (mem_tgt == NULL)
1239 if (*mem_tgt != NULL && disk_src == NULL) {
1240 OBD_FREE(*tmea, mea_size);
1244 LASSERT(mea_size == mdsize);
1246 OBD_ALLOC(*tmea, mea_size);
1247 /* FIXME: error handling here */
1248 LASSERT(*tmea != NULL);
1253 #warning "MEA unpacking/convertation must be here! -bzzz"
1254 memcpy(*tmea, mea, mdsize);
1258 int lmv_brw(int rw, struct obd_export *exp, struct obdo *oa,
1259 struct lov_stripe_md *ea, obd_count oa_bufs,
1260 struct brw_page *pgarr, struct obd_trans_info *oti)
1262 struct obd_device *obd = exp->exp_obd;
1263 struct lmv_obd *lmv = &obd->u.lmv;
1264 struct mea *mea = (struct mea *) ea;
1267 LASSERT(oa != NULL);
1268 LASSERT(ea != NULL);
1269 LASSERT(pgarr != NULL);
1270 LASSERT(oa->o_mds < lmv->count);
1272 oa->o_gr = mea->mea_fids[oa->o_mds].generation;
1273 oa->o_id = mea->mea_fids[oa->o_mds].id;
1274 oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1275 err = obd_brw(rw, lmv->tgts[oa->o_mds].exp, oa,
1276 NULL, oa_bufs, pgarr, oti);
1280 struct obd_ops lmv_obd_ops = {
1281 o_owner: THIS_MODULE,
1282 o_attach: lmv_attach,
1283 o_detach: lmv_detach,
1285 o_cleanup: lmv_cleanup,
1286 o_connect: lmv_connect_fake,
1287 o_disconnect: lmv_disconnect,
1288 o_statfs: lmv_statfs,
1289 o_get_info: lmv_get_info,
1290 o_set_info: lmv_set_info,
1291 o_create: lmv_obd_create,
1292 o_packmd: lmv_packmd,
1293 o_unpackmd: lmv_unpackmd,
1295 o_init_ea_size: lmv_init_ea_size,
1298 struct md_ops lmv_md_ops = {
1299 m_getstatus: lmv_getstatus,
1300 m_getattr: lmv_getattr,
1301 m_change_cbdata: lmv_change_cbdata,
1302 m_change_cbdata_name: lmv_change_cbdata_name,
1304 m_create: lmv_create,
1305 m_done_writing: lmv_done_writing,
1306 m_enqueue: lmv_enqueue,
1307 m_getattr_name: lmv_getattr_name,
1308 m_intent_lock: lmv_intent_lock,
1310 m_rename: lmv_rename,
1311 m_setattr: lmv_setattr,
1313 m_readpage: lmv_readpage,
1314 m_unlink: lmv_unlink,
1315 m_get_real_obd: lmv_get_real_obd,
1316 m_valid_attrs: lmv_valid_attrs,
1319 int __init lmv_init(void)
1321 struct lprocfs_static_vars lvars;
1324 lprocfs_init_vars(lmv, &lvars);
1325 rc = class_register_type(&lmv_obd_ops, &lmv_md_ops,
1326 lvars.module_vars, OBD_LMV_DEVICENAME);
1331 static void lmv_exit(void)
1333 class_unregister_type(OBD_LMV_DEVICENAME);
1336 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
1337 MODULE_DESCRIPTION("Lustre Logical Metadata Volume OBD driver");
1338 MODULE_LICENSE("GPL");
1340 module_init(lmv_init);
1341 module_exit(lmv_exit);