1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
6 * This file is part of Lustre, http://www.lustre.org.
8 * Lustre is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU General Public
10 * License as published by the Free Software Foundation.
12 * Lustre is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Lustre; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 # define EXPORT_SYMTAB
25 #define DEBUG_SUBSYSTEM S_LMV
27 #include <linux/slab.h>
28 #include <linux/module.h>
29 #include <linux/init.h>
30 #include <linux/slab.h>
31 #include <linux/pagemap.h>
32 #include <asm/div64.h>
33 #include <linux/seq_file.h>
35 #include <liblustre.h>
37 #include <linux/ext2_fs.h>
39 #include <linux/obd_support.h>
40 #include <linux/lustre_lib.h>
41 #include <linux/lustre_net.h>
42 #include <linux/lustre_idl.h>
43 #include <linux/lustre_dlm.h>
44 #include <linux/lustre_mds.h>
45 #include <linux/obd_class.h>
46 #include <linux/obd_ost.h>
47 #include <linux/lprocfs_status.h>
48 #include <linux/lustre_fsfilt.h>
49 #include <linux/obd_lmv.h>
50 #include "lmv_internal.h"
54 * -EINVAL : UUID can't be found in the LMV's target list
55 * -ENOTCONN: The UUID is found, but the target connection is bad (!)
56 * -EBADF : The UUID is found, but the OBD of the wrong type (!)
58 static int lmv_set_mdc_active(struct lmv_obd *lmv, struct obd_uuid *uuid,
61 struct obd_device *obd;
62 struct lmv_tgt_desc *tgt;
66 CDEBUG(D_INFO, "Searching in lmv %p for uuid %s (activate=%d)\n",
67 lmv, uuid->uuid, activate);
69 spin_lock(&lmv->lmv_lock);
70 for (i = 0, tgt = lmv->tgts; i < lmv->desc.ld_tgt_count; i++, tgt++) {
71 if (tgt->ltd_exp == NULL)
74 CDEBUG(D_INFO, "lmv idx %d is %s conn "LPX64"\n",
75 i, tgt->uuid.uuid, tgt->ltd_exp->exp_handle.h_cookie);
76 if (strncmp(uuid->uuid, tgt->uuid.uuid, sizeof uuid->uuid) == 0)
80 if (i == lmv->desc.ld_tgt_count)
81 GOTO(out, rc = -EINVAL);
83 obd = class_exp2obd(tgt->ltd_exp);
85 GOTO(out, rc = -ENOTCONN);
87 CDEBUG(D_INFO, "Found OBD %s=%s device %d (%p) type %s at LMV idx %d\n",
88 obd->obd_name, obd->obd_uuid.uuid, obd->obd_minor, obd,
89 obd->obd_type->typ_name, i);
90 LASSERT(strcmp(obd->obd_type->typ_name, LUSTRE_MDC_NAME) == 0);
92 if (tgt->active == activate) {
93 CDEBUG(D_INFO, "OBD %p already %sactive!\n", obd,
94 activate ? "" : "in");
98 CDEBUG(D_INFO, "Marking OBD %p %sactive\n", obd, activate ? "" : "in");
100 tgt->active = activate;
102 lmv->desc.ld_active_tgt_count++;
104 lmv->desc.ld_active_tgt_count--;
108 spin_unlock(&lmv->lmv_lock);
112 static int lmv_notify(struct obd_device *obd, struct obd_device *watched,
113 int active, void *data)
116 struct obd_uuid *uuid;
118 if (strcmp(watched->obd_type->typ_name, LUSTRE_MDC_NAME)) {
119 CERROR("unexpected notification of %s %s!\n",
120 watched->obd_type->typ_name,
124 uuid = &watched->u.cli.cl_import->imp_target_uuid;
126 /* Set MDC as active before notifying the observer, so the
127 * observer can use the MDC normally.
129 rc = lmv_set_mdc_active(&obd->u.lmv, uuid, active);
131 CERROR("%sactivation of %s failed: %d\n",
132 active ? "" : "de", uuid->uuid, rc);
136 if (obd->obd_observer)
137 /* Pass the notification up the chain. */
138 rc = obd_notify(obd->obd_observer, watched, active, data);
143 int lmv_attach(struct obd_device *dev, obd_count len, void *data)
145 struct lprocfs_static_vars lvars;
149 lprocfs_init_vars(lmv, &lvars);
150 rc = lprocfs_obd_attach(dev, lvars.obd_vars);
153 struct proc_dir_entry *entry;
155 entry = create_proc_entry("target_obd_status", 0444,
156 dev->obd_proc_entry);
159 entry->proc_fops = &lmv_proc_target_fops;
166 int lmv_detach(struct obd_device *dev)
168 return lprocfs_obd_detach(dev);
171 /* This is fake connect function. Its purpose is to initialize lmv and
172 * say caller that everything is okay. Real connection will be performed
174 static int lmv_connect(struct lustre_handle *conn, struct obd_device *obd,
175 struct obd_uuid *cluuid, unsigned long connect_flags)
177 struct lmv_obd *lmv = &obd->u.lmv;
178 struct obd_export *exp;
179 struct proc_dir_entry *lmv_proc_dir;
183 rc = class_connect(conn, obd, cluuid);
185 CERROR("class_connection() returned %d\n", rc);
189 exp = class_conn2export(conn);
190 /* We don't want to actually do the underlying connections more than
191 * once, so keep track. */
193 if (lmv->refcount > 1) {
194 class_export_put(exp);
198 lmv->cluuid = *cluuid;
199 lmv->connect_flags = connect_flags;
202 sema_init(&lmv->init_sem, 1);
204 lmv_proc_dir = lprocfs_register("target_obds", obd->obd_proc_entry,
206 if (IS_ERR(lmv_proc_dir)) {
207 CERROR("could not register /proc/fs/lustre/%s/%s/target_obds.",
208 obd->obd_type->typ_name, obd->obd_name);
216 void lmv_set_timeouts(struct obd_device *obd)
218 struct lmv_tgt_desc *tgts;
223 if (lmv->server_timeout == 0)
226 if (lmv->connected == 0)
229 for (i = 0, tgts = lmv->tgts; i < lmv->desc.ld_tgt_count; i++, tgts++) {
230 if (tgts->ltd_exp == NULL)
232 obd_set_info(tgts->ltd_exp, strlen("inter_mds"),
233 "inter_mds", 0, NULL);
237 /* Performs a check if passed obd is connected. If no - connect it. */
238 #define MAX_STRING_SIZE 128
239 int lmv_check_connect(struct obd_device *obd)
241 struct lmv_obd *lmv = &obd->u.lmv;
242 struct obd_uuid *cluuid;
243 struct lmv_tgt_desc *tgts;
244 struct proc_dir_entry *lmv_proc_dir;
245 struct obd_export *exp;
251 down(&lmv->init_sem);
252 if (lmv->connected) {
257 cluuid = &lmv->cluuid;
260 CDEBUG(D_OTHER, "time to connect %s to %s\n",
261 cluuid->uuid, obd->obd_name);
263 for (i = 0, tgts = lmv->tgts; i < lmv->desc.ld_tgt_count; i++, tgts++) {
264 struct obd_device *tgt_obd;
265 struct obd_uuid lmv_osc_uuid = { "LMV_OSC_UUID" };
266 struct lustre_handle conn = {0, };
268 LASSERT(tgts != NULL);
270 tgt_obd = class_find_client_obd(&tgts->uuid, LUSTRE_MDC_NAME,
273 CERROR("Target %s not attached\n", tgts->uuid.uuid);
274 GOTO(out_disc, rc = -EINVAL);
277 /* for MDS: don't connect to yourself */
278 if (obd_uuid_equals(&tgts->uuid, cluuid)) {
279 CDEBUG(D_OTHER, "don't connect back to %s\n",
281 tgts->ltd_exp = NULL;
285 CDEBUG(D_OTHER, "connect to %s(%s) - %s, %s FOR %s\n",
286 tgt_obd->obd_name, tgt_obd->obd_uuid.uuid,
287 tgts->uuid.uuid, obd->obd_uuid.uuid,
290 if (!tgt_obd->obd_set_up) {
291 CERROR("Target %s not set up\n", tgts->uuid.uuid);
292 GOTO(out_disc, rc = -EINVAL);
295 rc = obd_connect(&conn, tgt_obd, &lmv_osc_uuid,
298 CERROR("Target %s connect error %d\n",
299 tgts->uuid.uuid, rc);
302 tgts->ltd_exp = class_conn2export(&conn);
304 obd_init_ea_size(tgts->ltd_exp, lmv->max_easize,
305 lmv->max_cookiesize);
307 rc = obd_register_observer(tgt_obd, obd);
309 CERROR("Target %s register_observer error %d\n",
310 tgts->uuid.uuid, rc);
311 obd_disconnect(tgts->ltd_exp, 0);
315 lmv->desc.ld_active_tgt_count++;
318 CDEBUG(D_OTHER, "connected to %s(%s) successfully (%d)\n",
319 tgt_obd->obd_name, tgt_obd->obd_uuid.uuid,
320 atomic_read(&obd->obd_refcount));
322 lmv_proc_dir = lprocfs_srch(obd->obd_proc_entry, "target_obds");
324 struct obd_device *mdc_obd = class_conn2obd(&conn);
325 struct proc_dir_entry *mdc_symlink;
326 char name[MAX_STRING_SIZE + 1];
328 LASSERT(mdc_obd != NULL);
329 LASSERT(mdc_obd->obd_type != NULL);
330 LASSERT(mdc_obd->obd_type->typ_name != NULL);
331 name[MAX_STRING_SIZE] = '\0';
332 snprintf(name, MAX_STRING_SIZE, "../../../%s/%s",
333 mdc_obd->obd_type->typ_name,
335 mdc_symlink = proc_symlink(mdc_obd->obd_name,
337 if (mdc_symlink == NULL) {
338 CERROR("could not register LMV target "
339 "/proc/fs/lustre/%s/%s/target_obds/%s.",
340 obd->obd_type->typ_name, obd->obd_name,
342 lprocfs_remove(lmv_proc_dir);
348 lmv_set_timeouts(obd);
349 class_export_put(exp);
356 struct obd_uuid uuid;
358 --lmv->desc.ld_active_tgt_count;
360 /* save for CERROR below; (we know it's terminated) */
362 rc2 = obd_disconnect(tgts->ltd_exp, 0);
364 CERROR("error: LMV target %s disconnect on MDT idx %d: "
365 "error %d\n", uuid.uuid, i, rc2);
367 class_disconnect(exp, 0);
372 static int lmv_disconnect(struct obd_export *exp, int flags)
374 struct obd_device *obd = class_exp2obd(exp);
375 struct lmv_obd *lmv = &obd->u.lmv;
376 struct proc_dir_entry *lmv_proc_dir;
383 /* Only disconnect the underlying layers on the final disconnect. */
385 if (lmv->refcount != 0)
388 lmv_proc_dir = lprocfs_srch(obd->obd_proc_entry, "target_obds");
390 for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
391 struct obd_device *mdc_obd;
393 if (lmv->tgts[i].ltd_exp == NULL)
396 mdc_obd = class_exp2obd(lmv->tgts[i].ltd_exp);
398 struct proc_dir_entry *mdc_symlink;
400 mdc_symlink = lprocfs_srch(lmv_proc_dir, mdc_obd->obd_name);
402 lprocfs_remove(mdc_symlink);
404 CERROR("/proc/fs/lustre/%s/%s/target_obds/%s missing\n",
405 obd->obd_type->typ_name, obd->obd_name,
410 if (obd->obd_no_recov) {
411 /* Pass it on to our clients.
412 * XXX This should be an argument to disconnect,
413 * XXX not a back-door flag on the OBD. Ah well.
415 struct obd_device *mdc_obd;
416 mdc_obd = class_exp2obd(lmv->tgts[i].ltd_exp);
418 mdc_obd->obd_no_recov = 1;
421 CDEBUG(D_OTHER, "disconnected from %s(%s) successfully\n",
422 lmv->tgts[i].ltd_exp->exp_obd->obd_name,
423 lmv->tgts[i].ltd_exp->exp_obd->obd_uuid.uuid);
425 obd_register_observer(lmv->tgts[i].ltd_exp->exp_obd, NULL);
427 rc = obd_disconnect(lmv->tgts[i].ltd_exp, flags);
429 if (lmv->tgts[i].active) {
430 CERROR("Target %s disconnect error %d\n",
431 lmv->tgts[i].uuid.uuid, rc);
435 if (lmv->tgts[i].active) {
436 lmv->desc.ld_active_tgt_count--;
437 lmv->tgts[i].active = 0;
439 lmv->tgts[i].ltd_exp = NULL;
443 lprocfs_remove(lmv_proc_dir);
445 CERROR("/proc/fs/lustre/%s/%s/target_obds missing\n",
446 obd->obd_type->typ_name, obd->obd_name);
451 /* this is the case when no real connection is established by
452 * lmv_check_connect(). */
454 class_export_put(exp);
455 rc = class_disconnect(exp, 0);
456 if (lmv->refcount == 0)
461 static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
462 int len, void *karg, void *uarg)
464 struct obd_device *obddev = class_exp2obd(exp);
465 struct lmv_obd *lmv = &obddev->u.lmv;
466 int i, rc = 0, set = 0;
470 if (lmv->desc.ld_tgt_count == 0)
473 for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
476 if (lmv->tgts[i].ltd_exp == NULL) {
477 CWARN("%s: NULL export for %d\n", obddev->obd_name, i);
481 err = obd_iocontrol(cmd, lmv->tgts[i].ltd_exp, len, karg, uarg);
483 if (lmv->tgts[i].active) {
484 CERROR("error: iocontrol MDC %s on MDT"
485 "idx %d: err = %d\n",
486 lmv->tgts[i].uuid.uuid, i, err);
499 static int lmv_setup(struct obd_device *obd, obd_count len, void *buf)
502 struct lmv_desc *desc;
503 struct obd_uuid *uuids;
504 struct lmv_tgt_desc *tgts;
505 struct obd_device *tgt_obd;
506 struct lustre_cfg *lcfg = buf;
507 struct lmv_obd *lmv = &obd->u.lmv;
510 if (lcfg->lcfg_inllen1 < 1) {
511 CERROR("LMV setup requires a descriptor\n");
515 if (lcfg->lcfg_inllen2 < 1) {
516 CERROR("LMV setup requires an OST UUID list\n");
520 desc = (struct lmv_desc *)lcfg->lcfg_inlbuf1;
521 if (sizeof(*desc) > lcfg->lcfg_inllen1) {
522 CERROR("descriptor size wrong: %d > %d\n",
523 (int)sizeof(*desc), lcfg->lcfg_inllen1);
527 uuids = (struct obd_uuid *)lcfg->lcfg_inlbuf2;
528 if (sizeof(*uuids) * desc->ld_tgt_count != lcfg->lcfg_inllen2) {
529 CERROR("UUID array size wrong: %u * %u != %u\n",
530 sizeof(*uuids), desc->ld_tgt_count, lcfg->lcfg_inllen2);
534 lmv->bufsize = sizeof(struct lmv_tgt_desc) * desc->ld_tgt_count;
535 OBD_ALLOC(lmv->tgts, lmv->bufsize);
536 if (lmv->tgts == NULL) {
537 CERROR("Out of memory\n");
542 spin_lock_init(&lmv->lmv_lock);
544 for (i = 0, tgts = lmv->tgts; i < desc->ld_tgt_count; i++, tgts++)
545 tgts->uuid = uuids[i];
547 lmv->max_cookiesize = 0;
549 lmv->max_easize = sizeof(struct ll_fid) *
550 desc->ld_tgt_count + sizeof(struct mea);
552 rc = lmv_setup_mgr(obd);
554 CERROR("Can't setup LMV object manager, "
556 OBD_FREE(lmv->tgts, lmv->bufsize);
559 tgt_obd = class_find_client_obd(&lmv->tgts->uuid, LUSTRE_MDC_NAME,
562 CERROR("Target %s not attached\n", lmv->tgts->uuid.uuid);
566 rc = obd_llog_init(obd, &obd->obd_llogs, tgt_obd, 0, NULL);
568 CERROR("failed to setup llogging subsystems\n");
574 static int lmv_statfs(struct obd_device *obd, struct obd_statfs *osfs,
575 unsigned long max_age)
577 struct lmv_obd *lmv = &obd->u.lmv;
578 struct obd_statfs temp;
582 rc = lmv_check_connect(obd);
586 for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
587 if (lmv->tgts[i].ltd_exp == NULL) {
588 CWARN("%s: NULL export for %d\n", obd->obd_name, i);
592 rc = obd_statfs(lmv->tgts[i].ltd_exp->exp_obd, &temp, max_age);
594 CERROR("can't stat MDS #%d (%s)\n", i,
595 lmv->tgts[i].ltd_exp->exp_obd->obd_name);
599 memcpy(osfs, &temp, sizeof(temp));
601 osfs->os_bavail += temp.os_bavail;
602 osfs->os_blocks += temp.os_blocks;
603 osfs->os_ffree += temp.os_ffree;
604 osfs->os_files += temp.os_files;
610 static int lmv_cleanup(struct obd_device *obd, int flags)
612 struct lmv_obd *lmv = &obd->u.lmv;
614 lmv_cleanup_mgr(obd);
615 OBD_FREE(lmv->tgts, lmv->bufsize);
619 static int lmv_getstatus(struct obd_export *exp, struct ll_fid *fid)
621 struct obd_device *obd = exp->exp_obd;
622 struct lmv_obd *lmv = &obd->u.lmv;
625 rc = lmv_check_connect(obd);
628 rc = md_getstatus(lmv->tgts[0].ltd_exp, fid);
633 static int lmv_getattr(struct obd_export *exp, struct ll_fid *fid,
634 unsigned long valid, unsigned int ea_size,
635 struct ptlrpc_request **request)
637 struct obd_device *obd = exp->exp_obd;
638 struct lmv_obd *lmv = &obd->u.lmv;
639 int rc, i = fid->mds;
643 rc = lmv_check_connect(obd);
647 LASSERT(i < lmv->desc.ld_tgt_count);
649 rc = md_getattr(lmv->tgts[i].ltd_exp, fid, valid,
654 obj = lmv_grab_obj(obd, fid);
656 CDEBUG(D_OTHER, "GETATTR for %lu/%lu/%lu %s\n",
657 (unsigned long)fid->mds, (unsigned long)fid->id,
658 (unsigned long)fid->generation, obj ? "(splitted)" : "");
660 /* if object is splitted, then we loop over all the slaves and gather
661 * size attribute. In ideal world we would have to gather also mds field
662 * from all slaves, as object is spread over the cluster and this is
663 * definitely interesting information and it is not good to loss it,
666 struct mds_body *body;
668 if (*request == NULL) {
673 body = lustre_msg_buf((*request)->rq_repmsg, 0,
675 LASSERT(body != NULL);
679 for (i = 0; i < obj->objcount; i++) {
681 if (lmv->tgts[i].ltd_exp == NULL) {
682 CWARN("%s: NULL export for %d\n",
687 /* skip master obj. */
688 if (fid_equal(&obj->fid, &obj->objs[i].fid))
691 body->size += obj->objs[i].size;
701 static int lmv_change_cbdata(struct obd_export *exp, struct ll_fid *fid,
702 ldlm_iterator_t it, void *data)
704 struct obd_device *obd = exp->exp_obd;
705 struct lmv_obd *lmv = &obd->u.lmv;
709 rc = lmv_check_connect(obd);
713 CDEBUG(D_OTHER, "CBDATA for %lu/%lu/%lu\n", (unsigned long)fid->mds,
714 (unsigned long)fid->id, (unsigned long)fid->generation);
716 LASSERT(fid->mds < lmv->desc.ld_tgt_count);
718 rc = md_change_cbdata(lmv->tgts[fid->mds].ltd_exp,
724 static int lmv_change_cbdata_name(struct obd_export *exp, struct ll_fid *pfid,
725 char *name, int len, struct ll_fid *cfid,
726 ldlm_iterator_t it, void *data)
728 struct obd_device *obd = exp->exp_obd;
729 struct lmv_obd *lmv = &obd->u.lmv;
734 rc = lmv_check_connect(obd);
738 LASSERT(pfid->mds < lmv->desc.ld_tgt_count);
739 LASSERT(cfid->mds < lmv->desc.ld_tgt_count);
741 CDEBUG(D_OTHER, "CBDATA for %lu/%lu/%lu:%*s -> %lu/%lu/%lu\n",
742 (unsigned long)pfid->mds, (unsigned long)pfid->id,
743 (unsigned long)pfid->generation, len, name,
744 (unsigned long)cfid->mds, (unsigned long)cfid->id,
745 (unsigned long)cfid->generation);
747 /* this is default mds for directory name belongs to. */
749 obj = lmv_grab_obj(obd, pfid);
751 /* directory is splitted. look for right mds for this name. */
752 mds = raw_name2idx(obj->hashtype, obj->objcount, name, len);
753 mds = obj->objs[mds].fid.mds;
756 rc = md_change_cbdata(lmv->tgts[mds].ltd_exp, cfid, it, data);
760 static int lmv_valid_attrs(struct obd_export *exp, struct ll_fid *fid)
762 struct obd_device *obd = exp->exp_obd;
763 struct lmv_obd *lmv = &obd->u.lmv;
766 rc = lmv_check_connect(obd);
769 CDEBUG(D_OTHER, "validate %lu/%lu/%lu\n", (unsigned long) fid->mds,
770 (unsigned long) fid->id, (unsigned long) fid->generation);
771 LASSERT(fid->mds < lmv->desc.ld_tgt_count);
772 rc = md_valid_attrs(lmv->tgts[fid->mds].ltd_exp, fid);
776 int lmv_close(struct obd_export *exp, struct obdo *obdo,
777 struct obd_client_handle *och,
778 struct ptlrpc_request **request)
780 struct obd_device *obd = exp->exp_obd;
781 struct lmv_obd *lmv = &obd->u.lmv;
782 int rc, i = obdo->o_mds;
784 rc = lmv_check_connect(obd);
787 LASSERT(i < lmv->desc.ld_tgt_count);
788 CDEBUG(D_OTHER, "CLOSE %lu/%lu/%lu\n", (unsigned long) obdo->o_mds,
789 (unsigned long) obdo->o_id, (unsigned long) obdo->o_generation);
790 rc = md_close(lmv->tgts[i].ltd_exp, obdo, och, request);
794 int lmv_get_mea_and_update_object(struct obd_export *exp, struct ll_fid *fid)
796 struct obd_device *obd = exp->exp_obd;
797 struct lmv_obd *lmv = &obd->u.lmv;
798 struct ptlrpc_request *req = NULL;
805 mealen = MEA_SIZE_LMV(lmv);
807 valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
809 /* time to update mea of parent fid */
810 rc = md_getattr(lmv->tgts[fid->mds].ltd_exp, fid,
811 valid, mealen, &req);
813 CERROR("md_getattr() failed, error %d\n", rc);
817 rc = mdc_req2lustre_md(exp, req, 0, NULL, &md);
819 CERROR("mdc_req2lustre_md() failed, error %d\n", rc);
824 GOTO(cleanup, rc = -ENODATA);
826 obj = lmv_create_obj(exp, fid, md.mea);
831 obd_free_memmd(exp, (struct lov_stripe_md **)&md.mea);
835 ptlrpc_req_finished(req);
839 int lmv_create(struct obd_export *exp, struct mdc_op_data *op_data,
840 const void *data, int datalen, int mode, __u32 uid,
841 __u32 gid, __u64 rdev, struct ptlrpc_request **request)
843 struct obd_device *obd = exp->exp_obd;
844 struct lmv_obd *lmv = &obd->u.lmv;
845 struct mds_body *body;
847 int rc, mds, loop = 0;
850 rc = lmv_check_connect(obd);
854 if (!lmv->desc.ld_active_tgt_count)
857 LASSERT(++loop <= 2);
858 obj = lmv_grab_obj(obd, &op_data->fid1);
860 mds = raw_name2idx(obj->hashtype, obj->objcount, op_data->name,
862 op_data->fid1 = obj->objs[mds].fid;
866 CDEBUG(D_OTHER, "CREATE '%*s' on %lu/%lu/%lu\n", op_data->namelen,
867 op_data->name, (unsigned long)op_data->fid1.mds,
868 (unsigned long)op_data->fid1.id,
869 (unsigned long)op_data->fid1.generation);
871 rc = md_create(lmv->tgts[op_data->fid1.mds].ltd_exp, op_data, data,
872 datalen, mode, uid, gid, rdev, request);
874 if (*request == NULL)
877 body = lustre_msg_buf((*request)->rq_repmsg, 0,
879 LASSERT(body != NULL);
881 CDEBUG(D_OTHER, "created. id = %lu, generation = %lu, "
882 "mds = %d\n", (unsigned long)body->fid1.id,
883 (unsigned long)body->fid1.generation, op_data->fid1.mds);
885 LASSERT(body->valid & OBD_MD_MDS ||
886 body->mds == op_data->fid1.mds);
887 } else if (rc == -ERESTART) {
888 /* directory got splitted. time to update local object and
889 * repeat the request with proper MDS */
890 rc = lmv_get_mea_and_update_object(exp, &op_data->fid1);
892 ptlrpc_req_finished(*request);
899 int lmv_done_writing(struct obd_export *exp, struct obdo *obdo)
901 struct obd_device *obd = exp->exp_obd;
902 struct lmv_obd *lmv = &obd->u.lmv;
905 rc = lmv_check_connect(obd);
909 /* FIXME: choose right MDC here */
910 CWARN("this method isn't implemented yet\n");
911 rc = md_done_writing(lmv->tgts[0].ltd_exp, obdo);
915 int lmv_enqueue_slaves(struct obd_export *exp, int locktype,
916 struct lookup_intent *it, int lockmode,
917 struct mdc_op_data *data, struct lustre_handle *lockh,
918 void *lmm, int lmmsize, ldlm_completion_callback cb_completion,
919 ldlm_blocking_callback cb_blocking, void *cb_data)
921 struct obd_device *obd = exp->exp_obd;
922 struct lmv_obd *lmv = &obd->u.lmv;
923 struct mea *mea = data->mea1;
924 struct mdc_op_data data2;
928 LASSERT(mea != NULL);
929 for (i = 0; i < mea->mea_count; i++) {
930 memset(&data2, 0, sizeof(data2));
931 data2.fid1 = mea->mea_fids[i];
932 mds = data2.fid1.mds;
934 if (lmv->tgts[mds].ltd_exp == NULL)
937 rc = md_enqueue(lmv->tgts[mds].ltd_exp, locktype, it, lockmode,
938 &data2, lockh + i, lmm, lmmsize, cb_completion,
939 cb_blocking, cb_data);
941 CDEBUG(D_OTHER, "take lock on slave %lu/%lu/%lu -> %d/%d\n",
942 (unsigned long)mea->mea_fids[i].mds,
943 (unsigned long)mea->mea_fids[i].id,
944 (unsigned long)mea->mea_fids[i].generation,
945 rc, it->d.lustre.it_status);
948 if (it->d.lustre.it_data) {
949 struct ptlrpc_request *req;
950 req = (struct ptlrpc_request *) it->d.lustre.it_data;
951 ptlrpc_req_finished(req);
954 if (it->d.lustre.it_status)
955 GOTO(cleanup, rc = it->d.lustre.it_status);
960 /* drop all taken locks */
963 ldlm_lock_decref(lockh + i, lockmode);
969 int lmv_enqueue(struct obd_export *exp, int lock_type,
970 struct lookup_intent *it, int lock_mode,
971 struct mdc_op_data *data, struct lustre_handle *lockh,
972 void *lmm, int lmmsize, ldlm_completion_callback cb_completion,
973 ldlm_blocking_callback cb_blocking, void *cb_data)
975 struct obd_device *obd = exp->exp_obd;
976 struct lmv_obd *lmv = &obd->u.lmv;
981 rc = lmv_check_connect(obd);
985 if (it->it_op == IT_UNLINK) {
986 rc = lmv_enqueue_slaves(exp, lock_type, it, lock_mode,
987 data, lockh, lmm, lmmsize,
988 cb_completion, cb_blocking, cb_data);
993 obj = lmv_grab_obj(obd, &data->fid1);
995 /* directory is splitted. look for right mds for this
997 mds = raw_name2idx(obj->hashtype, obj->objcount,
998 (char *)data->name, data->namelen);
999 data->fid1 = obj->objs[mds].fid;
1003 CDEBUG(D_OTHER, "ENQUEUE '%s' on %lu/%lu\n", LL_IT2STR(it),
1004 (unsigned long)data->fid1.id, (unsigned long)data->fid1.generation);
1006 rc = md_enqueue(lmv->tgts[data->fid1.mds].ltd_exp, lock_type, it,
1007 lock_mode, data, lockh, lmm, lmmsize, cb_completion,
1008 cb_blocking, cb_data);
1013 int lmv_getattr_name(struct obd_export *exp, struct ll_fid *fid,
1014 char *filename, int namelen, unsigned long valid,
1015 unsigned int ea_size, struct ptlrpc_request **request)
1017 struct obd_device *obd = exp->exp_obd;
1018 struct lmv_obd *lmv = &obd->u.lmv;
1019 struct ll_fid rfid = *fid;
1020 int rc, mds = fid->mds, loop = 0;
1021 struct mds_body *body;
1022 struct lmv_obj *obj;
1024 rc = lmv_check_connect(obd);
1028 LASSERT(++loop <= 2);
1029 obj = lmv_grab_obj(obd, fid);
1031 /* directory is splitted. look for right mds for this name */
1032 mds = raw_name2idx(obj->hashtype, obj->objcount, filename, namelen - 1);
1033 rfid = obj->objs[mds].fid;
1037 CDEBUG(D_OTHER, "getattr_name for %*s on %lu/%lu/%lu -> %lu/%lu/%lu\n",
1038 namelen, filename, (unsigned long)fid->mds,
1039 (unsigned long)fid->id, (unsigned long)fid->generation,
1040 (unsigned long)rfid.mds, (unsigned long)rfid.id,
1041 (unsigned long)rfid.generation);
1043 rc = md_getattr_name(lmv->tgts[rfid.mds].ltd_exp, &rfid, filename,
1044 namelen, valid, ea_size, request);
1046 /* this could be cross-node reference. in this case all we have
1047 * right now is mds/ino/generation triple. we'd like to find
1048 * other attributes */
1049 body = lustre_msg_buf((*request)->rq_repmsg, 0, sizeof(*body));
1050 LASSERT(body != NULL);
1051 if (body->valid & OBD_MD_MDS) {
1052 struct ptlrpc_request *req = NULL;
1054 CDEBUG(D_OTHER, "request attrs for %lu/%lu/%lu\n",
1055 (unsigned long) rfid.mds,
1056 (unsigned long) rfid.id,
1057 (unsigned long) rfid.generation);
1058 rc = md_getattr_name(lmv->tgts[rfid.mds].ltd_exp, &rfid,
1059 NULL, 1, valid, ea_size, &req);
1060 ptlrpc_req_finished(*request);
1063 } else if (rc == -ERESTART) {
1064 /* directory got splitted. time to update local object and
1065 * repeat the request with proper MDS */
1066 rc = lmv_get_mea_and_update_object(exp, &rfid);
1068 ptlrpc_req_finished(*request);
1077 * llite passes fid of an target inode in data->fid1 and fid of directory in
1080 int lmv_link(struct obd_export *exp, struct mdc_op_data *data,
1081 struct ptlrpc_request **request)
1083 struct obd_device *obd = exp->exp_obd;
1084 struct lmv_obd *lmv = &obd->u.lmv;
1085 struct lmv_obj *obj;
1089 rc = lmv_check_connect(obd);
1093 if (data->namelen != 0) {
1094 /* usual link request */
1095 obj = lmv_grab_obj(obd, &data->fid1);
1097 rc = raw_name2idx(obj->hashtype, obj->objcount, data->name,
1099 data->fid1 = obj->objs[rc].fid;
1103 CDEBUG(D_OTHER,"link %lu/%lu/%lu:%*s to %lu/%lu/%lu mds %lu\n",
1104 (unsigned long)data->fid2.mds,
1105 (unsigned long)data->fid2.id,
1106 (unsigned long)data->fid2.generation,
1107 data->namelen, data->name,
1108 (unsigned long)data->fid1.mds,
1109 (unsigned long)data->fid1.id,
1110 (unsigned long)data->fid1.generation,
1111 (unsigned long)data->fid1.mds);
1113 /* request from MDS to acquire i_links for inode by fid1 */
1114 CDEBUG(D_OTHER, "inc i_nlinks for %lu/%lu/%lu\n",
1115 (unsigned long)data->fid1.mds,
1116 (unsigned long)data->fid1.id,
1117 (unsigned long)data->fid1.generation);
1120 rc = md_link(lmv->tgts[data->fid1.mds].ltd_exp, data, request);
1124 int lmv_rename(struct obd_export *exp, struct mdc_op_data *data,
1125 const char *old, int oldlen, const char *new, int newlen,
1126 struct ptlrpc_request **request)
1128 struct obd_device *obd = exp->exp_obd;
1129 struct lmv_obd *lmv = &obd->u.lmv;
1130 struct lmv_obj *obj;
1134 CDEBUG(D_OTHER, "rename %*s in %lu/%lu/%lu to %*s in %lu/%lu/%lu\n",
1135 oldlen, old, (unsigned long)data->fid1.mds,
1136 (unsigned long)data->fid1.id,
1137 (unsigned long)data->fid1.generation,
1138 newlen, new, (unsigned long) data->fid2.mds,
1139 (unsigned long) data->fid2.id,
1140 (unsigned long) data->fid2.generation);
1142 if (!fid_equal(&data->fid1, &data->fid2))
1143 CWARN("cross-node rename %lu/%lu/%lu:%*s to %lu/%lu/%lu:%*s\n",
1144 (unsigned long)data->fid1.mds,
1145 (unsigned long)data->fid1.id,
1146 (unsigned long)data->fid1.generation, oldlen, old,
1147 (unsigned long)data->fid2.mds,
1148 (unsigned long)data->fid2.id,
1149 (unsigned long)data->fid2.generation, newlen, new);
1151 rc = lmv_check_connect(obd);
1156 /* MDS with old dir entry is asking another MDS to create name
1159 "create %*s(%d/%d) in %lu/%lu/%lu pointing to %lu/%lu/%lu\n",
1160 newlen, new, oldlen, newlen,
1161 (unsigned long)data->fid2.mds,
1162 (unsigned long)data->fid2.id,
1163 (unsigned long)data->fid2.generation,
1164 (unsigned long)data->fid1.mds,
1165 (unsigned long)data->fid1.id,
1166 (unsigned long)data->fid1.generation);
1167 mds = data->fid2.mds;
1171 obj = lmv_grab_obj(obd, &data->fid1);
1173 /* directory is already splitted, so we have to forward request
1174 * to the right MDS */
1175 mds = raw_name2idx(obj->hashtype, obj->objcount, (char *)old, oldlen);
1176 data->fid1 = obj->objs[mds].fid;
1177 CDEBUG(D_OTHER, "forward to MDS #%u (%lu/%lu/%lu)\n", mds,
1178 (unsigned long)obj->objs[mds].fid.mds,
1179 (unsigned long)obj->objs[mds].fid.id,
1180 (unsigned long)obj->objs[mds].fid.generation);
1184 obj = lmv_grab_obj(obd, &data->fid2);
1186 /* directory is already splitted, so we have to forward request
1187 * to the right MDS */
1188 mds = raw_name2idx(obj->hashtype, obj->objcount, (char *)new, newlen);
1189 data->fid2 = obj->objs[mds].fid;
1190 CDEBUG(D_OTHER, "forward to MDS #%u (%lu/%lu/%lu)\n", mds,
1191 (unsigned long)obj->objs[mds].fid.mds,
1192 (unsigned long)obj->objs[mds].fid.id,
1193 (unsigned long)obj->objs[mds].fid.generation);
1197 mds = data->fid1.mds;
1200 rc = md_rename(lmv->tgts[mds].ltd_exp, data, old, oldlen,
1201 new, newlen, request);
1205 int lmv_setattr(struct obd_export *exp, struct mdc_op_data *data,
1206 struct iattr *iattr, void *ea, int ealen, void *ea2,
1207 int ea2len, struct ptlrpc_request **request)
1209 struct obd_device *obd = exp->exp_obd;
1210 struct lmv_obd *lmv = &obd->u.lmv;
1211 struct ptlrpc_request *req;
1212 struct mds_body *body;
1213 struct lmv_obj *obj;
1217 rc = lmv_check_connect(obd);
1221 obj = lmv_grab_obj(obd, &data->fid1);
1223 CDEBUG(D_OTHER, "SETATTR for %lu/%lu/%lu, valid 0x%x%s\n",
1224 (unsigned long)data->fid1.mds, (unsigned long)data->fid1.id,
1225 (unsigned long)data->fid1.generation, iattr->ia_valid,
1226 obj ? ", splitted" : "");
1229 for (i = 0; i < obj->objcount; i++) {
1230 data->fid1 = obj->objs[i].fid;
1232 rc = md_setattr(lmv->tgts[data->fid1.mds].ltd_exp, data,
1233 iattr, ea, ealen, ea2, ea2len, &req);
1235 if (fid_equal(&obj->fid, &obj->objs[i].fid)) {
1236 /* this is master object and this request should
1237 * be returned back to llite */
1240 ptlrpc_req_finished(req);
1248 LASSERT(data->fid1.mds < lmv->desc.ld_tgt_count);
1249 rc = md_setattr(lmv->tgts[data->fid1.mds].ltd_exp, data,
1250 iattr, ea, ealen, ea2, ea2len, request);
1252 body = lustre_msg_buf((*request)->rq_repmsg, 0,
1254 LASSERT(body != NULL);
1255 LASSERT(body->mds == data->fid1.mds);
1261 int lmv_sync(struct obd_export *exp, struct ll_fid *fid,
1262 struct ptlrpc_request **request)
1264 struct obd_device *obd = exp->exp_obd;
1265 struct lmv_obd *lmv = &obd->u.lmv;
1269 rc = lmv_check_connect(obd);
1273 rc = md_sync(lmv->tgts[fid->mds].ltd_exp, fid, request);
1277 int lmv_dirobj_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
1278 void *data, int flag)
1280 struct lustre_handle lockh;
1281 struct lmv_obj *obj;
1286 case LDLM_CB_BLOCKING:
1287 ldlm_lock2handle(lock, &lockh);
1288 rc = ldlm_cli_cancel(&lockh);
1290 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
1294 case LDLM_CB_CANCELING:
1295 /* time to drop cached attrs for dirobj */
1296 obj = lock->l_ast_data;
1298 CDEBUG(D_OTHER, "cancel %s on "LPU64"/"LPU64
1299 ", master %u/"LPU64"/%u\n",
1300 lock->l_resource->lr_name.name[3] == 1 ?
1301 "LOOKUP" : "UPDATE",
1302 lock->l_resource->lr_name.name[0],
1303 lock->l_resource->lr_name.name[1], obj->fid.mds,
1304 obj->fid.id, obj->fid.generation);
1314 void lmv_remove_dots(struct page *page)
1316 char *kaddr = page_address(page);
1317 unsigned limit = PAGE_CACHE_SIZE;
1318 unsigned offs, rec_len;
1319 struct ext2_dir_entry_2 *p;
1321 for (offs = 0; offs <= limit - EXT2_DIR_REC_LEN(1); offs += rec_len) {
1322 p = (struct ext2_dir_entry_2 *)(kaddr + offs);
1323 rec_len = le16_to_cpu(p->rec_len);
1325 if ((p->name_len == 1 && p->name[0] == '.') ||
1326 (p->name_len == 2 && p->name[0] == '.' && p->name[1] == '.'))
1331 int lmv_readpage(struct obd_export *exp, struct ll_fid *mdc_fid,
1332 __u64 offset, struct page *page,
1333 struct ptlrpc_request **request)
1335 struct obd_device *obd = exp->exp_obd;
1336 struct lmv_obd *lmv = &obd->u.lmv;
1337 struct ll_fid rfid = *mdc_fid;
1338 struct lmv_obj *obj;
1342 rc = lmv_check_connect(obd);
1346 LASSERT(mdc_fid->mds < lmv->desc.ld_tgt_count);
1347 CDEBUG(D_OTHER, "READPAGE at %llu from %lu/%lu/%lu\n",
1348 offset, (unsigned long) rfid.mds,
1349 (unsigned long) rfid.id,
1350 (unsigned long) rfid.generation);
1352 obj = lmv_grab_obj(obd, mdc_fid);
1356 /* find dirobj containing page with requested offset. */
1357 for (i = 0; i < obj->objcount; i++) {
1358 if (offset < obj->objs[i].size)
1360 offset -= obj->objs[i].size;
1362 rfid = obj->objs[i].fid;
1364 lmv_unlock_obj(obj);
1367 CDEBUG(D_OTHER, "forward to %lu/%lu/%lu with offset %lu\n",
1368 (unsigned long)rfid.mds, (unsigned long)rfid.id,
1369 (unsigned long)rfid.generation, (unsigned long)offset);
1371 rc = md_readpage(lmv->tgts[rfid.mds].ltd_exp, &rfid, offset,
1374 if (rc == 0 && !fid_equal(&rfid, mdc_fid))
1375 /* this page isn't from master object. To avoid "." and ".."
1376 * duplication in directory, we have to remove them from all
1378 lmv_remove_dots(page);
1383 int lmv_unlink_slaves(struct obd_export *exp, struct mdc_op_data *data,
1384 struct ptlrpc_request **req)
1386 struct obd_device *obd = exp->exp_obd;
1387 struct lmv_obd *lmv = &obd->u.lmv;
1388 struct mea *mea = data->mea1;
1389 struct mdc_op_data data2;
1393 LASSERT(mea != NULL);
1394 for (i = 0; i < mea->mea_count; i++) {
1395 memset(&data2, 0, sizeof(data2));
1396 data2.fid1 = mea->mea_fids[i];
1397 data2.create_mode = MDS_MODE_DONT_LOCK | S_IFDIR;
1398 mds = data2.fid1.mds;
1400 if (lmv->tgts[mds].ltd_exp == NULL)
1403 rc = md_unlink(lmv->tgts[mds].ltd_exp, &data2, req);
1404 CDEBUG(D_OTHER, "unlink slave %lu/%lu/%lu -> %d\n",
1405 (unsigned long) mea->mea_fids[i].mds,
1406 (unsigned long) mea->mea_fids[i].id,
1407 (unsigned long) mea->mea_fids[i].generation, rc);
1409 ptlrpc_req_finished(*req);
1418 int lmv_delete_object(struct obd_export *exp, struct ll_fid *fid)
1422 if (!lmv_delete_obj(exp, fid)) {
1423 CDEBUG(D_OTHER, "Object %lu/%lu/%lu is not found.\n",
1424 (unsigned long)fid->mds, (unsigned long)fid->id,
1425 (unsigned long)fid->generation);
1431 int lmv_unlink(struct obd_export *exp, struct mdc_op_data *data,
1432 struct ptlrpc_request **request)
1434 struct obd_device *obd = exp->exp_obd;
1435 struct lmv_obd *lmv = &obd->u.lmv;
1439 rc = lmv_check_connect(obd);
1443 if (data->namelen == 0 && data->mea1 != NULL) {
1444 /* mds asks to remove slave objects */
1445 rc = lmv_unlink_slaves(exp, data, request);
1447 } else if (data->namelen != 0) {
1448 struct lmv_obj *obj;
1450 obj = lmv_grab_obj(obd, &data->fid1);
1452 i = raw_name2idx(obj->hashtype, obj->objcount, data->name,
1454 data->fid1 = obj->objs[i].fid;
1457 CDEBUG(D_OTHER, "unlink '%*s' in %lu/%lu/%lu -> %u\n",
1458 data->namelen, data->name,
1459 (unsigned long) data->fid1.mds,
1460 (unsigned long) data->fid1.id,
1461 (unsigned long) data->fid1.generation, i);
1463 CDEBUG(D_OTHER, "drop i_nlink on %lu/%lu/%lu\n",
1464 (unsigned long) data->fid1.mds,
1465 (unsigned long) data->fid1.id,
1466 (unsigned long) data->fid1.generation);
1468 rc = md_unlink(lmv->tgts[data->fid1.mds].ltd_exp, data, request);
1472 struct obd_device *lmv_get_real_obd(struct obd_export *exp,
1473 char *name, int len)
1475 struct obd_device *obd = exp->exp_obd;
1476 struct lmv_obd *lmv = &obd->u.lmv;
1480 rc = lmv_check_connect(obd);
1482 RETURN(ERR_PTR(rc));
1483 #warning "we need well-desgined readdir() implementation to remove this mess"
1484 obd = lmv->tgts[0].ltd_exp->exp_obd;
1489 int lmv_init_ea_size(struct obd_export *exp, int easize, int cookiesize)
1491 struct obd_device *obd = exp->exp_obd;
1492 struct lmv_obd *lmv = &obd->u.lmv;
1493 int i, rc = 0, change = 0;
1496 if (lmv->max_easize < easize) {
1497 lmv->max_easize = easize;
1500 if (lmv->max_cookiesize < cookiesize) {
1501 lmv->max_cookiesize = cookiesize;
1507 if (lmv->connected == 0)
1510 for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
1511 if (lmv->tgts[i].ltd_exp == NULL) {
1512 CWARN("%s: NULL export for %d\n", obd->obd_name, i);
1516 rc = obd_init_ea_size(lmv->tgts[i].ltd_exp, easize, cookiesize);
1518 CERROR("obd_init_ea_size() failed on MDT target %d, "
1519 "error %d.\n", i, rc);
1526 int lmv_obd_create_single(struct obd_export *exp, struct obdo *oa,
1527 struct lov_stripe_md **ea, struct obd_trans_info *oti)
1529 struct obd_device *obd = exp->exp_obd;
1530 struct lmv_obd *lmv = &obd->u.lmv;
1531 struct lov_stripe_md obj_md;
1532 struct lov_stripe_md *obj_mdp = &obj_md;
1536 LASSERT(ea == NULL);
1537 LASSERT(oa->o_mds < lmv->desc.ld_tgt_count);
1539 rc = obd_create(lmv->tgts[oa->o_mds].ltd_exp, oa, &obj_mdp, oti);
1545 * to be called from MDS only
1547 int lmv_obd_create(struct obd_export *exp, struct obdo *oa,
1548 struct lov_stripe_md **ea, struct obd_trans_info *oti)
1550 struct obd_device *obd = exp->exp_obd;
1551 struct lmv_obd *lmv = &obd->u.lmv;
1558 rc = lmv_check_connect(obd);
1562 LASSERT(oa != NULL);
1565 rc = lmv_obd_create_single(exp, oa, NULL, oti);
1570 rc = obd_alloc_diskmd(exp, (struct lov_mds_md **)ea);
1572 CERROR("obd_alloc_diskmd() failed, error %d\n",
1583 mfid.generation = oa->o_generation;
1585 mea = (struct mea *)*ea;
1586 if (!mea->mea_count || mea->mea_count > lmv->desc.ld_tgt_count)
1587 mea->mea_count = lmv->desc.ld_tgt_count;
1588 mea->mea_magic = MEA_MAGIC_ALL_CHARS;
1590 mea->mea_master = -1;
1591 lcount = lmv->desc.ld_tgt_count;
1592 for (i = 0, c = 0; c < mea->mea_count && i < lcount; i++) {
1593 struct lov_stripe_md obj_md;
1594 struct lov_stripe_md *obj_mdp = &obj_md;
1596 if (lmv->tgts[i].ltd_exp == NULL) {
1597 /* this is master MDS */
1598 mea->mea_fids[c].id = mfid.id;
1599 mea->mea_fids[c].generation = mfid.generation;
1600 mea->mea_fids[c].mds = i;
1601 mea->mea_master = i;
1606 /* "master" MDS should always be part of stripped dir, so scan
1608 if (mea->mea_master == -1 && c == mea->mea_count - 1)
1611 oa->o_valid = OBD_MD_FLGENER | OBD_MD_FLTYPE | OBD_MD_FLMODE
1612 | OBD_MD_FLUID | OBD_MD_FLGID | OBD_MD_FLID;
1614 rc = obd_create(lmv->tgts[c].ltd_exp, oa, &obj_mdp, oti);
1616 CERROR("obd_create() failed on MDT target %d, "
1617 "error %d\n", c, rc);
1621 mea->mea_fids[c].id = oa->o_id;
1622 mea->mea_fids[c].generation = oa->o_generation;
1623 mea->mea_fids[c].mds = i;
1625 CDEBUG(D_OTHER, "dirobj at mds %d: "LPU64"/%u\n",
1626 i, oa->o_id, oa->o_generation);
1628 LASSERT(c == mea->mea_count);
1629 CDEBUG(D_OTHER, "%d dirobjects created\n", (int) mea->mea_count);
1634 static int lmv_llog_init(struct obd_device *obd, struct obd_llogs *llogs,
1635 struct obd_device *tgt, int count,
1636 struct llog_catid *logid)
1638 struct llog_ctxt *ctxt;
1642 rc = obd_llog_setup(obd, llogs, LLOG_CONFIG_REPL_CTXT, tgt, 0, NULL,
1645 ctxt = llog_get_context(llogs, LLOG_CONFIG_REPL_CTXT);
1646 ctxt->loc_imp = tgt->u.cli.cl_import;
1652 static int lmv_llog_finish(struct obd_device *obd,
1653 struct obd_llogs *llogs, int count)
1658 rc = obd_llog_cleanup(llog_get_context(llogs, LLOG_CONFIG_REPL_CTXT));
1662 static int lmv_get_info(struct obd_export *exp, __u32 keylen,
1663 void *key, __u32 *vallen, void *val)
1665 struct obd_device *obd;
1666 struct lmv_obd *lmv;
1669 obd = class_exp2obd(exp);
1671 CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",
1672 exp->exp_handle.h_cookie);
1677 if (keylen == 6 && memcmp(key, "mdsize", 6) == 0) {
1678 __u32 *mdsize = val;
1679 *vallen = sizeof(__u32);
1680 *mdsize = sizeof(struct ll_fid) * lmv->desc.ld_tgt_count
1681 + sizeof(struct mea);
1683 } else if (keylen == 6 && memcmp(key, "mdsnum", 6) == 0) {
1684 struct obd_uuid *cluuid = &lmv->cluuid;
1685 struct lmv_tgt_desc *tgts;
1686 __u32 *mdsnum = val;
1689 for (i = 0, tgts = lmv->tgts; i < lmv->desc.ld_tgt_count; i++, tgts++) {
1690 if (obd_uuid_equals(&tgts->uuid, cluuid)) {
1691 *vallen = sizeof(__u32);
1699 CDEBUG(D_IOCTL, "invalid key\n");
1703 int lmv_set_info(struct obd_export *exp, obd_count keylen,
1704 void *key, obd_count vallen, void *val)
1706 struct obd_device *obd;
1707 struct lmv_obd *lmv;
1710 obd = class_exp2obd(exp);
1712 CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",
1713 exp->exp_handle.h_cookie);
1718 if (keylen >= strlen("inter_mds") && strcmp(key, "inter_mds") == 0) {
1719 lmv->server_timeout = 1;
1720 lmv_set_timeouts(obd);
1727 int lmv_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
1728 struct lov_stripe_md *lsm)
1730 struct obd_device *obd = class_exp2obd(exp);
1731 struct lmv_obd *lmv = &obd->u.lmv;
1735 mea_size = sizeof(struct ll_fid) *
1736 lmv->desc.ld_tgt_count + sizeof(struct mea);
1740 if (*lmmp && !lsm) {
1741 OBD_FREE(*lmmp, mea_size);
1746 if (*lmmp == NULL) {
1747 OBD_ALLOC(*lmmp, mea_size);
1755 #warning "MEA packing/convertation must be here! -bzzz"
1756 memcpy(*lmmp, lsm, mea_size);
1760 int lmv_unpackmd(struct obd_export *exp, struct lov_stripe_md **mem_tgt,
1761 struct lov_mds_md *disk_src, int mdsize)
1763 struct obd_device *obd = class_exp2obd(exp);
1764 struct lmv_obd *lmv = &obd->u.lmv;
1765 struct mea **tmea = (struct mea **) mem_tgt;
1766 struct mea *mea = (void *) disk_src;
1770 mea_size = sizeof(struct ll_fid) *
1771 lmv->desc.ld_tgt_count + sizeof(struct mea);
1772 if (mem_tgt == NULL)
1775 if (*mem_tgt != NULL && disk_src == NULL) {
1776 OBD_FREE(*tmea, mea_size);
1780 LASSERT(mea_size == mdsize);
1782 OBD_ALLOC(*tmea, mea_size);
1789 #warning "MEA unpacking/convertation must be here! -bzzz"
1790 memcpy(*tmea, mea, mdsize);
1794 int lmv_brw(int rw, struct obd_export *exp, struct obdo *oa,
1795 struct lov_stripe_md *ea, obd_count oa_bufs,
1796 struct brw_page *pgarr, struct obd_trans_info *oti)
1798 struct obd_device *obd = exp->exp_obd;
1799 struct lmv_obd *lmv = &obd->u.lmv;
1800 struct mea *mea = (struct mea *) ea;
1803 LASSERT(oa != NULL);
1804 LASSERT(ea != NULL);
1805 LASSERT(pgarr != NULL);
1806 LASSERT(oa->o_mds < lmv->desc.ld_tgt_count);
1808 oa->o_gr = mea->mea_fids[oa->o_mds].generation;
1809 oa->o_id = mea->mea_fids[oa->o_mds].id;
1810 oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1811 err = obd_brw(rw, lmv->tgts[oa->o_mds].ltd_exp, oa,
1812 NULL, oa_bufs, pgarr, oti);
1816 struct obd_ops lmv_obd_ops = {
1817 .o_owner = THIS_MODULE,
1818 .o_attach = lmv_attach,
1819 .o_detach = lmv_detach,
1820 .o_setup = lmv_setup,
1821 .o_cleanup = lmv_cleanup,
1822 .o_connect = lmv_connect,
1823 .o_disconnect = lmv_disconnect,
1824 .o_statfs = lmv_statfs,
1825 .o_llog_init = lmv_llog_init,
1826 .o_llog_finish = lmv_llog_finish,
1827 .o_get_info = lmv_get_info,
1828 .o_set_info = lmv_set_info,
1829 .o_create = lmv_obd_create,
1830 .o_packmd = lmv_packmd,
1831 .o_unpackmd = lmv_unpackmd,
1833 .o_init_ea_size = lmv_init_ea_size,
1834 .o_notify = lmv_notify,
1835 .o_iocontrol = lmv_iocontrol,
1838 struct md_ops lmv_md_ops = {
1839 .m_getstatus = lmv_getstatus,
1840 .m_getattr = lmv_getattr,
1841 .m_change_cbdata = lmv_change_cbdata,
1842 .m_change_cbdata_name = lmv_change_cbdata_name,
1843 .m_close = lmv_close,
1844 .m_create = lmv_create,
1845 .m_done_writing = lmv_done_writing,
1846 .m_enqueue = lmv_enqueue,
1847 .m_getattr_name = lmv_getattr_name,
1848 .m_intent_lock = lmv_intent_lock,
1850 .m_rename = lmv_rename,
1851 .m_setattr = lmv_setattr,
1853 .m_readpage = lmv_readpage,
1854 .m_unlink = lmv_unlink,
1855 .m_get_real_obd = lmv_get_real_obd,
1856 .m_valid_attrs = lmv_valid_attrs,
1857 .m_delete_object = lmv_delete_object,
1860 int __init lmv_init(void)
1862 struct lprocfs_static_vars lvars;
1865 lprocfs_init_vars(lmv, &lvars);
1866 rc = class_register_type(&lmv_obd_ops, &lmv_md_ops,
1867 lvars.module_vars, OBD_LMV_DEVICENAME);
1872 static void lmv_exit(void)
1874 class_unregister_type(OBD_LMV_DEVICENAME);
1877 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
1878 MODULE_DESCRIPTION("Lustre Logical Metadata Volume OBD driver");
1879 MODULE_LICENSE("GPL");
1881 module_init(lmv_init);
1882 module_exit(lmv_exit);