1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
6 * This file is part of Lustre, http://www.lustre.org.
8 * Lustre is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU General Public
10 * License as published by the Free Software Foundation.
12 * Lustre is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Lustre; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 # define EXPORT_SYMTAB
25 #define DEBUG_SUBSYSTEM S_LMV
27 #include <linux/slab.h>
28 #include <linux/module.h>
29 #include <linux/init.h>
30 #include <linux/slab.h>
31 #include <linux/pagemap.h>
32 #include <asm/div64.h>
33 #include <linux/seq_file.h>
35 #include <liblustre.h>
37 #include <linux/ext2_fs.h>
39 #include <linux/obd_support.h>
40 #include <linux/lustre_lib.h>
41 #include <linux/lustre_net.h>
42 #include <linux/lustre_idl.h>
43 #include <linux/lustre_dlm.h>
44 #include <linux/lustre_mds.h>
45 #include <linux/obd_class.h>
46 #include <linux/obd_ost.h>
47 #include <linux/lprocfs_status.h>
48 #include <linux/lustre_fsfilt.h>
49 #include <linux/obd_lmv.h>
50 #include "lmv_internal.h"
53 kmem_cache_t *obj_cache;
54 atomic_t obj_cache_count = ATOMIC_INIT(0);
56 static void lmv_activate_target(struct lmv_obd *lmv,
57 struct lmv_tgt_desc *tgt,
60 if (tgt->active == activate)
63 tgt->active = activate;
64 lmv->desc.ld_active_tgt_count += (activate ? 1 : -1);
69 * -EINVAL : UUID can't be found in the LMV's target list
70 * -ENOTCONN: The UUID is found, but the target connection is bad (!)
71 * -EBADF : The UUID is found, but the OBD of the wrong type (!)
73 static int lmv_set_mdc_active(struct lmv_obd *lmv, struct obd_uuid *uuid,
76 struct lmv_tgt_desc *tgt;
77 struct obd_device *obd;
81 CDEBUG(D_INFO, "Searching in lmv %p for uuid %s (activate=%d)\n",
82 lmv, uuid->uuid, activate);
84 spin_lock(&lmv->lmv_lock);
85 for (i = 0, tgt = lmv->tgts; i < lmv->desc.ld_tgt_count; i++, tgt++) {
86 if (tgt->ltd_exp == NULL)
89 CDEBUG(D_INFO, "lmv idx %d is %s conn "LPX64"\n",
90 i, tgt->uuid.uuid, tgt->ltd_exp->exp_handle.h_cookie);
92 if (strncmp(uuid->uuid, tgt->uuid.uuid, sizeof uuid->uuid) == 0)
96 if (i == lmv->desc.ld_tgt_count)
97 GOTO(out_lmv_lock, rc = -EINVAL);
99 obd = class_exp2obd(tgt->ltd_exp);
101 GOTO(out_lmv_lock, rc = -ENOTCONN);
103 CDEBUG(D_INFO, "Found OBD %s=%s device %d (%p) type %s at LMV idx %d\n",
104 obd->obd_name, obd->obd_uuid.uuid, obd->obd_minor, obd,
105 obd->obd_type->typ_name, i);
106 LASSERT(strcmp(obd->obd_type->typ_name, LUSTRE_MDC_NAME) == 0);
108 if (tgt->active == activate) {
109 CDEBUG(D_INFO, "OBD %p already %sactive!\n", obd,
110 activate ? "" : "in");
111 GOTO(out_lmv_lock, rc);
114 CDEBUG(D_INFO, "Marking OBD %p %sactive\n",
115 obd, activate ? "" : "in");
117 lmv_activate_target(lmv, tgt, activate);
122 spin_unlock(&lmv->lmv_lock);
126 static int lmv_notify(struct obd_device *obd, struct obd_device *watched,
127 int active, void *data)
129 struct obd_uuid *uuid;
133 if (strcmp(watched->obd_type->typ_name, LUSTRE_MDC_NAME)) {
134 CERROR("unexpected notification of %s %s!\n",
135 watched->obd_type->typ_name,
139 uuid = &watched->u.cli.cl_import->imp_target_uuid;
141 /* Set MDC as active before notifying the observer, so the observer can
142 * use the MDC normally.
144 rc = lmv_set_mdc_active(&obd->u.lmv, uuid, active);
146 CERROR("%sactivation of %s failed: %d\n",
147 active ? "" : "de", uuid->uuid, rc);
151 if (obd->obd_observer)
152 /* Pass the notification up the chain. */
153 rc = obd_notify(obd->obd_observer, watched, active, data);
158 int lmv_attach(struct obd_device *dev, obd_count len, void *data)
160 struct lprocfs_static_vars lvars;
164 lprocfs_init_vars(lmv, &lvars);
165 rc = lprocfs_obd_attach(dev, lvars.obd_vars);
168 struct proc_dir_entry *entry;
170 entry = create_proc_entry("target_obd_status", 0444,
171 dev->obd_proc_entry);
174 entry->proc_fops = &lmv_proc_target_fops;
181 int lmv_detach(struct obd_device *dev)
183 return lprocfs_obd_detach(dev);
186 /* this is fake connect function. Its purpose is to initialize lmv and say
187 * caller that everything is okay. Real connection will be performed later. */
188 static int lmv_connect(struct lustre_handle *conn, struct obd_device *obd,
189 struct obd_uuid *cluuid, unsigned long flags)
192 struct proc_dir_entry *lmv_proc_dir;
194 struct lmv_obd *lmv = &obd->u.lmv;
195 struct obd_export *exp;
199 rc = class_connect(conn, obd, cluuid);
201 CERROR("class_connection() returned %d\n", rc);
205 exp = class_conn2export(conn);
207 /* we don't want to actually do the underlying connections more than
208 * once, so keep track. */
210 if (lmv->refcount > 1) {
211 class_export_put(exp);
217 lmv->cluuid = *cluuid;
218 lmv->connect_flags = flags;
219 sema_init(&lmv->init_sem, 1);
222 lmv_proc_dir = lprocfs_register("target_obds", obd->obd_proc_entry,
224 if (IS_ERR(lmv_proc_dir)) {
225 CERROR("could not register /proc/fs/lustre/%s/%s/target_obds.",
226 obd->obd_type->typ_name, obd->obd_name);
232 * all real clients shouls perform actual connection rightaway, because
233 * it is possible, that LMV will not have opportunity to connect
234 * targets, as MDC stuff will bit called directly, for instance while
235 * reading ../mdc/../kbytesfree procfs file, etc.
237 if (flags & OBD_OPT_REAL_CLIENT)
238 rc = lmv_check_connect(obd);
243 lprocfs_remove(lmv_proc_dir);
250 void lmv_set_timeouts(struct obd_device *obd)
252 struct lmv_tgt_desc *tgts;
257 if (lmv->server_timeout == 0)
260 if (lmv->connected == 0)
263 for (i = 0, tgts = lmv->tgts; i < lmv->desc.ld_tgt_count; i++, tgts++) {
264 if (tgts->ltd_exp == NULL)
267 obd_set_info(tgts->ltd_exp, strlen("inter_mds"),
268 "inter_mds", 0, NULL);
272 #define MAX_STRING_SIZE 128
274 /* performs a check if passed obd is connected. If no - connect it. */
275 int lmv_check_connect(struct obd_device *obd)
278 struct proc_dir_entry *lmv_proc_dir;
280 struct lmv_obd *lmv = &obd->u.lmv;
281 struct lmv_tgt_desc *tgts;
282 struct obd_uuid *cluuid;
283 struct obd_export *exp;
290 down(&lmv->init_sem);
291 if (lmv->connected) {
296 cluuid = &lmv->cluuid;
299 CDEBUG(D_OTHER, "time to connect %s to %s\n",
300 cluuid->uuid, obd->obd_name);
302 for (i = 0, tgts = lmv->tgts; i < lmv->desc.ld_tgt_count; i++, tgts++) {
303 struct obd_uuid lmv_mdc_uuid = { "LMV_MDC_UUID" };
304 struct lustre_handle conn = {0, };
305 struct obd_device *tgt_obd;
307 LASSERT(tgts != NULL);
309 tgt_obd = class_find_client_obd(&tgts->uuid, LUSTRE_MDC_NAME,
312 CERROR("target %s not attached\n", tgts->uuid.uuid);
313 GOTO(out_disc, rc = -EINVAL);
316 /* for MDS: don't connect to yourself */
317 if (obd_uuid_equals(&tgts->uuid, cluuid)) {
318 CDEBUG(D_OTHER, "don't connect back to %s\n",
320 tgts->ltd_exp = NULL;
324 CDEBUG(D_OTHER, "connect to %s(%s) - %s, %s FOR %s\n",
325 tgt_obd->obd_name, tgt_obd->obd_uuid.uuid,
326 tgts->uuid.uuid, obd->obd_uuid.uuid,
329 if (!tgt_obd->obd_set_up) {
330 CERROR("target %s not set up\n", tgts->uuid.uuid);
331 GOTO(out_disc, rc = -EINVAL);
334 rc = obd_connect(&conn, tgt_obd, &lmv_mdc_uuid,
337 CERROR("target %s connect error %d\n",
338 tgts->uuid.uuid, rc);
341 tgts->ltd_exp = class_conn2export(&conn);
343 obd_init_ea_size(tgts->ltd_exp, lmv->max_easize,
344 lmv->max_cookiesize);
346 rc = obd_register_observer(tgt_obd, obd);
348 CERROR("target %s register_observer error %d\n",
349 tgts->uuid.uuid, rc);
350 obd_disconnect(tgts->ltd_exp, 0);
354 lmv->desc.ld_active_tgt_count++;
357 CDEBUG(D_OTHER, "connected to %s(%s) successfully (%d)\n",
358 tgt_obd->obd_name, tgt_obd->obd_uuid.uuid,
359 atomic_read(&obd->obd_refcount));
362 lmv_proc_dir = lprocfs_srch(obd->obd_proc_entry, "target_obds");
364 struct obd_device *mdc_obd = class_conn2obd(&conn);
365 struct proc_dir_entry *mdc_symlink;
366 char name[MAX_STRING_SIZE + 1];
368 LASSERT(mdc_obd != NULL);
369 LASSERT(mdc_obd->obd_type != NULL);
370 LASSERT(mdc_obd->obd_type->typ_name != NULL);
371 name[MAX_STRING_SIZE] = '\0';
372 snprintf(name, MAX_STRING_SIZE, "../../../%s/%s",
373 mdc_obd->obd_type->typ_name,
375 mdc_symlink = proc_symlink(mdc_obd->obd_name,
377 if (mdc_symlink == NULL) {
378 CERROR("could not register LMV target "
379 "/proc/fs/lustre/%s/%s/target_obds/%s.",
380 obd->obd_type->typ_name, obd->obd_name,
382 lprocfs_remove(lmv_proc_dir);
389 lmv_set_timeouts(obd);
390 class_export_put(exp);
397 struct obd_uuid uuid;
399 --lmv->desc.ld_active_tgt_count;
401 /* save for CERROR below; (we know it's terminated) */
403 rc2 = obd_disconnect(tgts->ltd_exp, 0);
405 CERROR("error: LMV target %s disconnect on MDC idx %d: "
406 "error %d\n", uuid.uuid, i, rc2);
408 class_disconnect(exp, 0);
413 static int lmv_disconnect(struct obd_export *exp, unsigned long flags)
415 struct obd_device *obd = class_exp2obd(exp);
416 struct lmv_obd *lmv = &obd->u.lmv;
419 struct proc_dir_entry *lmv_proc_dir;
427 /* Only disconnect the underlying layers on the final disconnect. */
429 if (lmv->refcount != 0)
433 lmv_proc_dir = lprocfs_srch(obd->obd_proc_entry, "target_obds");
436 for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
437 struct obd_device *mdc_obd;
439 if (lmv->tgts[i].ltd_exp == NULL)
442 mdc_obd = class_exp2obd(lmv->tgts[i].ltd_exp);
445 mdc_obd->obd_no_recov = obd->obd_no_recov;
449 struct proc_dir_entry *mdc_symlink;
451 mdc_symlink = lprocfs_srch(lmv_proc_dir, mdc_obd->obd_name);
453 lprocfs_remove(mdc_symlink);
455 CERROR("/proc/fs/lustre/%s/%s/target_obds/%s missing\n",
456 obd->obd_type->typ_name, obd->obd_name,
461 CDEBUG(D_OTHER, "disconnected from %s(%s) successfully\n",
462 lmv->tgts[i].ltd_exp->exp_obd->obd_name,
463 lmv->tgts[i].ltd_exp->exp_obd->obd_uuid.uuid);
465 obd_register_observer(lmv->tgts[i].ltd_exp->exp_obd, NULL);
466 rc = obd_disconnect(lmv->tgts[i].ltd_exp, flags);
468 if (lmv->tgts[i].active) {
469 CERROR("Target %s disconnect error %d\n",
470 lmv->tgts[i].uuid.uuid, rc);
475 lmv_activate_target(lmv, &lmv->tgts[i], 0);
476 lmv->tgts[i].ltd_exp = NULL;
481 lprocfs_remove(lmv_proc_dir);
483 CERROR("/proc/fs/lustre/%s/%s/target_obds missing\n",
484 obd->obd_type->typ_name, obd->obd_name);
489 /* this is the case when no real connection is established by
490 * lmv_check_connect(). */
492 class_export_put(exp);
493 rc = class_disconnect(exp, 0);
494 if (lmv->refcount == 0)
499 static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
500 int len, void *karg, void *uarg)
502 struct obd_device *obddev = class_exp2obd(exp);
503 struct lmv_obd *lmv = &obddev->u.lmv;
504 int i, rc = 0, set = 0;
507 if (lmv->desc.ld_tgt_count == 0)
510 for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
513 if (lmv->tgts[i].ltd_exp == NULL)
516 err = obd_iocontrol(cmd, lmv->tgts[i].ltd_exp, len, karg, uarg);
518 if (lmv->tgts[i].active) {
519 CERROR("error: iocontrol MDC %s on MDT"
520 "idx %d: err = %d\n",
521 lmv->tgts[i].uuid.uuid, i, err);
534 static int lmv_setup(struct obd_device *obd, obd_count len, void *buf)
537 struct lmv_desc *desc;
538 struct obd_uuid *uuids;
539 struct lmv_tgt_desc *tgts;
540 struct obd_device *tgt_obd;
541 struct lustre_cfg *lcfg = buf;
542 struct lmv_obd *lmv = &obd->u.lmv;
545 if (lcfg->lcfg_inllen1 < 1) {
546 CERROR("LMV setup requires a descriptor\n");
550 if (lcfg->lcfg_inllen2 < 1) {
551 CERROR("LMV setup requires an MDT UUID list\n");
555 desc = (struct lmv_desc *)lcfg->lcfg_inlbuf1;
556 if (sizeof(*desc) > lcfg->lcfg_inllen1) {
557 CERROR("descriptor size wrong: %d > %d\n",
558 (int)sizeof(*desc), lcfg->lcfg_inllen1);
562 uuids = (struct obd_uuid *)lcfg->lcfg_inlbuf2;
563 if (sizeof(*uuids) * desc->ld_tgt_count != lcfg->lcfg_inllen2) {
564 CERROR("UUID array size wrong: %u * %u != %u\n",
565 sizeof(*uuids), desc->ld_tgt_count, lcfg->lcfg_inllen2);
569 lmv->tgts_size = sizeof(struct lmv_tgt_desc) * desc->ld_tgt_count;
570 OBD_ALLOC(lmv->tgts, lmv->tgts_size);
571 if (lmv->tgts == NULL) {
572 CERROR("Out of memory\n");
577 spin_lock_init(&lmv->lmv_lock);
579 for (i = 0, tgts = lmv->tgts; i < desc->ld_tgt_count; i++, tgts++)
580 tgts->uuid = uuids[i];
582 lmv->max_cookiesize = 0;
584 lmv->max_easize = sizeof(struct lustre_id) *
585 desc->ld_tgt_count + sizeof(struct mea);
587 rc = lmv_setup_mgr(obd);
589 CERROR("Can't setup LMV object manager, "
591 OBD_FREE(lmv->tgts, lmv->tgts_size);
594 tgt_obd = class_find_client_obd(&lmv->tgts->uuid, LUSTRE_MDC_NAME,
597 CERROR("Target %s not attached\n", lmv->tgts->uuid.uuid);
604 static int lmv_cleanup(struct obd_device *obd, int flags)
606 struct lmv_obd *lmv = &obd->u.lmv;
609 lmv_cleanup_mgr(obd);
610 OBD_FREE(lmv->tgts, lmv->tgts_size);
615 static int lmv_statfs(struct obd_device *obd, struct obd_statfs *osfs,
616 unsigned long max_age)
618 struct lmv_obd *lmv = &obd->u.lmv;
619 struct obd_statfs *temp;
623 rc = lmv_check_connect(obd);
627 OBD_ALLOC(temp, sizeof(*temp));
631 for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
632 if (lmv->tgts[i].ltd_exp == NULL) {
633 CWARN("%s: NULL export for %d\n", obd->obd_name, i);
637 rc = obd_statfs(lmv->tgts[i].ltd_exp->exp_obd, temp, max_age);
639 CERROR("can't stat MDS #%d (%s), error %d\n", i,
640 lmv->tgts[i].ltd_exp->exp_obd->obd_name,
642 GOTO(out_free_temp, rc);
645 memcpy(osfs, temp, sizeof(*temp));
647 osfs->os_bavail += temp->os_bavail;
648 osfs->os_blocks += temp->os_blocks;
649 osfs->os_ffree += temp->os_ffree;
650 osfs->os_files += temp->os_files;
656 OBD_FREE(temp, sizeof(*temp));
660 static int lmv_getstatus(struct obd_export *exp, struct lustre_id *id)
662 struct obd_device *obd = exp->exp_obd;
663 struct lmv_obd *lmv = &obd->u.lmv;
667 rc = lmv_check_connect(obd);
671 rc = md_getstatus(lmv->tgts[0].ltd_exp, id);
677 static int lmv_getattr(struct obd_export *exp, struct lustre_id *id,
678 __u64 valid, unsigned int ea_size,
679 struct ptlrpc_request **request)
681 struct obd_device *obd = exp->exp_obd;
682 struct lmv_obd *lmv = &obd->u.lmv;
683 int rc, i = id_group(id);
687 rc = lmv_check_connect(obd);
691 LASSERT(i < lmv->desc.ld_tgt_count);
693 rc = md_getattr(lmv->tgts[i].ltd_exp, id, valid,
698 obj = lmv_grab_obj(obd, id);
700 CDEBUG(D_OTHER, "GETATTR for "DLID4" %s\n",
701 OLID4(id), obj ? "(splitted)" : "");
704 * if object is splitted, then we loop over all the slaves and gather
705 * size attribute. In ideal world we would have to gather also mds field
706 * from all slaves, as object is spread over the cluster and this is
707 * definitely interesting information and it is not good to loss it,
711 struct mds_body *body;
713 if (*request == NULL) {
718 body = lustre_msg_buf((*request)->rq_repmsg, 0,
720 LASSERT(body != NULL);
724 for (i = 0; i < obj->objcount; i++) {
726 if (lmv->tgts[i].ltd_exp == NULL) {
727 CWARN("%s: NULL export for %d\n",
732 /* skip master obj. */
733 if (id_equal_fid(&obj->id, &obj->objs[i].id))
736 body->size += obj->objs[i].size;
746 static int lmv_change_cbdata(struct obd_export *exp,
747 struct lustre_id *id,
751 struct obd_device *obd = exp->exp_obd;
752 struct lmv_obd *lmv = &obd->u.lmv;
756 rc = lmv_check_connect(obd);
760 CDEBUG(D_OTHER, "CBDATA for "DLID4"\n", OLID4(id));
761 LASSERT(id_group(id) < lmv->desc.ld_tgt_count);
763 rc = md_change_cbdata(lmv->tgts[id_group(id)].ltd_exp,
769 static int lmv_change_cbdata_name(struct obd_export *exp,
770 struct lustre_id *pid,
772 struct lustre_id *cid,
776 struct obd_device *obd = exp->exp_obd;
777 struct lmv_obd *lmv = &obd->u.lmv;
778 struct lustre_id rcid = *cid;
783 rc = lmv_check_connect(obd);
787 LASSERT(id_group(pid) < lmv->desc.ld_tgt_count);
788 LASSERT(id_group(cid) < lmv->desc.ld_tgt_count);
790 CDEBUG(D_OTHER, "CBDATA for "DLID4":%*s -> "DLID4"\n",
791 OLID4(pid), len, name, OLID4(cid));
793 /* this is default mds for directory name belongs to. */
795 obj = lmv_grab_obj(obd, pid);
797 /* directory is splitted. look for right mds for this name. */
798 mds = raw_name2idx(obj->hashtype, obj->objcount, name, len);
799 rcid = obj->objs[mds].id;
800 mds = id_group(&rcid);
803 rc = md_change_cbdata(lmv->tgts[mds].ltd_exp, &rcid, it, data);
807 static int lmv_valid_attrs(struct obd_export *exp, struct lustre_id *id)
809 struct obd_device *obd = exp->exp_obd;
810 struct lmv_obd *lmv = &obd->u.lmv;
814 rc = lmv_check_connect(obd);
818 CDEBUG(D_OTHER, "validate "DLID4"\n", OLID4(id));
819 LASSERT(id_group(id) < lmv->desc.ld_tgt_count);
820 rc = md_valid_attrs(lmv->tgts[id_group(id)].ltd_exp, id);
824 int lmv_close(struct obd_export *exp, struct obdo *obdo,
825 struct obd_client_handle *och,
826 struct ptlrpc_request **request)
828 struct obd_device *obd = exp->exp_obd;
829 struct lmv_obd *lmv = &obd->u.lmv;
830 int rc, i = obdo->o_mds;
833 rc = lmv_check_connect(obd);
837 LASSERT(i < lmv->desc.ld_tgt_count);
838 CDEBUG(D_OTHER, "CLOSE %lu/%lu/%lu\n", (unsigned long)obdo->o_mds,
839 (unsigned long)obdo->o_id, (unsigned long)obdo->o_generation);
840 rc = md_close(lmv->tgts[i].ltd_exp, obdo, och, request);
844 int lmv_get_mea_and_update_object(struct obd_export *exp,
845 struct lustre_id *id)
847 struct obd_device *obd = exp->exp_obd;
848 struct lmv_obd *lmv = &obd->u.lmv;
849 struct ptlrpc_request *req = NULL;
857 mealen = MEA_SIZE_LMV(lmv);
859 valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
861 /* time to update mea of parent id */
862 rc = md_getattr(lmv->tgts[id_group(id)].ltd_exp,
863 id, valid, mealen, &req);
865 CERROR("md_getattr() failed, error %d\n", rc);
869 rc = mdc_req2lustre_md(exp, req, 0, NULL, &md);
871 CERROR("mdc_req2lustre_md() failed, error %d\n", rc);
876 GOTO(cleanup, rc = -ENODATA);
878 obj = lmv_create_obj(exp, id, md.mea);
883 obd_free_memmd(exp, (struct lov_stripe_md **)&md.mea);
888 ptlrpc_req_finished(req);
892 int lmv_create(struct obd_export *exp, struct mdc_op_data *op_data,
893 const void *data, int datalen, int mode, __u32 uid,
894 __u32 gid, __u64 rdev, struct ptlrpc_request **request)
896 struct obd_device *obd = exp->exp_obd;
897 struct lmv_obd *lmv = &obd->u.lmv;
898 struct mds_body *body;
900 int rc, mds, loop = 0;
903 rc = lmv_check_connect(obd);
907 if (!lmv->desc.ld_active_tgt_count)
910 LASSERT(++loop <= 2);
911 obj = lmv_grab_obj(obd, &op_data->id1);
913 mds = raw_name2idx(obj->hashtype, obj->objcount,
914 op_data->name, op_data->namelen);
915 op_data->id1 = obj->objs[mds].id;
919 CDEBUG(D_OTHER, "CREATE '%*s' on "DLID4"\n", op_data->namelen,
920 op_data->name, OLID4(&op_data->id1));
922 rc = md_create(lmv->tgts[id_group(&op_data->id1)].ltd_exp,
923 op_data, data, datalen, mode, uid, gid, rdev,
926 if (*request == NULL)
929 body = lustre_msg_buf((*request)->rq_repmsg, 0,
934 CDEBUG(D_OTHER, "created. "DLID4"\n", OLID4(&op_data->id1));
935 } else if (rc == -ERESTART) {
937 * directory got splitted. time to update local object and
938 * repeat the request with proper MDS.
940 rc = lmv_get_mea_and_update_object(exp, &op_data->id1);
942 ptlrpc_req_finished(*request);
949 int lmv_done_writing(struct obd_export *exp, struct obdo *obdo)
951 struct obd_device *obd = exp->exp_obd;
952 struct lmv_obd *lmv = &obd->u.lmv;
956 rc = lmv_check_connect(obd);
960 /* FIXME: choose right MDC here */
961 CWARN("this method isn't implemented yet\n");
962 rc = md_done_writing(lmv->tgts[0].ltd_exp, obdo);
966 int lmv_enqueue_slaves(struct obd_export *exp, int locktype,
967 struct lookup_intent *it, int lockmode,
968 struct mdc_op_data *data, struct lustre_handle *lockh,
969 void *lmm, int lmmsize, ldlm_completion_callback cb_compl,
970 ldlm_blocking_callback cb_blocking, void *cb_data)
972 struct obd_device *obd = exp->exp_obd;
973 struct lmv_obd *lmv = &obd->u.lmv;
974 struct mea *mea = data->mea1;
975 struct mdc_op_data *data2;
979 OBD_ALLOC(data2, sizeof(*data2));
983 LASSERT(mea != NULL);
984 for (i = 0; i < mea->mea_count; i++) {
985 memset(data2, 0, sizeof(*data2));
986 data2->id1 = mea->mea_ids[i];
987 mds = id_group(&data2->id1);
989 if (lmv->tgts[mds].ltd_exp == NULL)
992 rc = md_enqueue(lmv->tgts[mds].ltd_exp, locktype, it,
993 lockmode, data2, lockh + i, lmm, lmmsize,
994 cb_compl, cb_blocking, cb_data);
996 CDEBUG(D_OTHER, "take lock on slave "DLID4" -> %d/%d\n",
997 OLID4(&mea->mea_ids[i]), rc, it->d.lustre.it_status);
1000 if (it->d.lustre.it_data) {
1001 struct ptlrpc_request *req;
1002 req = (struct ptlrpc_request *)it->d.lustre.it_data;
1003 ptlrpc_req_finished(req);
1006 if (it->d.lustre.it_status)
1007 GOTO(cleanup, rc = it->d.lustre.it_status);
1010 OBD_FREE(data2, sizeof(*data2));
1013 OBD_FREE(data2, sizeof(*data2));
1015 /* drop all taken locks */
1017 if (lockh[i].cookie)
1018 ldlm_lock_decref(lockh + i, lockmode);
1019 lockh[i].cookie = 0;
1024 int lmv_enqueue(struct obd_export *exp, int lock_type,
1025 struct lookup_intent *it, int lock_mode,
1026 struct mdc_op_data *data, struct lustre_handle *lockh,
1027 void *lmm, int lmmsize, ldlm_completion_callback cb_compl,
1028 ldlm_blocking_callback cb_blocking, void *cb_data)
1030 struct obd_device *obd = exp->exp_obd;
1031 struct lmv_obd *lmv = &obd->u.lmv;
1032 struct lmv_obj *obj;
1036 rc = lmv_check_connect(obd);
1040 if (data->mea1 && it->it_op == IT_UNLINK) {
1041 rc = lmv_enqueue_slaves(exp, lock_type, it, lock_mode,
1042 data, lockh, lmm, lmmsize,
1043 cb_compl, cb_blocking, cb_data);
1047 if (data->namelen) {
1048 obj = lmv_grab_obj(obd, &data->id1);
1050 /* directory is splitted. look for right mds for this
1052 mds = raw_name2idx(obj->hashtype, obj->objcount,
1053 (char *)data->name, data->namelen);
1054 data->id1 = obj->objs[mds].id;
1058 CDEBUG(D_OTHER, "ENQUEUE '%s' on "DLID4"\n", LL_IT2STR(it),
1061 rc = md_enqueue(lmv->tgts[id_group(&data->id1)].ltd_exp,
1062 lock_type, it, lock_mode, data, lockh, lmm,
1063 lmmsize, cb_compl, cb_blocking, cb_data);
1067 int lmv_getattr_lock(struct obd_export *exp, struct lustre_id *id,
1068 char *filename, int namelen, __u64 valid,
1069 unsigned int ea_size, struct ptlrpc_request **request)
1071 int rc, mds = id_group(id), loop = 0;
1072 struct obd_device *obd = exp->exp_obd;
1073 struct lmv_obd *lmv = &obd->u.lmv;
1074 struct lustre_id rid = *id;
1075 struct mds_body *body;
1076 struct lmv_obj *obj;
1079 rc = lmv_check_connect(obd);
1083 LASSERT(++loop <= 2);
1084 obj = lmv_grab_obj(obd, id);
1086 /* directory is splitted. look for right mds for this name */
1087 mds = raw_name2idx(obj->hashtype, obj->objcount,
1088 filename, namelen - 1);
1089 rid = obj->objs[mds].id;
1093 CDEBUG(D_OTHER, "getattr_lock for %*s on "DLID4" -> "DLID4"\n",
1094 namelen, filename, OLID4(id), OLID4(&rid));
1096 rc = md_getattr_lock(lmv->tgts[id_group(&rid)].ltd_exp,
1097 &rid, filename, namelen, valid,
1101 * this could be cross-node reference. in this case all we have
1102 * right now is lustre_id triple. we'd like to find other
1105 body = lustre_msg_buf((*request)->rq_repmsg, 0, sizeof(*body));
1106 LASSERT(body != NULL);
1107 if (body->valid & OBD_MD_MDS) {
1108 struct ptlrpc_request *req = NULL;
1111 CDEBUG(D_OTHER, "request attrs for "DLID4"\n", OLID4(&rid));
1113 rc = md_getattr_lock(lmv->tgts[id_group(&rid)].ltd_exp,
1114 &rid, NULL, 1, valid, ea_size, &req);
1115 ptlrpc_req_finished(*request);
1118 } else if (rc == -ERESTART) {
1119 /* directory got splitted. time to update local object and
1120 * repeat the request with proper MDS */
1121 rc = lmv_get_mea_and_update_object(exp, &rid);
1123 ptlrpc_req_finished(*request);
1131 * llite passes id of an target inode in data->id1 and id of directory in
1134 int lmv_link(struct obd_export *exp, struct mdc_op_data *data,
1135 struct ptlrpc_request **request)
1137 struct obd_device *obd = exp->exp_obd;
1138 struct lmv_obd *lmv = &obd->u.lmv;
1139 struct lmv_obj *obj;
1143 rc = lmv_check_connect(obd);
1147 if (data->namelen != 0) {
1148 /* usual link request */
1149 obj = lmv_grab_obj(obd, &data->id1);
1151 rc = raw_name2idx(obj->hashtype, obj->objcount,
1152 data->name, data->namelen);
1153 data->id1 = obj->objs[rc].id;
1157 CDEBUG(D_OTHER,"link "DLID4":%*s to "DLID4"\n",
1158 OLID4(&data->id2), data->namelen, data->name,
1161 /* request from MDS to acquire i_links for inode by id1 */
1162 CDEBUG(D_OTHER, "inc i_nlinks for "DLID4"\n",
1166 rc = md_link(lmv->tgts[id_group(&data->id1)].ltd_exp,
1171 int lmv_rename(struct obd_export *exp, struct mdc_op_data *data,
1172 const char *old, int oldlen, const char *new, int newlen,
1173 struct ptlrpc_request **request)
1175 struct obd_device *obd = exp->exp_obd;
1176 struct lmv_obd *lmv = &obd->u.lmv;
1177 struct lmv_obj *obj;
1181 CDEBUG(D_OTHER, "rename %*s in "DLID4" to %*s in "DLID4"\n",
1182 oldlen, old, OLID4(&data->id1), newlen, new,
1185 rc = lmv_check_connect(obd);
1191 * MDS with old dir entry is asking another MDS to create name
1195 "create %*s(%d/%d) in "DLID4" pointing "
1196 "to "DLID4"\n", newlen, new, oldlen, newlen,
1197 OLID4(&data->id2), OLID4(&data->id1));
1199 mds = id_group(&data->id2);
1202 * target directory can be splitted, sowe should forward request
1205 obj = lmv_grab_obj(obd, &data->id2);
1207 mds = raw_name2idx(obj->hashtype, obj->objcount,
1208 (char *)new, newlen);
1209 data->id2 = obj->objs[mds].id;
1210 CDEBUG(D_OTHER, "forward to MDS #%u ("DLID4")\n", mds,
1217 obj = lmv_grab_obj(obd, &data->id1);
1220 * directory is already splitted, so we have to forward request
1223 mds = raw_name2idx(obj->hashtype, obj->objcount,
1224 (char *)old, oldlen);
1225 data->id1 = obj->objs[mds].id;
1226 CDEBUG(D_OTHER, "forward to MDS #%u ("DLID4")\n", mds,
1231 obj = lmv_grab_obj(obd, &data->id2);
1234 * directory is already splitted, so we have to forward request
1237 mds = raw_name2idx(obj->hashtype, obj->objcount,
1238 (char *)new, newlen);
1240 data->id2 = obj->objs[mds].id;
1241 CDEBUG(D_OTHER, "forward to MDS #%u ("DLID4")\n", mds,
1246 mds = id_group(&data->id1);
1249 if (id_group(&data->id1) != id_group(&data->id2)) {
1250 CDEBUG(D_OTHER,"cross-node rename "DLID4"/%*s to "DLID4"/%*s\n",
1251 OLID4(&data->id1), oldlen, old, OLID4(&data->id2),
1255 rc = md_rename(lmv->tgts[mds].ltd_exp, data, old, oldlen,
1256 new, newlen, request);
1260 int lmv_setattr(struct obd_export *exp, struct mdc_op_data *data,
1261 struct iattr *iattr, void *ea, int ealen, void *ea2,
1262 int ea2len, struct ptlrpc_request **request)
1264 struct obd_device *obd = exp->exp_obd;
1265 struct lmv_obd *lmv = &obd->u.lmv;
1266 struct ptlrpc_request *req;
1267 struct mds_body *body;
1268 struct lmv_obj *obj;
1272 rc = lmv_check_connect(obd);
1276 obj = lmv_grab_obj(obd, &data->id1);
1278 CDEBUG(D_OTHER, "SETATTR for "DLID4", valid 0x%x%s\n",
1279 OLID4(&data->id1), iattr->ia_valid, obj ? ", splitted" : "");
1282 for (i = 0; i < obj->objcount; i++) {
1283 data->id1 = obj->objs[i].id;
1285 rc = md_setattr(lmv->tgts[id_group(&data->id1)].ltd_exp,
1286 data, iattr, ea, ealen, ea2, ea2len, &req);
1288 if (id_equal_fid(&obj->id, &obj->objs[i].id)) {
1290 * this is master object and this request should
1291 * be returned back to llite.
1295 ptlrpc_req_finished(req);
1303 LASSERT(id_group(&data->id1) < lmv->desc.ld_tgt_count);
1304 rc = md_setattr(lmv->tgts[id_group(&data->id1)].ltd_exp,
1305 data, iattr, ea, ealen, ea2, ea2len, request);
1307 body = lustre_msg_buf((*request)->rq_repmsg, 0,
1309 LASSERT(body != NULL);
1310 LASSERT(id_group(&body->id1) == id_group(&data->id1));
1316 int lmv_sync(struct obd_export *exp, struct lustre_id *id,
1317 struct ptlrpc_request **request)
1319 struct obd_device *obd = exp->exp_obd;
1320 struct lmv_obd *lmv = &obd->u.lmv;
1324 rc = lmv_check_connect(obd);
1328 rc = md_sync(lmv->tgts[id_group(id)].ltd_exp,
1333 int lmv_dirobj_blocking_ast(struct ldlm_lock *lock,
1334 struct ldlm_lock_desc *desc,
1335 void *data, int flag)
1337 struct lustre_handle lockh;
1338 struct lmv_obj *obj;
1343 case LDLM_CB_BLOCKING:
1344 ldlm_lock2handle(lock, &lockh);
1345 rc = ldlm_cli_cancel(&lockh);
1347 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
1351 case LDLM_CB_CANCELING:
1352 /* time to drop cached attrs for dirobj */
1353 obj = lock->l_ast_data;
1355 CDEBUG(D_OTHER, "cancel %s on "LPU64"/"LPU64
1356 ", master "DLID4"\n",
1357 lock->l_resource->lr_name.name[3] == 1 ?
1358 "LOOKUP" : "UPDATE",
1359 lock->l_resource->lr_name.name[0],
1360 lock->l_resource->lr_name.name[1],
1371 void lmv_remove_dots(struct page *page)
1373 unsigned limit = PAGE_CACHE_SIZE;
1374 char *kaddr = page_address(page);
1375 struct ext2_dir_entry_2 *p;
1376 unsigned offs, rec_len;
1378 for (offs = 0; offs <= limit - EXT2_DIR_REC_LEN(1); offs += rec_len) {
1379 p = (struct ext2_dir_entry_2 *)(kaddr + offs);
1380 rec_len = le16_to_cpu(p->rec_len);
1382 if ((p->name_len == 1 && p->name[0] == '.') ||
1383 (p->name_len == 2 && p->name[0] == '.' && p->name[1] == '.'))
1388 int lmv_readpage(struct obd_export *exp, struct lustre_id *id,
1389 __u64 offset, struct page *page,
1390 struct ptlrpc_request **request)
1392 struct obd_device *obd = exp->exp_obd;
1393 struct lmv_obd *lmv = &obd->u.lmv;
1394 struct lustre_id rid = *id;
1395 struct lmv_obj *obj;
1399 #warning "we need well-desgined readdir() implementation"
1400 rc = lmv_check_connect(obd);
1404 LASSERT(id_group(id) < lmv->desc.ld_tgt_count);
1405 CDEBUG(D_OTHER, "READPAGE at %llu from "DLID4"\n",
1406 offset, OLID4(&rid));
1408 obj = lmv_grab_obj(obd, id);
1412 /* find dirobj containing page with requested offset. */
1413 for (i = 0; i < obj->objcount; i++) {
1414 if (offset < obj->objs[i].size)
1416 offset -= obj->objs[i].size;
1418 rid = obj->objs[i].id;
1420 lmv_unlock_obj(obj);
1423 CDEBUG(D_OTHER, "forward to "DLID4" with offset %lu\n",
1424 OLID4(&rid), (unsigned long)offset);
1426 rc = md_readpage(lmv->tgts[id_group(&rid)].ltd_exp, &rid,
1427 offset, page, request);
1429 if (rc == 0 && !id_equal_fid(&rid, id))
1430 /* this page isn't from master object. To avoid "." and ".."
1431 * duplication in directory, we have to remove them from all
1433 lmv_remove_dots(page);
1438 int lmv_unlink_slaves(struct obd_export *exp, struct mdc_op_data *data,
1439 struct ptlrpc_request **req)
1441 struct obd_device *obd = exp->exp_obd;
1442 struct lmv_obd *lmv = &obd->u.lmv;
1443 struct mea *mea = data->mea1;
1444 struct mdc_op_data *data2;
1448 OBD_ALLOC(data2, sizeof(*data2));
1452 LASSERT(mea != NULL);
1453 for (i = 0; i < mea->mea_count; i++) {
1454 memset(data2, 0, sizeof(*data2));
1455 data2->id1 = mea->mea_ids[i];
1456 data2->create_mode = MDS_MODE_DONT_LOCK | S_IFDIR;
1458 if (lmv->tgts[id_group(&data2->id1)].ltd_exp == NULL)
1461 rc = md_unlink(lmv->tgts[id_group(&data2->id1)].ltd_exp,
1464 CDEBUG(D_OTHER, "unlink slave "DLID4" -> %d\n",
1465 OLID4(&mea->mea_ids[i]), rc);
1468 ptlrpc_req_finished(*req);
1474 OBD_FREE(data2, sizeof(*data2));
1478 int lmv_delete_inode(struct obd_export *exp, struct lustre_id *id)
1483 if (lmv_delete_obj(exp, id)) {
1484 CDEBUG(D_OTHER, "lmv object "DLID4" is destroyed.\n",
1490 int lmv_unlink(struct obd_export *exp, struct mdc_op_data *data,
1491 struct ptlrpc_request **request)
1493 struct obd_device *obd = exp->exp_obd;
1494 struct lmv_obd *lmv = &obd->u.lmv;
1498 rc = lmv_check_connect(obd);
1502 if (data->namelen == 0 && data->mea1 != NULL) {
1503 /* mds asks to remove slave objects */
1504 rc = lmv_unlink_slaves(exp, data, request);
1508 if (data->namelen != 0) {
1509 struct lmv_obj *obj;
1511 obj = lmv_grab_obj(obd, &data->id1);
1513 i = raw_name2idx(obj->hashtype, obj->objcount,
1514 data->name, data->namelen);
1515 data->id1 = obj->objs[i].id;
1518 CDEBUG(D_OTHER, "unlink '%*s' in "DLID4" -> %u\n",
1519 data->namelen, data->name, OLID4(&data->id1),
1522 CDEBUG(D_OTHER, "drop i_nlink on "DLID4"\n",
1525 rc = md_unlink(lmv->tgts[id_group(&data->id1)].ltd_exp,
1530 struct obd_device *lmv_get_real_obd(struct obd_export *exp,
1531 char *name, int len)
1533 struct obd_device *obd = exp->exp_obd;
1534 struct lmv_obd *lmv = &obd->u.lmv;
1538 rc = lmv_check_connect(obd);
1540 RETURN(ERR_PTR(rc));
1541 obd = lmv->tgts[0].ltd_exp->exp_obd;
1547 int lmv_init_ea_size(struct obd_export *exp, int easize,
1550 struct obd_device *obd = exp->exp_obd;
1551 struct lmv_obd *lmv = &obd->u.lmv;
1552 int i, rc = 0, change = 0;
1555 if (lmv->max_easize < easize) {
1556 lmv->max_easize = easize;
1559 if (lmv->max_cookiesize < cookiesize) {
1560 lmv->max_cookiesize = cookiesize;
1566 if (lmv->connected == 0)
1569 for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
1570 if (lmv->tgts[i].ltd_exp == NULL) {
1571 CWARN("%s: NULL export for %d\n", obd->obd_name, i);
1575 rc = obd_init_ea_size(lmv->tgts[i].ltd_exp, easize, cookiesize);
1577 CERROR("obd_init_ea_size() failed on MDT target %d, "
1578 "error %d.\n", i, rc);
1585 int lmv_obd_create_single(struct obd_export *exp, struct obdo *oa,
1586 struct lov_stripe_md **ea, struct obd_trans_info *oti)
1588 struct obd_device *obd = exp->exp_obd;
1589 struct lmv_obd *lmv = &obd->u.lmv;
1590 struct lov_stripe_md obj_md;
1591 struct lov_stripe_md *obj_mdp = &obj_md;
1595 LASSERT(ea == NULL);
1596 LASSERT(oa->o_mds < lmv->desc.ld_tgt_count);
1598 rc = obd_create(lmv->tgts[oa->o_mds].ltd_exp,
1604 int lmv_getready(struct obd_export *exp)
1606 struct obd_device *obd = exp->exp_obd;
1610 rc = lmv_check_connect(obd);
1615 * to be called from MDS only. @oa should have correct store cookie and o_fid
1616 * values for "master" object, as it will be used.
1618 int lmv_obd_create(struct obd_export *exp, struct obdo *oa,
1619 struct lov_stripe_md **ea, struct obd_trans_info *oti)
1621 struct obd_device *obd = exp->exp_obd;
1622 struct lmv_obd *lmv = &obd->u.lmv;
1623 struct lustre_id mid;
1628 rc = lmv_check_connect(obd);
1632 LASSERT(oa != NULL);
1635 rc = lmv_obd_create_single(exp, oa, NULL, oti);
1637 CERROR("Can't create object, rc = %d\n", rc);
1642 rc = obd_alloc_diskmd(exp, (struct lov_mds_md **)ea);
1644 CERROR("obd_alloc_diskmd() failed, error %d\n",
1655 * here we should take care about splitted dir, so store cookie and fid
1656 * for "master" object should already be allocated and passed in @oa.
1658 LASSERT(oa->o_id != 0);
1659 LASSERT(oa->o_fid != 0);
1661 /* save "master" object id */
1664 mea = (struct mea *)*ea;
1665 mea->mea_master = -1;
1666 mea->mea_magic = MEA_MAGIC_ALL_CHARS;
1668 if (!mea->mea_count || mea->mea_count > lmv->desc.ld_tgt_count)
1669 mea->mea_count = lmv->desc.ld_tgt_count;
1671 for (i = 0, c = 0; c < mea->mea_count && i < lmv->desc.ld_tgt_count; i++) {
1672 struct lov_stripe_md obj_md;
1673 struct lov_stripe_md *obj_mdp = &obj_md;
1675 if (lmv->tgts[i].ltd_exp == NULL) {
1676 /* this is "master" MDS */
1677 mea->mea_master = i;
1678 mea->mea_ids[c] = mid;
1684 * "master" MDS should always be part of stripped dir,
1687 if (mea->mea_master == -1 && c == mea->mea_count - 1)
1690 oa->o_valid = OBD_MD_FLGENER | OBD_MD_FLTYPE | OBD_MD_FLMODE |
1691 OBD_MD_FLUID | OBD_MD_FLGID | OBD_MD_FLID;
1693 rc = obd_create(lmv->tgts[c].ltd_exp, oa, &obj_mdp, oti);
1695 CERROR("obd_create() failed on MDT target %d, "
1696 "error %d\n", c, rc);
1700 CDEBUG(D_OTHER, "dirobj at mds %d: "LPU64"/%u\n",
1701 i, oa->o_id, oa->o_generation);
1705 * here, when object is created (or it is master and was passed
1706 * from caller) on desired MDS we save its fid to local mea_ids.
1711 * store cookie should be defined here for both cases (master
1712 * object and not master), because master is already created.
1716 /* fill mea by store cookie and fid */
1717 obdo2id(&mea->mea_ids[c], oa);
1720 LASSERT(c == mea->mea_count);
1722 CDEBUG(D_OTHER, "%d dirobjects created\n",
1723 (int)mea->mea_count);
1728 static int lmv_llog_init(struct obd_device *obd, struct obd_llogs *llogs,
1729 struct obd_device *tgt, int count,
1730 struct llog_catid *logid)
1732 struct llog_ctxt *ctxt;
1736 rc = obd_llog_setup(obd, llogs, LLOG_CONFIG_REPL_CTXT, tgt, 0, NULL,
1739 ctxt = llog_get_context(llogs, LLOG_CONFIG_REPL_CTXT);
1740 ctxt->loc_imp = tgt->u.cli.cl_import;
1746 static int lmv_llog_finish(struct obd_device *obd,
1747 struct obd_llogs *llogs, int count)
1752 rc = obd_llog_cleanup(llog_get_context(llogs, LLOG_CONFIG_REPL_CTXT));
1756 static int lmv_get_info(struct obd_export *exp, __u32 keylen,
1757 void *key, __u32 *vallen, void *val)
1759 struct obd_device *obd;
1760 struct lmv_obd *lmv;
1764 obd = class_exp2obd(exp);
1766 CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",
1767 exp->exp_handle.h_cookie);
1772 if (keylen == 6 && memcmp(key, "mdsize", 6) == 0) {
1773 __u32 *mdsize = val;
1774 *vallen = sizeof(__u32);
1775 *mdsize = sizeof(struct lustre_id) * lmv->desc.ld_tgt_count
1776 + sizeof(struct mea);
1778 } else if (keylen == 6 && memcmp(key, "mdsnum", 6) == 0) {
1779 struct obd_uuid *cluuid = &lmv->cluuid;
1780 struct lmv_tgt_desc *tgts;
1781 __u32 *mdsnum = val;
1784 for (i = 0, tgts = lmv->tgts; i < lmv->desc.ld_tgt_count; i++, tgts++) {
1785 if (obd_uuid_equals(&tgts->uuid, cluuid)) {
1786 *vallen = sizeof(__u32);
1792 } else if (keylen == 6 && memcmp(key, "rootid", 6) == 0) {
1793 /* getting rootid from first MDS. */
1794 rc = obd_get_info(lmv->tgts[0].ltd_exp, keylen, key,
1797 } else if (keylen >= strlen("lmvdesc") && strcmp(key, "lmvdesc") == 0) {
1798 struct lmv_desc *desc_ret = val;
1799 *desc_ret = lmv->desc;
1803 CDEBUG(D_IOCTL, "invalid key\n");
1807 int lmv_set_info(struct obd_export *exp, obd_count keylen,
1808 void *key, obd_count vallen, void *val)
1810 struct obd_device *obd;
1811 struct lmv_obd *lmv;
1814 obd = class_exp2obd(exp);
1816 CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",
1817 exp->exp_handle.h_cookie);
1822 if (keylen >= strlen("inter_mds") && strcmp(key, "inter_mds") == 0) {
1823 lmv->server_timeout = 1;
1824 lmv_set_timeouts(obd);
1831 int lmv_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
1832 struct lov_stripe_md *lsm)
1834 struct obd_device *obd = class_exp2obd(exp);
1835 struct lmv_obd *lmv = &obd->u.lmv;
1836 struct mea *meap, *lsmp;
1840 mea_size = (sizeof(struct lustre_id) *
1841 lmv->desc.ld_tgt_count) + sizeof(struct mea);
1845 if (*lmmp && !lsm) {
1846 OBD_FREE(*lmmp, mea_size);
1851 if (*lmmp == NULL) {
1852 OBD_ALLOC(*lmmp, mea_size);
1860 lsmp = (struct mea *)lsm;
1861 meap = (struct mea *)*lmmp;
1863 meap->mea_magic = cpu_to_le32(lsmp->mea_magic);
1864 meap->mea_count = cpu_to_le32(lsmp->mea_count);
1865 meap->mea_master = cpu_to_le32(lsmp->mea_master);
1867 for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
1868 meap->mea_ids[i] = meap->mea_ids[i];
1869 id_cpu_to_le(&meap->mea_ids[i]);
1875 int lmv_unpackmd(struct obd_export *exp, struct lov_stripe_md **mem_tgt,
1876 struct lov_mds_md *disk_src, int mdsize)
1878 struct obd_device *obd = class_exp2obd(exp);
1879 struct mea **tmea = (struct mea **)mem_tgt;
1880 struct mea *mea = (struct mea *)disk_src;
1881 struct lmv_obd *lmv = &obd->u.lmv;
1885 mea_size = sizeof(struct lustre_id) *
1886 lmv->desc.ld_tgt_count + sizeof(struct mea);
1887 if (mem_tgt == NULL)
1890 if (*mem_tgt != NULL && disk_src == NULL) {
1891 OBD_FREE(*tmea, mea_size);
1895 LASSERT(mea_size == mdsize);
1897 OBD_ALLOC(*tmea, mea_size);
1904 (*tmea)->mea_magic = le32_to_cpu(mea->mea_magic);
1905 (*tmea)->mea_count = le32_to_cpu(mea->mea_count);
1906 (*tmea)->mea_master = le32_to_cpu(mea->mea_master);
1908 for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
1909 (*tmea)->mea_ids[i] = mea->mea_ids[i];
1910 id_le_to_cpu(&(*tmea)->mea_ids[i]);
1916 int lmv_brw(int rw, struct obd_export *exp, struct obdo *oa,
1917 struct lov_stripe_md *ea, obd_count oa_bufs,
1918 struct brw_page *pgarr, struct obd_trans_info *oti)
1920 struct obd_device *obd = exp->exp_obd;
1921 struct lmv_obd *lmv = &obd->u.lmv;
1922 struct mea *mea = (struct mea *) ea;
1925 LASSERT(oa != NULL);
1926 LASSERT(ea != NULL);
1927 LASSERT(pgarr != NULL);
1928 LASSERT(oa->o_mds < lmv->desc.ld_tgt_count);
1930 oa->o_gr = id_gen(&mea->mea_ids[oa->o_mds]);
1931 oa->o_id = id_ino(&mea->mea_ids[oa->o_mds]);
1932 oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1934 err = obd_brw(rw, lmv->tgts[oa->o_mds].ltd_exp,
1935 oa, NULL, oa_bufs, pgarr, oti);
1939 struct obd_ops lmv_obd_ops = {
1940 .o_owner = THIS_MODULE,
1941 .o_attach = lmv_attach,
1942 .o_detach = lmv_detach,
1943 .o_setup = lmv_setup,
1944 .o_cleanup = lmv_cleanup,
1945 .o_connect = lmv_connect,
1946 .o_disconnect = lmv_disconnect,
1947 .o_statfs = lmv_statfs,
1948 .o_llog_init = lmv_llog_init,
1949 .o_llog_finish = lmv_llog_finish,
1950 .o_get_info = lmv_get_info,
1951 .o_set_info = lmv_set_info,
1952 .o_create = lmv_obd_create,
1953 .o_packmd = lmv_packmd,
1954 .o_unpackmd = lmv_unpackmd,
1956 .o_init_ea_size = lmv_init_ea_size,
1957 .o_notify = lmv_notify,
1958 .o_iocontrol = lmv_iocontrol,
1959 .o_getready = lmv_getready,
1962 struct md_ops lmv_md_ops = {
1963 .m_getstatus = lmv_getstatus,
1964 .m_getattr = lmv_getattr,
1965 .m_change_cbdata = lmv_change_cbdata,
1966 .m_change_cbdata_name = lmv_change_cbdata_name,
1967 .m_close = lmv_close,
1968 .m_create = lmv_create,
1969 .m_done_writing = lmv_done_writing,
1970 .m_enqueue = lmv_enqueue,
1971 .m_getattr_lock = lmv_getattr_lock,
1972 .m_intent_lock = lmv_intent_lock,
1974 .m_rename = lmv_rename,
1975 .m_setattr = lmv_setattr,
1977 .m_readpage = lmv_readpage,
1978 .m_unlink = lmv_unlink,
1979 .m_get_real_obd = lmv_get_real_obd,
1980 .m_valid_attrs = lmv_valid_attrs,
1981 .m_delete_inode = lmv_delete_inode,
1984 int __init lmv_init(void)
1986 struct lprocfs_static_vars lvars;
1989 obj_cache = kmem_cache_create("lmv_objects",
1990 sizeof(struct lmv_obj),
1993 CERROR("error allocating lmv objects cache\n");
1997 lprocfs_init_vars(lmv, &lvars);
1998 rc = class_register_type(&lmv_obd_ops, &lmv_md_ops,
2000 OBD_LMV_DEVICENAME);
2002 kmem_cache_destroy(obj_cache);
2008 static void lmv_exit(void)
2010 class_unregister_type(OBD_LMV_DEVICENAME);
2012 LASSERTF(kmem_cache_destroy(obj_cache) == 0,
2013 "can't free lmv objects cache, %d object(s)"
2014 "still in use\n", atomic_read(&obj_cache_count));
2017 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
2018 MODULE_DESCRIPTION("Lustre Logical Metadata Volume OBD driver");
2019 MODULE_LICENSE("GPL");
2021 module_init(lmv_init);
2022 module_exit(lmv_exit);