1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
6 * This file is part of Lustre, http://www.lustre.org.
8 * Lustre is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU General Public
10 * License as published by the Free Software Foundation.
12 * Lustre is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Lustre; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 # define EXPORT_SYMTAB
25 #define DEBUG_SUBSYSTEM S_LMV
27 #include <linux/slab.h>
28 #include <linux/module.h>
29 #include <linux/init.h>
30 #include <linux/slab.h>
31 #include <linux/pagemap.h>
32 #include <asm/div64.h>
33 #include <linux/seq_file.h>
34 #include <linux/namei.h>
36 #include <liblustre.h>
38 #include <linux/ext2_fs.h>
40 #include <linux/obd_support.h>
41 #include <linux/lustre_lib.h>
42 #include <linux/lustre_net.h>
43 #include <linux/lustre_idl.h>
44 #include <linux/lustre_dlm.h>
45 #include <linux/lustre_mds.h>
46 #include <linux/obd_class.h>
47 #include <linux/obd_ost.h>
48 #include <linux/lprocfs_status.h>
49 #include <linux/lustre_fsfilt.h>
50 #include <linux/obd_lmv.h>
51 #include <linux/lustre_lite.h>
52 #include "lmv_internal.h"
54 /* not defined for liblustre building */
55 #if !defined(ATOMIC_INIT)
56 #define ATOMIC_INIT(val) { (val) }
60 kmem_cache_t *obj_cache;
61 atomic_t obj_cache_count = ATOMIC_INIT(0);
63 static void lmv_activate_target(struct lmv_obd *lmv,
64 struct lmv_tgt_desc *tgt,
67 if (tgt->active == activate)
70 tgt->active = activate;
71 lmv->desc.ld_active_tgt_count += (activate ? 1 : -1);
76 * -EINVAL : UUID can't be found in the LMV's target list
77 * -ENOTCONN: The UUID is found, but the target connection is bad (!)
78 * -EBADF : The UUID is found, but the OBD of the wrong type (!)
80 static int lmv_set_mdc_active(struct lmv_obd *lmv, struct obd_uuid *uuid,
83 struct lmv_tgt_desc *tgt;
84 struct obd_device *obd;
88 CDEBUG(D_INFO, "Searching in lmv %p for uuid %s (activate=%d)\n",
89 lmv, uuid->uuid, activate);
91 spin_lock(&lmv->lmv_lock);
92 for (i = 0, tgt = lmv->tgts; i < lmv->desc.ld_tgt_count; i++, tgt++) {
93 if (tgt->ltd_exp == NULL)
96 CDEBUG(D_INFO, "lmv idx %d is %s conn "LPX64"\n",
97 i, tgt->uuid.uuid, tgt->ltd_exp->exp_handle.h_cookie);
99 if (!strncmp((char *)uuid->uuid, (char *)tgt->uuid.uuid, sizeof(uuid->uuid)))
103 if (i == lmv->desc.ld_tgt_count)
104 GOTO(out_lmv_lock, rc = -EINVAL);
106 obd = class_exp2obd(tgt->ltd_exp);
108 GOTO(out_lmv_lock, rc = -ENOTCONN);
110 CDEBUG(D_INFO, "Found OBD %s=%s device %d (%p) type %s at LMV idx %d\n",
111 obd->obd_name, obd->obd_uuid.uuid, obd->obd_minor, obd,
112 obd->obd_type->typ_name, i);
113 LASSERT(strcmp(obd->obd_type->typ_name, LUSTRE_MDC_NAME) == 0);
115 if (tgt->active == activate) {
116 CDEBUG(D_INFO, "OBD %p already %sactive!\n", obd,
117 activate ? "" : "in");
118 GOTO(out_lmv_lock, rc);
121 CDEBUG(D_INFO, "Marking OBD %p %sactive\n",
122 obd, activate ? "" : "in");
124 lmv_activate_target(lmv, tgt, activate);
129 spin_unlock(&lmv->lmv_lock);
133 static int lmv_notify(struct obd_device *obd, struct obd_device *watched,
134 int active, void *data)
136 struct obd_uuid *uuid;
140 if (strcmp(watched->obd_type->typ_name, LUSTRE_MDC_NAME)) {
141 CERROR("unexpected notification of %s %s!\n",
142 watched->obd_type->typ_name,
146 uuid = &watched->u.cli.cl_import->imp_target_uuid;
148 /* Set MDC as active before notifying the observer, so the observer can
149 * use the MDC normally.
151 rc = lmv_set_mdc_active(&obd->u.lmv, uuid, active);
153 CERROR("%sactivation of %s failed: %d\n",
154 active ? "" : "de", uuid->uuid, rc);
158 if (obd->obd_observer)
159 /* Pass the notification up the chain. */
160 rc = obd_notify(obd->obd_observer, watched, active, data);
165 int lmv_attach(struct obd_device *dev, obd_count len, void *data)
167 struct lprocfs_static_vars lvars;
171 lprocfs_init_vars(lmv, &lvars);
172 rc = lprocfs_obd_attach(dev, lvars.obd_vars);
175 struct proc_dir_entry *entry;
177 entry = create_proc_entry("target_obd_status", 0444,
178 dev->obd_proc_entry);
181 entry->proc_fops = &lmv_proc_target_fops;
188 int lmv_detach(struct obd_device *dev)
190 return lprocfs_obd_detach(dev);
193 /* this is fake connect function. Its purpose is to initialize lmv and say
194 * caller that everything is okay. Real connection will be performed later. */
195 static int lmv_connect(struct lustre_handle *conn, struct obd_device *obd,
196 struct obd_uuid *cluuid, struct obd_connect_data *data,
200 struct proc_dir_entry *lmv_proc_dir;
202 struct lmv_obd *lmv = &obd->u.lmv;
203 struct obd_export *exp;
207 rc = class_connect(conn, obd, cluuid);
209 CERROR("class_connection() returned %d\n", rc);
213 exp = class_conn2export(conn);
215 /* we don't want to actually do the underlying connections more than
216 * once, so keep track. */
218 if (lmv->refcount > 1) {
219 class_export_put(exp);
225 lmv->cluuid = *cluuid;
226 lmv->connect_flags = flags;
227 sema_init(&lmv->init_sem, 1);
229 memcpy(&lmv->conn_data, data, sizeof(*data));
232 lmv_proc_dir = lprocfs_register("target_obds", obd->obd_proc_entry,
234 if (IS_ERR(lmv_proc_dir)) {
235 CERROR("could not register /proc/fs/lustre/%s/%s/target_obds.",
236 obd->obd_type->typ_name, obd->obd_name);
242 * all real clients shouls perform actual connection rightaway, because
243 * it is possible, that LMV will not have opportunity to connect
244 * targets, as MDC stuff will bit called directly, for instance while
245 * reading ../mdc/../kbytesfree procfs file, etc.
247 if (flags & OBD_OPT_REAL_CLIENT)
248 rc = lmv_check_connect(obd);
253 lprocfs_remove(lmv_proc_dir);
260 void lmv_set_timeouts(struct obd_device *obd)
262 struct lmv_tgt_desc *tgts;
267 if (lmv->server_timeout == 0)
270 if (lmv->connected == 0)
273 for (i = 0, tgts = lmv->tgts; i < lmv->desc.ld_tgt_count; i++, tgts++) {
274 if (tgts->ltd_exp == NULL)
277 obd_set_info(tgts->ltd_exp, strlen("inter_mds"),
278 "inter_mds", 0, NULL);
282 #define MAX_STRING_SIZE 128
284 /* performs a check if passed obd is connected. If no - connect it. */
285 int lmv_check_connect(struct obd_device *obd)
288 struct proc_dir_entry *lmv_proc_dir;
290 struct lmv_obd *lmv = &obd->u.lmv;
291 struct lmv_tgt_desc *tgts;
292 struct obd_uuid *cluuid;
293 struct obd_export *exp;
300 down(&lmv->init_sem);
301 if (lmv->connected) {
306 cluuid = &lmv->cluuid;
309 CDEBUG(D_OTHER, "time to connect %s to %s\n",
310 cluuid->uuid, obd->obd_name);
312 for (i = 0, tgts = lmv->tgts; i < lmv->desc.ld_tgt_count; i++, tgts++) {
313 struct obd_uuid lmv_mdc_uuid = { "LMV_MDC_UUID" };
314 struct lustre_handle conn = {0, };
315 struct obd_device *tgt_obd;
317 LASSERT(tgts != NULL);
319 tgt_obd = class_find_client_obd(&tgts->uuid, LUSTRE_MDC_NAME,
322 CERROR("target %s not attached\n", tgts->uuid.uuid);
323 GOTO(out_disc, rc = -EINVAL);
326 /* for MDS: don't connect to yourself */
327 if (obd_uuid_equals(&tgts->uuid, cluuid)) {
328 CDEBUG(D_OTHER, "don't connect back to %s\n",
330 tgts->ltd_exp = NULL;
334 CDEBUG(D_OTHER, "connect to %s(%s) - %s, %s FOR %s\n",
335 tgt_obd->obd_name, tgt_obd->obd_uuid.uuid,
336 tgts->uuid.uuid, obd->obd_uuid.uuid,
339 if (!tgt_obd->obd_set_up) {
340 CERROR("target %s not set up\n", tgts->uuid.uuid);
341 GOTO(out_disc, rc = -EINVAL);
344 rc = obd_connect(&conn, tgt_obd, &lmv_mdc_uuid, &lmv->conn_data,
347 CERROR("target %s connect error %d\n",
348 tgts->uuid.uuid, rc);
351 tgts->ltd_exp = class_conn2export(&conn);
353 obd_init_ea_size(tgts->ltd_exp, lmv->max_easize,
354 lmv->max_cookiesize);
356 rc = obd_register_observer(tgt_obd, obd);
358 CERROR("target %s register_observer error %d\n",
359 tgts->uuid.uuid, rc);
360 obd_disconnect(tgts->ltd_exp, 0);
364 lmv->desc.ld_active_tgt_count++;
367 CDEBUG(D_OTHER, "connected to %s(%s) successfully (%d)\n",
368 tgt_obd->obd_name, tgt_obd->obd_uuid.uuid,
369 atomic_read(&obd->obd_refcount));
372 lmv_proc_dir = lprocfs_srch(obd->obd_proc_entry, "target_obds");
374 struct obd_device *mdc_obd = class_conn2obd(&conn);
375 struct proc_dir_entry *mdc_symlink;
376 char name[MAX_STRING_SIZE + 1];
378 LASSERT(mdc_obd != NULL);
379 LASSERT(mdc_obd->obd_type != NULL);
380 LASSERT(mdc_obd->obd_type->typ_name != NULL);
381 name[MAX_STRING_SIZE] = '\0';
382 snprintf(name, MAX_STRING_SIZE, "../../../%s/%s",
383 mdc_obd->obd_type->typ_name,
385 mdc_symlink = proc_symlink(mdc_obd->obd_name,
387 if (mdc_symlink == NULL) {
388 CERROR("could not register LMV target "
389 "/proc/fs/lustre/%s/%s/target_obds/%s.",
390 obd->obd_type->typ_name, obd->obd_name,
392 lprocfs_remove(lmv_proc_dir);
399 lmv_set_timeouts(obd);
400 class_export_put(exp);
407 struct obd_uuid uuid;
409 --lmv->desc.ld_active_tgt_count;
411 /* save for CERROR below; (we know it's terminated) */
413 rc2 = obd_disconnect(tgts->ltd_exp, 0);
415 CERROR("error: LMV target %s disconnect on MDC idx %d: "
416 "error %d\n", uuid.uuid, i, rc2);
418 class_disconnect(exp, 0);
423 static int lmv_disconnect(struct obd_export *exp, unsigned long flags)
425 struct obd_device *obd = class_exp2obd(exp);
426 struct lmv_obd *lmv = &obd->u.lmv;
429 struct proc_dir_entry *lmv_proc_dir;
437 /* Only disconnect the underlying layers on the final disconnect. */
439 if (lmv->refcount != 0)
443 lmv_proc_dir = lprocfs_srch(obd->obd_proc_entry, "target_obds");
446 for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
447 struct obd_device *mdc_obd;
449 if (lmv->tgts[i].ltd_exp == NULL)
452 mdc_obd = class_exp2obd(lmv->tgts[i].ltd_exp);
455 mdc_obd->obd_no_recov = obd->obd_no_recov;
459 struct proc_dir_entry *mdc_symlink;
461 mdc_symlink = lprocfs_srch(lmv_proc_dir, mdc_obd->obd_name);
463 lprocfs_remove(mdc_symlink);
465 CERROR("/proc/fs/lustre/%s/%s/target_obds/%s missing\n",
466 obd->obd_type->typ_name, obd->obd_name,
471 CDEBUG(D_OTHER, "disconnected from %s(%s) successfully\n",
472 lmv->tgts[i].ltd_exp->exp_obd->obd_name,
473 lmv->tgts[i].ltd_exp->exp_obd->obd_uuid.uuid);
475 obd_register_observer(lmv->tgts[i].ltd_exp->exp_obd, NULL);
476 rc = obd_disconnect(lmv->tgts[i].ltd_exp, flags);
478 if (lmv->tgts[i].active) {
479 CERROR("Target %s disconnect error %d\n",
480 lmv->tgts[i].uuid.uuid, rc);
485 lmv_activate_target(lmv, &lmv->tgts[i], 0);
486 lmv->tgts[i].ltd_exp = NULL;
491 lprocfs_remove(lmv_proc_dir);
493 CERROR("/proc/fs/lustre/%s/%s/target_obds missing\n",
494 obd->obd_type->typ_name, obd->obd_name);
499 /* this is the case when no real connection is established by
500 * lmv_check_connect(). */
502 class_export_put(exp);
503 rc = class_disconnect(exp, 0);
504 if (lmv->refcount == 0)
509 static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
510 int len, void *karg, void *uarg)
512 struct obd_device *obddev = class_exp2obd(exp);
513 struct lmv_obd *lmv = &obddev->u.lmv;
514 int i, rc = 0, set = 0;
517 if (lmv->desc.ld_tgt_count == 0)
520 for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
523 if (lmv->tgts[i].ltd_exp == NULL)
526 err = obd_iocontrol(cmd, lmv->tgts[i].ltd_exp, len, karg, uarg);
528 if (lmv->tgts[i].active) {
529 CERROR("error: iocontrol MDC %s on MDT"
530 "idx %d: err = %d\n",
531 lmv->tgts[i].uuid.uuid, i, err);
544 static int lmv_setup(struct obd_device *obd, obd_count len, void *buf)
547 struct lmv_desc *desc;
548 struct obd_uuid *uuids;
549 struct lmv_tgt_desc *tgts;
550 struct obd_device *tgt_obd;
551 struct lustre_cfg *lcfg = buf;
552 struct lmv_obd *lmv = &obd->u.lmv;
555 if (LUSTRE_CFG_BUFLEN(lcfg, 1) < 1) {
556 CERROR("LMV setup requires a descriptor\n");
560 if (LUSTRE_CFG_BUFLEN(lcfg, 2) < 1) {
561 CERROR("LMV setup requires an MDT UUID list\n");
565 desc = (struct lmv_desc *)lustre_cfg_buf(lcfg, 1);
566 if (sizeof(*desc) > LUSTRE_CFG_BUFLEN(lcfg, 1)) {
567 CERROR("descriptor size wrong: %d > %d\n",
568 (int)sizeof(*desc), LUSTRE_CFG_BUFLEN(lcfg, 1));
572 uuids = (struct obd_uuid *)lustre_cfg_buf(lcfg, 2);
573 if (sizeof(*uuids) * desc->ld_tgt_count != LUSTRE_CFG_BUFLEN(lcfg, 2)) {
574 CERROR("UUID array size wrong: %u * %u != %u\n",
575 sizeof(*uuids), desc->ld_tgt_count, LUSTRE_CFG_BUFLEN(lcfg, 2));
579 lmv->tgts_size = sizeof(struct lmv_tgt_desc) * desc->ld_tgt_count;
580 OBD_ALLOC(lmv->tgts, lmv->tgts_size);
581 if (lmv->tgts == NULL) {
582 CERROR("Out of memory\n");
587 spin_lock_init(&lmv->lmv_lock);
589 for (i = 0, tgts = lmv->tgts; i < desc->ld_tgt_count; i++, tgts++)
590 tgts->uuid = uuids[i];
592 lmv->max_cookiesize = 0;
594 lmv->max_easize = sizeof(struct lustre_id) *
595 desc->ld_tgt_count + sizeof(struct mea);
597 rc = lmv_setup_mgr(obd);
599 CERROR("Can't setup LMV object manager, "
601 OBD_FREE(lmv->tgts, lmv->tgts_size);
604 tgt_obd = class_find_client_obd(&lmv->tgts->uuid, LUSTRE_MDC_NAME,
607 CERROR("Target %s not attached\n", lmv->tgts->uuid.uuid);
614 static int lmv_cleanup(struct obd_device *obd, int flags)
616 struct lmv_obd *lmv = &obd->u.lmv;
619 lmv_cleanup_mgr(obd);
620 OBD_FREE(lmv->tgts, lmv->tgts_size);
625 static int lmv_statfs(struct obd_device *obd, struct obd_statfs *osfs,
626 unsigned long max_age)
628 struct lmv_obd *lmv = &obd->u.lmv;
629 struct obd_statfs *temp;
633 rc = lmv_check_connect(obd);
637 OBD_ALLOC(temp, sizeof(*temp));
641 for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
642 if (lmv->tgts[i].ltd_exp == NULL)
645 rc = obd_statfs(lmv->tgts[i].ltd_exp->exp_obd, temp, max_age);
647 CERROR("can't stat MDS #%d (%s), error %d\n", i,
648 lmv->tgts[i].ltd_exp->exp_obd->obd_name,
650 GOTO(out_free_temp, rc);
653 memcpy(osfs, temp, sizeof(*temp));
655 osfs->os_bavail += temp->os_bavail;
656 osfs->os_blocks += temp->os_blocks;
657 osfs->os_ffree += temp->os_ffree;
658 osfs->os_files += temp->os_files;
664 OBD_FREE(temp, sizeof(*temp));
668 static int lmv_getstatus(struct obd_export *exp, struct lustre_id *id)
670 struct obd_device *obd = exp->exp_obd;
671 struct lmv_obd *lmv = &obd->u.lmv;
675 rc = lmv_check_connect(obd);
679 rc = md_getstatus(lmv->tgts[0].ltd_exp, id);
685 static int lmv_getattr(struct obd_export *exp, struct lustre_id *id,
686 __u64 valid, const char *ea_name, int ea_namelen,
687 unsigned int ea_size, struct ptlrpc_request **request)
689 struct obd_device *obd = exp->exp_obd;
690 struct lmv_obd *lmv = &obd->u.lmv;
691 int rc, i = id_group(id);
695 rc = lmv_check_connect(obd);
699 LASSERT(i < lmv->desc.ld_tgt_count);
702 rc = md_getattr(lmv->tgts[i].ltd_exp, id, valid,
703 ea_name, ea_namelen, ea_size, request);
707 obj = lmv_grab_obj(obd, id);
709 CDEBUG(D_OTHER, "GETATTR for "DLID4" %s\n",
710 OLID4(id), obj ? "(splitted)" : "");
713 * if object is splitted, then we loop over all the slaves and gather
714 * size attribute. In ideal world we would have to gather also mds field
715 * from all slaves, as object is spread over the cluster and this is
716 * definitely interesting information and it is not good to loss it,
720 struct mds_body *body;
722 if (*request == NULL) {
727 body = lustre_msg_buf((*request)->rq_repmsg, 0,
729 LASSERT(body != NULL);
733 for (i = 0; i < obj->objcount; i++) {
735 if (lmv->tgts[i].ltd_exp == NULL) {
736 CWARN("%s: NULL export for %d\n",
741 /* skip master obj. */
742 if (id_equal_fid(&obj->id, &obj->objs[i].id))
745 body->size += obj->objs[i].size;
755 static int lmv_change_cbdata(struct obd_export *exp,
756 struct lustre_id *id,
760 struct obd_device *obd = exp->exp_obd;
761 struct lmv_obd *lmv = &obd->u.lmv;
765 rc = lmv_check_connect(obd);
769 CDEBUG(D_OTHER, "CBDATA for "DLID4"\n", OLID4(id));
770 LASSERT(id_group(id) < lmv->desc.ld_tgt_count);
772 rc = md_change_cbdata(lmv->tgts[id_group(id)].ltd_exp,
778 static int lmv_change_cbdata_name(struct obd_export *exp,
779 struct lustre_id *pid,
781 struct lustre_id *cid,
785 struct obd_device *obd = exp->exp_obd;
786 struct lmv_obd *lmv = &obd->u.lmv;
787 struct lustre_id rcid = *cid;
792 rc = lmv_check_connect(obd);
796 LASSERT(id_group(pid) < lmv->desc.ld_tgt_count);
797 LASSERT(id_group(cid) < lmv->desc.ld_tgt_count);
799 CDEBUG(D_OTHER, "CBDATA for "DLID4":%*s -> "DLID4"\n",
800 OLID4(pid), len, name, OLID4(cid));
802 /* this is default mds for directory name belongs to. */
804 obj = lmv_grab_obj(obd, pid);
806 /* directory is splitted. look for right mds for this name. */
807 mds = raw_name2idx(obj->hashtype, obj->objcount, name, len);
808 rcid = obj->objs[mds].id;
809 mds = id_group(&rcid);
812 rc = md_change_cbdata(lmv->tgts[mds].ltd_exp, &rcid, it, data);
816 static int lmv_valid_attrs(struct obd_export *exp, struct lustre_id *id)
818 struct obd_device *obd = exp->exp_obd;
819 struct lmv_obd *lmv = &obd->u.lmv;
823 rc = lmv_check_connect(obd);
827 CDEBUG(D_OTHER, "validate "DLID4"\n", OLID4(id));
828 LASSERT(id_group(id) < lmv->desc.ld_tgt_count);
829 rc = md_valid_attrs(lmv->tgts[id_group(id)].ltd_exp, id);
833 int lmv_close(struct obd_export *exp, struct obdo *obdo,
834 struct obd_client_handle *och,
835 struct ptlrpc_request **request)
837 struct obd_device *obd = exp->exp_obd;
838 struct lmv_obd *lmv = &obd->u.lmv;
839 int rc, i = obdo->o_mds;
842 rc = lmv_check_connect(obd);
846 LASSERT(i < lmv->desc.ld_tgt_count);
847 CDEBUG(D_OTHER, "CLOSE %lu/%lu/%lu\n", (unsigned long)obdo->o_mds,
848 (unsigned long)obdo->o_id, (unsigned long)obdo->o_generation);
849 rc = md_close(lmv->tgts[i].ltd_exp, obdo, och, request);
853 int lmv_get_mea_and_update_object(struct obd_export *exp,
854 struct lustre_id *id)
856 struct obd_device *obd = exp->exp_obd;
857 struct lmv_obd *lmv = &obd->u.lmv;
858 struct ptlrpc_request *req = NULL;
866 mealen = MEA_SIZE_LMV(lmv);
868 valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA | OBD_MD_MEA;
870 /* time to update mea of parent id */
871 rc = md_getattr(lmv->tgts[id_group(id)].ltd_exp,
872 id, valid, NULL, 0, mealen, &req);
874 CERROR("md_getattr() failed, error %d\n", rc);
878 rc = mdc_req2lustre_md(exp, req, 0, NULL, &md);
880 CERROR("mdc_req2lustre_md() failed, error %d\n", rc);
885 GOTO(cleanup, rc = -ENODATA);
887 obj = lmv_create_obj(exp, id, md.mea);
892 obd_free_memmd(exp, (struct lov_stripe_md **)&md.mea);
897 ptlrpc_req_finished(req);
901 int lmv_create(struct obd_export *exp, struct mdc_op_data *op_data,
902 const void *data, int datalen, int mode, __u32 uid,
903 __u32 gid, __u64 rdev, struct ptlrpc_request **request)
905 struct obd_device *obd = exp->exp_obd;
906 struct lmv_obd *lmv = &obd->u.lmv;
907 struct mds_body *body;
909 int rc, mds, loop = 0;
912 rc = lmv_check_connect(obd);
916 if (!lmv->desc.ld_active_tgt_count)
919 LASSERT(++loop <= 2);
920 obj = lmv_grab_obj(obd, &op_data->id1);
922 mds = raw_name2idx(obj->hashtype, obj->objcount,
923 op_data->name, op_data->namelen);
924 op_data->id1 = obj->objs[mds].id;
928 CDEBUG(D_OTHER, "CREATE '%*s' on "DLID4"\n", op_data->namelen,
929 op_data->name, OLID4(&op_data->id1));
931 rc = md_create(lmv->tgts[id_group(&op_data->id1)].ltd_exp,
932 op_data, data, datalen, mode, uid, gid, rdev,
935 if (*request == NULL)
938 body = lustre_msg_buf((*request)->rq_repmsg, 0,
943 CDEBUG(D_OTHER, "created. "DLID4"\n", OLID4(&op_data->id1));
944 } else if (rc == -ERESTART) {
946 * directory got splitted. time to update local object and
947 * repeat the request with proper MDS.
949 rc = lmv_get_mea_and_update_object(exp, &op_data->id1);
951 ptlrpc_req_finished(*request);
958 int lmv_done_writing(struct obd_export *exp, struct obdo *obdo)
960 struct obd_device *obd = exp->exp_obd;
961 struct lmv_obd *lmv = &obd->u.lmv;
965 rc = lmv_check_connect(obd);
969 /* FIXME: choose right MDC here */
970 CWARN("this method isn't implemented yet\n");
971 rc = md_done_writing(lmv->tgts[0].ltd_exp, obdo);
975 int lmv_enqueue_slaves(struct obd_export *exp, int locktype,
976 struct lookup_intent *it, int lockmode,
977 struct mdc_op_data *data, struct lustre_handle *lockh,
978 void *lmm, int lmmsize, ldlm_completion_callback cb_compl,
979 ldlm_blocking_callback cb_blocking, void *cb_data)
981 struct obd_device *obd = exp->exp_obd;
982 struct lmv_obd *lmv = &obd->u.lmv;
983 struct mea *mea = data->mea1;
984 struct mdc_op_data *data2;
988 OBD_ALLOC(data2, sizeof(*data2));
992 LASSERT(mea != NULL);
993 for (i = 0; i < mea->mea_count; i++) {
994 memset(data2, 0, sizeof(*data2));
995 data2->id1 = mea->mea_ids[i];
996 mds = id_group(&data2->id1);
998 if (lmv->tgts[mds].ltd_exp == NULL)
1001 rc = md_enqueue(lmv->tgts[mds].ltd_exp, locktype, it,
1002 lockmode, data2, lockh + i, lmm, lmmsize,
1003 cb_compl, cb_blocking, cb_data);
1005 CDEBUG(D_OTHER, "take lock on slave "DLID4" -> %d/%d\n",
1006 OLID4(&mea->mea_ids[i]), rc, LUSTRE_IT(it)->it_status);
1009 if (LUSTRE_IT(it)->it_data) {
1010 struct ptlrpc_request *req;
1011 req = (struct ptlrpc_request *) LUSTRE_IT(it)->it_data;
1012 ptlrpc_req_finished(req);
1015 if (LUSTRE_IT(it)->it_status)
1016 GOTO(cleanup, rc = LUSTRE_IT(it)->it_status);
1019 OBD_FREE(data2, sizeof(*data2));
1022 OBD_FREE(data2, sizeof(*data2));
1024 /* drop all taken locks */
1026 if (lockh[i].cookie)
1027 ldlm_lock_decref(lockh + i, lockmode);
1028 lockh[i].cookie = 0;
1033 int lmv_enqueue_remote(struct obd_export *exp, int lock_type,
1034 struct lookup_intent *it, int lock_mode,
1035 struct mdc_op_data *data, struct lustre_handle *lockh,
1036 void *lmm, int lmmsize, ldlm_completion_callback cb_compl,
1037 ldlm_blocking_callback cb_blocking, void *cb_data)
1039 struct ptlrpc_request *req = LUSTRE_IT(it)->it_data;
1040 struct obd_device *obd = exp->exp_obd;
1041 struct lmv_obd *lmv = &obd->u.lmv;
1042 struct lustre_handle plock;
1043 struct mdc_op_data rdata;
1044 struct mds_body *body = NULL;
1048 body = lustre_msg_buf(req->rq_repmsg, 1, sizeof(*body));
1049 LASSERT(body != NULL);
1051 if (!(body->valid & OBD_MD_MDS))
1054 CDEBUG(D_OTHER, "ENQUEUE '%s' on "DLID4" -> "DLID4"\n",
1055 LL_IT2STR(it), OLID4(&data->id1), OLID4(&body->id1));
1057 /* we got LOOKUP lock, but we really need attrs */
1058 pmode = LUSTRE_IT(it)->it_lock_mode;
1059 LASSERT(pmode != 0);
1060 memcpy(&plock, lockh, sizeof(plock));
1061 LUSTRE_IT(it)->it_lock_mode = 0;
1062 LUSTRE_IT(it)->it_data = NULL;
1063 LASSERT((body->valid & OBD_MD_FID) != 0);
1065 memcpy(&rdata, data, sizeof(rdata));
1066 rdata.id1 = body->id1;
1070 LUSTRE_IT(it)->it_disposition &= ~DISP_ENQ_COMPLETE;
1071 ptlrpc_req_finished(req);
1073 rc = md_enqueue(lmv->tgts[id_group(&rdata.id1)].ltd_exp,
1074 lock_type, it, lock_mode, &rdata, lockh, lmm,
1075 lmmsize, cb_compl, cb_blocking, cb_data);
1076 ldlm_lock_decref(&plock, pmode);
1080 int lmv_enqueue(struct obd_export *exp, int lock_type,
1081 struct lookup_intent *it, int lock_mode,
1082 struct mdc_op_data *data, struct lustre_handle *lockh,
1083 void *lmm, int lmmsize, ldlm_completion_callback cb_compl,
1084 ldlm_blocking_callback cb_blocking, void *cb_data)
1086 struct obd_device *obd = exp->exp_obd;
1087 struct lmv_obd *lmv = &obd->u.lmv;
1088 struct lmv_obj *obj;
1092 rc = lmv_check_connect(obd);
1096 if (data->mea1 && it->it_op == IT_UNLINK) {
1097 rc = lmv_enqueue_slaves(exp, lock_type, it, lock_mode,
1098 data, lockh, lmm, lmmsize,
1099 cb_compl, cb_blocking, cb_data);
1103 if (data->namelen) {
1104 obj = lmv_grab_obj(obd, &data->id1);
1106 /* directory is splitted. look for right mds for this
1108 mds = raw_name2idx(obj->hashtype, obj->objcount,
1109 (char *)data->name, data->namelen);
1110 data->id1 = obj->objs[mds].id;
1114 CDEBUG(D_OTHER, "ENQUEUE '%s' on "DLID4"\n", LL_IT2STR(it),
1117 rc = md_enqueue(lmv->tgts[id_group(&data->id1)].ltd_exp,
1118 lock_type, it, lock_mode, data, lockh, lmm,
1119 lmmsize, cb_compl, cb_blocking, cb_data);
1120 if (rc == 0 && it->it_op == IT_OPEN)
1121 rc = lmv_enqueue_remote(exp, lock_type, it, lock_mode,
1122 data, lockh, lmm, lmmsize,
1123 cb_compl, cb_blocking, cb_data);
1127 int lmv_getattr_lock(struct obd_export *exp, struct lustre_id *id,
1128 char *filename, int namelen, __u64 valid,
1129 unsigned int ea_size, struct ptlrpc_request **request)
1131 int rc, mds = id_group(id), loop = 0;
1132 struct obd_device *obd = exp->exp_obd;
1133 struct lmv_obd *lmv = &obd->u.lmv;
1134 struct lustre_id rid = *id;
1135 struct mds_body *body;
1136 struct lmv_obj *obj;
1139 rc = lmv_check_connect(obd);
1143 LASSERT(++loop <= 2);
1144 obj = lmv_grab_obj(obd, id);
1146 /* directory is splitted. look for right mds for this name */
1147 mds = raw_name2idx(obj->hashtype, obj->objcount,
1148 filename, namelen - 1);
1149 rid = obj->objs[mds].id;
1153 CDEBUG(D_OTHER, "getattr_lock for %*s on "DLID4" -> "DLID4"\n",
1154 namelen, filename, OLID4(id), OLID4(&rid));
1156 rc = md_getattr_lock(lmv->tgts[id_group(&rid)].ltd_exp, &rid,
1157 filename, namelen, (valid | OBD_MD_FID),
1161 * this could be cross-node reference. in this case all we have
1162 * right now is lustre_id triple. we'd like to find other
1165 body = lustre_msg_buf((*request)->rq_repmsg, 0, sizeof(*body));
1166 LASSERT(body != NULL);
1167 LASSERT((body->valid & OBD_MD_FID) != 0);
1169 if (body->valid & OBD_MD_MDS) {
1170 struct ptlrpc_request *req = NULL;
1173 CDEBUG(D_OTHER, "request attrs for "DLID4"\n", OLID4(&rid));
1175 rc = md_getattr_lock(lmv->tgts[id_group(&rid)].ltd_exp,
1176 &rid, NULL, 1, valid, ea_size, &req);
1177 ptlrpc_req_finished(*request);
1180 } else if (rc == -ERESTART) {
1181 /* directory got splitted. time to update local object and
1182 * repeat the request with proper MDS */
1183 rc = lmv_get_mea_and_update_object(exp, &rid);
1185 ptlrpc_req_finished(*request);
1193 * llite passes id of an target inode in data->id1 and id of directory in
1196 int lmv_link(struct obd_export *exp, struct mdc_op_data *data,
1197 struct ptlrpc_request **request)
1199 struct obd_device *obd = exp->exp_obd;
1200 struct lmv_obd *lmv = &obd->u.lmv;
1201 struct lmv_obj *obj;
1205 rc = lmv_check_connect(obd);
1209 if (data->namelen != 0) {
1210 /* usual link request */
1211 obj = lmv_grab_obj(obd, &data->id1);
1213 rc = raw_name2idx(obj->hashtype, obj->objcount,
1214 data->name, data->namelen);
1215 data->id1 = obj->objs[rc].id;
1219 CDEBUG(D_OTHER,"link "DLID4":%*s to "DLID4"\n",
1220 OLID4(&data->id2), data->namelen, data->name,
1223 /* request from MDS to acquire i_links for inode by id1 */
1224 CDEBUG(D_OTHER, "inc i_nlinks for "DLID4"\n",
1228 rc = md_link(lmv->tgts[id_group(&data->id1)].ltd_exp,
1233 int lmv_rename(struct obd_export *exp, struct mdc_op_data *data,
1234 const char *old, int oldlen, const char *new, int newlen,
1235 struct ptlrpc_request **request)
1237 struct obd_device *obd = exp->exp_obd;
1238 struct lmv_obd *lmv = &obd->u.lmv;
1239 struct lmv_obj *obj;
1243 CDEBUG(D_OTHER, "rename %*s in "DLID4" to %*s in "DLID4"\n",
1244 oldlen, old, OLID4(&data->id1), newlen, new,
1247 rc = lmv_check_connect(obd);
1253 * MDS with old dir entry is asking another MDS to create name
1257 "create %*s(%d/%d) in "DLID4" pointing "
1258 "to "DLID4"\n", newlen, new, oldlen, newlen,
1259 OLID4(&data->id2), OLID4(&data->id1));
1261 mds = id_group(&data->id2);
1264 * target directory can be splitted, sowe should forward request
1267 obj = lmv_grab_obj(obd, &data->id2);
1269 mds = raw_name2idx(obj->hashtype, obj->objcount,
1270 (char *)new, newlen);
1271 data->id2 = obj->objs[mds].id;
1272 CDEBUG(D_OTHER, "forward to MDS #%u ("DLID4")\n", mds,
1279 obj = lmv_grab_obj(obd, &data->id1);
1282 * directory is already splitted, so we have to forward request
1285 mds = raw_name2idx(obj->hashtype, obj->objcount,
1286 (char *)old, oldlen);
1287 data->id1 = obj->objs[mds].id;
1288 CDEBUG(D_OTHER, "forward to MDS #%u ("DLID4")\n", mds,
1293 obj = lmv_grab_obj(obd, &data->id2);
1296 * directory is already splitted, so we have to forward request
1299 mds = raw_name2idx(obj->hashtype, obj->objcount,
1300 (char *)new, newlen);
1302 data->id2 = obj->objs[mds].id;
1303 CDEBUG(D_OTHER, "forward to MDS #%u ("DLID4")\n", mds,
1308 mds = id_group(&data->id1);
1311 if (id_group(&data->id1) != id_group(&data->id2)) {
1312 CDEBUG(D_OTHER,"cross-node rename "DLID4"/%*s to "DLID4"/%*s\n",
1313 OLID4(&data->id1), oldlen, old, OLID4(&data->id2),
1317 rc = md_rename(lmv->tgts[mds].ltd_exp, data, old, oldlen,
1318 new, newlen, request);
1322 int lmv_setattr(struct obd_export *exp, struct mdc_op_data *data,
1323 struct iattr *iattr, void *ea, int ealen, void *ea2,
1324 int ea2len, struct ptlrpc_request **request)
1326 struct obd_device *obd = exp->exp_obd;
1327 struct lmv_obd *lmv = &obd->u.lmv;
1328 struct ptlrpc_request *req;
1329 struct mds_body *body;
1330 struct lmv_obj *obj;
1334 rc = lmv_check_connect(obd);
1338 obj = lmv_grab_obj(obd, &data->id1);
1340 CDEBUG(D_OTHER, "SETATTR for "DLID4", valid 0x%x%s\n",
1341 OLID4(&data->id1), iattr->ia_valid, obj ? ", splitted" : "");
1344 for (i = 0; i < obj->objcount; i++) {
1345 data->id1 = obj->objs[i].id;
1347 rc = md_setattr(lmv->tgts[id_group(&data->id1)].ltd_exp,
1348 data, iattr, ea, ealen, ea2, ea2len, &req);
1350 if (id_equal_fid(&obj->id, &obj->objs[i].id)) {
1352 * this is master object and this request should
1353 * be returned back to llite.
1357 ptlrpc_req_finished(req);
1365 LASSERT(id_group(&data->id1) < lmv->desc.ld_tgt_count);
1366 rc = md_setattr(lmv->tgts[id_group(&data->id1)].ltd_exp,
1367 data, iattr, ea, ealen, ea2, ea2len, request);
1369 body = lustre_msg_buf((*request)->rq_repmsg, 0,
1371 LASSERT(body != NULL);
1372 LASSERT((body->valid & OBD_MD_FID) != 0);
1373 LASSERT(id_group(&body->id1) == id_group(&data->id1));
1379 int lmv_sync(struct obd_export *exp, struct lustre_id *id,
1380 struct ptlrpc_request **request)
1382 struct obd_device *obd = exp->exp_obd;
1383 struct lmv_obd *lmv = &obd->u.lmv;
1387 rc = lmv_check_connect(obd);
1391 rc = md_sync(lmv->tgts[id_group(id)].ltd_exp,
1396 int lmv_dirobj_blocking_ast(struct ldlm_lock *lock,
1397 struct ldlm_lock_desc *desc,
1398 void *data, int flag)
1400 struct lustre_handle lockh;
1401 struct lmv_obj *obj;
1406 case LDLM_CB_BLOCKING:
1407 ldlm_lock2handle(lock, &lockh);
1408 rc = ldlm_cli_cancel(&lockh);
1410 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
1414 case LDLM_CB_CANCELING:
1415 /* time to drop cached attrs for dirobj */
1416 obj = lock->l_ast_data;
1418 CDEBUG(D_OTHER, "cancel %s on "LPU64"/"LPU64
1419 ", master "DLID4"\n",
1420 lock->l_resource->lr_name.name[3] == 1 ?
1421 "LOOKUP" : "UPDATE",
1422 lock->l_resource->lr_name.name[0],
1423 lock->l_resource->lr_name.name[1],
1434 void lmv_remove_dots(struct page *page)
1436 unsigned limit = PAGE_CACHE_SIZE;
1437 char *kaddr = page_address(page);
1438 struct ext2_dir_entry_2 *p;
1439 unsigned offs, rec_len;
1441 for (offs = 0; offs <= limit - EXT2_DIR_REC_LEN(1); offs += rec_len) {
1442 p = (struct ext2_dir_entry_2 *)(kaddr + offs);
1443 rec_len = le16_to_cpu(p->rec_len);
1445 if ((p->name_len == 1 && p->name[0] == '.') ||
1446 (p->name_len == 2 && p->name[0] == '.' && p->name[1] == '.'))
1451 int lmv_readpage(struct obd_export *exp, struct lustre_id *id,
1452 __u64 offset, struct page *page,
1453 struct ptlrpc_request **request)
1455 struct obd_device *obd = exp->exp_obd;
1456 struct lmv_obd *lmv = &obd->u.lmv;
1457 struct lustre_id rid = *id;
1458 struct lmv_obj *obj;
1462 #warning "we need well-desgined readdir() implementation"
1463 rc = lmv_check_connect(obd);
1467 LASSERT(id_group(id) < lmv->desc.ld_tgt_count);
1468 CDEBUG(D_OTHER, "READPAGE at %llu from "DLID4"\n",
1469 offset, OLID4(&rid));
1471 obj = lmv_grab_obj(obd, id);
1475 /* find dirobj containing page with requested offset. */
1476 for (i = 0; i < obj->objcount; i++) {
1477 if (offset < obj->objs[i].size)
1479 offset -= obj->objs[i].size;
1481 rid = obj->objs[i].id;
1483 lmv_unlock_obj(obj);
1486 CDEBUG(D_OTHER, "forward to "DLID4" with offset %lu\n",
1487 OLID4(&rid), (unsigned long)offset);
1489 rc = md_readpage(lmv->tgts[id_group(&rid)].ltd_exp, &rid,
1490 offset, page, request);
1492 if (rc == 0 && !id_equal_fid(&rid, id))
1493 /* this page isn't from master object. To avoid "." and ".."
1494 * duplication in directory, we have to remove them from all
1496 lmv_remove_dots(page);
1501 int lmv_unlink_slaves(struct obd_export *exp, struct mdc_op_data *data,
1502 struct ptlrpc_request **req)
1504 struct obd_device *obd = exp->exp_obd;
1505 struct lmv_obd *lmv = &obd->u.lmv;
1506 struct mea *mea = data->mea1;
1507 struct mdc_op_data *data2;
1511 OBD_ALLOC(data2, sizeof(*data2));
1515 LASSERT(mea != NULL);
1516 for (i = 0; i < mea->mea_count; i++) {
1517 memset(data2, 0, sizeof(*data2));
1518 data2->id1 = mea->mea_ids[i];
1519 data2->create_mode = MDS_MODE_DONT_LOCK | S_IFDIR;
1521 if (lmv->tgts[id_group(&data2->id1)].ltd_exp == NULL)
1524 rc = md_unlink(lmv->tgts[id_group(&data2->id1)].ltd_exp,
1527 CDEBUG(D_OTHER, "unlink slave "DLID4" -> %d\n",
1528 OLID4(&mea->mea_ids[i]), rc);
1531 ptlrpc_req_finished(*req);
1537 OBD_FREE(data2, sizeof(*data2));
1541 int lmv_delete_inode(struct obd_export *exp, struct lustre_id *id)
1546 if (lmv_delete_obj(exp, id)) {
1547 CDEBUG(D_OTHER, "lmv object "DLID4" is destroyed.\n",
1553 int lmv_unlink(struct obd_export *exp, struct mdc_op_data *data,
1554 struct ptlrpc_request **request)
1556 struct obd_device *obd = exp->exp_obd;
1557 struct lmv_obd *lmv = &obd->u.lmv;
1561 rc = lmv_check_connect(obd);
1565 if (data->namelen == 0 && data->mea1 != NULL) {
1566 /* mds asks to remove slave objects */
1567 rc = lmv_unlink_slaves(exp, data, request);
1571 if (data->namelen != 0) {
1572 struct lmv_obj *obj;
1574 obj = lmv_grab_obj(obd, &data->id1);
1576 i = raw_name2idx(obj->hashtype, obj->objcount,
1577 data->name, data->namelen);
1578 data->id1 = obj->objs[i].id;
1581 CDEBUG(D_OTHER, "unlink '%*s' in "DLID4" -> %u\n",
1582 data->namelen, data->name, OLID4(&data->id1),
1585 CDEBUG(D_OTHER, "drop i_nlink on "DLID4"\n",
1588 rc = md_unlink(lmv->tgts[id_group(&data->id1)].ltd_exp,
1593 struct obd_device *lmv_get_real_obd(struct obd_export *exp,
1594 struct lustre_id *id)
1596 struct obd_device *obd = exp->exp_obd;
1597 struct lmv_obd *lmv = &obd->u.lmv;
1601 rc = lmv_check_connect(obd);
1603 RETURN(ERR_PTR(rc));
1604 obd = lmv->tgts[id_group(id)].ltd_exp->exp_obd;
1610 int lmv_init_ea_size(struct obd_export *exp, int easize,
1613 struct obd_device *obd = exp->exp_obd;
1614 struct lmv_obd *lmv = &obd->u.lmv;
1615 int i, rc = 0, change = 0;
1618 if (lmv->max_easize < easize) {
1619 lmv->max_easize = easize;
1622 if (lmv->max_cookiesize < cookiesize) {
1623 lmv->max_cookiesize = cookiesize;
1629 if (lmv->connected == 0)
1632 for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
1633 if (lmv->tgts[i].ltd_exp == NULL) {
1634 CWARN("%s: NULL export for %d\n", obd->obd_name, i);
1638 rc = obd_init_ea_size(lmv->tgts[i].ltd_exp, easize, cookiesize);
1640 CERROR("obd_init_ea_size() failed on MDT target %d, "
1641 "error %d.\n", i, rc);
1648 int lmv_obd_create_single(struct obd_export *exp, struct obdo *oa,
1649 void *acl, int acl_size,
1650 struct lov_stripe_md **ea, struct obd_trans_info *oti)
1652 struct obd_device *obd = exp->exp_obd;
1653 struct lmv_obd *lmv = &obd->u.lmv;
1654 struct lov_stripe_md obj_md;
1655 struct lov_stripe_md *obj_mdp = &obj_md;
1659 LASSERT(ea == NULL);
1660 LASSERT(oa->o_mds < lmv->desc.ld_tgt_count);
1662 rc = obd_create(lmv->tgts[oa->o_mds].ltd_exp, oa,
1663 acl, acl_size, &obj_mdp, oti);
1668 int lmv_getready(struct obd_export *exp)
1670 struct obd_device *obd = exp->exp_obd;
1674 rc = lmv_check_connect(obd);
1679 * to be called from MDS only. @oa should have correct store cookie and o_fid
1680 * values for "master" object, as it will be used.
1682 int lmv_obd_create(struct obd_export *exp, struct obdo *oa,
1683 void *acl, int acl_size,
1684 struct lov_stripe_md **ea, struct obd_trans_info *oti)
1686 struct obd_device *obd = exp->exp_obd;
1687 struct lmv_obd *lmv = &obd->u.lmv;
1688 struct lustre_id mid;
1693 rc = lmv_check_connect(obd);
1697 LASSERT(oa != NULL);
1700 rc = lmv_obd_create_single(exp, oa, acl, acl_size, NULL, oti);
1702 CERROR("Can't create object, rc = %d\n", rc);
1706 /* acl is only suppied when mds create single remote obj */
1707 LASSERT(acl == NULL && acl_size == 0);
1710 rc = obd_alloc_diskmd(exp, (struct lov_mds_md **)ea);
1712 CERROR("obd_alloc_diskmd() failed, error %d\n",
1723 * here we should take care about splitted dir, so store cookie and fid
1724 * for "master" object should already be allocated and passed in @oa.
1726 LASSERT(oa->o_id != 0);
1727 LASSERT(oa->o_fid != 0);
1729 /* save "master" object id */
1732 mea = (struct mea *)*ea;
1733 mea->mea_master = -1;
1734 mea->mea_magic = MEA_MAGIC_ALL_CHARS;
1736 if (!mea->mea_count || mea->mea_count > lmv->desc.ld_tgt_count)
1737 mea->mea_count = lmv->desc.ld_tgt_count;
1739 for (i = 0, c = 0; c < mea->mea_count && i < lmv->desc.ld_tgt_count; i++) {
1740 struct lov_stripe_md obj_md;
1741 struct lov_stripe_md *obj_mdp = &obj_md;
1743 if (lmv->tgts[i].ltd_exp == NULL) {
1744 /* this is "master" MDS */
1745 mea->mea_master = i;
1746 mea->mea_ids[c] = mid;
1752 * "master" MDS should always be part of stripped dir,
1755 if (mea->mea_master == -1 && c == mea->mea_count - 1)
1758 oa->o_valid = OBD_MD_FLGENER | OBD_MD_FLTYPE | OBD_MD_FLMODE |
1759 OBD_MD_FLUID | OBD_MD_FLGID | OBD_MD_FLID;
1761 rc = obd_create(lmv->tgts[c].ltd_exp, oa, NULL, 0,
1764 CERROR("obd_create() failed on MDT target %d, "
1765 "error %d\n", c, rc);
1769 CDEBUG(D_OTHER, "dirobj at mds %d: "LPU64"/%u\n",
1770 i, oa->o_id, oa->o_generation);
1774 * here, when object is created (or it is master and was passed
1775 * from caller) on desired MDS we save its fid to local mea_ids.
1780 * store cookie should be defined here for both cases (master
1781 * object and not master), because master is already created.
1785 /* fill mea by store cookie and fid */
1786 obdo2id(&mea->mea_ids[c], oa);
1789 LASSERT(c == mea->mea_count);
1791 CDEBUG(D_OTHER, "%d dirobjects created\n",
1792 (int)mea->mea_count);
1797 static int lmv_llog_init(struct obd_device *obd, struct obd_llogs *llogs,
1798 struct obd_device *tgt, int count,
1799 struct llog_catid *logid)
1801 struct llog_ctxt *ctxt;
1805 rc = obd_llog_setup(obd, llogs, LLOG_CONFIG_REPL_CTXT, tgt, 0, NULL,
1808 ctxt = llog_get_context(llogs, LLOG_CONFIG_REPL_CTXT);
1809 ctxt->loc_imp = tgt->u.cli.cl_import;
1815 static int lmv_llog_finish(struct obd_device *obd,
1816 struct obd_llogs *llogs, int count)
1821 rc = obd_llog_cleanup(llog_get_context(llogs, LLOG_CONFIG_REPL_CTXT));
1825 static int lmv_get_info(struct obd_export *exp, __u32 keylen,
1826 void *key, __u32 *vallen, void *val)
1828 struct obd_device *obd;
1829 struct lmv_obd *lmv;
1833 obd = class_exp2obd(exp);
1835 CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",
1836 exp->exp_handle.h_cookie);
1841 if (keylen == 6 && memcmp(key, "mdsize", 6) == 0) {
1842 __u32 *mdsize = val;
1843 *vallen = sizeof(__u32);
1844 *mdsize = sizeof(struct lustre_id) * lmv->desc.ld_tgt_count
1845 + sizeof(struct mea);
1847 } else if (keylen == 6 && memcmp(key, "mdsnum", 6) == 0) {
1848 struct obd_uuid *cluuid = &lmv->cluuid;
1849 struct lmv_tgt_desc *tgts;
1850 __u32 *mdsnum = val;
1853 for (i = 0, tgts = lmv->tgts; i < lmv->desc.ld_tgt_count; i++, tgts++) {
1854 if (obd_uuid_equals(&tgts->uuid, cluuid)) {
1855 *vallen = sizeof(__u32);
1861 } else if (keylen == 6 && memcmp(key, "rootid", 6) == 0) {
1862 /* getting rootid from first MDS. */
1863 rc = obd_get_info(lmv->tgts[0].ltd_exp, keylen, key,
1866 } else if (keylen >= strlen("lmvdesc") && strcmp(key, "lmvdesc") == 0) {
1867 struct lmv_desc *desc_ret = val;
1868 *desc_ret = lmv->desc;
1870 } else if (keylen == strlen("remote_flag") &&
1871 !strcmp(key, "remote_flag")) {
1872 struct lmv_tgt_desc *tgts;
1875 LASSERT(*vallen == sizeof(__u32));
1876 for (i = 0, tgts = lmv->tgts; i < lmv->desc.ld_tgt_count;
1879 /* all tgts should be connected when this get called. */
1880 if (!tgts || !tgts->ltd_exp) {
1881 CERROR("target not setup?\n");
1885 if (!obd_get_info(tgts->ltd_exp, keylen, key,
1892 CDEBUG(D_IOCTL, "invalid key\n");
1896 int lmv_set_info(struct obd_export *exp, obd_count keylen,
1897 void *key, obd_count vallen, void *val)
1899 struct obd_device *obd;
1900 struct lmv_obd *lmv;
1903 obd = class_exp2obd(exp);
1905 CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",
1906 exp->exp_handle.h_cookie);
1911 if (keylen >= strlen("inter_mds") && strcmp(key, "inter_mds") == 0) {
1912 lmv->server_timeout = 1;
1913 lmv_set_timeouts(obd);
1917 /* maybe this could be default */
1918 if ((keylen == strlen("sec") && strcmp(key, "sec") == 0) ||
1919 (keylen == strlen("nllu") && strcmp(key, "nllu") == 0)) {
1920 struct lmv_tgt_desc *tgt;
1921 struct obd_export *exp;
1924 spin_lock(&lmv->lmv_lock);
1925 for (i = 0, tgt = lmv->tgts; i < lmv->desc.ld_tgt_count;
1928 /* during setup time the connections to mdc might
1929 * haven't been established.
1932 struct obd_device *tgt_obd;
1934 tgt_obd = class_find_client_obd(&tgt->uuid,
1938 CERROR("can't set info %s, "
1939 "device %s not attached?\n",
1940 (char *) key, tgt->uuid.uuid);
1944 exp = tgt_obd->obd_self_export;
1947 err = obd_set_info(exp, keylen, key, vallen, val);
1951 spin_unlock(&lmv->lmv_lock);
1959 int lmv_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
1960 struct lov_stripe_md *lsm)
1962 struct obd_device *obd = class_exp2obd(exp);
1963 struct lmv_obd *lmv = &obd->u.lmv;
1964 struct mea *meap, *lsmp;
1968 mea_size = (sizeof(struct lustre_id) *
1969 lmv->desc.ld_tgt_count) + sizeof(struct mea);
1973 if (*lmmp && !lsm) {
1974 OBD_FREE(*lmmp, mea_size);
1979 if (*lmmp == NULL) {
1980 OBD_ALLOC(*lmmp, mea_size);
1988 lsmp = (struct mea *)lsm;
1989 meap = (struct mea *)*lmmp;
1991 if (lsmp->mea_magic != MEA_MAGIC_LAST_CHAR &&
1992 lsmp->mea_magic != MEA_MAGIC_ALL_CHARS)
1995 meap->mea_magic = cpu_to_le32(lsmp->mea_magic);
1996 meap->mea_count = cpu_to_le32(lsmp->mea_count);
1997 meap->mea_master = cpu_to_le32(lsmp->mea_master);
1999 for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
2000 meap->mea_ids[i] = meap->mea_ids[i];
2001 id_cpu_to_le(&meap->mea_ids[i]);
2007 int lmv_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp,
2008 struct lov_mds_md *lmm, int lmm_size)
2010 struct obd_device *obd = class_exp2obd(exp);
2011 struct mea **tmea = (struct mea **)lsmp;
2012 struct mea *mea = (struct mea *)lmm;
2013 struct lmv_obd *lmv = &obd->u.lmv;
2014 int mea_size, i, rc = 0;
2018 mea_size = sizeof(struct lustre_id) *
2019 lmv->desc.ld_tgt_count + sizeof(struct mea);
2024 if (*lsmp != NULL && lmm == NULL) {
2025 OBD_FREE(*tmea, mea_size);
2029 LASSERT(mea_size == lmm_size);
2031 OBD_ALLOC(*tmea, mea_size);
2038 if (mea->mea_magic == MEA_MAGIC_LAST_CHAR ||
2039 mea->mea_magic == MEA_MAGIC_ALL_CHARS)
2041 magic = le32_to_cpu(mea->mea_magic);
2043 struct mea_old *old = (struct mea_old *)lmm;
2045 mea_size = sizeof(struct lustre_id) * old->mea_count +
2046 sizeof(struct mea_old);
2048 if (old->mea_count > 256 || old->mea_master > 256 ||
2049 lmm_size < mea_size || old->mea_master > old->mea_count) {
2050 CWARN("bad MEA: count %u, master %u, size %u\n",
2051 old->mea_count, old->mea_master, mea_size);
2052 GOTO(out_free_mea, rc = -EINVAL);
2054 magic = MEA_MAGIC_LAST_CHAR;
2057 (*tmea)->mea_magic = magic;
2058 (*tmea)->mea_count = le32_to_cpu(mea->mea_count);
2059 (*tmea)->mea_master = le32_to_cpu(mea->mea_master);
2061 for (i = 0; i < (*tmea)->mea_count; i++) {
2062 (*tmea)->mea_ids[i] = mea->mea_ids[i];
2063 id_le_to_cpu(&(*tmea)->mea_ids[i]);
2068 OBD_FREE(*tmea, mea_size);
2072 int lmv_brw(int rw, struct obd_export *exp, struct obdo *oa,
2073 struct lov_stripe_md *ea, obd_count oa_bufs,
2074 struct brw_page *pgarr, struct obd_trans_info *oti)
2076 struct obd_device *obd = exp->exp_obd;
2077 struct lmv_obd *lmv = &obd->u.lmv;
2078 struct mea *mea = (struct mea *) ea;
2081 LASSERT(oa != NULL);
2082 LASSERT(ea != NULL);
2083 LASSERT(pgarr != NULL);
2084 LASSERT(oa->o_mds < lmv->desc.ld_tgt_count);
2086 oa->o_gr = id_gen(&mea->mea_ids[oa->o_mds]);
2087 oa->o_id = id_ino(&mea->mea_ids[oa->o_mds]);
2088 oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
2090 err = obd_brw(rw, lmv->tgts[oa->o_mds].ltd_exp,
2091 oa, NULL, oa_bufs, pgarr, oti);
2095 struct obd_ops lmv_obd_ops = {
2096 .o_owner = THIS_MODULE,
2097 .o_attach = lmv_attach,
2098 .o_detach = lmv_detach,
2099 .o_setup = lmv_setup,
2100 .o_cleanup = lmv_cleanup,
2101 .o_connect = lmv_connect,
2102 .o_disconnect = lmv_disconnect,
2103 .o_statfs = lmv_statfs,
2104 .o_llog_init = lmv_llog_init,
2105 .o_llog_finish = lmv_llog_finish,
2106 .o_get_info = lmv_get_info,
2107 .o_set_info = lmv_set_info,
2108 .o_create = lmv_obd_create,
2109 .o_packmd = lmv_packmd,
2110 .o_unpackmd = lmv_unpackmd,
2112 .o_init_ea_size = lmv_init_ea_size,
2113 .o_notify = lmv_notify,
2114 .o_iocontrol = lmv_iocontrol,
2115 .o_getready = lmv_getready,
2118 struct md_ops lmv_md_ops = {
2119 .m_getstatus = lmv_getstatus,
2120 .m_getattr = lmv_getattr,
2121 .m_change_cbdata = lmv_change_cbdata,
2122 .m_change_cbdata_name = lmv_change_cbdata_name,
2123 .m_close = lmv_close,
2124 .m_create = lmv_create,
2125 .m_done_writing = lmv_done_writing,
2126 .m_enqueue = lmv_enqueue,
2127 .m_getattr_lock = lmv_getattr_lock,
2128 .m_intent_lock = lmv_intent_lock,
2130 .m_rename = lmv_rename,
2131 .m_setattr = lmv_setattr,
2133 .m_readpage = lmv_readpage,
2134 .m_unlink = lmv_unlink,
2135 .m_get_real_obd = lmv_get_real_obd,
2136 .m_valid_attrs = lmv_valid_attrs,
2137 .m_delete_inode = lmv_delete_inode,
2140 int __init lmv_init(void)
2142 struct lprocfs_static_vars lvars;
2145 obj_cache = kmem_cache_create("lmv_objects",
2146 sizeof(struct lmv_obj),
2149 CERROR("error allocating lmv objects cache\n");
2153 lprocfs_init_vars(lmv, &lvars);
2154 rc = class_register_type(&lmv_obd_ops, &lmv_md_ops,
2156 OBD_LMV_DEVICENAME);
2158 kmem_cache_destroy(obj_cache);
2164 static void lmv_exit(void)
2166 class_unregister_type(OBD_LMV_DEVICENAME);
2168 LASSERTF(kmem_cache_destroy(obj_cache) == 0,
2169 "can't free lmv objects cache, %d object(s)"
2170 "still in use\n", atomic_read(&obj_cache_count));
2173 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
2174 MODULE_DESCRIPTION("Lustre Logical Metadata Volume OBD driver");
2175 MODULE_LICENSE("GPL");
2177 module_init(lmv_init);
2178 module_exit(lmv_exit);