1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
6 * This file is part of Lustre, http://www.lustre.org.
8 * Lustre is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU General Public
10 * License as published by the Free Software Foundation.
12 * Lustre is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Lustre; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 # define EXPORT_SYMTAB
25 #define DEBUG_SUBSYSTEM S_LMV
27 #include <linux/slab.h>
28 #include <linux/module.h>
29 #include <linux/init.h>
30 #include <linux/slab.h>
31 #include <linux/pagemap.h>
32 #include <asm/div64.h>
33 #include <linux/seq_file.h>
34 #include <linux/namei.h>
36 #include <liblustre.h>
38 #include <linux/ext2_fs.h>
40 #include <linux/obd_support.h>
41 #include <linux/lustre_lib.h>
42 #include <linux/lustre_net.h>
43 #include <linux/lustre_idl.h>
44 #include <linux/lustre_dlm.h>
45 #include <linux/lustre_mds.h>
46 #include <linux/obd_class.h>
47 #include <linux/obd_ost.h>
48 #include <linux/lprocfs_status.h>
49 #include <linux/lustre_fsfilt.h>
50 #include <linux/obd_lmv.h>
51 #include <linux/lustre_lite.h>
52 #include <linux/lustre_audit.h>
53 #include "lmv_internal.h"
55 /* not defined for liblustre building */
56 #if !defined(ATOMIC_INIT)
57 #define ATOMIC_INIT(val) { (val) }
61 kmem_cache_t *obj_cache;
62 atomic_t obj_cache_count = ATOMIC_INIT(0);
64 static void lmv_activate_target(struct lmv_obd *lmv,
65 struct lmv_tgt_desc *tgt,
68 if (tgt->active == activate)
71 tgt->active = activate;
72 lmv->desc.ld_active_tgt_count += (activate ? 1 : -1);
77 * -EINVAL : UUID can't be found in the LMV's target list
78 * -ENOTCONN: The UUID is found, but the target connection is bad (!)
79 * -EBADF : The UUID is found, but the OBD of the wrong type (!)
81 static int lmv_set_mdc_active(struct lmv_obd *lmv, struct obd_uuid *uuid,
84 struct lmv_tgt_desc *tgt;
85 struct obd_device *obd;
89 CDEBUG(D_INFO, "Searching in lmv %p for uuid %s (activate=%d)\n",
90 lmv, uuid->uuid, activate);
92 spin_lock(&lmv->lmv_lock);
93 for (i = 0, tgt = lmv->tgts; i < lmv->desc.ld_tgt_count; i++, tgt++) {
94 if (tgt->ltd_exp == NULL)
97 CDEBUG(D_INFO, "lmv idx %d is %s conn "LPX64"\n",
98 i, tgt->uuid.uuid, tgt->ltd_exp->exp_handle.h_cookie);
100 if (obd_uuid_equals(uuid, &tgt->uuid))
104 if (i == lmv->desc.ld_tgt_count)
105 GOTO(out_lmv_lock, rc = -EINVAL);
107 obd = class_exp2obd(tgt->ltd_exp);
109 GOTO(out_lmv_lock, rc = -ENOTCONN);
111 CDEBUG(D_INFO, "Found OBD %s=%s device %d (%p) type %s at LMV idx %d\n",
112 obd->obd_name, obd->obd_uuid.uuid, obd->obd_minor, obd,
113 obd->obd_type->typ_name, i);
114 LASSERT(strcmp(obd->obd_type->typ_name, OBD_MDC_DEVICENAME) == 0);
116 if (tgt->active == activate) {
117 CDEBUG(D_INFO, "OBD %p already %sactive!\n", obd,
118 activate ? "" : "in");
119 GOTO(out_lmv_lock, rc);
122 CDEBUG(D_INFO, "Marking OBD %p %sactive\n",
123 obd, activate ? "" : "in");
125 lmv_activate_target(lmv, tgt, activate);
130 spin_unlock(&lmv->lmv_lock);
134 static int lmv_notify(struct obd_device *obd, struct obd_device *watched,
135 int active, void *data)
137 struct obd_uuid *uuid;
141 if (strcmp(watched->obd_type->typ_name, OBD_MDC_DEVICENAME)) {
142 CERROR("unexpected notification of %s %s!\n",
143 watched->obd_type->typ_name,
147 uuid = &watched->u.cli.cl_import->imp_target_uuid;
149 /* Set MDC as active before notifying the observer, so the observer can
150 * use the MDC normally.
152 rc = lmv_set_mdc_active(&obd->u.lmv, uuid, active);
154 CERROR("%sactivation of %s failed: %d\n",
155 active ? "" : "de", uuid->uuid, rc);
159 if (obd->obd_observer)
160 /* Pass the notification up the chain. */
161 rc = obd_notify(obd->obd_observer, watched, active, data);
166 static int lmv_attach(struct obd_device *dev, obd_count len, void *data)
168 struct lprocfs_static_vars lvars;
172 lprocfs_init_vars(lmv, &lvars);
173 rc = lprocfs_obd_attach(dev, lvars.obd_vars);
176 struct proc_dir_entry *entry;
178 entry = create_proc_entry("target_obd_status", 0444,
179 dev->obd_proc_entry);
182 entry->proc_fops = &lmv_proc_target_fops;
189 static int lmv_detach(struct obd_device *dev)
191 return lprocfs_obd_detach(dev);
194 /* this is fake connect function. Its purpose is to initialize lmv and say
195 * caller that everything is okay. Real connection will be performed later. */
196 static int lmv_connect(struct lustre_handle *conn, struct obd_device *obd,
197 struct obd_uuid *cluuid, struct obd_connect_data *data,
201 struct proc_dir_entry *lmv_proc_dir;
203 struct lmv_obd *lmv = &obd->u.lmv;
204 struct obd_export *exp;
208 rc = class_connect(conn, obd, cluuid);
210 CERROR("class_connection() returned %d\n", rc);
214 exp = class_conn2export(conn);
216 /* we don't want to actually do the underlying connections more than
217 * once, so keep track. */
219 if (lmv->refcount > 1) {
220 class_export_put(exp);
226 lmv->cluuid = *cluuid;
227 lmv->connect_flags = flags;
229 memcpy(&lmv->conn_data, data, sizeof(*data));
232 lmv_proc_dir = lprocfs_register("target_obds", obd->obd_proc_entry,
234 if (IS_ERR(lmv_proc_dir)) {
235 CERROR("could not register /proc/fs/lustre/%s/%s/target_obds.",
236 obd->obd_type->typ_name, obd->obd_name);
241 /* all real clients should perform actual connection right away, because
242 * it is possible, that LMV will not have opportunity to connect
243 * targets, as MDC stuff will be called directly, for instance while
244 * reading ../mdc/../kbytesfree procfs file, etc.
246 if (flags & OBD_OPT_REAL_CLIENT)
247 rc = lmv_check_connect(obd);
252 lprocfs_remove(lmv_proc_dir);
259 static void lmv_set_timeouts(struct obd_device *obd)
261 struct lmv_tgt_desc *tgts;
266 if (lmv->server_timeout == 0)
269 if (lmv->connected == 0)
272 for (i = 0, tgts = lmv->tgts; i < lmv->desc.ld_tgt_count; i++, tgts++) {
273 if (tgts->ltd_exp == NULL)
276 obd_set_info(tgts->ltd_exp, strlen("inter_mds"),
277 "inter_mds", 0, NULL);
281 static int lmv_init_ea_size(struct obd_export *exp, int easize,
284 struct obd_device *obd = exp->exp_obd;
285 struct lmv_obd *lmv = &obd->u.lmv;
286 int i, rc = 0, change = 0;
289 if (lmv->max_easize < easize) {
290 lmv->max_easize = easize;
293 if (lmv->max_cookiesize < cookiesize) {
294 lmv->max_cookiesize = cookiesize;
300 if (lmv->connected == 0)
303 for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
304 if (lmv->tgts[i].ltd_exp == NULL) {
305 CWARN("%s: NULL export for %d\n", obd->obd_name, i);
309 rc = obd_init_ea_size(lmv->tgts[i].ltd_exp, easize, cookiesize);
311 CERROR("obd_init_ea_size() failed on MDT target %d, "
312 "error %d.\n", i, rc);
319 #define MAX_STRING_SIZE 128
321 int lmv_connect_mdc(struct obd_device *obd, struct lmv_tgt_desc *tgt)
323 struct lmv_obd *lmv = &obd->u.lmv;
324 struct obd_uuid *cluuid = &lmv->cluuid;
325 struct obd_uuid lmv_mdc_uuid = { "LMV_MDC_UUID" };
326 struct lustre_handle conn = {0, };
327 struct obd_device *mdc_obd;
328 struct obd_export *mdc_exp;
331 struct proc_dir_entry *lmv_proc_dir;
335 /* for MDS: don't connect to yourself */
336 if (obd_uuid_equals(&tgt->uuid, cluuid)) {
337 CDEBUG(D_CONFIG, "don't connect back to %s\n", cluuid->uuid);
338 /* XXX - the old code didn't increment active tgt count.
343 mdc_obd = class_find_client_obd(&tgt->uuid, OBD_MDC_DEVICENAME,
346 CERROR("target %s not attached\n", tgt->uuid.uuid);
350 CDEBUG(D_CONFIG, "connect to %s(%s) - %s, %s FOR %s\n",
351 mdc_obd->obd_name, mdc_obd->obd_uuid.uuid,
352 tgt->uuid.uuid, obd->obd_uuid.uuid,
355 if (!mdc_obd->obd_set_up) {
356 CERROR("target %s not set up\n", tgt->uuid.uuid);
360 rc = obd_connect(&conn, mdc_obd, &lmv_mdc_uuid, &lmv->conn_data,
363 CERROR("target %s connect error %d\n", tgt->uuid.uuid, rc);
367 mdc_exp = class_conn2export(&conn);
369 rc = obd_register_observer(mdc_obd, obd);
371 obd_disconnect(mdc_exp, 0);
372 CERROR("target %s register_observer error %d\n",
377 if (obd->obd_observer) {
378 /* tell the mds_lmv about the new target */
379 rc = obd_notify(obd->obd_observer, mdc_exp->exp_obd, 1,
380 (void *)(tgt - lmv->tgts));
382 obd_disconnect(mdc_exp, 0);
387 tgt->ltd_exp = mdc_exp;
389 lmv->desc.ld_active_tgt_count++;
391 obd_init_ea_size(tgt->ltd_exp, lmv->max_easize,
392 lmv->max_cookiesize);
393 CDEBUG(D_CONFIG, "connected to %s(%s) successfully (%d)\n",
394 mdc_obd->obd_name, mdc_obd->obd_uuid.uuid,
395 atomic_read(&obd->obd_refcount));
398 lmv_proc_dir = lprocfs_srch(obd->obd_proc_entry, "target_obds");
400 struct proc_dir_entry *mdc_symlink;
401 char name[MAX_STRING_SIZE + 1];
403 LASSERT(mdc_obd->obd_type != NULL);
404 LASSERT(mdc_obd->obd_type->typ_name != NULL);
405 name[MAX_STRING_SIZE] = '\0';
406 snprintf(name, MAX_STRING_SIZE, "../../../%s/%s",
407 mdc_obd->obd_type->typ_name,
409 mdc_symlink = proc_symlink(mdc_obd->obd_name,
411 if (mdc_symlink == NULL) {
412 CERROR("could not register LMV target "
413 "/proc/fs/lustre/%s/%s/target_obds/%s.",
414 obd->obd_type->typ_name, obd->obd_name,
416 lprocfs_remove(lmv_proc_dir);
424 int lmv_add_mdc(struct obd_device *obd, struct obd_uuid *tgt_uuid)
426 struct lmv_obd *lmv = &obd->u.lmv;
427 struct lmv_tgt_desc *tgt;
431 CDEBUG(D_CONFIG, "tgt_uuid: %s.\n", tgt_uuid->uuid);
435 if (lmv->desc.ld_active_tgt_count >= LMV_MAX_TGT_COUNT) {
436 lmv_init_unlock(lmv);
437 CERROR("can't add %s, LMV module compiled for %d MDCs. "
438 "That many MDCs already configured.\n",
439 tgt_uuid->uuid, LMV_MAX_TGT_COUNT);
442 if (lmv->desc.ld_tgt_count == 0) {
443 struct obd_device *mdc_obd;
445 mdc_obd = class_find_client_obd(tgt_uuid, OBD_MDC_DEVICENAME,
448 lmv_init_unlock(lmv);
449 CERROR("Target %s not attached\n", tgt_uuid->uuid);
453 rc = obd_llog_init(obd, &obd->obd_llogs, mdc_obd, 0, NULL);
455 lmv_init_unlock(lmv);
456 CERROR("lmv failed to setup llogging subsystems\n");
459 spin_lock(&lmv->lmv_lock);
460 tgt = lmv->tgts + lmv->desc.ld_tgt_count++;
461 tgt->uuid = *tgt_uuid;
462 spin_unlock(&lmv->lmv_lock);
464 if (lmv->connected) {
465 rc = lmv_connect_mdc(obd, tgt);
467 spin_lock(&lmv->lmv_lock);
468 lmv->desc.ld_tgt_count--;
469 memset(tgt, 0, sizeof(*tgt));
470 spin_unlock(&lmv->lmv_lock);
472 int easize = sizeof(struct mea) +
473 lmv->desc.ld_tgt_count *
474 sizeof(struct lustre_id);
475 lmv_init_ea_size(obd->obd_self_export, easize, 0);
479 lmv_init_unlock(lmv);
483 /* performs a check if passed obd is connected. If no - connect it. */
484 int lmv_check_connect(struct obd_device *obd)
486 struct lmv_obd *lmv = &obd->u.lmv;
487 struct lmv_tgt_desc *tgt;
495 if (lmv->connected) {
496 lmv_init_unlock(lmv);
500 if (lmv->desc.ld_tgt_count == 0) {
501 CERROR("%s: no targets configured.\n", obd->obd_name);
505 CDEBUG(D_CONFIG, "time to connect %s to %s\n",
506 lmv->cluuid.uuid, obd->obd_name);
508 LASSERT(lmv->tgts != NULL);
510 for (i = 0, tgt = lmv->tgts; i < lmv->desc.ld_tgt_count; i++, tgt++) {
511 rc = lmv_connect_mdc(obd, tgt);
516 lmv_set_timeouts(obd);
517 class_export_put(lmv->exp);
519 easize = lmv->desc.ld_tgt_count * sizeof(struct lustre_id) +
521 lmv_init_ea_size(obd->obd_self_export, easize, 0);
522 lmv_init_unlock(lmv);
531 --lmv->desc.ld_active_tgt_count;
532 rc2 = obd_disconnect(tgt->ltd_exp, 0);
534 CERROR("error: LMV target %s disconnect on "
535 "MDC idx %d: error %d\n",
536 tgt->uuid.uuid, i, rc2);
540 class_disconnect(lmv->exp, 0);
541 lmv_init_unlock(lmv);
545 static int lmv_disconnect(struct obd_export *exp, unsigned long flags)
547 struct obd_device *obd = class_exp2obd(exp);
548 struct lmv_obd *lmv = &obd->u.lmv;
551 struct proc_dir_entry *lmv_proc_dir;
559 /* Only disconnect the underlying layers on the final disconnect. */
561 if (lmv->refcount != 0)
565 lmv_proc_dir = lprocfs_srch(obd->obd_proc_entry, "target_obds");
568 for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
569 struct obd_device *mdc_obd;
571 if (lmv->tgts[i].ltd_exp == NULL)
574 mdc_obd = class_exp2obd(lmv->tgts[i].ltd_exp);
577 mdc_obd->obd_no_recov = obd->obd_no_recov;
581 struct proc_dir_entry *mdc_symlink;
583 mdc_symlink = lprocfs_srch(lmv_proc_dir, mdc_obd->obd_name);
585 lprocfs_remove(mdc_symlink);
587 CERROR("/proc/fs/lustre/%s/%s/target_obds/%s missing\n",
588 obd->obd_type->typ_name, obd->obd_name,
593 CDEBUG(D_OTHER, "disconnected from %s(%s) successfully\n",
594 lmv->tgts[i].ltd_exp->exp_obd->obd_name,
595 lmv->tgts[i].ltd_exp->exp_obd->obd_uuid.uuid);
597 obd_register_observer(lmv->tgts[i].ltd_exp->exp_obd, NULL);
598 rc = obd_disconnect(lmv->tgts[i].ltd_exp, flags);
600 if (lmv->tgts[i].active) {
601 CERROR("Target %s disconnect error %d\n",
602 lmv->tgts[i].uuid.uuid, rc);
607 lmv_activate_target(lmv, &lmv->tgts[i], 0);
608 lmv->tgts[i].ltd_exp = NULL;
613 lprocfs_remove(lmv_proc_dir);
615 CERROR("/proc/fs/lustre/%s/%s/target_obds missing\n",
616 obd->obd_type->typ_name, obd->obd_name);
621 /* this is the case when no real connection is established by
622 * lmv_check_connect(). */
624 class_export_put(exp);
625 rc = class_disconnect(exp, 0);
626 if (lmv->refcount == 0)
631 static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
632 int len, void *karg, void *uarg)
634 struct obd_device *obddev = class_exp2obd(exp);
635 struct lmv_obd *lmv = &obddev->u.lmv;
636 int i, rc = 0, set = 0;
639 if (lmv->desc.ld_tgt_count == 0)
642 for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
645 if (lmv->tgts[i].ltd_exp == NULL)
648 err = obd_iocontrol(cmd, lmv->tgts[i].ltd_exp, len, karg, uarg);
650 if (lmv->tgts[i].active) {
651 CERROR("error: iocontrol MDC %s on MDT"
652 "idx %d: err = %d\n",
653 lmv->tgts[i].uuid.uuid, i, err);
666 static int lmv_setup(struct obd_device *obd, obd_count len, void *buf)
668 struct lmv_obd *lmv = &obd->u.lmv;
669 struct lustre_cfg *lcfg = buf;
670 struct lmv_desc *desc;
674 if (LUSTRE_CFG_BUFLEN(lcfg, 1) < 1) {
675 CERROR("LMV setup requires a descriptor\n");
679 desc = (struct lmv_desc *)lustre_cfg_buf(lcfg, 1);
680 if (sizeof(*desc) > LUSTRE_CFG_BUFLEN(lcfg, 1)) {
681 CERROR("descriptor size wrong: %d > %d\n",
682 (int)sizeof(*desc), LUSTRE_CFG_BUFLEN(lcfg, 1));
686 lmv->tgts_size = LMV_MAX_TGT_COUNT * sizeof(struct lmv_tgt_desc);
688 OBD_ALLOC(lmv->tgts, lmv->tgts_size);
689 if (lmv->tgts == NULL) {
690 CERROR("Out of memory\n");
694 obd_str2uuid(&lmv->desc.ld_uuid, desc->ld_uuid.uuid);
695 lmv->desc.ld_tgt_count = 0;
696 lmv->desc.ld_active_tgt_count = 0;
697 lmv->max_cookiesize = 0;
700 spin_lock_init(&lmv->lmv_lock);
701 sema_init(&lmv->init_sem, 1);
703 rc = lmv_setup_mgr(obd);
705 CERROR("Can't setup LMV object manager, "
707 OBD_FREE(lmv->tgts, lmv->tgts_size);
714 static int lmv_cleanup(struct obd_device *obd, int flags)
716 struct lmv_obd *lmv = &obd->u.lmv;
719 lmv_cleanup_mgr(obd);
720 OBD_FREE(lmv->tgts, lmv->tgts_size);
724 static int lmv_process_config(struct obd_device *obd, obd_count len, void *buf)
726 struct lustre_cfg *lcfg = buf;
727 struct obd_uuid tgt_uuid;
731 switch(lcfg->lcfg_command) {
732 case LCFG_LMV_ADD_MDC:
733 if (LUSTRE_CFG_BUFLEN(lcfg, 1) > sizeof(tgt_uuid.uuid))
734 GOTO(out, rc = -EINVAL);
736 obd_str2uuid(&tgt_uuid, lustre_cfg_string(lcfg, 1));
737 rc = lmv_add_mdc(obd, &tgt_uuid);
740 CERROR("Unknown command: %d\n", lcfg->lcfg_command);
741 GOTO(out, rc = -EINVAL);
748 static int lmv_statfs(struct obd_device *obd, struct obd_statfs *osfs,
749 unsigned long max_age)
751 struct lmv_obd *lmv = &obd->u.lmv;
752 struct obd_statfs *temp;
756 rc = lmv_check_connect(obd);
760 OBD_ALLOC(temp, sizeof(*temp));
764 for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
765 if (lmv->tgts[i].ltd_exp == NULL)
768 rc = obd_statfs(lmv->tgts[i].ltd_exp->exp_obd, temp, max_age);
770 CERROR("can't stat MDS #%d (%s), error %d\n", i,
771 lmv->tgts[i].ltd_exp->exp_obd->obd_name,
773 GOTO(out_free_temp, rc);
776 memcpy(osfs, temp, sizeof(*temp));
778 osfs->os_bavail += temp->os_bavail;
779 osfs->os_blocks += temp->os_blocks;
780 osfs->os_ffree += temp->os_ffree;
781 osfs->os_files += temp->os_files;
787 OBD_FREE(temp, sizeof(*temp));
791 static int lmv_getstatus(struct obd_export *exp, struct lustre_id *id)
793 struct obd_device *obd = exp->exp_obd;
794 struct lmv_obd *lmv = &obd->u.lmv;
798 rc = lmv_check_connect(obd);
802 rc = md_getstatus(lmv->tgts[0].ltd_exp, id);
808 static int lmv_getattr(struct obd_export *exp, struct lustre_id *id,
809 __u64 valid, const char *xattr_name,
810 const void *xattr_data, unsigned int xattr_datalen,
811 unsigned int ea_size, struct obd_capa *ocapa,
812 struct ptlrpc_request **request)
814 struct obd_device *obd = exp->exp_obd;
815 struct lmv_obd *lmv = &obd->u.lmv;
816 int rc, i = id_group(id);
820 rc = lmv_check_connect(obd);
824 LASSERT(i < lmv->desc.ld_tgt_count);
827 rc = md_getattr(lmv->tgts[i].ltd_exp, id, valid,
828 xattr_name, xattr_data, xattr_datalen,
829 ea_size, ocapa, request);
833 obj = lmv_grab_obj(obd, id);
835 CDEBUG(D_OTHER, "GETATTR for "DLID4" %s\n",
836 OLID4(id), obj ? "(splitted)" : "");
839 * if object is splitted, then we loop over all the slaves and gather
840 * size attribute. In ideal world we would have to gather also mds field
841 * from all slaves, as object is spread over the cluster and this is
842 * definitely interesting information and it is not good to loss it,
846 struct mds_body *body;
848 if (*request == NULL) {
853 body = lustre_msg_buf((*request)->rq_repmsg, 0,
855 LASSERT(body != NULL);
859 for (i = 0; i < obj->objcount; i++) {
861 if (lmv->tgts[i].ltd_exp == NULL) {
862 CWARN("%s: NULL export for %d\n",
867 /* skip master obj. */
868 if (id_equal_fid(&obj->id, &obj->objs[i].id))
871 body->size += obj->objs[i].size;
881 static int lmv_access_check(struct obd_export *exp,
882 struct lustre_id *id,
883 struct ptlrpc_request **request)
885 struct obd_device *obd = exp->exp_obd;
886 struct lmv_obd *lmv = &obd->u.lmv;
887 int rc, i = id_group(id);
890 rc = lmv_check_connect(obd);
894 LASSERT(i < lmv->desc.ld_tgt_count);
895 rc = md_access_check(lmv->tgts[i].ltd_exp, id, request);
899 static int lmv_change_cbdata(struct obd_export *exp,
900 struct lustre_id *id,
904 struct obd_device *obd = exp->exp_obd;
905 struct lmv_obd *lmv = &obd->u.lmv;
909 rc = lmv_check_connect(obd);
913 CDEBUG(D_OTHER, "CBDATA for "DLID4"\n", OLID4(id));
914 LASSERT(id_group(id) < lmv->desc.ld_tgt_count);
916 /* with CMD every object can have two locks in different
917 * namespaces: lookup lock in space of mds storing direntry
918 * and update/open lock in space of mds storing inode */
919 for (i = 0; i < lmv->desc.ld_tgt_count; i++)
920 md_change_cbdata(lmv->tgts[i].ltd_exp, id, it, data);
925 static int lmv_change_cbdata_name(struct obd_export *exp,
926 struct lustre_id *pid,
928 struct lustre_id *cid,
932 struct obd_device *obd = exp->exp_obd;
933 struct lmv_obd *lmv = &obd->u.lmv;
934 struct lustre_id rcid = *cid;
939 rc = lmv_check_connect(obd);
943 LASSERT(id_group(pid) < lmv->desc.ld_tgt_count);
944 LASSERT(id_group(cid) < lmv->desc.ld_tgt_count);
946 CDEBUG(D_OTHER, "CBDATA for "DLID4":%*s -> "DLID4"\n",
947 OLID4(pid), len, name, OLID4(cid));
949 /* this is default mds for directory name belongs to. */
951 obj = lmv_grab_obj(obd, pid);
953 /* directory is splitted. look for right mds for this name. */
954 mds = raw_name2idx(obj->hashtype, obj->objcount, name, len);
955 rcid = obj->objs[mds].id;
956 mds = id_group(&rcid);
959 rc = md_change_cbdata(lmv->tgts[mds].ltd_exp, &rcid, it, data);
963 static int lmv_valid_attrs(struct obd_export *exp, struct lustre_id *id)
965 struct obd_device *obd = exp->exp_obd;
966 struct lmv_obd *lmv = &obd->u.lmv;
970 rc = lmv_check_connect(obd);
974 CDEBUG(D_OTHER, "validate "DLID4"\n", OLID4(id));
975 LASSERT(id_group(id) < lmv->desc.ld_tgt_count);
976 rc = md_valid_attrs(lmv->tgts[id_group(id)].ltd_exp, id);
980 static int lmv_close(struct obd_export *exp, struct mdc_op_data *op_data,
981 struct obd_client_handle *och,
982 struct ptlrpc_request **request)
984 struct obd_device *obd = exp->exp_obd;
985 struct lmv_obd *lmv = &obd->u.lmv;
986 int rc, i = id_group(&op_data->id1);
989 rc = lmv_check_connect(obd);
993 LASSERT(i < lmv->desc.ld_tgt_count);
994 CDEBUG(D_OTHER, "CLOSE "DLID4"\n", OLID4(&op_data->id1));
995 rc = md_close(lmv->tgts[i].ltd_exp, op_data, och, request);
999 int lmv_get_mea_and_update_object(struct obd_export *exp,
1000 struct lustre_id *id)
1002 struct obd_device *obd = exp->exp_obd;
1003 struct lmv_obd *lmv = &obd->u.lmv;
1004 struct ptlrpc_request *req = NULL;
1005 struct lmv_obj *obj;
1006 struct lustre_md md;
1012 mealen = MEA_SIZE_LMV(lmv);
1014 valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA | OBD_MD_MEA;
1016 /* time to update mea of parent id */
1017 rc = md_getattr(lmv->tgts[id_group(id)].ltd_exp,
1018 id, valid, NULL, NULL, 0, mealen, NULL, &req);
1020 CERROR("md_getattr() failed, error %d\n", rc);
1024 rc = mdc_req2lustre_md(exp, req, 0, NULL, &md);
1026 CERROR("mdc_req2lustre_md() failed, error %d\n", rc);
1031 GOTO(cleanup, rc = -ENODATA);
1033 obj = lmv_create_obj(exp, id, md.mea);
1039 obd_free_memmd(exp, (struct lov_stripe_md **)&md.mea);
1044 ptlrpc_req_finished(req);
1048 int lmv_create(struct obd_export *exp, struct mdc_op_data *op_data,
1049 const void *data, int datalen, int mode, __u32 uid,
1050 __u32 gid, __u64 rdev, struct ptlrpc_request **request)
1052 struct obd_device *obd = exp->exp_obd;
1053 struct lmv_obd *lmv = &obd->u.lmv;
1054 struct mds_body *body;
1055 struct lmv_obj *obj;
1056 int rc, mds, loop = 0;
1059 rc = lmv_check_connect(obd);
1063 if (!lmv->desc.ld_active_tgt_count)
1066 LASSERT(++loop <= 2);
1067 obj = lmv_grab_obj(obd, &op_data->id1);
1069 mds = raw_name2idx(obj->hashtype, obj->objcount,
1070 op_data->name, op_data->namelen);
1071 op_data->id1 = obj->objs[mds].id;
1075 CDEBUG(D_OTHER, "CREATE '%*s' on "DLID4"\n", op_data->namelen,
1076 op_data->name, OLID4(&op_data->id1));
1078 rc = md_create(lmv->tgts[id_group(&op_data->id1)].ltd_exp,
1079 op_data, data, datalen, mode, uid, gid, rdev,
1082 if (*request == NULL)
1085 body = lustre_msg_buf((*request)->rq_repmsg, 0,
1090 CDEBUG(D_OTHER, "created. "DLID4"\n", OLID4(&op_data->id1));
1091 } else if (rc == -ERESTART) {
1093 * directory got splitted. time to update local object and
1094 * repeat the request with proper MDS.
1096 rc = lmv_get_mea_and_update_object(exp, &op_data->id1);
1098 ptlrpc_req_finished(*request);
1105 static int lmv_done_writing(struct obd_export *exp, struct obdo *obdo)
1107 struct obd_device *obd = exp->exp_obd;
1108 struct lmv_obd *lmv = &obd->u.lmv;
1112 rc = lmv_check_connect(obd);
1116 /* FIXME: choose right MDC here */
1117 CWARN("this method isn't implemented yet\n");
1118 rc = md_done_writing(lmv->tgts[0].ltd_exp, obdo);
1123 lmv_enqueue_slaves(struct obd_export *exp, int locktype,
1124 struct lookup_intent *it, int lockmode,
1125 struct mdc_op_data *data, struct lustre_handle *lockh,
1126 void *lmm, int lmmsize, ldlm_completion_callback cb_compl,
1127 ldlm_blocking_callback cb_blocking, void *cb_data)
1129 struct obd_device *obd = exp->exp_obd;
1130 struct lmv_obd *lmv = &obd->u.lmv;
1131 struct mea *mea = data->mea1;
1132 struct mdc_op_data *data2;
1136 OBD_ALLOC(data2, sizeof(*data2));
1140 LASSERT(mea != NULL);
1141 for (i = 0; i < mea->mea_count; i++) {
1142 memset(data2, 0, sizeof(*data2));
1143 data2->id1 = mea->mea_ids[i];
1144 mds = id_group(&data2->id1);
1146 if (lmv->tgts[mds].ltd_exp == NULL)
1149 rc = md_enqueue(lmv->tgts[mds].ltd_exp, locktype, it,
1150 lockmode, data2, lockh + i, lmm, lmmsize,
1151 cb_compl, cb_blocking, cb_data);
1153 CDEBUG(D_OTHER, "take lock on slave "DLID4" -> %d/%d\n",
1154 OLID4(&mea->mea_ids[i]), rc, LUSTRE_IT(it)->it_status);
1157 if (LUSTRE_IT(it)->it_data) {
1158 struct ptlrpc_request *req;
1159 req = (struct ptlrpc_request *) LUSTRE_IT(it)->it_data;
1160 ptlrpc_req_finished(req);
1163 if (LUSTRE_IT(it)->it_status)
1164 GOTO(cleanup, rc = LUSTRE_IT(it)->it_status);
1167 OBD_FREE(data2, sizeof(*data2));
1170 OBD_FREE(data2, sizeof(*data2));
1172 /* drop all taken locks */
1174 if (lockh[i].cookie)
1175 ldlm_lock_decref(lockh + i, lockmode);
1176 lockh[i].cookie = 0;
1182 lmv_enqueue_remote(struct obd_export *exp, int lock_type,
1183 struct lookup_intent *it, int lock_mode,
1184 struct mdc_op_data *data, struct lustre_handle *lockh,
1185 void *lmm, int lmmsize, ldlm_completion_callback cb_compl,
1186 ldlm_blocking_callback cb_blocking, void *cb_data)
1188 struct ptlrpc_request *req = LUSTRE_IT(it)->it_data;
1189 struct obd_device *obd = exp->exp_obd;
1190 struct lmv_obd *lmv = &obd->u.lmv;
1191 struct lustre_handle plock;
1192 struct mdc_op_data rdata;
1193 struct mds_body *body = NULL;
1197 body = lustre_msg_buf(req->rq_repmsg, 1, sizeof(*body));
1198 LASSERT(body != NULL);
1200 if (!(body->valid & OBD_MD_MDS))
1203 CDEBUG(D_OTHER, "ENQUEUE '%s' on "DLID4" -> "DLID4"\n",
1204 LL_IT2STR(it), OLID4(&data->id1), OLID4(&body->id1));
1206 /* we got LOOKUP lock, but we really need attrs */
1207 pmode = LUSTRE_IT(it)->it_lock_mode;
1208 LASSERT(pmode != 0);
1209 memcpy(&plock, lockh, sizeof(plock));
1210 LUSTRE_IT(it)->it_lock_mode = 0;
1211 LUSTRE_IT(it)->it_data = NULL;
1212 LASSERT((body->valid & OBD_MD_FID) != 0);
1214 memcpy(&rdata, data, sizeof(rdata));
1215 rdata.id1 = body->id1;
1219 LUSTRE_IT(it)->it_disposition &= ~DISP_ENQ_COMPLETE;
1220 ptlrpc_req_finished(req);
1222 rc = md_enqueue(lmv->tgts[id_group(&rdata.id1)].ltd_exp,
1223 lock_type, it, lock_mode, &rdata, lockh, lmm,
1224 lmmsize, cb_compl, cb_blocking, cb_data);
1225 ldlm_lock_decref(&plock, pmode);
1230 lmv_enqueue(struct obd_export *exp, int lock_type,
1231 struct lookup_intent *it, int lock_mode,
1232 struct mdc_op_data *data, struct lustre_handle *lockh,
1233 void *lmm, int lmmsize, ldlm_completion_callback cb_compl,
1234 ldlm_blocking_callback cb_blocking, void *cb_data)
1236 struct obd_device *obd = exp->exp_obd;
1237 struct lmv_obd *lmv = &obd->u.lmv;
1238 struct lmv_obj *obj;
1242 rc = lmv_check_connect(obd);
1246 if (data->mea1 && it->it_op == IT_UNLINK) {
1247 rc = lmv_enqueue_slaves(exp, lock_type, it, lock_mode,
1248 data, lockh, lmm, lmmsize,
1249 cb_compl, cb_blocking, cb_data);
1253 if (data->namelen) {
1254 obj = lmv_grab_obj(obd, &data->id1);
1256 /* directory is splitted. look for right mds for this
1258 mds = raw_name2idx(obj->hashtype, obj->objcount,
1259 (char *)data->name, data->namelen);
1260 data->id1 = obj->objs[mds].id;
1264 CDEBUG(D_OTHER, "ENQUEUE '%s' on "DLID4"\n", LL_IT2STR(it),
1267 rc = md_enqueue(lmv->tgts[id_group(&data->id1)].ltd_exp,
1268 lock_type, it, lock_mode, data, lockh, lmm,
1269 lmmsize, cb_compl, cb_blocking, cb_data);
1270 if (rc == 0 && it->it_op == IT_OPEN)
1271 rc = lmv_enqueue_remote(exp, lock_type, it, lock_mode,
1272 data, lockh, lmm, lmmsize,
1273 cb_compl, cb_blocking, cb_data);
1278 lmv_getattr_lock(struct obd_export *exp, struct lustre_id *id,
1279 char *filename, int namelen, __u64 valid,
1280 unsigned int ea_size, struct ptlrpc_request **request)
1282 int rc, mds = id_group(id), loop = 0;
1283 struct obd_device *obd = exp->exp_obd;
1284 struct lmv_obd *lmv = &obd->u.lmv;
1285 struct lustre_id rid = *id;
1286 struct mds_body *body;
1287 struct lmv_obj *obj;
1290 rc = lmv_check_connect(obd);
1294 LASSERT(++loop <= 2);
1295 obj = lmv_grab_obj(obd, id);
1297 /* directory is splitted. look for right mds for this name */
1298 mds = raw_name2idx(obj->hashtype, obj->objcount,
1299 filename, namelen - 1);
1300 rid = obj->objs[mds].id;
1304 CDEBUG(D_OTHER, "getattr_lock for %*s on "DLID4" -> "DLID4"\n",
1305 namelen, filename, OLID4(id), OLID4(&rid));
1307 rc = md_getattr_lock(lmv->tgts[id_group(&rid)].ltd_exp,
1308 &rid, filename, namelen,
1309 valid == OBD_MD_FLID ? valid : valid | OBD_MD_FID,
1313 * this could be cross-node reference. in this case all we have
1314 * right now is lustre_id triple. we'd like to find other
1317 body = lustre_msg_buf((*request)->rq_repmsg, 0, sizeof(*body));
1318 LASSERT(body != NULL);
1319 LASSERT((body->valid & OBD_MD_FID) != 0
1320 || body->valid == OBD_MD_FLID);
1322 if (body->valid & OBD_MD_MDS) {
1323 struct ptlrpc_request *req = NULL;
1326 CDEBUG(D_OTHER, "request attrs for "DLID4"\n", OLID4(&rid));
1328 rc = md_getattr_lock(lmv->tgts[id_group(&rid)].ltd_exp,
1329 &rid, NULL, 1, valid, ea_size, &req);
1330 ptlrpc_req_finished(*request);
1333 } else if (rc == -ERESTART) {
1334 /* directory got splitted. time to update local object and
1335 * repeat the request with proper MDS */
1336 rc = lmv_get_mea_and_update_object(exp, &rid);
1338 ptlrpc_req_finished(*request);
1346 * llite passes id of an target inode in data->id1 and id of directory in
1349 static int lmv_link(struct obd_export *exp, struct mdc_op_data *data,
1350 struct ptlrpc_request **request)
1352 struct obd_device *obd = exp->exp_obd;
1353 struct lmv_obd *lmv = &obd->u.lmv;
1354 struct lmv_obj *obj;
1358 rc = lmv_check_connect(obd);
1362 if (data->namelen != 0) {
1363 /* usual link request */
1364 obj = lmv_grab_obj(obd, &data->id2);
1366 rc = raw_name2idx(obj->hashtype, obj->objcount,
1367 data->name, data->namelen);
1368 data->id2 = obj->objs[rc].id;
1372 mds = id_group(&data->id2);
1374 CDEBUG(D_OTHER,"link "DLID4":%*s to "DLID4"\n",
1375 OLID4(&data->id2), data->namelen, data->name,
1378 mds = id_group(&data->id1);
1380 /* request from MDS to acquire i_links for inode by id1 */
1381 CDEBUG(D_OTHER, "inc i_nlinks for "DLID4"\n",
1385 CDEBUG(D_OTHER, "forward to MDS #%u ("DLID4")\n",
1386 mds, OLID4(&data->id1));
1387 rc = md_link(lmv->tgts[mds].ltd_exp, data, request);
1392 static int lmv_rename(struct obd_export *exp, struct mdc_op_data *data,
1393 const char *old, int oldlen, const char *new, int newlen,
1394 struct ptlrpc_request **request)
1396 struct obd_device *obd = exp->exp_obd;
1397 struct lmv_obd *lmv = &obd->u.lmv;
1398 struct lmv_obj *obj;
1402 CDEBUG(D_OTHER, "rename %*s in "DLID4" to %*s in "DLID4"\n",
1403 oldlen, old, OLID4(&data->id1), newlen, new,
1406 rc = lmv_check_connect(obd);
1412 * MDS with old dir entry is asking another MDS to create name
1416 "create %*s(%d/%d) in "DLID4" pointing "
1417 "to "DLID4"\n", newlen, new, oldlen, newlen,
1418 OLID4(&data->id2), OLID4(&data->id1));
1420 mds = id_group(&data->id2);
1423 * target directory can be splitted, sowe should forward request
1426 obj = lmv_grab_obj(obd, &data->id2);
1428 mds = raw_name2idx(obj->hashtype, obj->objcount,
1429 (char *)new, newlen);
1430 data->id2 = obj->objs[mds].id;
1431 CDEBUG(D_OTHER, "forward to MDS #%u ("DLID4")\n", mds,
1438 obj = lmv_grab_obj(obd, &data->id1);
1441 * directory is already splitted, so we have to forward request
1444 mds = raw_name2idx(obj->hashtype, obj->objcount,
1445 (char *)old, oldlen);
1446 data->id1 = obj->objs[mds].id;
1447 CDEBUG(D_OTHER, "forward to MDS #%u ("DLID4")\n", mds,
1452 obj = lmv_grab_obj(obd, &data->id2);
1455 * directory is already splitted, so we have to forward request
1458 mds = raw_name2idx(obj->hashtype, obj->objcount,
1459 (char *)new, newlen);
1461 data->id2 = obj->objs[mds].id;
1462 CDEBUG(D_OTHER, "forward to MDS #%u ("DLID4")\n", mds,
1467 mds = id_group(&data->id1);
1470 if (id_group(&data->id1) != id_group(&data->id2)) {
1471 CDEBUG(D_OTHER,"cross-node rename "DLID4"/%*s to "DLID4"/%*s\n",
1472 OLID4(&data->id1), oldlen, old, OLID4(&data->id2),
1476 rc = md_rename(lmv->tgts[mds].ltd_exp, data, old, oldlen,
1477 new, newlen, request);
1481 static int lmv_setattr(struct obd_export *exp, struct mdc_op_data *data,
1482 struct iattr *iattr, void *ea, int ealen, void *ea2,
1483 int ea2len, void *ea3, int ea3len,
1484 struct ptlrpc_request **request)
1486 struct obd_device *obd = exp->exp_obd;
1487 struct lmv_obd *lmv = &obd->u.lmv;
1488 struct ptlrpc_request *req;
1489 struct mds_body *body;
1490 struct lmv_obj *obj;
1494 rc = lmv_check_connect(obd);
1498 obj = lmv_grab_obj(obd, &data->id1);
1500 CDEBUG(D_OTHER, "SETATTR for "DLID4", valid 0x%x%s\n",
1501 OLID4(&data->id1), iattr->ia_valid, obj ? ", splitted" : "");
1504 for (i = 0; i < obj->objcount; i++) {
1505 data->id1 = obj->objs[i].id;
1507 rc = md_setattr(lmv->tgts[id_group(&data->id1)].ltd_exp,
1508 data, iattr, ea, ealen, ea2, ea2len,
1511 if (id_equal_fid(&obj->id, &obj->objs[i].id)) {
1513 * this is master object and this request should
1514 * be returned back to llite.
1518 ptlrpc_req_finished(req);
1526 LASSERT(id_group(&data->id1) < lmv->desc.ld_tgt_count);
1527 rc = md_setattr(lmv->tgts[id_group(&data->id1)].ltd_exp,
1528 data, iattr, ea, ealen, ea2, ea2len, ea3,
1531 body = lustre_msg_buf((*request)->rq_repmsg, 0,
1533 LASSERT(body != NULL);
1534 LASSERT((body->valid & OBD_MD_FID) != 0);
1535 LASSERT(id_group(&body->id1) == id_group(&data->id1));
1541 static int lmv_sync(struct obd_export *exp, struct lustre_id *id,
1542 struct ptlrpc_request **request)
1544 struct obd_device *obd = exp->exp_obd;
1545 struct lmv_obd *lmv = &obd->u.lmv;
1549 rc = lmv_check_connect(obd);
1553 rc = md_sync(lmv->tgts[id_group(id)].ltd_exp,
1558 int lmv_dirobj_blocking_ast(struct ldlm_lock *lock,
1559 struct ldlm_lock_desc *desc,
1560 void *data, int flag)
1562 struct lustre_handle lockh;
1563 struct lmv_obj *obj;
1568 case LDLM_CB_BLOCKING:
1569 ldlm_lock2handle(lock, &lockh);
1570 rc = ldlm_cli_cancel(&lockh);
1572 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
1576 case LDLM_CB_CANCELING:
1577 /* time to drop cached attrs for dirobj */
1578 obj = lock->l_ast_data;
1580 CDEBUG(D_OTHER, "cancel %s on "LPU64"/"LPU64
1581 ", master "DLID4"\n",
1582 lock->l_resource->lr_name.name[3] == 1 ?
1583 "LOOKUP" : "UPDATE",
1584 lock->l_resource->lr_name.name[0],
1585 lock->l_resource->lr_name.name[1],
1596 static void lmv_remove_dots(struct page *page)
1598 unsigned limit = PAGE_CACHE_SIZE;
1599 char *kaddr = page_address(page);
1600 struct ext2_dir_entry_2 *p;
1601 unsigned offs, rec_len;
1603 for (offs = 0; offs <= limit - EXT2_DIR_REC_LEN(1); offs += rec_len) {
1604 p = (struct ext2_dir_entry_2 *)(kaddr + offs);
1605 rec_len = le16_to_cpu(p->rec_len);
1607 if ((p->name_len == 1 && p->name[0] == '.') ||
1608 (p->name_len == 2 && p->name[0] == '.' && p->name[1] == '.'))
1613 static int lmv_readpage(struct obd_export *exp, struct lustre_id *id,
1614 __u64 offset, struct page *page,
1615 struct ptlrpc_request **request)
1617 struct obd_device *obd = exp->exp_obd;
1618 struct lmv_obd *lmv = &obd->u.lmv;
1619 struct lustre_id rid = *id;
1620 struct lmv_obj *obj;
1624 rc = lmv_check_connect(obd);
1628 LASSERT(id_group(id) < lmv->desc.ld_tgt_count);
1629 CDEBUG(D_OTHER, "READPAGE at %llu from "DLID4"\n",
1630 offset, OLID4(&rid));
1632 obj = lmv_grab_obj(obd, id);
1636 /* find dirobj containing page with requested offset. */
1637 for (i = 0; i < obj->objcount; i++) {
1638 if (offset < obj->objs[i].size)
1640 offset -= obj->objs[i].size;
1642 rid = obj->objs[i].id;
1644 lmv_unlock_obj(obj);
1647 CDEBUG(D_OTHER, "forward to "DLID4" with offset %lu\n",
1648 OLID4(&rid), (unsigned long)offset);
1650 rc = md_readpage(lmv->tgts[id_group(&rid)].ltd_exp, &rid,
1651 offset, page, request);
1653 if (rc == 0 && !id_equal_fid(&rid, id))
1654 /* this page isn't from master object. To avoid "." and ".."
1655 * duplication in directory, we have to remove them from all
1657 lmv_remove_dots(page);
1662 static int lmv_unlink_slaves(struct obd_export *exp, struct mdc_op_data *data,
1663 struct ptlrpc_request **req)
1665 struct obd_device *obd = exp->exp_obd;
1666 struct lmv_obd *lmv = &obd->u.lmv;
1667 struct mea *mea = data->mea1;
1668 struct mdc_op_data *data2;
1672 OBD_ALLOC(data2, sizeof(*data2));
1676 LASSERT(mea != NULL);
1677 for (i = 0; i < mea->mea_count; i++) {
1678 memset(data2, 0, sizeof(*data2));
1679 data2->id1 = mea->mea_ids[i];
1680 data2->create_mode = MDS_MODE_DONT_LOCK | S_IFDIR;
1682 if (lmv->tgts[id_group(&data2->id1)].ltd_exp == NULL)
1685 rc = md_unlink(lmv->tgts[id_group(&data2->id1)].ltd_exp,
1688 CDEBUG(D_OTHER, "unlink slave "DLID4" -> %d\n",
1689 OLID4(&mea->mea_ids[i]), rc);
1692 ptlrpc_req_finished(*req);
1698 OBD_FREE(data2, sizeof(*data2));
1702 static int lmv_delete_inode(struct obd_export *exp, struct lustre_id *id)
1707 if (lmv_delete_obj(exp, id)) {
1708 CDEBUG(D_OTHER, "lmv object "DLID4" is destroyed.\n",
1714 static int lmv_unlink(struct obd_export *exp, struct mdc_op_data *data,
1715 struct ptlrpc_request **request)
1717 struct obd_device *obd = exp->exp_obd;
1718 struct lmv_obd *lmv = &obd->u.lmv;
1722 rc = lmv_check_connect(obd);
1726 if (data->namelen == 0 && data->mea1 != NULL) {
1727 /* mds asks to remove slave objects */
1728 rc = lmv_unlink_slaves(exp, data, request);
1732 if (data->namelen != 0) {
1733 struct lmv_obj *obj;
1735 obj = lmv_grab_obj(obd, &data->id1);
1737 i = raw_name2idx(obj->hashtype, obj->objcount,
1738 data->name, data->namelen);
1739 data->id1 = obj->objs[i].id;
1742 CDEBUG(D_OTHER, "unlink '%*s' in "DLID4" -> %u\n",
1743 data->namelen, data->name, OLID4(&data->id1),
1746 CDEBUG(D_OTHER, "drop i_nlink on "DLID4"\n",
1749 rc = md_unlink(lmv->tgts[id_group(&data->id1)].ltd_exp,
1754 static struct obd_device *lmv_get_real_obd(struct obd_export *exp,
1755 struct lustre_id *id)
1757 struct obd_device *obd = exp->exp_obd;
1758 struct lmv_obd *lmv = &obd->u.lmv;
1762 rc = lmv_check_connect(obd);
1764 RETURN(ERR_PTR(rc));
1765 obd = lmv->tgts[id_group(id)].ltd_exp->exp_obd;
1771 static int lmv_obd_create_single(struct obd_export *exp, struct obdo *oa,
1772 void *acl, int acl_size,
1773 struct lov_stripe_md **ea,
1774 struct obd_trans_info *oti)
1776 struct obd_device *obd = exp->exp_obd;
1777 struct lmv_obd *lmv = &obd->u.lmv;
1778 struct lov_stripe_md obj_md;
1779 struct lov_stripe_md *obj_mdp = &obj_md;
1783 LASSERT(ea == NULL);
1784 LASSERT(oa->o_mds < lmv->desc.ld_tgt_count);
1786 rc = obd_create(lmv->tgts[oa->o_mds].ltd_exp, oa,
1787 acl, acl_size, &obj_mdp, oti);
1793 * to be called from MDS only. @oa should have correct store cookie and o_fid
1794 * values for "master" object, as it will be used.
1796 int lmv_obd_create(struct obd_export *exp, struct obdo *oa,
1797 void *acl, int acl_size,
1798 struct lov_stripe_md **ea, struct obd_trans_info *oti)
1800 struct obd_device *obd = exp->exp_obd;
1801 struct lmv_obd *lmv = &obd->u.lmv;
1802 struct lustre_id mid;
1807 rc = lmv_check_connect(obd);
1811 LASSERT(oa != NULL);
1814 rc = lmv_obd_create_single(exp, oa, acl, acl_size, NULL, oti);
1816 CERROR("Can't create object, rc = %d\n", rc);
1820 /* acl is only suppied when mds create single remote obj */
1821 LASSERT(acl == NULL && acl_size == 0);
1824 rc = obd_alloc_diskmd(exp, (struct lov_mds_md **)ea);
1826 CERROR("obd_alloc_diskmd() failed, error %d\n",
1837 * here we should take care about splitted dir, so store cookie and fid
1838 * for "master" object should already be allocated and passed in @oa.
1840 LASSERT(oa->o_id != 0);
1841 LASSERT(oa->o_fid != 0);
1843 /* save "master" object id */
1846 mea = (struct mea *)*ea;
1847 mea->mea_master = -1;
1848 mea->mea_magic = MEA_MAGIC_ALL_CHARS;
1850 if (!mea->mea_count || mea->mea_count > lmv->desc.ld_tgt_count)
1851 mea->mea_count = lmv->desc.ld_tgt_count;
1853 for (i = 0, c = 0; c < mea->mea_count && i < lmv->desc.ld_tgt_count; i++) {
1854 struct lov_stripe_md obj_md;
1855 struct lov_stripe_md *obj_mdp = &obj_md;
1857 if (lmv->tgts[i].ltd_exp == NULL) {
1858 /* this is "master" MDS */
1859 mea->mea_master = i;
1860 mea->mea_ids[c] = mid;
1866 * "master" MDS should always be part of stripped dir,
1869 if (mea->mea_master == -1 && c == mea->mea_count - 1)
1872 oa->o_valid = OBD_MD_FLGENER | OBD_MD_FLTYPE | OBD_MD_FLMODE |
1873 OBD_MD_FLUID | OBD_MD_FLGID | OBD_MD_FLID;
1875 rc = obd_create(lmv->tgts[c].ltd_exp, oa, NULL, 0,
1878 CERROR("obd_create() failed on MDT target %d, "
1879 "error %d\n", c, rc);
1883 CDEBUG(D_OTHER, "dirobj at mds %d: "LPU64"/%u\n",
1884 i, oa->o_id, oa->o_generation);
1888 * here, when object is created (or it is master and was passed
1889 * from caller) on desired MDS we save its fid to local mea_ids.
1894 * store cookie should be defined here for both cases (master
1895 * object and not master), because master is already created.
1899 /* fill mea by store cookie and fid */
1900 obdo2id(&mea->mea_ids[c], oa);
1903 LASSERT(c == mea->mea_count);
1905 CDEBUG(D_OTHER, "%d dirobjects created\n",
1906 (int)mea->mea_count);
1911 static int lmv_llog_init(struct obd_device *obd, struct obd_llogs *llogs,
1912 struct obd_device *tgt, int count,
1913 struct llog_catid *logid)
1915 struct llog_ctxt *ctxt;
1919 rc = obd_llog_setup(obd, llogs, LLOG_CONFIG_REPL_CTXT, tgt, 0, NULL,
1922 ctxt = llog_get_context(llogs, LLOG_CONFIG_REPL_CTXT);
1923 ctxt->loc_imp = tgt->u.cli.cl_import;
1929 static int lmv_llog_finish(struct obd_device *obd,
1930 struct obd_llogs *llogs, int count)
1935 rc = obd_llog_cleanup(llog_get_context(llogs, LLOG_CONFIG_REPL_CTXT));
1939 static int lmv_precleanup(struct obd_device *obd, int flags)
1943 rc = obd_llog_finish(obd, &obd->obd_llogs, 0);
1945 CERROR("failed to cleanup llogging subsystems\n");
1950 static int lmv_get_info(struct obd_export *exp, __u32 keylen,
1951 void *key, __u32 *vallen, void *val)
1953 struct obd_device *obd;
1954 struct lmv_obd *lmv;
1958 obd = class_exp2obd(exp);
1960 CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",
1961 exp->exp_handle.h_cookie);
1966 if (keylen == strlen("mdsize") && !strcmp(key, "mdsize")) {
1967 __u32 *mdsize = val;
1968 *vallen = sizeof(__u32);
1969 *mdsize = sizeof(struct lustre_id) * lmv->desc.ld_tgt_count
1970 + sizeof(struct mea);
1972 } else if (keylen == strlen("mdsnum") && !strcmp(key, "mdsnum")) {
1973 struct obd_uuid *cluuid = &lmv->cluuid;
1974 struct lmv_tgt_desc *tgts;
1975 __u32 *mdsnum = val;
1979 for (i = 0; i < lmv->desc.ld_tgt_count; i++, tgts++) {
1980 if (obd_uuid_equals(&tgts->uuid, cluuid)) {
1981 *vallen = sizeof(__u32);
1987 } else if (keylen == strlen("rootid") && !strcmp(key, "rootid")) {
1988 rc = lmv_check_connect(obd);
1992 /* getting rootid from first MDS. */
1993 rc = obd_get_info(lmv->tgts[0].ltd_exp, keylen, key,
1996 } else if (keylen >= strlen("lmvdesc") && !strcmp(key, "lmvdesc")) {
1997 struct lmv_desc *desc_ret = val;
1998 *desc_ret = lmv->desc;
2000 } else if (keylen >= strlen("remote_flag") && !strcmp(key, "remote_flag")) {
2001 struct lmv_tgt_desc *tgts;
2004 rc = lmv_check_connect(obd);
2008 LASSERT(*vallen == sizeof(__u32));
2009 for (i = 0, tgts = lmv->tgts; i < lmv->desc.ld_tgt_count;
2012 /* all tgts should be connected when this get called. */
2013 if (!tgts || !tgts->ltd_exp) {
2014 CERROR("target not setup?\n");
2018 if (!obd_get_info(tgts->ltd_exp, keylen, key,
2023 } else if (keylen >= strlen("lovdesc") && !strcmp(key, "lovdesc")) {
2024 rc = lmv_check_connect(obd);
2028 /* forwarding this request to first MDS, it should know LOV
2030 rc = obd_get_info(lmv->tgts[0].ltd_exp, keylen, key,
2033 } else if (keylen >= strlen("getext") && !strcmp(key, "getext")) {
2034 struct lmv_tgt_desc *tgts;
2037 rc = lmv_check_connect(obd);
2041 LASSERT(*vallen == sizeof(struct fid_extent));
2042 for (i = 0, tgts = lmv->tgts; i < lmv->desc.ld_tgt_count;
2045 /* all tgts should be connected when this get called. */
2046 if (!tgts || !tgts->ltd_exp) {
2047 CERROR("target not setup?\n");
2051 rc = obd_get_info(tgts->ltd_exp, keylen, key,
2059 CDEBUG(D_IOCTL, "invalid key\n");
2063 int lmv_set_info(struct obd_export *exp, obd_count keylen,
2064 void *key, obd_count vallen, void *val)
2066 struct lmv_tgt_desc *tgt;
2067 struct obd_device *obd;
2068 struct lmv_obd *lmv;
2072 obd = class_exp2obd(exp);
2074 CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",
2075 exp->exp_handle.h_cookie);
2080 if (keylen >= strlen("inter_mds") && strcmp(key, "inter_mds") == 0) {
2081 lmv->server_timeout = 1;
2082 lmv_set_timeouts(obd);
2086 /* maybe this could be default */
2087 if ((keylen == strlen("sec") && strcmp(key, "sec") == 0) ||
2088 (keylen == strlen("sec_flags") && strcmp(key, "sec_flags") == 0) ||
2089 (keylen == strlen("nllu") && strcmp(key, "nllu") == 0)) {
2090 struct obd_export *exp;
2093 spin_lock(&lmv->lmv_lock);
2094 for (i = 0, tgt = lmv->tgts; i < lmv->desc.ld_tgt_count;
2097 /* during setup time the connections to mdc might
2098 * haven't been established.
2101 struct obd_device *tgt_obd;
2103 tgt_obd = class_find_client_obd(&tgt->uuid,
2107 CERROR("can't set info %s, "
2108 "device %s not attached?\n",
2109 (char *) key, tgt->uuid.uuid);
2113 exp = tgt_obd->obd_self_export;
2116 err = obd_set_info(exp, keylen, key, vallen, val);
2120 spin_unlock(&lmv->lmv_lock);
2124 if (keylen == 5 && strcmp(key, "audit") == 0) {
2125 struct audit_attr_msg * msg = val;
2126 int mds = id_group(&msg->id);
2128 LASSERT(mds < lmv->desc.ld_tgt_count);
2130 if (IS_AUDIT_OP(msg->attr, AUDIT_FS) ||
2131 IS_AUDIT_OP(msg->attr, AUDIT_NULL) ||
2132 IS_AUDIT_OP(msg->attr, AUDIT_SYNC)) {
2133 //FS audit, send message to all mds
2134 for (i = 0; i < lmv->desc.ld_tgt_count;i++) {
2135 obd_set_info(lmv->tgts[i].ltd_exp,
2136 keylen, key, vallen, val);
2139 else if (IS_AUDIT_OP(msg->attr, AUDIT_DIR)) {
2141 //if dir is splitted, send RPC to all mds involved
2142 struct lmv_obj *obj;
2143 struct lustre_id rid;
2146 obj = lmv_grab_obj(obd, &msg->id);
2149 for (i = 0; i < obj->objcount; i++) {
2150 rid = obj->objs[i].id;
2151 mds = id_group(&rid);
2152 obd_set_info(lmv->tgts[mds].ltd_exp,
2156 lmv_unlock_obj(obj);
2160 rc = obd_set_info(lmv->tgts[mds].ltd_exp,
2161 keylen, key, vallen, val);
2165 //set audit for file
2166 rc = obd_set_info(lmv->tgts[mds].ltd_exp,
2167 keylen, key, vallen, val);
2171 if (((keylen == strlen("flush_cred") &&
2172 strcmp(key, "flush_cred") == 0)) ||
2173 ((keylen == strlen("crypto_type") &&
2174 strcmp(key, "crypto_type") == 0))) {
2177 for (i = 0, tgt = lmv->tgts; i < lmv->desc.ld_tgt_count;
2181 rc = obd_set_info(tgt->ltd_exp,
2182 keylen, key, vallen, val);
2190 if (keylen == strlen("ids") && memcmp(key, "ids", keylen) == 0) {
2191 struct lustre_id *id = (struct lustre_id *)val;
2193 rc = lmv_check_connect(obd);
2197 rc = obd_set_info(lmv->tgts[id_group(id)].ltd_exp,
2198 keylen, key, vallen, val);
2202 if (keylen == strlen("chkconnect") &&
2203 memcmp(key, "chkconnect", keylen) == 0) {
2204 rc = lmv_check_connect(obd);
2211 int lmv_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
2212 struct lov_stripe_md *lsm)
2214 struct obd_device *obd = class_exp2obd(exp);
2215 struct lmv_obd *lmv = &obd->u.lmv;
2216 struct mea *meap, *lsmp;
2220 mea_size = (sizeof(struct lustre_id) *
2221 lmv->desc.ld_tgt_count) + sizeof(struct mea);
2225 if (*lmmp && !lsm) {
2226 OBD_FREE(*lmmp, mea_size);
2231 if (*lmmp == NULL) {
2232 OBD_ALLOC(*lmmp, mea_size);
2240 lsmp = (struct mea *)lsm;
2241 meap = (struct mea *)*lmmp;
2243 if (lsmp->mea_magic != MEA_MAGIC_LAST_CHAR &&
2244 lsmp->mea_magic != MEA_MAGIC_ALL_CHARS)
2247 meap->mea_magic = cpu_to_le32(lsmp->mea_magic);
2248 meap->mea_count = cpu_to_le32(lsmp->mea_count);
2249 meap->mea_master = cpu_to_le32(lsmp->mea_master);
2251 for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
2252 meap->mea_ids[i] = meap->mea_ids[i];
2253 id_cpu_to_le(&meap->mea_ids[i]);
2259 int lmv_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp,
2260 struct lov_mds_md *lmm, int lmm_size)
2262 struct obd_device *obd = class_exp2obd(exp);
2263 struct mea **tmea = (struct mea **)lsmp;
2264 struct mea *mea = (struct mea *)lmm;
2265 struct lmv_obd *lmv = &obd->u.lmv;
2266 int mea_size, i, rc = 0;
2270 mea_size = sizeof(struct lustre_id) *
2271 lmv->desc.ld_tgt_count + sizeof(struct mea);
2276 if (*lsmp != NULL && lmm == NULL) {
2277 OBD_FREE(*tmea, mea_size);
2281 LASSERT(mea_size == lmm_size);
2283 OBD_ALLOC(*tmea, mea_size);
2290 if (mea->mea_magic == MEA_MAGIC_LAST_CHAR ||
2291 mea->mea_magic == MEA_MAGIC_ALL_CHARS)
2293 magic = le32_to_cpu(mea->mea_magic);
2295 struct mea_old *old = (struct mea_old *)lmm;
2297 mea_size = sizeof(struct lustre_id) * old->mea_count +
2298 sizeof(struct mea_old);
2300 if (old->mea_count > 256 || old->mea_master > 256 ||
2301 lmm_size < mea_size || old->mea_master > old->mea_count) {
2302 CWARN("bad MEA: count %u, master %u, size %u\n",
2303 old->mea_count, old->mea_master, mea_size);
2304 GOTO(out_free_mea, rc = -EINVAL);
2306 magic = MEA_MAGIC_LAST_CHAR;
2309 (*tmea)->mea_magic = magic;
2310 (*tmea)->mea_count = le32_to_cpu(mea->mea_count);
2311 (*tmea)->mea_master = le32_to_cpu(mea->mea_master);
2313 for (i = 0; i < (*tmea)->mea_count; i++) {
2314 (*tmea)->mea_ids[i] = mea->mea_ids[i];
2315 id_le_to_cpu(&(*tmea)->mea_ids[i]);
2320 OBD_FREE(*tmea, mea_size);
2324 int lmv_brw(int rw, struct obd_export *exp, struct obdo *oa,
2325 struct lov_stripe_md *ea, obd_count oa_bufs,
2326 struct brw_page *pgarr, struct obd_trans_info *oti)
2328 struct obd_device *obd = exp->exp_obd;
2329 struct lmv_obd *lmv = &obd->u.lmv;
2330 struct mea *mea = (struct mea *) ea;
2333 LASSERT(oa != NULL);
2334 LASSERT(ea != NULL);
2335 LASSERT(pgarr != NULL);
2336 LASSERT(oa->o_mds < lmv->desc.ld_tgt_count);
2338 oa->o_gr = id_gen(&mea->mea_ids[oa->o_mds]);
2339 oa->o_id = id_ino(&mea->mea_ids[oa->o_mds]);
2340 oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
2342 err = obd_brw(rw, lmv->tgts[oa->o_mds].ltd_exp,
2343 oa, NULL, oa_bufs, pgarr, oti);
2347 static int lmv_cancel_unused(struct obd_export *exp,
2348 struct lov_stripe_md *lsm,
2349 int flags, void *opaque)
2351 struct obd_device *obd = exp->exp_obd;
2352 struct lmv_obd *lmv = &obd->u.lmv;
2356 LASSERT(lsm == NULL);
2358 for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
2359 if (!lmv->tgts[i].ltd_exp || !lmv->tgts[i].active)
2362 err = obd_cancel_unused(lmv->tgts[i].ltd_exp,
2363 NULL, flags, opaque);
2370 struct obd_ops lmv_obd_ops = {
2371 .o_owner = THIS_MODULE,
2372 .o_attach = lmv_attach,
2373 .o_detach = lmv_detach,
2374 .o_setup = lmv_setup,
2375 .o_cleanup = lmv_cleanup,
2376 .o_precleanup = lmv_precleanup,
2377 .o_process_config = lmv_process_config,
2378 .o_connect = lmv_connect,
2379 .o_disconnect = lmv_disconnect,
2380 .o_statfs = lmv_statfs,
2381 .o_llog_init = lmv_llog_init,
2382 .o_llog_finish = lmv_llog_finish,
2383 .o_get_info = lmv_get_info,
2384 .o_set_info = lmv_set_info,
2385 .o_create = lmv_obd_create,
2386 .o_packmd = lmv_packmd,
2387 .o_unpackmd = lmv_unpackmd,
2389 .o_init_ea_size = lmv_init_ea_size,
2390 .o_notify = lmv_notify,
2391 .o_iocontrol = lmv_iocontrol,
2392 .o_cancel_unused = lmv_cancel_unused,
2395 struct md_ops lmv_md_ops = {
2396 .m_getstatus = lmv_getstatus,
2397 .m_getattr = lmv_getattr,
2398 .m_change_cbdata = lmv_change_cbdata,
2399 .m_change_cbdata_name = lmv_change_cbdata_name,
2400 .m_close = lmv_close,
2401 .m_create = lmv_create,
2402 .m_done_writing = lmv_done_writing,
2403 .m_enqueue = lmv_enqueue,
2404 .m_getattr_lock = lmv_getattr_lock,
2405 .m_intent_lock = lmv_intent_lock,
2407 .m_rename = lmv_rename,
2408 .m_setattr = lmv_setattr,
2410 .m_readpage = lmv_readpage,
2411 .m_unlink = lmv_unlink,
2412 .m_get_real_obd = lmv_get_real_obd,
2413 .m_valid_attrs = lmv_valid_attrs,
2414 .m_delete_inode = lmv_delete_inode,
2415 .m_access_check = lmv_access_check,
2418 int __init lmv_init(void)
2420 struct lprocfs_static_vars lvars;
2423 obj_cache = kmem_cache_create("lmv_objects",
2424 sizeof(struct lmv_obj),
2427 CERROR("error allocating lmv objects cache\n");
2431 lprocfs_init_vars(lmv, &lvars);
2432 rc = class_register_type(&lmv_obd_ops, &lmv_md_ops,
2434 OBD_LMV_DEVICENAME);
2436 kmem_cache_destroy(obj_cache);
2442 static void lmv_exit(void)
2444 class_unregister_type(OBD_LMV_DEVICENAME);
2446 LASSERTF(kmem_cache_destroy(obj_cache) == 0,
2447 "can't free lmv objects cache, %d object(s)"
2448 "still in use\n", atomic_read(&obj_cache_count));
2451 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
2452 MODULE_DESCRIPTION("Lustre Logical Metadata Volume OBD driver");
2453 MODULE_LICENSE("GPL");
2455 module_init(lmv_init);
2456 module_exit(lmv_exit);