/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * lustre/mdt/mdt_handler.c
- * Lustre Metadata Target (mdt) request handler
+ * GPL HEADER START
*
- * Copyright (c) 2006 Cluster File Systems, Inc.
- * Author: Peter Braam <braam@clusterfs.com>
- * Author: Andreas Dilger <adilger@clusterfs.com>
- * Author: Phil Schwan <phil@clusterfs.com>
- * Author: Mike Shaver <shaver@clusterfs.com>
- * Author: Nikita Danilov <nikita@clusterfs.com>
- * Author: Huang Hua <huanghua@clusterfs.com>
- * Author: Yury Umanets <umka@clusterfs.com>
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
- * This file is part of the Lustre file system, http://www.lustre.org
- * Lustre is a trademark of Cluster File Systems, Inc.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
*
- * You may have signed or agreed to another license before downloading
- * this software. If so, you are bound by the terms and conditions
- * of that agreement, and the following does not apply to you. See the
- * LICENSE file included with this distribution for more information.
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
*
- * If you did not agree to a different license, then this copy of Lustre
- * is open source software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
- * In either case, Lustre is distributed in the hope that it will be
- * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * license text for more details.
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/mdt/mdt_handler.c
+ *
+ * Lustre Metadata Target (mdt) request handler
+ *
+ * Author: Peter Braam <braam@clusterfs.com>
+ * Author: Andreas Dilger <adilger@clusterfs.com>
+ * Author: Phil Schwan <phil@clusterfs.com>
+ * Author: Mike Shaver <shaver@clusterfs.com>
+ * Author: Nikita Danilov <nikita@clusterfs.com>
+ * Author: Huang Hua <huanghua@clusterfs.com>
+ * Author: Yury Umanets <umka@clusterfs.com>
*/
#ifndef EXPORT_SYMTAB
#include <lustre_mds.h>
#include <lustre_mdt.h>
#include "mdt_internal.h"
-#include <linux/lustre_acl.h>
+#include <lustre_acl.h>
#include <lustre_param.h>
mdl_mode_t mdt_mdl_lock_modes[] = {
struct md_object *next = mdt_object_child(parent);
struct lu_fid *child_fid = &info->mti_tmp_fid1;
struct lu_name *lname = NULL;
- const char *name;
+ const char *name = NULL;
int namelen = 0;
struct mdt_lock_handle *lhp;
struct ldlm_lock *lock;
namelen = req_capsule_get_size(info->mti_pill, &RMF_NAME,
RCL_CLIENT) - 1;
- LASSERT(namelen >= 0);
-
- /* XXX: "namelen == 0" is for getattr by fid (OBD_CONNECT_ATTRFID),
- * otherwise do not allow empty name, that is the name must contain
- * at least one character and the terminating '\0'*/
- if (namelen == 0) {
- reqbody =req_capsule_client_get(info->mti_pill, &RMF_MDT_BODY);
- LASSERT(fid_is_sane(&reqbody->fid2));
- name = NULL;
-
- CDEBUG(D_INODE, "getattr with lock for "DFID"/"DFID", "
- "ldlm_rep = %p\n",
- PFID(mdt_object_fid(parent)), PFID(&reqbody->fid2),
- ldlm_rep);
- } else {
- lname = mdt_name(info->mti_env, (char *)name, namelen);
- CDEBUG(D_INODE, "getattr with lock for "DFID"/%s, "
- "ldlm_rep = %p\n",
- PFID(mdt_object_fid(parent)), name, ldlm_rep);
- }
+ if (!info->mti_cross_ref) {
+ /*
+ * XXX: Check for "namelen == 0" is for getattr by fid
+ * (OBD_CONNECT_ATTRFID), otherwise do not allow empty name,
+ * that is the name must contain at least one character and
+ * the terminating '\0'
+ */
+ if (namelen == 0) {
+ reqbody = req_capsule_client_get(info->mti_pill,
+ &RMF_MDT_BODY);
+ LASSERT(fid_is_sane(&reqbody->fid2));
+ name = NULL;
+ CDEBUG(D_INODE, "getattr with lock for "DFID"/"DFID", "
+ "ldlm_rep = %p\n",
+ PFID(mdt_object_fid(parent)), PFID(&reqbody->fid2),
+ ldlm_rep);
+ } else {
+ lname = mdt_name(info->mti_env, (char *)name, namelen);
+ CDEBUG(D_INODE, "getattr with lock for "DFID"/%s, "
+ "ldlm_rep = %p\n", PFID(mdt_object_fid(parent)),
+ name, ldlm_rep);
+ }
+ }
mdt_set_disposition(info, ldlm_rep, DISP_LOOKUP_EXECD);
rc = mdt_object_exists(parent);
&parent->mot_obj.mo_lu,
"Parent doesn't exist!\n");
RETURN(-ESTALE);
- } else
+ } else if (!info->mti_cross_ref) {
LASSERTF(rc > 0, "Parent "DFID" is on remote server\n",
PFID(mdt_object_fid(parent)));
-
+ }
if (lname) {
rc = mdt_raw_lookup(info, parent, lname, ldlm_rep);
if (rc != 0) {
LDLM_LOCK_PUT(lock);
rc = 0;
} else {
- struct md_attr *ma = &info->mti_attr;
+ struct md_attr *ma;
relock:
+ ma = &info->mti_attr;
+
mdt_lock_handle_init(lhc);
mdt_lock_reg_init(lhc, LCK_PR);
LU_OBJECT_DEBUG(D_WARNING, info->mti_env,
&child->mot_obj.mo_lu,
"Object doesn't exist!\n");
+ GOTO(out_child, rc = -ESTALE);
}
ma->ma_valid = 0;
lock = ldlm_handle2lock(&lhc->mlh_reg_lh);
if (lock) {
struct mdt_body *repbody;
- struct lu_attr *ma;
/* Debugging code. */
res_id = &lock->l_resource->lr_name;
*/
repbody = req_capsule_server_get(info->mti_pill,
&RMF_MDT_BODY);
- ma = &info->mti_attr.ma_attr;
if (lock->l_policy_data.l_inodebits.bits &
MDS_INODELOCK_UPDATE)
mdt_pack_size2body(info, child);
RETURN(-EFAULT);
}
- if (keylen != (sizeof(KEY_READ_ONLY) - 1) ||
- memcmp(key, KEY_READ_ONLY, keylen) != 0)
+ if (!KEY_IS(KEY_READ_ONLY))
RETURN(-EINVAL);
req->rq_status = 0;
continue;
fid_le_to_cpu(lf, &ent->lde_fid);
- if (le32_to_cpu(ent->lde_hash) & MAX_HASH_HIGHEST_BIT)
+ if (le64_to_cpu(ent->lde_hash) & MAX_HASH_HIGHEST_BIT)
ma->ma_attr.la_mode = S_IFDIR;
else
ma->ma_attr.la_mode = 0;
memcpy(name, ent->lde_name, le16_to_cpu(ent->lde_namelen));
lname = mdt_name(info->mti_env, name,
- le16_to_cpu(ent->lde_namelen) + 1);
+ le16_to_cpu(ent->lde_namelen));
ma->ma_attr_flags |= MDS_PERM_BYPASS;
rc = mdo_name_insert(info->mti_env,
md_object_next(&object->mot_obj),
* reqbody->nlink contains number bytes to read.
*/
rdpg->rp_hash = reqbody->size;
- if ((__u64)rdpg->rp_hash != reqbody->size) {
- CERROR("Invalid hash: %#llx != %#llx\n",
- (__u64)rdpg->rp_hash, reqbody->size);
+ if (rdpg->rp_hash != reqbody->size) {
+ CERROR("Invalid hash: "LPX64" != "LPX64"\n",
+ rdpg->rp_hash, reqbody->size);
RETURN(-EFAULT);
}
rdpg->rp_count = reqbody->nlink;
RETURN(rc);
}
-/* TODO these two methods not available now. */
-
/* this should sync the whole device */
-static int mdt_device_sync(struct mdt_thread_info *info)
+static int mdt_device_sync(const struct lu_env *env, struct mdt_device *mdt)
{
- return 0;
+ struct dt_device *dt = mdt->mdt_bottom;
+ int rc;
+ ENTRY;
+
+ rc = dt->dd_ops->dt_sync(env, dt);
+ RETURN(rc);
}
/* this should sync this object */
static int mdt_object_sync(struct mdt_thread_info *info)
{
- return 0;
+ struct md_object *next;
+ int rc;
+ ENTRY;
+
+ if (!mdt_object_exists(info->mti_object)) {
+ CWARN("Non existing object "DFID"!\n",
+ PFID(mdt_object_fid(info->mti_object)));
+ RETURN(-ESTALE);
+ }
+ next = mdt_object_child(info->mti_object);
+ rc = mo_object_sync(info->mti_env, next);
+
+ RETURN(rc);
}
static int mdt_sync(struct mdt_thread_info *info)
/* sync the whole device */
rc = req_capsule_server_pack(pill);
if (rc == 0)
- rc = mdt_device_sync(info);
+ rc = mdt_device_sync(info->mti_env, info->mti_mdt);
else
rc = err_serious(rc);
} else {
ENTRY;
CDEBUG(D_INFO, "Find object for "DFID"\n", PFID(f));
- o = lu_object_find(env, d->mdt_md_dev.md_lu_dev.ld_site, f);
+ o = lu_object_find(env, &d->mdt_md_dev.md_lu_dev, f, NULL);
if (unlikely(IS_ERR(o)))
m = (struct mdt_object *)o;
else
LASSERT(lh->mlh_type != MDT_PDO_LOCK);
}
+ if (lh->mlh_type == MDT_PDO_LOCK) {
+ /* check for exists after object is locked */
+ if (mdt_object_exists(o) == 0) {
+ /* Non-existent object shouldn't have PDO lock */
+ RETURN(-ESTALE);
+ } else {
+ /* Non-dir object shouldn't have PDO lock */
+ LASSERT(S_ISDIR(lu_object_attr(&o->mot_obj.mo_lu)));
+ }
+ }
+
memset(policy, 0, sizeof(*policy));
fid_build_reg_res_name(mdt_object_fid(o), res_id);
}
/*
- * Finish res_id initializing by name hash marking patr of
+ * Finish res_id initializing by name hash marking part of
* directory which is taking modification.
*/
res_id->name[LUSTRE_RES_ID_HSH_OFF] = lh->mlh_pdo_hash;
/*
* Use LDLM_FL_LOCAL_ONLY for this lock. We do not know yet if it is
* going to be sent to client. If it is - mdt_intent_policy() path will
- * fix it up and turns FL_LOCAL flag off.
+ * fix it up and turn FL_LOCAL flag off.
*/
rc = mdt_fid_lock(ns, &lh->mlh_reg_lh, lh->mlh_reg_mode, policy,
res_id, LDLM_FL_LOCAL_ONLY | LDLM_FL_ATOMIC_CB);
if (rc)
GOTO(out, rc);
- if (lh->mlh_type == MDT_PDO_LOCK) {
- /* check for exists after object is locked */
- if (mdt_object_exists(o) == 0) {
- /* Non-existent object shouldn't have PDO lock */
- rc = -ESTALE;
- } else {
- /* Non-dir object shouldn't have PDO lock */
- LASSERT(S_ISDIR(lu_object_attr(&o->mot_obj.mo_lu)));
- }
- }
out:
if (rc)
mdt_object_unlock(info, o, lh, 1);
RETURN(rc);
}
+static inline
+void mdt_save_lock(struct ptlrpc_request *req, struct lustre_handle *h,
+ ldlm_mode_t mode, int decref)
+{
+ ENTRY;
+
+ if (lustre_handle_is_used(h)) {
+ if (decref)
+ mdt_fid_unlock(h, mode);
+ else
+ ptlrpc_save_lock(req, h, mode);
+ h->cookie = 0ull;
+ }
+
+ EXIT;
+}
+
/*
* Just call ldlm_lock_decref() if decref, else we only call ptlrpc_save_lock()
* to save this lock in req. when transaction committed, req will be released,
struct ptlrpc_request *req = mdt_info_req(info);
ENTRY;
- if (lustre_handle_is_used(&lh->mlh_pdo_lh)) {
- /* Do not save PDO locks to request, just decref. */
- mdt_fid_unlock(&lh->mlh_pdo_lh,
- lh->mlh_pdo_mode);
- lh->mlh_pdo_lh.cookie = 0ull;
- }
-
- if (lustre_handle_is_used(&lh->mlh_reg_lh)) {
- if (decref) {
- mdt_fid_unlock(&lh->mlh_reg_lh,
- lh->mlh_reg_mode);
- } else {
- ptlrpc_save_lock(req, &lh->mlh_reg_lh,
- lh->mlh_reg_mode);
- }
- lh->mlh_reg_lh.cookie = 0ull;
- }
+ mdt_save_lock(req, &lh->mlh_pdo_lh, lh->mlh_pdo_mode, decref);
+ mdt_save_lock(req, &lh->mlh_reg_lh, lh->mlh_reg_mode, decref);
EXIT;
}
LASSERT(current->journal_info == NULL);
/*
- * Checking for various OBD_FAIL_$PREF_$OPC_NET codes. _Do_ not try
- * to put same checks into handlers like mdt_close(), mdt_reint(),
+ * Checking for various OBD_FAIL_$PREF_$OPC_NET codes. _Do_ not try
+ * to put same checks into handlers like mdt_close(), mdt_reint(),
* etc., without talking to mdt authors first. Checking same thing
* there again is useless and returning 0 error wihtout packing reply
* is buggy! Handlers either pack reply or return error.
* lock.
*/
if (new_lock == NULL)
- new_lock = ldlm_handle2lock(&lh->mlh_reg_lh);
+ new_lock = ldlm_handle2lock_long(&lh->mlh_reg_lh, 0);
if (new_lock == NULL && (flags & LDLM_FL_INTENT_ONLY)) {
lh->mlh_reg_lh.cookie = 0;
RETURN(ELDLM_LOCK_REPLACED);
}
- /* This lock might already be given to the client by an resent req,
- * in this case we should return ELDLM_LOCK_ABORTED,
- * so we should check led_held_locks here, but it will affect
- * performance, FIXME
+ /*
+ * Fixup the lock to be given to the client.
*/
- /* Fixup the lock to be given to the client */
lock_res_and_lock(new_lock);
- new_lock->l_readers = 0;
- new_lock->l_writers = 0;
+ /* Zero new_lock->l_readers and new_lock->l_writers without triggering
+ * possible blocking AST. */
+ while (new_lock->l_readers > 0) {
+ lu_ref_del(&new_lock->l_reference, "reader", new_lock);
+ lu_ref_del(&new_lock->l_reference, "user", new_lock);
+ new_lock->l_readers--;
+ }
+ while (new_lock->l_writers > 0) {
+ lu_ref_del(&new_lock->l_reference, "writer", new_lock);
+ lu_ref_del(&new_lock->l_reference, "user", new_lock);
+ new_lock->l_writers--;
+ }
new_lock->l_export = class_export_get(req->rq_export);
- spin_lock(&req->rq_export->exp_ldlm_data.led_lock);
- list_add(&new_lock->l_export_chain,
- &new_lock->l_export->exp_ldlm_data.led_held_locks);
- spin_unlock(&req->rq_export->exp_ldlm_data.led_lock);
-
new_lock->l_blocking_ast = lock->l_blocking_ast;
new_lock->l_completion_ast = lock->l_completion_ast;
new_lock->l_remote_handle = lock->l_remote_handle;
new_lock->l_flags &= ~LDLM_FL_LOCAL;
+ lustre_hash_add(new_lock->l_export->exp_lock_hash,
+ &new_lock->l_remote_handle,
+ &new_lock->l_exp_hash);
+
unlock_res_and_lock(new_lock);
- LDLM_LOCK_PUT(new_lock);
+ LDLM_LOCK_RELEASE(new_lock);
lh->mlh_reg_lh.cookie = 0;
RETURN(ELDLM_LOCK_REPLACED);
struct obd_export *exp = req->rq_export;
struct lustre_handle remote_hdl;
struct ldlm_request *dlmreq;
- struct list_head *iter;
+ struct ldlm_lock *lock;
if (!(lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT))
return;
dlmreq = req_capsule_client_get(info->mti_pill, &RMF_DLM_REQ);
remote_hdl = dlmreq->lock_handle[0];
- spin_lock(&exp->exp_ldlm_data.led_lock);
- list_for_each(iter, &exp->exp_ldlm_data.led_held_locks) {
- struct ldlm_lock *lock;
- lock = list_entry(iter, struct ldlm_lock, l_export_chain);
- if (lock == new_lock)
- continue;
- if (lock->l_remote_handle.cookie == remote_hdl.cookie) {
+ lock = lustre_hash_lookup(exp->exp_lock_hash, &remote_hdl);
+ if (lock) {
+ if (lock != new_lock) {
lh->mlh_reg_lh.cookie = lock->l_handle.h_cookie;
lh->mlh_reg_mode = lock->l_granted_mode;
- LDLM_DEBUG(lock, "restoring lock cookie");
+ LDLM_DEBUG(lock, "Restoring lock cookie");
DEBUG_REQ(D_DLMTRACE, req,
"restoring lock cookie "LPX64,
lh->mlh_reg_lh.cookie);
if (old_lock)
*old_lock = LDLM_LOCK_GET(lock);
- spin_unlock(&exp->exp_ldlm_data.led_lock);
+ lh_put(exp->exp_lock_hash, &lock->l_exp_hash);
return;
}
+
+ lh_put(exp->exp_lock_hash, &lock->l_exp_hash);
}
- spin_unlock(&exp->exp_ldlm_data.led_lock);
/*
* If the xid matches, then we know this is a resent request, and allow
rep->lock_policy_res2 = clear_serious(rc);
lhc->mlh_reg_lh.cookie = 0ull;
- rc = ELDLM_LOCK_ABORTED;
- RETURN(rc);
+ if (rc == -ENOTCONN || rc == -ENODEV) {
+ /*
+ * If it is the disconnect error (ENODEV & ENOCONN), the error
+ * will be returned by rq_status, and client at ptlrpc layer
+ * will detect this, then disconnect, reconnect the import
+ * immediately, instead of impacting the following the rpc.
+ */
+ RETURN(rc);
+ } else {
+ /*
+ * For other cases, the error will be returned by intent.
+ * and client will retrieve the result from intent.
+ */
+ /*
+ * FIXME: when open lock is finished, that should be
+ * checked here.
+ */
+ RETURN(ELDLM_LOCK_ABORTED);
+ }
}
static int mdt_intent_code(long itcode)
if (it != NULL) {
const struct ldlm_request *dlmreq;
__u64 req_bits;
-#if 0
- struct ldlm_lock *lock = *lockp;
-
- LDLM_DEBUG(lock, "intent policy opc: %s\n",
- ldlm_it2str(it->opc));
-#endif
rc = mdt_intent_opc(it->opc, info, lockp, flags);
if (rc == 0)
m->mdt_bottom = NULL;
}
-static struct lu_device *mdt_layer_setup(const struct lu_env *env,
+static struct lu_device *mdt_layer_setup(struct lu_env *env,
const char *typename,
struct lu_device *child,
struct lustre_cfg *cfg)
return ERR_PTR(rc);
}
-static int mdt_stack_init(const struct lu_env *env,
+static int mdt_stack_init(struct lu_env *env,
struct mdt_device *m, struct lustre_cfg *cfg)
{
struct lu_device *d = &m->mdt_md_dev.md_lu_dev;
}
/* init the stack */
- rc = mdt_stack_init(env, m, cfg);
+ rc = mdt_stack_init((struct lu_env *)env, m, cfg);
if (rc) {
CERROR("Can't init device stack, rc %d\n", rc);
GOTO(err_fini_proc, rc);
mdt_init_capa_ctxt(env, m);
- /* we use a shorter ldlm_timeout on MDS for keep bumping on
- * might-be slow processing OST */
+ /* Reduce the initial timeout on an MDS because it doesn't need such
+ * a long timeout as an OST does. Adaptive timeouts will adjust this
+ * value appropriately. */
if (ldlm_timeout == LDLM_TIMEOUT_DEFAULT)
ldlm_timeout = MDS_LDLM_TIMEOUT_DEFAULT;
mdt_stack_fini(env, m, md2lu_dev(m->mdt_child));
err_fini_proc:
mdt_procfs_fini(m);
+ ptlrpc_lprocfs_unregister_obd(obd);
lprocfs_obd_cleanup(obd);
err_fini_site:
lu_site_fini(s);
struct obd_device *obd = d->ld_obd;
lprocfs_mdt_init_vars(&lvars);
- rc = class_process_proc_param(PARAM_MDT, lvars.obd_vars, cfg, obd);
- if (rc)
- /* others are passed further */
+ rc = class_process_proc_param(PARAM_MDT, lvars.obd_vars,
+ cfg, obd);
+ if (rc == -ENOSYS)
+ /* we don't understand; pass it on */
rc = next->ld_ops->ldo_process_config(env, next, cfg);
break;
}
RETURN(NULL);
}
-static int mdt_object_init(const struct lu_env *env, struct lu_object *o)
+static int mdt_object_init(const struct lu_env *env, struct lu_object *o,
+ const struct lu_object_conf *_)
{
struct mdt_device *d = mdt_dev(o->lo_dev);
struct lu_device *under;
if (mdt->mdt_namespace != NULL || exp->exp_obd->obd_namespace != NULL)
ldlm_cancel_locks_for_export(exp);
+ /* release nid stat refererence */
+ lprocfs_exp_cleanup(exp);
+
/* complete all outstanding replies */
spin_lock(&exp->exp_lock);
while (!list_empty(&exp->exp_outstanding_replies)) {
static int mdt_init_export(struct obd_export *exp)
{
struct mdt_export_data *med = &exp->exp_mdt_data;
+ int rc;
ENTRY;
CFS_INIT_LIST_HEAD(&med->med_open_head);
spin_lock(&exp->exp_lock);
exp->exp_connecting = 1;
spin_unlock(&exp->exp_lock);
- RETURN(0);
+ rc = ldlm_init_export(exp);
+ if (rc)
+ CERROR("Error %d while initializing export\n", rc);
+ RETURN(rc);
}
static int mdt_destroy_export(struct obd_export *export)
mdt_cleanup_idmap(med);
target_destroy_export(export);
+ ldlm_destroy_export(export);
if (obd_uuid_equals(&export->exp_client_uuid, &obd->obd_uuid))
RETURN(0);
switch (cmd) {
case OBD_IOC_SYNC:
- rc = dt->dd_ops->dt_sync(&env, dt);
+ rc = mdt_device_sync(&env, mdt);
break;
case OBD_IOC_SET_READONLY:
rc = dt->dd_ops->dt_sync(&env, dt);
}
};
-MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
+MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
MODULE_DESCRIPTION("Lustre Meta-data Target ("LUSTRE_MDT_NAME")");
MODULE_LICENSE("GPL");