* GPL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*/
/*
/*
* Initialized in mdt_mod_init().
*/
-unsigned long mdt_num_threads;
+static unsigned long mdt_num_threads;
+static unsigned long mdt_min_threads;
+static unsigned long mdt_max_threads;
/* ptlrpc request handler for MDT. All handlers are
* grouped into several slices - struct mdt_opc_slice,
void mdt_pack_attr2body(struct mdt_thread_info *info, struct mdt_body *b,
const struct lu_attr *attr, const struct lu_fid *fid)
{
- /*XXX should pack the reply body according to lu_valid*/
- b->valid |= OBD_MD_FLCTIME | OBD_MD_FLUID |
- OBD_MD_FLGID | OBD_MD_FLTYPE |
- OBD_MD_FLMODE | OBD_MD_FLNLINK | OBD_MD_FLFLAGS |
- OBD_MD_FLATIME | OBD_MD_FLMTIME ;
+ struct md_attr *ma = &info->mti_attr;
- if (!S_ISREG(attr->la_mode))
- b->valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | OBD_MD_FLRDEV;
+ LASSERT(ma->ma_valid & MA_INODE);
b->atime = attr->la_atime;
b->mtime = attr->la_mtime;
b->nlink = attr->la_nlink;
b->rdev = attr->la_rdev;
+ /*XXX should pack the reply body according to lu_valid*/
+ b->valid |= OBD_MD_FLCTIME | OBD_MD_FLUID |
+ OBD_MD_FLGID | OBD_MD_FLTYPE |
+ OBD_MD_FLMODE | OBD_MD_FLNLINK | OBD_MD_FLFLAGS |
+ OBD_MD_FLATIME | OBD_MD_FLMTIME ;
+
+ if (!S_ISREG(attr->la_mode)) {
+ b->valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | OBD_MD_FLRDEV;
+ } else if (ma->ma_need & MA_LOV && ma->ma_lmm_size == 0) {
+ /* means no objects are allocated on osts. */
+ LASSERT(!(ma->ma_valid & MA_LOV));
+ /* just ignore blocks occupied by extend attributes on MDS */
+ b->blocks = 0;
+ /* if no object is allocated on osts, the size on mds is valid. b=22272 */
+ b->valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS;
+ }
+
if (fid) {
b->fid1 = *fid;
b->valid |= OBD_MD_FLID;
if (info)
mdt_body_reverse_idmap(info, b);
+
+ if (b->valid & OBD_MD_FLSIZE)
+ CDEBUG(D_VFSTRACE, DFID": returning size %llu\n",
+ PFID(fid), (unsigned long long)b->size);
}
static inline int mdt_body_has_lov(const struct lu_attr *la,
repbody->eadatasize = ma->ma_lmv_size;
repbody->valid |= (OBD_MD_FLDIREA|OBD_MD_MEA);
}
- if (!(ma->ma_valid & MA_LOV) && !(ma->ma_valid & MA_LMV)) {
- repbody->valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS;
- }
} else if (S_ISLNK(la->la_mode) &&
reqbody->valid & OBD_MD_LINKNAME) {
buffer->lb_buf = ma->ma_lmm;
struct lu_name *lname = NULL;
const char *name = NULL;
int namelen = 0;
- struct mdt_lock_handle *lhp;
+ struct mdt_lock_handle *lhp = NULL;
struct ldlm_lock *lock;
struct ldlm_res_id *res_id;
int is_resent;
if (namelen == 0) {
reqbody = req_capsule_client_get(info->mti_pill,
&RMF_MDT_BODY);
- LASSERT(fid_is_sane(&reqbody->fid2));
- name = NULL;
+ if (unlikely(reqbody == NULL))
+ RETURN(err_serious(-EFAULT));
+ if (unlikely(!fid_is_sane(&reqbody->fid2)))
+ RETURN(err_serious(-EINVAL));
+
+ name = NULL;
CDEBUG(D_INODE, "getattr with lock for "DFID"/"DFID", "
"ldlm_rep = %p\n",
PFID(mdt_object_fid(parent)), PFID(&reqbody->fid2),
RETURN(rc);
}
- /* step 1: lock parent */
- lhp = &info->mti_lh[MDT_LH_PARENT];
- mdt_lock_pdo_init(lhp, LCK_PR, name, namelen);
- rc = mdt_object_lock(info, parent, lhp, MDS_INODELOCK_UPDATE,
- MDT_LOCAL_LOCK);
-
- if (unlikely(rc != 0))
- RETURN(rc);
-
if (lname) {
+ /* step 1: lock parent */
+ lhp = &info->mti_lh[MDT_LH_PARENT];
+ mdt_lock_pdo_init(lhp, LCK_PR, name, namelen);
+ rc = mdt_object_lock(info, parent, lhp, MDS_INODELOCK_UPDATE,
+ MDT_LOCAL_LOCK);
+ if (unlikely(rc != 0))
+ RETURN(rc);
+
/* step 2: lookup child's fid by name */
rc = mdo_lookup(info->mti_env, next, lname, child_fid,
&info->mti_spec);
LDLM_LOCK_PUT(lock);
rc = 0;
} else {
- struct md_attr *ma;
relock:
- ma = &info->mti_attr;
-
OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_RESEND, obd_timeout*2);
mdt_lock_handle_init(lhc);
mdt_lock_reg_init(lhc, LCK_PR);
LU_OBJECT_DEBUG(D_WARNING, info->mti_env,
&child->mot_obj.mo_lu,
"Object doesn't exist!\n");
- GOTO(out_child, rc = -ESTALE);
+ GOTO(out_child, rc = -ENOENT);
}
- ma->ma_valid = 0;
- ma->ma_need = MA_INODE;
- rc = mo_attr_get(info->mti_env, next, ma);
- if (unlikely(rc != 0))
- GOTO(out_child, rc);
+ if (!(child_bits & MDS_INODELOCK_UPDATE)) {
+ struct md_attr *ma = &info->mti_attr;
- /* If the file has not been changed for some time, we return
- * not only a LOOKUP lock, but also an UPDATE lock and this
- * might save us RPC on later STAT. For directories, it also
- * let negative dentry starts working for this dir. */
- if (ma->ma_valid & MA_INODE &&
- ma->ma_attr.la_valid & LA_CTIME &&
- info->mti_mdt->mdt_namespace->ns_ctime_age_limit +
- ma->ma_attr.la_ctime < cfs_time_current_sec())
- child_bits |= MDS_INODELOCK_UPDATE;
+ ma->ma_valid = 0;
+ ma->ma_need = MA_INODE;
+ rc = mo_attr_get(info->mti_env,
+ mdt_object_child(child), ma);
+ if (unlikely(rc != 0))
+ GOTO(out_child, rc);
+
+ /* If the file has not been changed for some time, we
+ * return not only a LOOKUP lock, but also an UPDATE
+ * lock and this might save us RPC on later STAT. For
+ * directories, it also let negative dentry starts
+ * working for this dir. */
+ if (ma->ma_valid & MA_INODE &&
+ ma->ma_attr.la_valid & LA_CTIME &&
+ info->mti_mdt->mdt_namespace->ns_ctime_age_limit +
+ ma->ma_attr.la_ctime < cfs_time_current_sec())
+ child_bits |= MDS_INODELOCK_UPDATE;
+ }
rc = mdt_object_lock(info, child, lhc, child_bits,
MDT_CROSS_LOCK);
out_child:
mdt_object_put(info->mti_env, child);
out_parent:
- mdt_object_unlock(info, parent, lhp, 1);
+ if (lhp)
+ mdt_object_unlock(info, parent, lhp, 1);
return rc;
}
if (OBD_FAIL_CHECK(OBD_FAIL_MDS_SENDPAGE))
GOTO(abort_bulk, rc = 0);
- timeout = (int) req->rq_deadline - cfs_time_current_sec();
- if (timeout < 0)
- CERROR("Req deadline already passed %lu (now: %lu)\n",
- req->rq_deadline, cfs_time_current_sec());
- *lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(max(timeout, 1)),
- cfs_time_seconds(1), NULL, NULL);
- rc = l_wait_event(desc->bd_waitq, !ptlrpc_server_bulk_active(desc) ||
- exp->exp_failed || exp->exp_abort_active_req, lwi);
- LASSERT (rc == 0 || rc == -ETIMEDOUT);
+ do {
+ timeout = (int) req->rq_deadline - cfs_time_current_sec();
+ if (timeout < 0)
+ CERROR("Req deadline already passed %lu (now: %lu)\n",
+ req->rq_deadline, cfs_time_current_sec());
+ *lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(max(timeout, 1)),
+ cfs_time_seconds(1), NULL, NULL);
+ rc = l_wait_event(desc->bd_waitq,
+ !ptlrpc_server_bulk_active(desc) ||
+ exp->exp_failed ||
+ exp->exp_abort_active_req, lwi);
+ LASSERT (rc == 0 || rc == -ETIMEDOUT);
+ } while ((rc == -ETIMEDOUT) &&
+ (req->rq_deadline > cfs_time_current_sec()));
if (rc == 0) {
if (desc->bd_success &&
static int mdt_enqueue(struct mdt_thread_info *info)
{
struct ptlrpc_request *req;
- __u64 req_bits;
int rc;
/*
LASSERT(info->mti_dlm_req != NULL);
req = mdt_info_req(info);
-
- /*
- * Lock without inodebits makes no sense and will oops later in
- * ldlm. Let's check it now to see if we have wrong lock from client or
- * bits get corrupted somewhere in mdt_intent_policy().
- */
- req_bits = info->mti_dlm_req->lock_desc.l_policy_data.l_inodebits.bits;
- /* This is disabled because we need to support liblustre flock.
- * LASSERT(req_bits != 0);
- */
-
rc = ldlm_handle_enqueue0(info->mti_mdt->mdt_namespace,
req, info->mti_dlm_req, &cbs);
info->mti_fail_id = OBD_FAIL_LDLM_REPLY;
int mdt_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
void *data, int flag)
{
- struct obd_device *obd = lock->l_resource->lr_namespace->ns_obd;
+ struct obd_device *obd = ldlm_lock_to_ns(lock)->ns_obd;
struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
int rc;
ENTRY;
RETURN(-ESTALE);
} else {
/* Non-dir object shouldn't have PDO lock */
- LASSERT(S_ISDIR(lu_object_attr(&o->mot_obj.mo_lu)));
+ if (!S_ISDIR(lu_object_attr(&o->mot_obj.mo_lu)))
+ RETURN(-ENOTDIR);
}
}
res_id, LDLM_FL_LOCAL_ONLY | LDLM_FL_ATOMIC_CB,
&info->mti_exp->exp_handle.h_cookie);
if (rc)
- GOTO(out, rc);
-
-out:
- if (rc)
mdt_object_unlock(info, o, lh, 1);
-
+ else if (unlikely(OBD_FAIL_PRECHECK(OBD_FAIL_MDS_PDO_LOCK)) &&
+ lh->mlh_pdo_hash != 0 &&
+ (lh->mlh_reg_mode == LCK_PW || lh->mlh_reg_mode == LCK_EX)) {
+ OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_PDO_LOCK, 10);
+ }
RETURN(rc);
}
dlm_req = req_capsule_client_get(info->mti_pill, &RMF_DLM_REQ);
if (dlm_req != NULL) {
- if (info->mti_mdt->mdt_opts.mo_compat_resname)
- rc = mdt_lock_resname_compat(info->mti_mdt,
- dlm_req);
- info->mti_dlm_req = dlm_req;
+ if (unlikely(dlm_req->lock_desc.l_resource.lr_type ==
+ LDLM_IBITS &&
+ dlm_req->lock_desc.l_policy_data.\
+ l_inodebits.bits == 0)) {
+ /*
+ * Lock without inodebits makes no sense and
+ * will oops later in ldlm. If client miss to
+ * set such bits, do not trigger ASSERTION.
+ *
+ * For liblustre flock case, it maybe zero.
+ */
+ rc = -EPROTO;
+ } else {
+ if (info->mti_mdt->mdt_opts.mo_compat_resname)
+ rc = mdt_lock_resname_compat(
+ info->mti_mdt,
+ dlm_req);
+ info->mti_dlm_req = dlm_req;
+ }
} else {
rc = -EFAULT;
}
if (likely(rc == 0 && req->rq_export && h->mh_opc != MDS_DISCONNECT))
target_committed_to_req(req);
- if (unlikely((lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY) &&
+ if (unlikely(req_is_replay(req) &&
lustre_msg_get_transno(req->rq_reqmsg) == 0)) {
DEBUG_REQ(D_ERROR, req, "transno is 0 during REPLAY");
LBUG();
info->mti_fail_id = OBD_FAIL_MDS_ALL_REPLY_NET;
info->mti_transno = lustre_msg_get_transno(req->rq_reqmsg);
- info->mti_mos[0] = NULL;
- info->mti_mos[1] = NULL;
- info->mti_mos[2] = NULL;
- info->mti_mos[3] = NULL;
+ info->mti_mos = NULL;
memset(&info->mti_attr, 0, sizeof(info->mti_attr));
info->mti_body = NULL;
static int mdt_recovery(struct mdt_thread_info *info)
{
struct ptlrpc_request *req = mdt_info_req(info);
- int recovering;
struct obd_device *obd;
ENTRY;
obd = req->rq_export->exp_obd;
/* Check for aborted recovery... */
- cfs_spin_lock_bh(&obd->obd_processing_task_lock);
- recovering = obd->obd_recovering;
- cfs_spin_unlock_bh(&obd->obd_processing_task_lock);
- if (unlikely(recovering)) {
+ if (unlikely(obd->obd_recovering)) {
int rc;
int should_process;
DEBUG_REQ(D_INFO, req, "Got new replay");
switch (lustre_msg_get_opc(msg)) {
case MDS_CONNECT:
case MDS_DISCONNECT:
- case MDS_SET_INFO:
case OBD_PING:
case SEC_CTX_INIT:
case SEC_CTX_INIT_CONT:
case MDS_SYNC:
case MDS_GETXATTR:
case MDS_SETXATTR:
+ case MDS_SET_INFO:
case MDS_GET_INFO:
case MDS_QUOTACHECK:
case MDS_QUOTACTL:
* checked here.
*/
if (lustre_handle_is_used(&lhc->mlh_reg_lh)) {
+ LASSERTF(rc == 0, "Error occurred but lock handle "
+ "is still in use\n");
rep->lock_policy_res2 = 0;
rc = mdt_intent_lock_replace(info, lockp, NULL, lhc, flags);
RETURN(rc);
req_capsule_extend(pill, &RQF_LDLM_INTENT);
it = req_capsule_client_get(pill, &RMF_LDLM_INTENT);
if (it != NULL) {
- const struct ldlm_request *dlmreq;
- __u64 req_bits;
-
rc = mdt_intent_opc(it->opc, info, lockp, flags);
if (rc == 0)
rc = ELDLM_OK;
- /*
- * Lock without inodebits makes no sense and will oops
+ /* Lock without inodebits makes no sense and will oops
* later in ldlm. Let's check it now to see if we have
- * wrong lock from client or bits get corrupted
- * somewhere in mdt_intent_opc().
- */
- dlmreq = info->mti_dlm_req;
- req_bits = dlmreq->lock_desc.l_policy_data.l_inodebits.bits;
- LASSERT(req_bits != 0);
-
+ * ibits corrupted somewhere in mdt_intent_opc().
+ * The case for client miss to set ibits has been
+ * processed by others. */
+ LASSERT(ergo(info->mti_dlm_req->lock_desc.l_resource.\
+ lr_type == LDLM_IBITS,
+ info->mti_dlm_req->lock_desc.\
+ l_policy_data.l_inodebits.bits != 0));
} else
rc = err_serious(-EFAULT);
} else {
RETURN(rc);
}
-/*
- * Seq wrappers
- */
-static void mdt_seq_adjust(const struct lu_env *env,
- struct mdt_device *m, int lost)
-{
- struct md_site *ms = mdt_md_site(m);
- struct lu_seq_range out;
- ENTRY;
-
- LASSERT(ms && ms->ms_server_seq);
- LASSERT(lost >= 0);
- /* get extra seq from seq_server, moving it's range up */
- while (lost-- > 0) {
- seq_server_alloc_meta(ms->ms_server_seq, NULL, &out, env);
- }
- EXIT;
-}
-
static int mdt_seq_fini(const struct lu_env *env,
struct mdt_device *m)
{
* We'd like to have a mechanism to set this on a per-device
* basis, but alas...
*/
- .psc_min_threads = min(max(mdt_num_threads, MDT_MIN_THREADS),
- MDT_MAX_THREADS),
- .psc_max_threads = MDT_MAX_THREADS,
+ .psc_min_threads = mdt_min_threads,
+ .psc_max_threads = mdt_max_threads,
.psc_ctx_tags = LCT_MD_THREAD
};
if (m->mdt_regular_service == NULL)
RETURN(-ENOMEM);
- rc = ptlrpc_start_threads(NULL, m->mdt_regular_service);
+ rc = ptlrpc_start_threads(m->mdt_regular_service);
if (rc)
GOTO(err_mdt_svc, rc);
.psc_req_portal = MDS_READPAGE_PORTAL,
.psc_rep_portal = MDC_REPLY_PORTAL,
.psc_watchdog_factor = MDT_SERVICE_WATCHDOG_FACTOR,
- .psc_min_threads = min(max(mdt_num_threads, MDT_MIN_THREADS),
- MDT_MAX_THREADS),
- .psc_max_threads = MDT_MAX_THREADS,
+ .psc_min_threads = mdt_min_threads,
+ .psc_max_threads = mdt_max_threads,
.psc_ctx_tags = LCT_MD_THREAD
};
m->mdt_readpage_service =
GOTO(err_mdt_svc, rc = -ENOMEM);
}
- rc = ptlrpc_start_threads(NULL, m->mdt_readpage_service);
+ rc = ptlrpc_start_threads(m->mdt_readpage_service);
/*
* setattr service configuration.
.psc_req_portal = MDS_SETATTR_PORTAL,
.psc_rep_portal = MDC_REPLY_PORTAL,
.psc_watchdog_factor = MDT_SERVICE_WATCHDOG_FACTOR,
- .psc_min_threads = min(max(mdt_num_threads, MDT_MIN_THREADS),
- MDT_MAX_THREADS),
- .psc_max_threads = MDT_MAX_THREADS,
+ .psc_min_threads = mdt_min_threads,
+ .psc_max_threads = mdt_max_threads,
.psc_ctx_tags = LCT_MD_THREAD
};
GOTO(err_mdt_svc, rc = -ENOMEM);
}
- rc = ptlrpc_start_threads(NULL, m->mdt_setattr_service);
+ rc = ptlrpc_start_threads(m->mdt_setattr_service);
if (rc)
GOTO(err_mdt_svc, rc);
.psc_req_portal = SEQ_CONTROLLER_PORTAL,
.psc_rep_portal = MDC_REPLY_PORTAL,
.psc_watchdog_factor = MDT_SERVICE_WATCHDOG_FACTOR,
- .psc_min_threads = SEQ_NUM_THREADS,
- .psc_max_threads = SEQ_NUM_THREADS,
+ .psc_min_threads = mdt_min_threads,
+ .psc_max_threads = mdt_max_threads,
.psc_ctx_tags = LCT_MD_THREAD|LCT_DT_THREAD
};
GOTO(err_mdt_svc, rc = -ENOMEM);
}
- rc = ptlrpc_start_threads(NULL, m->mdt_mdsc_service);
+ rc = ptlrpc_start_threads(m->mdt_mdsc_service);
if (rc)
GOTO(err_mdt_svc, rc);
.psc_req_portal = SEQ_METADATA_PORTAL,
.psc_rep_portal = MDC_REPLY_PORTAL,
.psc_watchdog_factor = MDT_SERVICE_WATCHDOG_FACTOR,
- .psc_min_threads = SEQ_NUM_THREADS,
- .psc_max_threads = SEQ_NUM_THREADS,
+ .psc_min_threads = mdt_min_threads,
+ .psc_max_threads = mdt_max_threads,
.psc_ctx_tags = LCT_MD_THREAD|LCT_DT_THREAD
};
GOTO(err_mdt_svc, rc = -ENOMEM);
}
- rc = ptlrpc_start_threads(NULL, m->mdt_mdss_service);
+ rc = ptlrpc_start_threads(m->mdt_mdss_service);
if (rc)
GOTO(err_mdt_svc, rc);
.psc_req_portal = SEQ_DATA_PORTAL,
.psc_rep_portal = OSC_REPLY_PORTAL,
.psc_watchdog_factor = MDT_SERVICE_WATCHDOG_FACTOR,
- .psc_min_threads = SEQ_NUM_THREADS,
- .psc_max_threads = SEQ_NUM_THREADS,
+ .psc_min_threads = mdt_min_threads,
+ .psc_max_threads = mdt_max_threads,
.psc_ctx_tags = LCT_MD_THREAD|LCT_DT_THREAD
};
GOTO(err_mdt_svc, rc = -ENOMEM);
}
- rc = ptlrpc_start_threads(NULL, m->mdt_dtss_service);
+ rc = ptlrpc_start_threads(m->mdt_dtss_service);
if (rc)
GOTO(err_mdt_svc, rc);
.psc_req_portal = FLD_REQUEST_PORTAL,
.psc_rep_portal = MDC_REPLY_PORTAL,
.psc_watchdog_factor = MDT_SERVICE_WATCHDOG_FACTOR,
- .psc_min_threads = FLD_NUM_THREADS,
- .psc_max_threads = FLD_NUM_THREADS,
+ .psc_min_threads = mdt_min_threads,
+ .psc_max_threads = mdt_max_threads,
.psc_ctx_tags = LCT_DT_THREAD|LCT_MD_THREAD
};
GOTO(err_mdt_svc, rc = -ENOMEM);
}
- rc = ptlrpc_start_threads(NULL, m->mdt_fld_service);
+ rc = ptlrpc_start_threads(m->mdt_fld_service);
if (rc)
GOTO(err_mdt_svc, rc);
.psc_req_portal = MDS_MDS_PORTAL,
.psc_rep_portal = MDC_REPLY_PORTAL,
.psc_watchdog_factor = MDT_SERVICE_WATCHDOG_FACTOR,
- .psc_min_threads = min(max(mdt_num_threads, MDT_MIN_THREADS),
- MDT_MAX_THREADS),
- .psc_max_threads = MDT_MAX_THREADS,
+ .psc_min_threads = mdt_min_threads,
+ .psc_max_threads = mdt_max_threads,
.psc_ctx_tags = LCT_MD_THREAD
};
m->mdt_xmds_service =
GOTO(err_mdt_svc, rc = -ENOMEM);
}
- rc = ptlrpc_start_threads(NULL, m->mdt_xmds_service);
+ rc = ptlrpc_start_threads(m->mdt_xmds_service);
if (rc)
GOTO(err_mdt_svc, rc);
LCONSOLE_INFO("Disabling ACL\n");
}
+ if (!*p)
+ break;
+
options = ++p;
}
}
obd = class_name2obd(dev);
LASSERT(obd != NULL);
- cfs_spin_lock_init(&m->mdt_transno_lock);
-
m->mdt_max_mdsize = MAX_MD_SIZE;
m->mdt_max_cookiesize = sizeof(struct llog_cookie);
m->mdt_som_conf = 0;
CERROR("CMD Operation not allowed in IOP mode\n");
GOTO(err_lmi, rc = -EINVAL);
}
+ /* Read recovery timeouts */
+ if (lsi->lsi_lmd && lsi->lsi_lmd->lmd_recovery_time_soft)
+ obd->obd_recovery_timeout =
+ lsi->lsi_lmd->lmd_recovery_time_soft;
+
+ if (lsi->lsi_lmd && lsi->lsi_lmd->lmd_recovery_time_hard)
+ obd->obd_recovery_time_hard =
+ lsi->lsi_lmd->lmd_recovery_time_hard;
}
cfs_rwlock_init(&m->mdt_sptlrpc_lock);
m->mdt_nosquash_strlen = 0;
cfs_init_rwsem(&m->mdt_squash_sem);
- cfs_spin_lock_init(&m->mdt_client_bitmap_lock);
-
OBD_ALLOC_PTR(mite);
if (mite == NULL)
GOTO(err_lmi, rc = -ENOMEM);
LUSTRE_MDT_NAME"-%p", m);
m->mdt_namespace = ldlm_namespace_new(obd, info->mti_u.ns_name,
LDLM_NAMESPACE_SERVER,
- LDLM_NAMESPACE_GREEDY);
+ LDLM_NAMESPACE_GREEDY,
+ LDLM_NS_TYPE_MDT);
if (m->mdt_namespace == NULL)
GOTO(err_fini_seq, rc = -ENOMEM);
lu_printer_t p, const struct lu_object *o)
{
struct mdt_object *mdto = mdt_obj((struct lu_object *)o);
- return (*p)(env, cookie, LUSTRE_MDT_NAME"-object@%p(ioepoch=%llu "
- "flags=%llx, epochcount=%d, writecount=%d)",
+ return (*p)(env, cookie, LUSTRE_MDT_NAME"-object@%p(ioepoch="LPU64" "
+ "flags="LPX64", epochcount=%d, writecount=%d)",
mdto, mdto->mot_ioepoch, mdto->mot_flags,
mdto->mot_ioepoch_count, mdto->mot_writecount);
}
void *localdata)
{
struct mdt_thread_info *info;
- struct lsd_client_data *lcd;
struct obd_export *lexp;
struct lustre_handle conn = { 0 };
struct mdt_device *mdt;
rc = mdt_connect_internal(lexp, mdt, data);
if (rc == 0) {
- OBD_ALLOC_PTR(lcd);
- if (lcd != NULL) {
- struct mdt_thread_info *mti;
- mti = lu_context_key_get(&env->le_ctx,
- &mdt_thread_key);
- LASSERT(mti != NULL);
- mti->mti_exp = lexp;
- memcpy(lcd->lcd_uuid, cluuid, sizeof lcd->lcd_uuid);
- lexp->exp_mdt_data.med_lcd = lcd;
- rc = mdt_client_new(env, mdt);
- if (rc != 0) {
- OBD_FREE_PTR(lcd);
- lexp->exp_mdt_data.med_lcd = NULL;
- } else {
- mdt_export_stats_init(obd, lexp, localdata);
- }
- } else
- rc = -ENOMEM;
+ struct mdt_thread_info *mti;
+ struct lsd_client_data *lcd = lexp->exp_target_data.ted_lcd;
+ LASSERT(lcd);
+ mti = lu_context_key_get(&env->le_ctx, &mdt_thread_key);
+ LASSERT(mti != NULL);
+ mti->mti_exp = lexp;
+ memcpy(lcd->lcd_uuid, cluuid, sizeof lcd->lcd_uuid);
+ rc = mdt_client_new(env, mdt);
+ if (rc == 0)
+ mdt_export_stats_init(obd, lexp, localdata);
}
out:
- if (rc != 0)
+ if (rc != 0) {
class_disconnect(lexp);
- else
+ *exp = NULL;
+ } else {
*exp = lexp;
+ }
RETURN(rc);
}
int cookie_size;
lmm_size = mdt->mdt_max_mdsize;
- OBD_ALLOC(ma->ma_lmm, lmm_size);
+ OBD_ALLOC_LARGE(ma->ma_lmm, lmm_size);
if (ma->ma_lmm == NULL)
GOTO(out_lmm, rc = -ENOMEM);
cookie_size = mdt->mdt_max_cookiesize;
- OBD_ALLOC(ma->ma_cookie, cookie_size);
+ OBD_ALLOC_LARGE(ma->ma_cookie, cookie_size);
if (ma->ma_cookie == NULL)
GOTO(out_cookie, rc = -ENOMEM);
ma->ma_need = 0;
/* It is not for setattr, just tell MDD to send
* DESTROY RPC to OSS if needed */
- ma->ma_attr_flags = MDS_CLOSE_CLEANUP;
ma->ma_valid = MA_FLAGS;
+ ma->ma_attr_flags = MDS_CLOSE_CLEANUP;
+ /* Don't unlink orphan on failover umount, LU-184 */
+ if (exp->exp_flags & OBD_OPT_FAILOVER)
+ ma->ma_attr_flags |= MDS_KEEP_ORPHAN;
mdt_mfd_close(info, mfd);
}
- OBD_FREE(ma->ma_cookie, cookie_size);
+ OBD_FREE_LARGE(ma->ma_cookie, cookie_size);
ma->ma_cookie = NULL;
out_cookie:
- OBD_FREE(ma->ma_lmm, lmm_size);
+ OBD_FREE_LARGE(ma->ma_lmm, lmm_size);
ma->ma_lmm = NULL;
}
out_lmm:
info->mti_mdt = NULL;
/* cleanup client slot early */
- mdt_client_del(&env, mdt);
+ /* Do not erase record for recoverable client. */
+ if (!(exp->exp_flags & OBD_OPT_FAILOVER) || exp->exp_failed)
+ mdt_client_del(&env, mdt);
lu_env_fini(&env);
RETURN(rc);
cfs_spin_lock(&exp->exp_lock);
exp->exp_connecting = 1;
cfs_spin_unlock(&exp->exp_lock);
- rc = ldlm_init_export(exp);
+ rc = lut_client_alloc(exp);
+ if (rc == 0)
+ rc = ldlm_init_export(exp);
+
if (rc)
CERROR("Error %d while initializing export\n", rc);
RETURN(rc);
target_destroy_export(exp);
ldlm_destroy_export(exp);
+ lut_client_free(exp);
LASSERT(cfs_list_empty(&exp->exp_outstanding_replies));
LASSERT(cfs_list_empty(&exp->exp_mdt_data.med_open_head));
static void mdt_allow_cli(struct mdt_device *m, unsigned int flag)
{
if (flag & CONFIG_LOG)
- m->mdt_fl_cfglog = 1;
+ cfs_set_bit(MDT_FL_CFGLOG, &m->mdt_state);
/* also notify active event */
if (flag & CONFIG_SYNC)
- m->mdt_fl_synced = 1;
+ cfs_set_bit(MDT_FL_SYNCED, &m->mdt_state);
+
+ if (cfs_test_bit(MDT_FL_CFGLOG, &m->mdt_state) &&
+ cfs_test_bit(MDT_FL_SYNCED, &m->mdt_state)) {
+ struct obd_device *obd = m->mdt_md_dev.md_lu_dev.ld_obd;
- if (m->mdt_fl_cfglog && m->mdt_fl_synced)
/* Open for clients */
- m->mdt_md_dev.md_lu_dev.ld_obd->obd_no_conn = 0;
+ if (obd->obd_no_conn) {
+ cfs_spin_lock(&obd->obd_dev_lock);
+ obd->obd_no_conn = 0;
+ cfs_spin_unlock(&obd->obd_dev_lock);
+ }
+ }
}
static int mdt_upcall(const struct lu_env *env, struct md_device *md,
m->mdt_max_mdsize, m->mdt_max_cookiesize);
mdt_allow_cli(m, CONFIG_SYNC);
if (data)
- (*(__u64 *)data) = m->mdt_mount_count;
+ (*(__u64 *)data) =
+ m->mdt_lut.lut_obd->u.obt.obt_mount_count;
break;
case MD_NO_TRANS:
mti = lu_context_key_get(&env->le_ctx, &mdt_thread_key);
break;
case MD_LOV_CONFIG:
/* Check that MDT is not yet configured */
- LASSERT(!m->mdt_fl_cfglog);
+ LASSERT(!cfs_test_bit(MDT_FL_CFGLOG, &m->mdt_state));
break;
#ifdef HAVE_QUOTA_SUPPORT
case MD_LOV_QUOTA:
* fid, this is error to find remote object here
*/
CERROR("nonlocal object "DFID"\n", PFID(fid));
- } else if (rc == 0) {
- rc = -ENOENT;
- CDEBUG(D_IOCTL, "no such object: "DFID"\n", PFID(fid));
} else {
version = mo_version_get(mti->mti_env, mdt_object_child(obj));
- if (version < 0) {
- rc = (int)version;
- } else {
- *(__u64 *)data->ioc_inlbuf2 = version;
- rc = 0;
- }
+ *(__u64 *)data->ioc_inlbuf2 = version;
+ rc = 0;
}
mdt_object_unlock_put(mti, obj, lh, 1);
RETURN(rc);
int mdt_postrecov(const struct lu_env *env, struct mdt_device *mdt)
{
struct lu_device *ld = md2lu_dev(mdt->mdt_child);
- struct obd_device *obd = mdt2obd_dev(mdt);
#ifdef HAVE_QUOTA_SUPPORT
+ struct obd_device *obd = mdt2obd_dev(mdt);
struct md_device *next = mdt->mdt_child;
#endif
- int rc, lost;
+ int rc;
ENTRY;
- /* if some clients didn't participate in recovery then we can possibly
- * lost sequence. Now we should increase sequence for safe value */
- lost = obd->obd_max_recoverable_clients - obd->obd_connected_clients;
- mdt_seq_adjust(env, mdt, lost);
rc = ld->ld_ops->ldo_recovery_complete(env, ld);
#ifdef HAVE_QUOTA_SUPPORT
*/
int mdt_hsm_copytool_send(struct obd_export *exp)
{
- struct lnl_hdr *lh;
+ struct kuc_hdr *lh;
struct hsm_action_list *hal;
struct hsm_action_item *hai;
int rc, len;
if (lh == NULL)
RETURN(-ENOMEM);
- lh->lnl_magic = LNL_MAGIC;
- lh->lnl_transport = LNL_TRANSPORT_HSM;
- lh->lnl_msgtype = HMT_ACTION_LIST;
- lh->lnl_msglen = len;
+ lh->kuc_magic = KUC_MAGIC;
+ lh->kuc_transport = KUC_TRANSPORT_HSM;
+ lh->kuc_msgtype = HMT_ACTION_LIST;
+ lh->kuc_msglen = len;
hal = (struct hsm_action_list *)(lh + 1);
hal->hal_version = HAL_VERSION;
/* Uses the ldlm reverse import; this rpc will be seen by
the ldlm_callback_handler */
- rc = target_set_info_rpc(exp->exp_imp_reverse, LDLM_SET_INFO,
- sizeof(KEY_HSM_COPYTOOL_SEND),
- KEY_HSM_COPYTOOL_SEND,
- len, lh, NULL);
+ rc = do_set_info_async(exp->exp_imp_reverse,
+ LDLM_SET_INFO, LUSTRE_OBD_VERSION,
+ sizeof(KEY_HSM_COPYTOOL_SEND),
+ KEY_HSM_COPYTOOL_SEND,
+ len, lh, NULL);
OBD_FREE(lh, len);
llo_local_obj_register(&mdt_last_recv);
- mdt_num_threads = MDT_NUM_THREADS;
+ if (mdt_num_threads > 0) {
+ if (mdt_num_threads > MDT_MAX_THREADS)
+ mdt_num_threads = MDT_MAX_THREADS;
+ if (mdt_num_threads < MDT_MIN_THREADS)
+ mdt_num_threads = MDT_MIN_THREADS;
+ mdt_max_threads = mdt_min_threads = mdt_num_threads;
+ } else {
+ mdt_max_threads = MDT_MAX_THREADS;
+ mdt_min_threads = MDT_MIN_THREADS;
+ }
+
lprocfs_mdt_init_vars(&lvars);
rc = class_register_type(&mdt_obd_device_ops, NULL,
lvars.module_vars, LUSTRE_MDT_NAME,