- b=512,others: fully reactivate OST imports after reconnection.
- b=513(partial): make sure client sees out -ENOTCONN from mds_handle
- b=515: more graceful error handling for truncating on dead OST
- b=474: don't error out just because a file is striped across a dead OST; only
if we actually try to access it
- b=410: fix garbage sizes when stat(2)ing a file that includes a stripe on a
dead OST
- print console diagnostic for completion-timeout client recovery
- MSG_RESENT for requests that were retransmitted
TBA
* bug fixes
+ - Fully reactivate OST imports after reconnection (512, others)
+ - Make sure client sees our -ENOTCONN from mds_handle (513 - partial)
+ - More graceful error handling for truncating on dead OST (515)
+ - Don't error out unless we're actually accessing dead stripes (474)
+ - Fix garbage sizes when stripes are missing (410)
- LRU counters were broken, causing constant lock purge (433, 432)
- garbage on read from stripes with failed OSTs (441)
- mark OSCs as active before reconnecting during recovery (438)
/* Flags that apply to all requests are in the bottom 16 bits */
#define MSG_GEN_FLAG_MASK 0x0000ffff
#define MSG_LAST_REPLAY 1
+#define MSG_RESENT 2
static inline int lustre_msg_get_flags(struct lustre_msg *msg)
{
return (msg->flags & MSG_GEN_FLAG_MASK);
}
+static inline void lustre_msg_add_flags(struct lustre_msg *msg, int flags)
+{
+ msg->flags |= MSG_GEN_FLAG_MASK & flags;
+}
+
static inline void lustre_msg_set_flags(struct lustre_msg *msg, int flags)
{
msg->flags &= ~MSG_GEN_FLAG_MASK;
- msg->flags |= MSG_GEN_FLAG_MASK & flags;
+ lustre_msg_add_flags(msg, flags);
}
static inline int lustre_msg_get_op_flags(struct lustre_msg *msg)
CERROR("lock %p has NULL obd\n", lock);
else if (!(conn = obd->u.cli.cl_import.imp_connection))
CERROR("lock %p has NULL connection\n", lock);
- else
+ else {
+ LDLM_DEBUG(lock, "timed out waiting for completion");
+ CERROR("lock %p timed out from %s\n", lock,
+ conn->c_remote_uuid);
class_signal_connection_failure(conn);
+ }
RETURN(0);
}
mdc_close(&sbi->ll_mdc_conn, inode->i_ino,
S_IFREG, &fd->fd_mdshandle, &req);
out_req:
- ptlrpc_req_finished(req); /* once for reply */
ptlrpc_req_finished(req); /* once for an early "commit" */
//out_fd:
fd->fd_mdshandle.cookie = DEAD_HANDLE_MAGIC;
RETURN(rc);
}
+ memset(&oa, 0, sizeof oa);
oa.o_id = lsm->lsm_object_id;
oa.o_mode = S_IFREG;
oa.o_valid = OBD_MD_FLID|OBD_MD_FLTYPE|OBD_MD_FLSIZE|OBD_MD_FLBLOCKS;
return 0;
}
-
-
static int ll_inode_revalidate(struct dentry *dentry)
{
struct inode *inode = dentry->d_inode;
err = ll_size_lock(inode, lsm, inode->i_size, LCK_PW, &lockhs);
if (err) {
CERROR("ll_size_lock failed: %d\n", err);
- /* FIXME: What to do here? It's too late to back out... */
- LBUG();
+ return;
}
/* truncate == punch from new size to absolute end of file */
err = obd_punch(ll_i2obdconn(inode), &oa, lsm, inode->i_size,
OBD_OBJECT_EOF);
- if (err) {
- LBUG();
- CERROR("obd_truncate fails (%d) ino %lu\n", err,
- inode->i_ino);
- } else
+ if (err)
+ CERROR("obd_truncate fails (%d) ino %lu\n", err, inode->i_ino);
+ else
obdo_to_inode(inode, &oa, oa.o_valid);
err = ll_size_unlock(inode, lsm, LCK_PW, lockhs);
* Copyright (C) 2002 Cluster File Systems, Inc.
* Author: Phil Schwan <phil@off.net>
* Peter Braam <braam@clusterfs.com>
+ * Mike Shaver <shaver@off.net>
*
* This code is issued under the GNU General Public License.
* See the file COPYING in this distribution
tmp.o_valid &= ~OBD_MD_FLHANDLE;
err = obd_getattr(&lov->tgts[loi->loi_ost_idx].conn, &tmp,NULL);
- if (err && lov->tgts[loi->loi_ost_idx].active) {
- CERROR("Error getattr objid "LPX64" subobj "LPX64
- " on OST idx %d: rc = %d\n",
- oa->o_id, loi->loi_id, loi->loi_ost_idx, err);
- RETURN(err);
+ if (err) {
+ if (lov->tgts[loi->loi_ost_idx].active) {
+ CERROR("Error getattr objid "LPX64" subobj "
+ LPX64" on OST idx %d: rc = %d\n",
+ oa->o_id, loi->loi_id, loi->loi_ost_idx,
+ err);
+ RETURN(err);
+ }
+ } else {
+ lov_merge_attrs(oa, &tmp, tmp.o_valid, lsm, i, &new);
}
- lov_merge_attrs(oa, &tmp, tmp.o_valid, lsm, i, &new);
}
RETURN(0);
tmp->o_id = loi->loi_id;
rc = obd_open(&lov->tgts[loi->loi_ost_idx].conn, tmp, NULL);
- if (rc && lov->tgts[loi->loi_ost_idx].active) {
- CERROR("Error open objid "LPX64" subobj "LPX64
- " on OST idx %d: rc = %d\n",
- oa->o_id, lsm->lsm_oinfo[i].loi_id,
- loi->loi_ost_idx, rc);
- goto out_handles;
+ if (rc) {
+ if (lov->tgts[loi->loi_ost_idx].active) {
+ CERROR("Error open objid "LPX64" subobj "LPX64
+ " on OST idx %d: rc = %d\n",
+ oa->o_id, lsm->lsm_oinfo[i].loi_id,
+ loi->loi_ost_idx, rc);
+ goto out_handles;
+ }
+ continue;
}
lov_merge_attrs(oa, tmp, tmp->o_valid, lsm, i, &new);
for (i = 0, loi = lsm->lsm_oinfo, si_last = si = stripeinfo;
i < stripe_count; i++, loi++, si_last = si, si++) {
- if (lov->tgts[loi->loi_ost_idx].active == 0)
- GOTO(out_ioarr, rc = -EIO);
if (i > 0)
si->index = si_last->index + si_last->bufct;
si->lsm.lsm_object_id = loi->loi_id;
LASSERT(!strcmp(req->rq_obd->obd_type->typ_name, LUSTRE_MDT_NAME));
if (req->rq_reqmsg->opc != MDS_CONNECT) {
- if (req->rq_export == NULL)
+ if (req->rq_export == NULL) {
+ req->rq_status = -ENOTCONN;
GOTO(out, rc = -ENOTCONN);
+ }
mds = mds_req2mds(req);
if (mds->mds_recoverable_clients != 0) {
set_osc_active(imp, 0 /* inactive */);
RETURN(0);
}
+
case PTLRPC_RECOVD_PHASE_RECOVER:
imp->imp_flags &= ~IMP_INVALID;
rc = ptlrpc_reconnect_import(imp, OST_CONNECT);
imp->imp_flags |= IMP_INVALID;
RETURN(rc);
}
+
+ spin_lock(&imp->imp_lock);
+ imp->imp_level = LUSTRE_CONN_FULL;
+ spin_unlock(&imp->imp_lock);
+
set_osc_active(imp, 1 /* active */);
RETURN(0);
+
default:
RETURN(-EINVAL);
}
err = req->rq_repmsg->status;
if (req->rq_repmsg->type == NTOH__u32(PTL_RPC_MSG_ERR)) {
- DEBUG_REQ(D_ERROR, req, "type == PTL_RPC_MSG_ERR");
+ DEBUG_REQ(D_ERROR, req, "type == PTL_RPC_MSG_ERR (%d)\n", err);
RETURN(err ? err : -EINVAL);
}
if ((req->rq_flags & (PTL_RPC_FL_RESEND | PTL_RPC_FL_INTR)) ==
PTL_RPC_FL_RESEND) {
req->rq_flags &= ~PTL_RPC_FL_RESEND;
+ lustre_msg_add_flags(req->rq_reqmsg, MSG_RESENT);
DEBUG_REQ(D_HA, req, "resending: ");
goto resend;
}