we need this becase client's timeout should be longer then inter-mds recovery may take
- few ugly hacks that allow client to reconnect with old (before failure took place) UUID
- mds_preprw() and mds_preprw() should use l_dput() instead of f_dput()
- SOCKNAL_IO_TIMEOUT has been set to 20 seconds to make discovery of
stale connection faster
- lconf generates persisten UUIDs for LMV clients
it each new LMV uses fresh UUID, then each recovered MDS looks like
new client (target MDS doesn't recognize it's old because of new UUID).
if target MDS gets restarted, then it'll find more clients in LAST_RCVD
then it actually had
#define SOCKNAL_MAX_RECONNECT_INTERVAL (60*HZ) /* ...exponentially increasing to this */
/* default vals for runtime tunables */
-#define SOCKNAL_IO_TIMEOUT 50 /* default comms timeout (seconds) */
+#define SOCKNAL_IO_TIMEOUT 20 /* default comms timeout (seconds) */
#define SOCKNAL_EAGER_ACK 0 /* default eager ack (boolean) */
#define SOCKNAL_TYPED_CONNS 1 /* unidirectional large, bidirectional small? */
#define SOCKNAL_ZC_MIN_FRAG (2<<10) /* default smallest zerocopy fragment */
int connected;
int max_easize;
int max_cookiesize;
+ int server_timeout;
};
struct niobuf_local {
struct lustre_handle *hdl;
hdl = &exp->exp_imp_reverse->imp_remote_handle;
/* Might be a re-connect after a partition. */
- if (!memcmp(&conn->cookie, &hdl->cookie, sizeof conn->cookie)) {
+#warning "FIXME ASAP"
+ memcpy(&hdl->cookie, &conn->cookie, sizeof(conn->cookie));
+ if (1 || !memcmp(&conn->cookie, &hdl->cookie, sizeof conn->cookie)) {
CERROR("%s reconnecting\n", cluuid->uuid);
conn->cookie = exp->exp_handle.h_cookie;
- RETURN(EALREADY);
+ /*RETURN(EALREADY);*/
+ RETURN(0);
} else {
CERROR("%s reconnecting from %s, "
"handle mismatch (ours "LPX64", theirs "
if (!target || target->obd_stopping || !target->obd_set_up) {
CERROR("UUID '%s' is not available for connect\n", str);
+
GOTO(out, rc = -ENODEV);
}
} else if (req->rq_reqmsg->conn_cnt == 1) {
CERROR("%s reconnected with 1 conn_cnt; cookies not random?\n",
cluuid.uuid);
- GOTO(out, rc = -EALREADY);
+#warning "FIXME ASAP"
+ /*GOTO(out, rc = -EALREADY);*/
}
/* Tell the client if we're in recovery. */
LASSERT(export != NULL);
spin_lock_irqsave(&export->exp_lock, flags);
- if (export->exp_conn_cnt >= req->rq_reqmsg->conn_cnt) {
+#warning "FIXME ASAP"
+ if (0 && export->exp_conn_cnt >= req->rq_reqmsg->conn_cnt) {
CERROR("%s: already connected at a higher conn_cnt: %d > %d\n",
cluuid.uuid, export->exp_conn_cnt,
req->rq_reqmsg->conn_cnt);
RETURN(0);
}
+void lmv_set_timeouts(struct obd_device *obd)
+{
+ struct lmv_tgt_desc *tgts;
+ struct lmv_obd *lmv;
+ int i;
+
+ lmv = &obd->u.lmv;
+ if (lmv->server_timeout == 0)
+ return;
+
+ if (lmv->connected == 0)
+ return;
+
+ for (i = 0, tgts = lmv->tgts; i < lmv->count; i++, tgts++) {
+ if (tgts->exp == NULL)
+ continue;
+ obd_set_info(tgts->exp, strlen("inter_mds"),
+ "inter_mds", 0, NULL);
+ }
+}
+
int lmv_connect(struct obd_device *obd)
{
struct lmv_obd *lmv = &obd->u.lmv;
atomic_read(&obd->obd_refcount));
}
+ lmv_set_timeouts(obd);
+
class_export_put(exp);
RETURN (0);
RETURN(-EINVAL);
}
lmv = &obd->u.lmv;
- lmv_connect(obd);
if (keylen >= strlen("client") && strcmp(key, "client") == 0) {
struct lmv_tgt_desc *tgts;
int i, rc;
+ lmv_connect(obd);
for (i = 0, tgts = lmv->tgts; i < lmv->count; i++, tgts++) {
rc = obd_set_info(tgts->exp, keylen, key, vallen, val);
if (rc)
RETURN(rc);
}
RETURN(0);
+ } else if (keylen >= strlen("inter_mds") && strcmp(key, "inter_mds") == 0) {
+ lmv->server_timeout = 1;
+ lmv_set_timeouts(obd);
+ RETURN(0);
}
RETURN(-EINVAL);
rc = ptlrpc_queue_wait(req);
ptlrpc_req_finished(req);
RETURN(rc);
+ } else if (keylen >= strlen("inter_mds") && strcmp(key, "inter_mds") == 0) {
+ struct obd_import *imp = class_exp2cliimp(exp);
+ imp->imp_server_timeout = 1;
+ CDEBUG(D_OTHER, "%s: timeout / 2\n", exp->exp_obd->obd_name);
+ RETURN(0);
}
RETURN(rc);
GOTO(err_reg, rc);
mds->mds_num = mdsize;
+ rc = obd_set_info(mds->mds_lmv_exp, strlen("inter_mds"),
+ "inter_mds", 0, NULL);
+ if (rc)
+ GOTO(err_reg, rc);
RETURN(0);
err_reg:
struct dentry *filter_fid2dentry(struct obd_device *obd,
struct dentry *dir_dentry,
obd_gr group, obd_id id);
-void f_dput(struct dentry *dentry);
int mds_preprw(int cmd, struct obd_export *exp, struct obdo *oa,
int objcount, struct obd_ioobj *obj,
if (dentry->d_inode == NULL) {
CERROR("trying to BRW to non-existent file "LPU64"\n",
obj->ioo_id);
- f_dput(dentry);
+ l_dput(dentry);
GOTO(cleanup, rc = -ENOENT);
}
i, obj->ioo_bufcnt, dentry, rc);
while (lnb-- > res)
__free_pages(lnb->page, 0);
- f_dput(dentry);
+ l_dput(dentry);
GOTO(cleanup, rc);
}
tot_bytes += lnb->len;
#define SOCKNAL_MAX_RECONNECT_INTERVAL (60*HZ) /* ...exponentially increasing to this */
/* default vals for runtime tunables */
-#define SOCKNAL_IO_TIMEOUT 50 /* default comms timeout (seconds) */
+#define SOCKNAL_IO_TIMEOUT 20 /* default comms timeout (seconds) */
#define SOCKNAL_EAGER_ACK 0 /* default eager ack (boolean) */
#define SOCKNAL_TYPED_CONNS 1 /* unidirectional large, bidirectional small? */
#define SOCKNAL_ZC_MIN_FRAG (2<<10) /* default smallest zerocopy fragment */
void ptlrpc_invalidate_import(struct obd_import *imp, int in_rpc)
{
struct l_wait_info lwi;
+ unsigned long timeout;
int inflight = 0;
int rc;
inflight = 1;
/* wait for all requests to error out and call completion
callbacks */
- lwi = LWI_TIMEOUT_INTR(MAX(obd_timeout * HZ, 1), NULL,
- NULL, NULL);
+ if (imp->imp_server_timeout)
+ timeout = obd_timeout / 2;
+ else
+ timeout = obd_timeout;
+ timeout = MAX(timeout * HZ, 1);
+ lwi = LWI_TIMEOUT_INTR(timeout, NULL, NULL, NULL);
rc = l_wait_event(imp->imp_recovery_waitq,
(atomic_read(&imp->imp_inflight) == inflight),
&lwi);
if (aa->pcaa_initial_connect && !imp->imp_initial_recov) {
ptlrpc_deactivate_import(imp);
}
+ /*if (rc == -ETIMEDOUT) {
+ CDEBUG(D_ERROR, "recovery of %s on %s failed (timeout)\n",
+ imp->imp_target_uuid.uuid,
+ (char *)imp->imp_connection->c_remote_uuid.uuid);
+ ptlrpc_connect_import(imp, NULL);
+ RETURN(0);
+ }*/
CDEBUG(D_ERROR, "recovery of %s on %s failed (%d)\n",
imp->imp_target_uuid.uuid,
(char *)imp->imp_connection->c_remote_uuid.uuid, rc);
if (ptlrpc_import_in_recovery(imp)) {
struct l_wait_info lwi;
- lwi = LWI_TIMEOUT_INTR(MAX(obd_timeout * HZ, 1), back_to_sleep,
- NULL, NULL);
+ unsigned long timeout;
+ if (imp->imp_server_timeout)
+ timeout = obd_timeout / 2;
+ else
+ timeout = obd_timeout;
+ timeout = MAX(timeout * HZ, 1);
+ lwi = LWI_TIMEOUT_INTR(obd_timeout, back_to_sleep, NULL, NULL);
rc = l_wait_event(imp->imp_recovery_waitq,
!ptlrpc_import_in_recovery(imp), &lwi);
if (level == LUSTRE_IMP_DISCON) {
/* wait at least a timeout before
trying recovery again. */
+ unsigned long timeout = obd_timeout;
+ if (imp->imp_server_timeout)
+ timeout = obd_timeout / 2;
imp->imp_next_ping = time(NULL) +
- (obd_timeout * HZ);
+ (timeout * HZ);
ptlrpc_initiate_recovery(imp);
}
else if (level != LUSTRE_IMP_FULL ||
if name_override != None:
self.name = "lmv_%s" % name_override
self.add_lustre_module('lmv', 'lmv')
- self.mds_uuid = self.db.get_first_ref('mds')
- mds = self.db.lookup(self.mds_uuid)
- self.lmv_name = mds.getName()
self.devlist = self.db.get_refs('mds')
self.mdclist = []
self.desc_uuid = self.uuid
except CommandError, e:
print "Error preparing LMV %s\n" % mdc.uuid
raise e
- self.info(self.mds_uuid)
lctl.lmv_setup(self.name, self.uuid, self.desc_uuid,
string.join(self.devlist))
# setup LMV
if self.master_mds:
client_uuid = generate_client_uuid(self.name)
+ client_uuid = self.name + "_lmv_" + "UUID"
self.master = LMV(self.db.lookup(self.lmv_uuid), client_uuid, self.name, self.name)
self.master_mds = self.master.name
# modules
self.add_lustre_module('osc', 'osc')
self.add_lustre_module('lov', 'lov')
self.add_lustre_module('lmv', 'lmv')
+ self.add_lustre_module('ost', 'ost')
self.add_lustre_module('mds', 'mds')
if self.fstype:
self.add_lustre_module('lvfs', 'fsfilt_%s' % (self.fstype))