Whamcloud - gitweb
- mds->lmv->mdc propagate lower timeout down to import
authoralex <alex>
Tue, 1 Jun 2004 15:19:34 +0000 (15:19 +0000)
committeralex <alex>
Tue, 1 Jun 2004 15:19:34 +0000 (15:19 +0000)
  we need this becase client's timeout should be longer then inter-mds recovery may take
- few ugly hacks that allow client to reconnect with old (before failure took place) UUID
- mds_preprw() and mds_preprw() should use l_dput() instead of f_dput()
- SOCKNAL_IO_TIMEOUT has been set to 20 seconds to make discovery of
  stale connection faster
- lconf generates persisten UUIDs for LMV clients
  it each new LMV uses fresh UUID, then each recovered MDS looks like
  new client (target MDS doesn't recognize it's old because of new UUID).
  if target MDS gets restarted, then it'll find more clients in LAST_RCVD
  then it actually had

lnet/klnds/socklnd/socklnd.h
lustre/include/linux/obd.h
lustre/ldlm/ldlm_lib.c
lustre/lmv/lmv_obd.c
lustre/mdc/mdc_request.c
lustre/mds/mds_lmv.c
lustre/portals/knals/socknal/socknal.h
lustre/ptlrpc/import.c
lustre/ptlrpc/pinger.c
lustre/utils/lconf

index bd3c1fb..50ff5ce 100644 (file)
@@ -77,7 +77,7 @@
 #define SOCKNAL_MAX_RECONNECT_INTERVAL (60*HZ) /* ...exponentially increasing to this */
 
 /* default vals for runtime tunables */
-#define SOCKNAL_IO_TIMEOUT       50             /* default comms timeout (seconds) */
+#define SOCKNAL_IO_TIMEOUT       20             /* default comms timeout (seconds) */
 #define SOCKNAL_EAGER_ACK        0              /* default eager ack (boolean) */
 #define SOCKNAL_TYPED_CONNS      1              /* unidirectional large, bidirectional small? */
 #define SOCKNAL_ZC_MIN_FRAG     (2<<10)         /* default smallest zerocopy fragment */
index 12cf9fb..ea6f615 100644 (file)
@@ -425,6 +425,7 @@ struct lmv_obd {
         int                     connected;
         int                     max_easize;
         int                     max_cookiesize;
+        int                     server_timeout;
 };
 
 struct niobuf_local {
index 57d1058..4f113f4 100644 (file)
@@ -339,10 +339,13 @@ int target_handle_reconnect(struct lustre_handle *conn, struct obd_export *exp,
                 struct lustre_handle *hdl;
                 hdl = &exp->exp_imp_reverse->imp_remote_handle;
                 /* Might be a re-connect after a partition. */
-                if (!memcmp(&conn->cookie, &hdl->cookie, sizeof conn->cookie)) {
+#warning "FIXME ASAP"
+                memcpy(&hdl->cookie, &conn->cookie, sizeof(conn->cookie));
+                if (1 || !memcmp(&conn->cookie, &hdl->cookie, sizeof conn->cookie)) {
                         CERROR("%s reconnecting\n", cluuid->uuid);
                         conn->cookie = exp->exp_handle.h_cookie;
-                        RETURN(EALREADY);
+                        /*RETURN(EALREADY);*/
+                        RETURN(0);
                 } else {
                         CERROR("%s reconnecting from %s, "
                                "handle mismatch (ours "LPX64", theirs "
@@ -393,6 +396,7 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler)
         
         if (!target || target->obd_stopping || !target->obd_set_up) {
                 CERROR("UUID '%s' is not available for connect\n", str);
+
                 GOTO(out, rc = -ENODEV);
         }
 
@@ -447,7 +451,8 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler)
         } else if (req->rq_reqmsg->conn_cnt == 1) {
                 CERROR("%s reconnected with 1 conn_cnt; cookies not random?\n",
                        cluuid.uuid);
-                GOTO(out, rc = -EALREADY);
+#warning "FIXME ASAP"
+                /*GOTO(out, rc = -EALREADY);*/
         }
 
         /* Tell the client if we're in recovery. */
@@ -503,7 +508,8 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler)
         LASSERT(export != NULL);
 
         spin_lock_irqsave(&export->exp_lock, flags);
-        if (export->exp_conn_cnt >= req->rq_reqmsg->conn_cnt) {
+#warning "FIXME ASAP"
+        if (0 && export->exp_conn_cnt >= req->rq_reqmsg->conn_cnt) {
                 CERROR("%s: already connected at a higher conn_cnt: %d > %d\n",
                        cluuid.uuid, export->exp_conn_cnt, 
                        req->rq_reqmsg->conn_cnt);
index 99e229e..824bc0e 100644 (file)
@@ -106,6 +106,27 @@ static int lmv_connect_fake(struct lustre_handle *conn,
         RETURN(0);
 }
 
+void lmv_set_timeouts(struct obd_device *obd)
+{
+        struct lmv_tgt_desc *tgts;
+        struct lmv_obd *lmv;
+        int i;
+
+        lmv = &obd->u.lmv;
+        if (lmv->server_timeout == 0)
+                return;
+
+        if (lmv->connected == 0)
+                return;
+
+        for (i = 0, tgts = lmv->tgts; i < lmv->count; i++, tgts++) {
+                if (tgts->exp == NULL)
+                        continue;
+                obd_set_info(tgts->exp, strlen("inter_mds"),
+                             "inter_mds", 0, NULL);
+        }
+}
+
 int lmv_connect(struct obd_device *obd)
 {
         struct lmv_obd *lmv = &obd->u.lmv;
@@ -180,6 +201,8 @@ int lmv_connect(struct obd_device *obd)
                         atomic_read(&obd->obd_refcount));
         }
 
+        lmv_set_timeouts(obd);
+
         class_export_put(exp);
         RETURN (0);
 
@@ -1126,18 +1149,22 @@ int lmv_set_info(struct obd_export *exp, obd_count keylen,
                 RETURN(-EINVAL);
         }
         lmv = &obd->u.lmv;
-        lmv_connect(obd);
 
         if (keylen >= strlen("client") && strcmp(key, "client") == 0) {
                 struct lmv_tgt_desc *tgts;
                 int i, rc;
 
+                lmv_connect(obd);
                 for (i = 0, tgts = lmv->tgts; i < lmv->count; i++, tgts++) {
                         rc = obd_set_info(tgts->exp, keylen, key, vallen, val);
                         if (rc)
                                 RETURN(rc);
                 }
                 RETURN(0);
+        } else if (keylen >= strlen("inter_mds") && strcmp(key, "inter_mds") == 0) {
+                lmv->server_timeout = 1;
+                lmv_set_timeouts(obd);
+                RETURN(0);
         }
         
         RETURN(-EINVAL);
index f6fdd32..90665c1 100644 (file)
@@ -707,6 +707,11 @@ int mdc_set_info(struct obd_export *exp, obd_count keylen,
                 rc = ptlrpc_queue_wait(req);
                 ptlrpc_req_finished(req);
                 RETURN(rc);
+        } else if (keylen >= strlen("inter_mds") && strcmp(key, "inter_mds") == 0) {
+                struct obd_import *imp = class_exp2cliimp(exp);
+                imp->imp_server_timeout = 1;
+                CDEBUG(D_OTHER, "%s: timeout / 2\n", exp->exp_obd->obd_name);
+                RETURN(0);
         }
         
         RETURN(rc);
index 9ea4849..863dba5 100644 (file)
@@ -99,6 +99,10 @@ int mds_lmv_connect(struct obd_device *obd, char * lmv_name)
                 GOTO(err_reg, rc);
         mds->mds_num = mdsize;
 
+        rc = obd_set_info(mds->mds_lmv_exp, strlen("inter_mds"),
+                                "inter_mds", 0, NULL);
+        if (rc)
+                GOTO(err_reg, rc);
        RETURN(0);
 
 err_reg:
@@ -518,7 +522,6 @@ static int filter_start_page_write(struct inode *inode,
 struct dentry *filter_fid2dentry(struct obd_device *obd,
                                  struct dentry *dir_dentry,
                                  obd_gr group, obd_id id);
-void f_dput(struct dentry *dentry);
 
 int mds_preprw(int cmd, struct obd_export *exp, struct obdo *oa,
                 int objcount, struct obd_ioobj *obj,
@@ -547,7 +550,7 @@ int mds_preprw(int cmd, struct obd_export *exp, struct obdo *oa,
         if (dentry->d_inode == NULL) {
                 CERROR("trying to BRW to non-existent file "LPU64"\n",
                        obj->ioo_id);
-                f_dput(dentry);
+                l_dput(dentry);
                 GOTO(cleanup, rc = -ENOENT);
         }
 
@@ -571,7 +574,7 @@ int mds_preprw(int cmd, struct obd_export *exp, struct obdo *oa,
                                i, obj->ioo_bufcnt, dentry, rc);
                         while (lnb-- > res)
                                 __free_pages(lnb->page, 0);
-                        f_dput(dentry);
+                        l_dput(dentry);
                         GOTO(cleanup, rc);
                 }
                 tot_bytes += lnb->len;
index bd3c1fb..50ff5ce 100644 (file)
@@ -77,7 +77,7 @@
 #define SOCKNAL_MAX_RECONNECT_INTERVAL (60*HZ) /* ...exponentially increasing to this */
 
 /* default vals for runtime tunables */
-#define SOCKNAL_IO_TIMEOUT       50             /* default comms timeout (seconds) */
+#define SOCKNAL_IO_TIMEOUT       20             /* default comms timeout (seconds) */
 #define SOCKNAL_EAGER_ACK        0              /* default eager ack (boolean) */
 #define SOCKNAL_TYPED_CONNS      1              /* unidirectional large, bidirectional small? */
 #define SOCKNAL_ZC_MIN_FRAG     (2<<10)         /* default smallest zerocopy fragment */
index 374e46e..4b5fad3 100644 (file)
@@ -148,6 +148,7 @@ void ptlrpc_deactivate_import(struct obd_import *imp)
 void ptlrpc_invalidate_import(struct obd_import *imp, int in_rpc)
 {
         struct l_wait_info lwi;
+        unsigned long timeout;
         int inflight = 0;
         int rc;
 
@@ -160,8 +161,12 @@ void ptlrpc_invalidate_import(struct obd_import *imp, int in_rpc)
                 inflight = 1;
         /* wait for all requests to error out and call completion 
            callbacks */
-        lwi = LWI_TIMEOUT_INTR(MAX(obd_timeout * HZ, 1), NULL, 
-                               NULL, NULL);
+        if (imp->imp_server_timeout)
+                timeout = obd_timeout / 2;
+        else
+                timeout = obd_timeout;
+        timeout = MAX(timeout * HZ, 1);
+        lwi = LWI_TIMEOUT_INTR(timeout, NULL, NULL, NULL);
         rc = l_wait_event(imp->imp_recovery_waitq, 
                           (atomic_read(&imp->imp_inflight) == inflight), 
                           &lwi);
@@ -441,6 +446,13 @@ finish:
                 if (aa->pcaa_initial_connect && !imp->imp_initial_recov) {
                         ptlrpc_deactivate_import(imp);
                 }
+                /*if (rc == -ETIMEDOUT) {
+                        CDEBUG(D_ERROR, "recovery of %s on %s failed (timeout)\n",
+                               imp->imp_target_uuid.uuid,
+                               (char *)imp->imp_connection->c_remote_uuid.uuid);
+                        ptlrpc_connect_import(imp, NULL);
+                        RETURN(0);
+                }*/
                 CDEBUG(D_ERROR, "recovery of %s on %s failed (%d)\n",
                        imp->imp_target_uuid.uuid,
                        (char *)imp->imp_connection->c_remote_uuid.uuid, rc);
@@ -572,8 +584,13 @@ int ptlrpc_disconnect_import(struct obd_import *imp)
 
         if (ptlrpc_import_in_recovery(imp)) {
                 struct l_wait_info lwi;
-                lwi = LWI_TIMEOUT_INTR(MAX(obd_timeout * HZ, 1), back_to_sleep, 
-                                       NULL, NULL);
+                unsigned long timeout;
+                if (imp->imp_server_timeout)
+                        timeout = obd_timeout / 2;
+                else
+                        timeout = obd_timeout;
+                timeout = MAX(timeout * HZ, 1);
+                lwi = LWI_TIMEOUT_INTR(obd_timeout, back_to_sleep, NULL, NULL);
                 rc = l_wait_event(imp->imp_recovery_waitq, 
                                   !ptlrpc_import_in_recovery(imp), &lwi);
 
index 01d7d23..f1e68e7 100644 (file)
@@ -337,8 +337,11 @@ static int pinger_check_rpcs(void *arg)
                         if (level == LUSTRE_IMP_DISCON) {
                                 /* wait at least a timeout before 
                                    trying recovery again. */
+                                unsigned long timeout = obd_timeout;
+                                if (imp->imp_server_timeout)
+                                        timeout = obd_timeout / 2;
                                 imp->imp_next_ping = time(NULL) + 
-                                        (obd_timeout * HZ);
+                                        (timeout * HZ);
                                 ptlrpc_initiate_recovery(imp);
                         } 
                         else if (level != LUSTRE_IMP_FULL ||
index 7c48e1b..a1d13a7 100755 (executable)
@@ -1380,9 +1380,6 @@ class LMV(Module):
         if name_override != None:
             self.name = "lmv_%s" % name_override
         self.add_lustre_module('lmv', 'lmv')
-        self.mds_uuid = self.db.get_first_ref('mds')
-        mds = self.db.lookup(self.mds_uuid)
-        self.lmv_name = mds.getName()
         self.devlist = self.db.get_refs('mds')
         self.mdclist = []
         self.desc_uuid = self.uuid
@@ -1409,7 +1406,6 @@ class LMV(Module):
             except CommandError, e:
                 print "Error preparing LMV %s\n" % mdc.uuid
                 raise e
-        self.info(self.mds_uuid)
         lctl.lmv_setup(self.name, self.uuid, self.desc_uuid,
                        string.join(self.devlist))
 
@@ -1517,6 +1513,7 @@ class MDSDEV(Module):
        # setup LMV
        if self.master_mds:
             client_uuid = generate_client_uuid(self.name)
+           client_uuid = self.name + "_lmv_" + "UUID"
            self.master = LMV(self.db.lookup(self.lmv_uuid), client_uuid, self.name, self.name)
            self.master_mds = self.master.name
         # modules
@@ -1524,6 +1521,7 @@ class MDSDEV(Module):
         self.add_lustre_module('osc', 'osc')
         self.add_lustre_module('lov', 'lov')
         self.add_lustre_module('lmv', 'lmv')
+        self.add_lustre_module('ost', 'ost')
         self.add_lustre_module('mds', 'mds')
         if self.fstype:
             self.add_lustre_module('lvfs', 'fsfilt_%s' % (self.fstype))