From: scjody <scjody>
Date: Tue, 11 Jul 2006 22:12:42 +0000 (+0000)
Subject: Merge b1_5 from b1_4 (20060711_1245)
X-Git-Tag: v1_7_100~1^90~8^2~11
X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=6417321b232ead5dba3e1e1826494cf5ba34926e;p=fs%2Flustre-release.git

Merge b1_5 from b1_4 (20060711_1245)

Bugzilla   : 4778
Description: last_id value checked outside lock on OST caused LASSERT failure
Details    : If there were multiple MDS->OST object precreate requests in
	     flight, it was possible that the OST's last object id was checked
	     outside a lock and incorrectly tripped an assertion.  Move checks
	     inside locks, and discard old precreate requests.

Bugzilla   : 9387
Description: import connection selection may be incorrect if timer wraps
Details    : Using a 32-bit jiffies timer with HZ=1000 may cause backup
	     import connections to be ignored if the 32-bit jiffies counter
	     wraps.  Use a 64-bit jiffies counter.

Bugzilla   : 10083
Description: LNET request buffers exhausted under heavy short-term load
Details    : If a large number of client requests are generated on a service
	     that has previously never seen so many requests it is possible
	     that the request buffer growth cannot keep up with the spike in
	     demand.  Instead of dropping incoming requests, they are held in
	     the LND until the RPC service can accept more requests.

Bugzilla   : 9314
Description: Assertion failure in ll_local_open after replay.
Details    : If replay happened on an open request reply before we were able
             to set replay handler, reply will become not swabbed tripping the
             assertion in ll_local_open. Now we set the handler right after
             recognising of open request
---

diff --git a/lustre/ChangeLog b/lustre/ChangeLog
index 0f09188..63b53b1 100644
--- a/lustre/ChangeLog
+++ b/lustre/ChangeLog
@@ -315,8 +315,8 @@ Frequency  : Always
 Bugzilla   : 10248
 Description: Allow fractional MB tunings for lustre in /proc/ filesystem.
 Details    : Many of the /proc/ tunables can only be tuned at a megabyte
-             granularity. Now, Fractional MB granularity is be supported,
-             this is very useful for low memory system.
+	     granularity. Now, Fractional MB granularity is be supported,
+	     this is very useful for low memory system.
 
 Severity   : enhancement
 Bugzilla   : 9292
@@ -391,8 +391,44 @@ Frequency  : rare
 Bugzilla   : 10409
 Description: i_sem vs transaction deadlock in mds_obd_destroy during unlink.
 Details    : protect inode from truncation within vfs_unlink() context
-             just take a reference before calling vfs_unlink() and release it
-             when parent's i_sem is free.
+	     just take a reference before calling vfs_unlink() and release it
+	     when parent's i_sem is free.
+
+Severity   : major
+Frequency  : rare
+Bugzilla   : 4778
+Description: last_id value checked outside lock on OST caused LASSERT failure
+Details    : If there were multiple MDS->OST object precreate requests in
+	     flight, it was possible that the OST's last object id was checked
+	     outside a lock and incorrectly tripped an assertion.  Move checks
+	     inside locks, and discard old precreate requests.
+
+Severity   : minor
+Frequency  : rare
+Bugzilla   : 9387
+Description: import connection selection may be incorrect if timer wraps
+Details    : Using a 32-bit jiffies timer with HZ=1000 may cause backup
+	     import connections to be ignored if the 32-bit jiffies counter
+	     wraps.  Use a 64-bit jiffies counter.
+
+Severity   : minor
+Frequency  : very large clusters immediately after boot
+Bugzilla   : 10083
+Description: LNET request buffers exhausted under heavy short-term load
+Details    : If a large number of client requests are generated on a service
+	     that has previously never seen so many requests it is possible
+	     that the request buffer growth cannot keep up with the spike in
+	     demand.  Instead of dropping incoming requests, they are held in
+	     the LND until the RPC service can accept more requests.
+
+Severity   : minor
+Frequency  : Sometimes during replay
+Bugzilla   : 9314
+Description: Assertion failure in ll_local_open after replay.
+Details    : If replay happened on an open request reply before we were able
+             to set replay handler, reply will become not swabbed tripping the
+             assertion in ll_local_open. Now we set the handler right after
+             recognising of open request
 
 Severity   : minor
 Frequency  : Sometimes during replay
diff --git a/lustre/mdc/mdc_locks.c b/lustre/mdc/mdc_locks.c
index 8a77a17..ada4a8b 100644
--- a/lustre/mdc/mdc_locks.c
+++ b/lustre/mdc/mdc_locks.c
@@ -455,9 +455,9 @@ int mdc_enqueue(struct obd_export *exp,
                         RETURN (-EPROTO);
                 }
 
-                /* If this is an successful OPEN request, we need to set
+                /* If this is a successful OPEN request, we need to set
                    replay handler and data early, so that if replay happens
-                   immediatelly after swabbing below, new reply is swabbed
+                   immediately after swabbing below, new reply is swabbed
                    by that handler correctly */
                 if (it_disposition(it, DISP_OPEN_OPEN) &&
                     !it_open_error(DISP_OPEN_OPEN, it))
diff --git a/lustre/ptlrpc/events.c b/lustre/ptlrpc/events.c
index 7db4343..2cf3e1f 100644
--- a/lustre/ptlrpc/events.c
+++ b/lustre/ptlrpc/events.c
@@ -212,16 +212,17 @@ void request_in_callback(lnet_event_t *ev)
 
         if (ev->unlinked) {
                 service->srv_nrqbd_receiving--;
-                if (ev->type != LNET_EVENT_UNLINK &&
-                    service->srv_nrqbd_receiving == 0) {
-                        /* This service is off-air because all its request
-                         * buffers are busy.  Portals will start dropping
-                         * incoming requests until more buffers get posted.  
-                         * NB don't moan if it's because we're tearing down the
-                         * service. */
-                        CERROR("All %s request buffers busy\n",
+                CDEBUG(D_RPCTRACE,"Buffer complete: %d buffers still posted\n",
+                       service->srv_nrqbd_receiving);
+
+                /* Normally, don't complain about 0 buffers posted; LNET won't
+                 * drop incoming reqs since we set the portal lazy */
+                if (test_req_buffer_pressure &&
+                    ev->type != LNET_EVENT_UNLINK &&
+                    service->srv_nrqbd_receiving == 0)
+                        CWARN("All %s request buffers busy\n",
                               service->srv_name);
-                }
+
                 /* req takes over the network's ref on rqbd */
         } else {
                 /* req takes a ref on rqbd */
diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c
index b896381..5a2d9cc 100644
--- a/lustre/ptlrpc/import.c
+++ b/lustre/ptlrpc/import.c
@@ -265,11 +265,8 @@ static int import_select_connection(struct obd_import *imp)
                        imp->imp_obd->obd_name,
                        libcfs_nid2str(conn->oic_conn->c_peer.nid),
                        conn->oic_last_attempt);
-
-                /* Throttle the reconnect rate to once per RECONNECT_INTERVAL */
                 if (get_jiffies_64() >
                     conn->oic_last_attempt + RECONNECT_INTERVAL * HZ) {
-
                         /* If we have never tried this connection since the
                            the last successful attempt, go with this one */
                         if (conn->oic_last_attempt <=
@@ -279,7 +276,7 @@ static int import_select_connection(struct obd_import *imp)
                         }
 
                         /* Both of these connections have already been tried
-                           since the last successful connection, just choose the
+                           since the last successful connection; just choose the
                            least recently used */
                         if (!imp_conn)
                                 imp_conn = conn;
@@ -287,7 +284,7 @@ static int import_select_connection(struct obd_import *imp)
                                 if (conn->oic_last_attempt <
                                                 imp_conn->oic_last_attempt)
                                         imp_conn = conn;
-        }
+                }
         }
 
         /* if not found, simply choose the current one */
diff --git a/lustre/ptlrpc/ptlrpc_internal.h b/lustre/ptlrpc/ptlrpc_internal.h
index e45106d..e954571 100644
--- a/lustre/ptlrpc/ptlrpc_internal.h
+++ b/lustre/ptlrpc/ptlrpc_internal.h
@@ -34,6 +34,7 @@ struct ldlm_namespace;
 struct obd_import;
 struct ldlm_res_id;
 struct ptlrpc_request_set;
+extern int test_req_buffer_pressure;
 
 void ptlrpc_request_handle_notconn(struct ptlrpc_request *);
 void lustre_assert_wire_constants(void);
diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c
index edfdcce..d0cf4cd 100644
--- a/lustre/ptlrpc/service.c
+++ b/lustre/ptlrpc/service.c
@@ -34,6 +34,10 @@
 #include <lnet/types.h>
 #include "ptlrpc_internal.h"
 
+int test_req_buffer_pressure = 0;
+CFS_MODULE_PARM(test_req_buffer_pressure, "i", int, 0444,
+                "set non-zero to put pressure on request buffer pools");
+
 /* forward ref */
 static int ptlrpc_server_post_idle_rqbds (struct ptlrpc_service *svc);
 
@@ -239,13 +243,8 @@ ptlrpc_server_post_idle_rqbds (struct ptlrpc_service *svc)
         list_del(&rqbd->rqbd_list);
         list_add_tail(&rqbd->rqbd_list, &svc->srv_idle_rqbds);
 
-        if (svc->srv_nrqbd_receiving == 0) {
-                /* This service is off-air on this interface because all
-                 * its request buffers are busy.  Portals will have started
-                 * dropping incoming requests until more buffers get
-                 * posted */
-                CERROR("All %s request buffers busy\n", svc->srv_name);
-        }
+        /* Don't complain if no request buffers are posted right now; LNET
+         * won't drop requests because we set the portal lazy! */
 
         spin_unlock(&svc->srv_lock);
 
@@ -277,7 +276,7 @@ ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size, int max_reply_size,
         CFS_INIT_LIST_HEAD(&service->srv_threads);
         cfs_waitq_init(&service->srv_waitq);
 
-        service->srv_nbuf_per_group = nbufs;
+        service->srv_nbuf_per_group = test_req_buffer_pressure ? 1 : nbufs;
         service->srv_max_req_size = max_req_size;
         service->srv_buf_size = bufsize;
         service->srv_rep_portal = rep_portal;
@@ -289,6 +288,9 @@ ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size, int max_reply_size,
         service->srv_request_max_cull_seq = 0;
         service->srv_num_threads = num_threads;
 
+        rc = LNetSetLazyPortal(service->srv_req_portal);
+        LASSERT (rc == 0);
+
         CFS_INIT_LIST_HEAD(&service->srv_request_queue);
         CFS_INIT_LIST_HEAD(&service->srv_idle_rqbds);
         CFS_INIT_LIST_HEAD(&service->srv_active_rqbds);
@@ -836,7 +838,8 @@ static void
 ptlrpc_check_rqbd_pool(struct ptlrpc_service *svc)
 {
         int avail = svc->srv_nrqbd_receiving;
-        int low_water = svc->srv_nbuf_per_group/2;
+        int low_water = test_req_buffer_pressure ? 0 :
+                        svc->srv_nbuf_per_group/2;
 
         /* NB I'm not locking; just looking. */
 
@@ -979,6 +982,8 @@ static int ptlrpc_main(void *arg)
                          * for a timeout (unless something else happens)
                          * before I try again */
                         svc->srv_rqbd_timeout = cfs_time_seconds(1)/10;
+                        CDEBUG(D_RPCTRACE,"Posted buffers: %d\n",
+                               svc->srv_nrqbd_receiving);
                 }
         }
 
@@ -1131,6 +1136,9 @@ int ptlrpc_unregister_service(struct ptlrpc_service *service)
 
         CDEBUG(D_NET, "%s: tearing down\n", service->srv_name);
 
+        rc = LNetClearLazyPortal(service->srv_req_portal);
+        LASSERT (rc == 0);
+
         /* Unlink all the request buffers.  This forces a 'final' event with
          * its 'unlink' flag set for each posted rqbd */
         list_for_each(tmp, &service->srv_active_rqbds) {