Whamcloud - gitweb
Branch HEAD
authorjohann <johann>
Mon, 28 Jan 2008 10:52:50 +0000 (10:52 +0000)
committerjohann <johann>
Mon, 28 Jan 2008 10:52:50 +0000 (10:52 +0000)
b=13843
i=adilger
i=shadow

A lot of unlink operations with concurrent I/O can lead to a
deadlock causing evictions. To address the problem, the number of
oustanding OST_DESTROY requests is now throttled to
max_rpcs_in_flight per OSC.

lustre/ChangeLog
lustre/include/obd.h
lustre/ldlm/ldlm_lib.c
lustre/osc/osc_request.c

index 929fdfa..d933b24 100644 (file)
@@ -636,6 +636,15 @@ Details    : After ELC code landed, it is now improper to enqueue any mds
             locks under och_sem, because enqueue might want to decide to
             cancel open locks for same inode we are holding och_sem for.
 
+Severity   : normal
+Bugzilla   : 13843
+Description: Client eviction while running blogbench
+Details    : A lot of unlink operations with concurrent I/O can lead to a
+            deadlock causing evictions. To address the problem, the number of
+            oustanding OST_DESTROY requests is now throttled to
+            max_rpcs_in_flight per OSC and LDLM_FL_DISCARD_DATA blocking
+            callbacks are processed in priority.
+
 --------------------------------------------------------------------------------
 
 2007-08-10         Cluster File Systems, Inc. <info@clusterfs.com>
index edad05b..91afc47 100644 (file)
@@ -447,6 +447,10 @@ struct client_obd {
         struct obd_histogram     cl_read_offset_hist;
         struct obd_histogram     cl_write_offset_hist;
 
+        /* number of in flight destroy rpcs is limited to max_rpcs_in_flight */
+        atomic_t                 cl_destroy_in_flight;
+        cfs_waitq_t              cl_destroy_waitq;
+
         struct mdc_rpc_lock     *cl_rpc_lock;
         struct mdc_rpc_lock     *cl_setattr_lock;
         struct mdc_rpc_lock     *cl_close_lock;
index 7d4d43a..79c3fc9 100644 (file)
@@ -276,6 +276,8 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
         spin_lock_init(&cli->cl_write_page_hist.oh_lock);
         spin_lock_init(&cli->cl_read_offset_hist.oh_lock);
         spin_lock_init(&cli->cl_write_offset_hist.oh_lock);
+        cfs_waitq_init(&cli->cl_destroy_waitq);
+        atomic_set(&cli->cl_destroy_in_flight, 0);
 #ifdef ENABLE_CHECKSUM
         cli->cl_checksum = 1;
 #endif
index ba98d4e..5a760bd 100644 (file)
@@ -594,6 +594,34 @@ static int osc_resource_get_unused(struct obd_export *exp, struct obdo *oa,
         RETURN(count);
 }
 
+static int osc_destroy_interpret(struct ptlrpc_request *req, void *data,
+                                 int rc)
+{
+        struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
+
+        atomic_dec(&cli->cl_destroy_in_flight);
+        cfs_waitq_signal(&cli->cl_destroy_waitq);
+        return 0;
+}
+
+static int osc_can_send_destroy(struct client_obd *cli)
+{
+        if (atomic_inc_return(&cli->cl_destroy_in_flight) <=
+            cli->cl_max_rpcs_in_flight) {
+                /* The destroy request can be sent */
+                return 1;
+        }
+        if (atomic_dec_return(&cli->cl_destroy_in_flight) <
+            cli->cl_max_rpcs_in_flight) {
+                /*
+                 * The counter has been modified between the two atomic
+                 * operations.
+                 */
+                cfs_waitq_signal(&cli->cl_destroy_waitq);
+        }
+        return 0;
+}
+
 /* Destroy requests can be async always on the client, and we don't even really
  * care about the return code since the client cannot do anything at all about
  * a destroy failure.
@@ -613,6 +641,7 @@ static int osc_destroy(struct obd_export *exp, struct obdo *oa,
         struct ost_body *body;
         int size[3] = { sizeof(struct ptlrpc_body), sizeof(*body), 0 };
         int count, bufcount = 2;
+        struct client_obd *cli = &exp->exp_obd->u.cli;
         ENTRY;
 
         if (!oa) {
@@ -630,6 +659,7 @@ static int osc_destroy(struct obd_export *exp, struct obdo *oa,
                 RETURN(-ENOMEM);
 
         req->rq_request_portal = OST_IO_PORTAL; /* bug 7198 */
+        req->rq_interpret_reply = osc_destroy_interpret;
 
         body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body));
         if (oti != NULL && oa->o_valid & OBD_MD_FLCOOKIE)
@@ -639,6 +669,18 @@ static int osc_destroy(struct obd_export *exp, struct obdo *oa,
 
         ptlrpc_req_set_repsize(req, 2, size);
 
+        if (!osc_can_send_destroy(cli)) {
+                struct l_wait_info lwi = { 0 };
+
+                /*
+                 * Wait until the number of on-going destroy RPCs drops
+                 * under max_rpc_in_flight
+                 */
+                l_wait_event_exclusive(cli->cl_destroy_waitq,
+                                       osc_can_send_destroy(cli), &lwi);
+        }
+
+        /* Do not wait for response */
         ptlrpcd_add_req(req);
         RETURN(0);
 }