Whamcloud - gitweb
b=24375 Fix a race between completion and enqueue
authorVitaly Fertman <vitaly.fertman@oracle.com>
Tue, 22 Feb 2011 10:29:53 +0000 (13:29 +0300)
committerTerry Rutledge <terry.rutledge@oracle.com>
Tue, 22 Feb 2011 16:19:56 +0000 (08:19 -0800)
o=green
i=vitaly
i=vs

ldlm_enqueue_tail does not obtain proper lockng when checking lock mode to see
if the lock is granted, so there is a window where ldlm_handle_completion_ast
can update lvb with correct data, but beforeit has a chance to update the lock
mode, the ldlm_enqueue_tail will check the lock mode and since the lock is not
granted yet, it will overwrite correct lvb with stale value from enqueue time.

lustre/ldlm/ldlm_request.c

index 4cc1cb8..7d692d0 100644 (file)
@@ -385,6 +385,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
         struct lustre_handle old_hash_key;
         struct ldlm_lock *lock;
         struct ldlm_reply *reply;
+        struct ost_lvb *tmplvb;
         int cleanup_phase = 1;
         ENTRY;
 
@@ -409,7 +410,6 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
                                 rc = -EPROTO;
                         }
                         if (lvb_len) {
-                                void *tmplvb;
                                 tmplvb = lustre_swab_repbuf(req,
                                                             DLM_REPLY_REC_OFF,
                                                             lvb_len,
@@ -506,13 +506,22 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
 
         /* If the lock has already been granted by a completion AST, don't
          * clobber the LVB with an older one. */
-        if (lvb_len && (lock->l_req_mode != lock->l_granted_mode)) {
-                void *tmplvb;
-                tmplvb = lustre_swab_repbuf(req, DLM_REPLY_REC_OFF, lvb_len,
-                                            lvb_swabber);
-                if (tmplvb == NULL)
-                        GOTO(cleanup, rc = -EPROTO);
-                memcpy(lock->l_lvb_data, tmplvb, lvb_len);
+        if (lvb_len) {
+                /* We must lock or a racing completion might update lvb
+                   without letting us know and we'll clobber the correct value.
+                   Cannot unlock after the check either, a that still leaves
+                   a tiny window for completion to get in */
+                lock_res_and_lock(lock);
+                if (lock->l_req_mode != lock->l_granted_mode) {
+                        tmplvb = lustre_swab_repbuf(req, DLM_REPLY_REC_OFF,
+                                                    lvb_len, lvb_swabber);
+                        if (tmplvb == NULL) {
+                                unlock_res_and_lock(lock);
+                                GOTO(cleanup, rc = -EPROTO);
+                        }
+                        memcpy(lock->l_lvb_data, tmplvb, lvb_len);
+                }
+                unlock_res_and_lock(lock);
         }
 
         if (!is_replay) {