Whamcloud - gitweb
LU-1735 ptlrpc: only set jobid if not already set
authorAndreas Dilger <adilger@whamcloud.com>
Fri, 10 Aug 2012 22:24:00 +0000 (16:24 -0600)
committerOleg Drokin <green@whamcloud.com>
Wed, 22 Aug 2012 16:14:49 +0000 (12:14 -0400)
The ptlrpc_set_add_req->lustre_get_jobid->cfs_access_process_vm()
callpath locks mm->mmap_sem to fetch environment variables, but if
this could deadlock in case of MMAP IO, which also holds mmap_sem.
If the mmap_sem is already held for write, just don't set the jobid.
This will make jobid stats inconsistent for mmap IO, but avoids a
lot of complexity or race conditions in the code otherwise.

If the caller already has fetched the jobid and saved it for this
inode in the OSC layer, so we don't need to fetch and reset the
pd_jobid field at all in this case.  This avoids doing extra work
to fetch the jobid if it is not needed, and avoids storing it
temporarily on the stack when it won't be used.

Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Change-Id: I22c3d5c1755c1d6aab666a769df38218b954650a
Reviewed-on: http://review.whamcloud.com/3604
Reviewed-by: Jinshan Xiong <jinshan.xiong@whamcloud.com>
Tested-by: Hudson
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Niu Yawei <niu@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
libcfs/libcfs/linux/linux-curproc.c
lustre/obdclass/class_obd.c
lustre/ptlrpc/client.c
lustre/ptlrpc/pack_generic.c
lustre/ptlrpc/ptlrpcd.c

index f675eac..c1e814d 100644 (file)
@@ -288,6 +288,14 @@ int cfs_get_environ(const char *key, char *value, int *val_len)
                RETURN(-EINVAL);
        }
 
+       /* Avoid deadlocks on mmap_sem if called from sys_mmap_pgoff(),
+        * which is already holding mmap_sem for writes.  If some other
+        * thread gets the write lock in the meantime, this thread will
+        * block, but at least it won't deadlock on itself.  LU-1735 */
+       if (down_read_trylock(&mm->mmap_sem) == 0)
+               return -EDEADLK;
+       up_read(&mm->mmap_sem);
+
        addr = mm->env_start;
        ret = -ENOENT;
 
index e2a0521..6689678 100644 (file)
@@ -138,8 +138,9 @@ int lustre_get_jobid(char *jobid)
 #ifdef __KERNEL__
        ret = cfs_get_environ(obd_jobid_var, jobid, &jobid_len);
        if (ret) {
-               CDEBUG((ret != -ENOENT && ret != -EINVAL) ? D_ERROR : D_INFO,
-                      "Get jobid for (%s) failed(%d).\n", obd_jobid_var, ret);
+               CDEBUG((ret != -ENOENT && ret != -EINVAL && ret != -EDEADLK) ?
+                      D_ERROR : D_INFO, "Get jobid for (%s) failed: rc = %d\n",
+                      obd_jobid_var, ret);
        }
 #endif
        RETURN(ret);
index 5b50c0c..35b1443 100644 (file)
@@ -1027,7 +1027,6 @@ EXPORT_SYMBOL(ptlrpc_set_add_cb);
 void ptlrpc_set_add_req(struct ptlrpc_request_set *set,
                         struct ptlrpc_request *req)
 {
-       char jobid[JOBSTATS_JOBID_SIZE];
        LASSERT(cfs_list_empty(&req->rq_set_chain));
 
        /* The set takes over the caller's request reference */
@@ -1036,10 +1035,8 @@ void ptlrpc_set_add_req(struct ptlrpc_request_set *set,
        cfs_atomic_inc(&set->set_remaining);
        req->rq_queued_time = cfs_time_current();
 
-       if (req->rq_reqmsg) {
-               lustre_get_jobid(jobid);
-               lustre_msg_set_jobid(req->rq_reqmsg, jobid);
-       }
+       if (req->rq_reqmsg != NULL)
+               lustre_msg_set_jobid(req->rq_reqmsg, NULL);
 
        if (set->set_producer != NULL)
                /* If the request set has a producer callback, the RPC must be
index 0664de7..f1d8621 100644 (file)
@@ -1557,7 +1557,11 @@ void lustre_msg_set_jobid(struct lustre_msg *msg, char *jobid)
                pb = lustre_msg_buf_v2(msg, MSG_PTLRPC_BODY_OFF,
                                       sizeof(struct ptlrpc_body));
                LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
-               memcpy(pb->pb_jobid, jobid, JOBSTATS_JOBID_SIZE);
+
+               if (jobid != NULL)
+                       memcpy(pb->pb_jobid, jobid, JOBSTATS_JOBID_SIZE);
+               else if (pb->pb_jobid[0] == '\0')
+                       lustre_get_jobid(pb->pb_jobid);
                return;
        }
        default:
index 8c7d8a2..fbb03b2 100644 (file)
@@ -238,12 +238,9 @@ static int ptlrpcd_steal_rqset(struct ptlrpc_request_set *des,
 void ptlrpcd_add_req(struct ptlrpc_request *req, pdl_policy_t policy, int idx)
 {
         struct ptlrpcd_ctl *pc;
-       char jobid[JOBSTATS_JOBID_SIZE];
 
-       if (req->rq_reqmsg) {
-               lustre_get_jobid(jobid);
-               lustre_msg_set_jobid(req->rq_reqmsg, jobid);
-       }
+       if (req->rq_reqmsg)
+               lustre_msg_set_jobid(req->rq_reqmsg, NULL);
 
         cfs_spin_lock(&req->rq_lock);
         if (req->rq_invalid_rqset) {