From: Andreas Dilger Date: Fri, 10 Aug 2012 22:24:00 +0000 (-0600) Subject: LU-1735 ptlrpc: only set jobid if not already set X-Git-Tag: 2.3.51~172 X-Git-Url: https://git.whamcloud.com/gitweb?a=commitdiff_plain;h=f641913a348ec0179cdc599baa93f74aa64f9943;p=fs%2Flustre-release.git LU-1735 ptlrpc: only set jobid if not already set The ptlrpc_set_add_req->lustre_get_jobid->cfs_access_process_vm() callpath locks mm->mmap_sem to fetch environment variables, but if this could deadlock in case of MMAP IO, which also holds mmap_sem. If the mmap_sem is already held for write, just don't set the jobid. This will make jobid stats inconsistent for mmap IO, but avoids a lot of complexity or race conditions in the code otherwise. If the caller already has fetched the jobid and saved it for this inode in the OSC layer, so we don't need to fetch and reset the pd_jobid field at all in this case. This avoids doing extra work to fetch the jobid if it is not needed, and avoids storing it temporarily on the stack when it won't be used. Signed-off-by: Andreas Dilger Change-Id: I22c3d5c1755c1d6aab666a769df38218b954650a Reviewed-on: http://review.whamcloud.com/3604 Reviewed-by: Jinshan Xiong Tested-by: Hudson Tested-by: Maloo Reviewed-by: Niu Yawei Reviewed-by: Oleg Drokin --- diff --git a/libcfs/libcfs/linux/linux-curproc.c b/libcfs/libcfs/linux/linux-curproc.c index f675eac..c1e814d 100644 --- a/libcfs/libcfs/linux/linux-curproc.c +++ b/libcfs/libcfs/linux/linux-curproc.c @@ -288,6 +288,14 @@ int cfs_get_environ(const char *key, char *value, int *val_len) RETURN(-EINVAL); } + /* Avoid deadlocks on mmap_sem if called from sys_mmap_pgoff(), + * which is already holding mmap_sem for writes. If some other + * thread gets the write lock in the meantime, this thread will + * block, but at least it won't deadlock on itself. LU-1735 */ + if (down_read_trylock(&mm->mmap_sem) == 0) + return -EDEADLK; + up_read(&mm->mmap_sem); + addr = mm->env_start; ret = -ENOENT; diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c index e2a0521..6689678 100644 --- a/lustre/obdclass/class_obd.c +++ b/lustre/obdclass/class_obd.c @@ -138,8 +138,9 @@ int lustre_get_jobid(char *jobid) #ifdef __KERNEL__ ret = cfs_get_environ(obd_jobid_var, jobid, &jobid_len); if (ret) { - CDEBUG((ret != -ENOENT && ret != -EINVAL) ? D_ERROR : D_INFO, - "Get jobid for (%s) failed(%d).\n", obd_jobid_var, ret); + CDEBUG((ret != -ENOENT && ret != -EINVAL && ret != -EDEADLK) ? + D_ERROR : D_INFO, "Get jobid for (%s) failed: rc = %d\n", + obd_jobid_var, ret); } #endif RETURN(ret); diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 5b50c0c..35b1443 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -1027,7 +1027,6 @@ EXPORT_SYMBOL(ptlrpc_set_add_cb); void ptlrpc_set_add_req(struct ptlrpc_request_set *set, struct ptlrpc_request *req) { - char jobid[JOBSTATS_JOBID_SIZE]; LASSERT(cfs_list_empty(&req->rq_set_chain)); /* The set takes over the caller's request reference */ @@ -1036,10 +1035,8 @@ void ptlrpc_set_add_req(struct ptlrpc_request_set *set, cfs_atomic_inc(&set->set_remaining); req->rq_queued_time = cfs_time_current(); - if (req->rq_reqmsg) { - lustre_get_jobid(jobid); - lustre_msg_set_jobid(req->rq_reqmsg, jobid); - } + if (req->rq_reqmsg != NULL) + lustre_msg_set_jobid(req->rq_reqmsg, NULL); if (set->set_producer != NULL) /* If the request set has a producer callback, the RPC must be diff --git a/lustre/ptlrpc/pack_generic.c b/lustre/ptlrpc/pack_generic.c index 0664de7..f1d8621 100644 --- a/lustre/ptlrpc/pack_generic.c +++ b/lustre/ptlrpc/pack_generic.c @@ -1557,7 +1557,11 @@ void lustre_msg_set_jobid(struct lustre_msg *msg, char *jobid) pb = lustre_msg_buf_v2(msg, MSG_PTLRPC_BODY_OFF, sizeof(struct ptlrpc_body)); LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg); - memcpy(pb->pb_jobid, jobid, JOBSTATS_JOBID_SIZE); + + if (jobid != NULL) + memcpy(pb->pb_jobid, jobid, JOBSTATS_JOBID_SIZE); + else if (pb->pb_jobid[0] == '\0') + lustre_get_jobid(pb->pb_jobid); return; } default: diff --git a/lustre/ptlrpc/ptlrpcd.c b/lustre/ptlrpc/ptlrpcd.c index 8c7d8a2..fbb03b2 100644 --- a/lustre/ptlrpc/ptlrpcd.c +++ b/lustre/ptlrpc/ptlrpcd.c @@ -238,12 +238,9 @@ static int ptlrpcd_steal_rqset(struct ptlrpc_request_set *des, void ptlrpcd_add_req(struct ptlrpc_request *req, pdl_policy_t policy, int idx) { struct ptlrpcd_ctl *pc; - char jobid[JOBSTATS_JOBID_SIZE]; - if (req->rq_reqmsg) { - lustre_get_jobid(jobid); - lustre_msg_set_jobid(req->rq_reqmsg, jobid); - } + if (req->rq_reqmsg) + lustre_msg_set_jobid(req->rq_reqmsg, NULL); cfs_spin_lock(&req->rq_lock); if (req->rq_invalid_rqset) {