summary |
shortlog |
log |
commit | commitdiff |
tree
raw |
patch |
inline | side by side (from parent 1:
b8d0456)
osc_extent_wait can be stuck in scenario like this:
1) thread-1 held an active extent
2) thread-2 called flush cache, and marked this extent as "urgent"
and "sync_wait"
3) thread-3 wants to write to the same extent, osc_extent_find will
get "conflict" because this extent is "sync_wait", so it starts
to wait...
4) cl_writeback_work has been scheduled by thread-4 to write some
other extents, it has sent RPCs but not returned yet.
5) thread-1 finished his work, and called osc_extent_release()->
osc_io_unplug_async()->ptlrpcd_queue_work(), but found
cl_writeback_work is still running, so it's ignored (-EBUSY)
6) thread-3 is stuck because nobody will wake him up.
This patch allows ptlrpcd_work to be rescheduled, so it will not
miss request anymore
Signed-off-by: Liang Zhen <liang.zhen@intel.com>
Change-Id: I4929d52b2d409c2ce081147bb5ee3dd380a86c43
Reviewed-on: http://review.whamcloud.com/8922
Tested-by: Jenkins
Reviewed-by: Jinshan Xiong <jinshan.xiong@intel.com>
Reviewed-by: Bobi Jam <bobijam@gmail.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
#include "ptlrpc_internal.h"
static int ptlrpc_send_new_req(struct ptlrpc_request *req);
#include "ptlrpc_internal.h"
static int ptlrpc_send_new_req(struct ptlrpc_request *req);
+static int ptlrpcd_check_work(struct ptlrpc_request *req);
/**
* Initialize passed in client structure \a cl.
/**
* Initialize passed in client structure \a cl.
ptlrpc_req_interpret(env, req, req->rq_status);
ptlrpc_req_interpret(env, req, req->rq_status);
- ptlrpc_rqphase_move(req, RQ_PHASE_COMPLETE);
+ if (ptlrpcd_check_work(req)) {
+ atomic_dec(&set->set_remaining);
+ continue;
+ }
+ ptlrpc_rqphase_move(req, RQ_PHASE_COMPLETE);
CDEBUG(req->rq_reqmsg != NULL ? D_RPCTRACE : 0,
"Completed RPC pname:cluuid:pid:xid:nid:"
CDEBUG(req->rq_reqmsg != NULL ? D_RPCTRACE : 0,
"Completed RPC pname:cluuid:pid:xid:nid:"
* have delay before it really runs by ptlrpcd thread.
*/
struct ptlrpc_work_async_args {
* have delay before it really runs by ptlrpcd thread.
*/
struct ptlrpc_work_async_args {
- __u64 magic;
- int (*cb)(const struct lu_env *, void *);
- void *cbdata;
+ int (*cb)(const struct lu_env *, void *);
+ void *cbdata;
-#define PTLRPC_WORK_MAGIC 0x6655436b676f4f44ULL /* magic code */
+static void ptlrpcd_add_work_req(struct ptlrpc_request *req)
+{
+ /* re-initialize the req */
+ req->rq_timeout = obd_timeout;
+ req->rq_sent = cfs_time_current_sec();
+ req->rq_deadline = req->rq_sent + req->rq_timeout;
+ req->rq_reply_deadline = req->rq_deadline;
+ req->rq_phase = RQ_PHASE_INTERPRET;
+ req->rq_next_phase = RQ_PHASE_COMPLETE;
+ req->rq_xid = ptlrpc_next_xid();
+ req->rq_import_generation = req->rq_import->imp_generation;
+
+ ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
+}
static int work_interpreter(const struct lu_env *env,
static int work_interpreter(const struct lu_env *env,
- struct ptlrpc_request *req, void *data, int rc)
+ struct ptlrpc_request *req, void *data, int rc)
- struct ptlrpc_work_async_args *arg = data;
+ struct ptlrpc_work_async_args *arg = data;
+
+ LASSERT(ptlrpcd_check_work(req));
+ LASSERT(arg->cb != NULL);
+
+ rc = arg->cb(env, arg->cbdata);
+
+ list_del_init(&req->rq_set_chain);
+ req->rq_set = NULL;
+
+ if (atomic_dec_return(&req->rq_refcount) > 1) {
+ atomic_set(&req->rq_refcount, 2);
+ ptlrpcd_add_work_req(req);
+ }
+ return rc;
+}
- LASSERT(arg->magic == PTLRPC_WORK_MAGIC);
- LASSERT(arg->cb != NULL);
+static int worker_format;
- return arg->cb(env, arg->cbdata);
+static int ptlrpcd_check_work(struct ptlrpc_request *req)
+{
+ return req->rq_pill.rc_fmt == (void *)&worker_format;
req->rq_receiving_reply = 0;
req->rq_must_unlink = 0;
req->rq_no_delay = req->rq_no_resend = 1;
req->rq_receiving_reply = 0;
req->rq_must_unlink = 0;
req->rq_no_delay = req->rq_no_resend = 1;
+ req->rq_pill.rc_fmt = (void *)&worker_format;
spin_lock_init(&req->rq_lock);
CFS_INIT_LIST_HEAD(&req->rq_list);
spin_lock_init(&req->rq_lock);
CFS_INIT_LIST_HEAD(&req->rq_list);
CFS_INIT_LIST_HEAD(&req->rq_exp_list);
init_waitqueue_head(&req->rq_reply_waitq);
init_waitqueue_head(&req->rq_set_waitq);
CFS_INIT_LIST_HEAD(&req->rq_exp_list);
init_waitqueue_head(&req->rq_reply_waitq);
init_waitqueue_head(&req->rq_set_waitq);
- cfs_atomic_set(&req->rq_refcount, 1);
+ atomic_set(&req->rq_refcount, 1);
CLASSERT (sizeof(*args) <= sizeof(req->rq_async_args));
args = ptlrpc_req_async_args(req);
CLASSERT (sizeof(*args) <= sizeof(req->rq_async_args));
args = ptlrpc_req_async_args(req);
- args->magic = PTLRPC_WORK_MAGIC;
args->cb = cb;
args->cbdata = cbdata;
args->cb = cb;
args->cbdata = cbdata;
int ptlrpcd_queue_work(void *handler)
{
int ptlrpcd_queue_work(void *handler)
{
- struct ptlrpc_request *req = handler;
+ struct ptlrpc_request *req = handler;
/*
* Check if the req is already being queued.
/*
* Check if the req is already being queued.
* for this purpose. This is okay because the caller should use this
* req as opaque data. - Jinshan
*/
* for this purpose. This is okay because the caller should use this
* req as opaque data. - Jinshan
*/
- LASSERT(cfs_atomic_read(&req->rq_refcount) > 0);
- if (cfs_atomic_read(&req->rq_refcount) > 1)
- return -EBUSY;
-
- if (cfs_atomic_inc_return(&req->rq_refcount) > 2) { /* race */
- cfs_atomic_dec(&req->rq_refcount);
- return -EBUSY;
- }
-
- /* re-initialize the req */
- req->rq_timeout = obd_timeout;
- req->rq_sent = cfs_time_current_sec();
- req->rq_deadline = req->rq_sent + req->rq_timeout;
- req->rq_reply_deadline = req->rq_deadline;
- req->rq_phase = RQ_PHASE_INTERPRET;
- req->rq_next_phase = RQ_PHASE_COMPLETE;
- req->rq_xid = ptlrpc_next_xid();
- req->rq_import_generation = req->rq_import->imp_generation;
-
- ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
- return 0;
+ LASSERT(atomic_read(&req->rq_refcount) > 0);
+ if (atomic_inc_return(&req->rq_refcount) == 2)
+ ptlrpcd_add_work_req(req);
+ return 0;
}
EXPORT_SYMBOL(ptlrpcd_queue_work);
}
EXPORT_SYMBOL(ptlrpcd_queue_work);