typedef int (*ptlrpc_interpterer_t)(const struct lu_env *env,
struct ptlrpc_request *req,
void *arg, int rc);
+/** Type of request resend call-back */
+typedef void (*ptlrpc_resend_cb_t)(struct ptlrpc_request *req,
+ void *arg);
/**
* Definition of request pool structure.
struct ptlrpc_request_set *rq_set;
/** Async completion handler, called when reply is received */
ptlrpc_interpterer_t rq_interpret_reply;
+ /** Resend handler, called when request is resend to update RPC data */
+ ptlrpc_resend_cb_t rq_resend_cb;
/** Async completion context */
union ptlrpc_async_args rq_async_args;
}
} else {
- LDLM_ERROR(lock, "client (nid %s) returned %d "
- "from %s AST", libcfs_nid2str(peer.nid),
- (req->rq_repmsg != NULL) ?
- lustre_msg_get_status(req->rq_repmsg) : 0,
- ast_type);
+ LDLM_ERROR(lock, "client (nid %s) returned %d: rc=%d "
+ "from %s AST", libcfs_nid2str(peer.nid),
+ (req->rq_repmsg != NULL) ?
+ lustre_msg_get_status(req->rq_repmsg) : 0,
+ rc, ast_type);
}
ldlm_lock_cancel(lock);
/* Server-side AST functions are called from ldlm_reprocess_all,
RETURN(0);
}
+static void ldlm_update_resend(struct ptlrpc_request *req, void *data)
+{
+ struct ldlm_cb_async_args *ca = data;
+ struct ldlm_lock *lock = ca->ca_lock;
+
+ ldlm_refresh_waiting_lock(lock, ldlm_get_enq_timeout(lock));
+}
+
static inline int ldlm_ast_fini(struct ptlrpc_request *req,
struct ldlm_cb_set_arg *arg,
struct ldlm_lock *lock,
ca->ca_lock = lock;
req->rq_interpret_reply = ldlm_cb_interpret;
- req->rq_no_resend = 1;
lock_res_and_lock(lock);
if (lock->l_granted_mode != lock->l_req_mode) {
if (instant_cancel) {
unlock_res_and_lock(lock);
ldlm_lock_cancel(lock);
+
+ req->rq_no_resend = 1;
} else {
LASSERT(lock->l_granted_mode == lock->l_req_mode);
ldlm_add_waiting_lock(lock);
unlock_res_and_lock(lock);
+
+ /* Do not resend after lock callback timeout */
+ req->rq_delay_limit = ldlm_get_enq_timeout(lock);
+ req->rq_resend_cb = ldlm_update_resend;
}
req->rq_send_state = LUSTRE_IMP_FULL;
ca->ca_lock = lock;
req->rq_interpret_reply = ldlm_cb_interpret;
- req->rq_no_resend = 1;
body = req_capsule_client_get(&req->rq_pill, &RMF_DLM_REQ);
body->lock_handle[0] = lock->l_remote_handle;
* that would not only cancel the lock, but will also remove
* it from waiting list */
if (ldlm_is_cancel_on_block(lock)) {
- unlock_res_and_lock(lock);
- ldlm_lock_cancel(lock);
- instant_cancel = 1;
- lock_res_and_lock(lock);
- } else {
- /* start the lock-timeout clock */
- ldlm_add_waiting_lock(lock);
- }
+ unlock_res_and_lock(lock);
+ ldlm_lock_cancel(lock);
+
+ instant_cancel = 1;
+ req->rq_no_resend = 1;
+
+ lock_res_and_lock(lock);
+ } else {
+ /* start the lock-timeout clock */
+ ldlm_add_waiting_lock(lock);
+ /* Do not resend after lock callback timeout */
+ req->rq_delay_limit = ldlm_get_enq_timeout(lock);
+ req->rq_resend_cb = ldlm_update_resend;
+ }
}
unlock_res_and_lock(lock);
if (op_data->op_attr.ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID))
bits |= MDS_INODELOCK_LOOKUP;
if ((op_data->op_flags & MF_MDC_CANCEL_FID1) &&
- (fid_is_sane(&op_data->op_fid1)) &&
- !OBD_FAIL_CHECK(OBD_FAIL_LDLM_BL_CALLBACK_NET))
+ (fid_is_sane(&op_data->op_fid1)))
count = mdc_resource_get_unused(exp, &op_data->op_fid1,
&cancels, LCK_EX, bits);
req = ptlrpc_request_alloc(class_exp2cliimp(exp),
LASSERT(req == NULL);
if ((op_data->op_flags & MF_MDC_CANCEL_FID1) &&
- (fid_is_sane(&op_data->op_fid1)) &&
- !OBD_FAIL_CHECK(OBD_FAIL_LDLM_BL_CALLBACK_NET))
+ (fid_is_sane(&op_data->op_fid1)))
count = mdc_resource_get_unused(exp, &op_data->op_fid1,
&cancels, LCK_EX,
MDS_INODELOCK_UPDATE);
if ((op_data->op_flags & MF_MDC_CANCEL_FID3) &&
- (fid_is_sane(&op_data->op_fid3)) &&
- !OBD_FAIL_CHECK(OBD_FAIL_LDLM_BL_CALLBACK_NET))
+ (fid_is_sane(&op_data->op_fid3)))
count += mdc_resource_get_unused(exp, &op_data->op_fid3,
&cancels, LCK_EX,
MDS_INODELOCK_FULL);
D_HA : D_ERROR, req, "IMP_CLOSED ");
*status = -EIO;
} else if (ptlrpc_send_limit_expired(req)) {
- /* probably doesn't need to be a D_ERROR after initial testing */
- DEBUG_REQ(D_ERROR, req, "send limit expired ");
- *status = -EIO;
+ /* probably doesn't need to be a D_ERROR after initial testing*/
+ DEBUG_REQ(D_HA, req, "send limit expired ");
+ *status = -ETIMEDOUT;
} else if (req->rq_send_state == LUSTRE_IMP_CONNECTING &&
imp->imp_state == LUSTRE_IMP_CONNECTING) {
/* allow CONNECT even if import is invalid */ ;
lustre_msghdr_set_flags(request->rq_reqmsg,
request->rq_import->imp_msghdr_flags);
- if (request->rq_resend)
- lustre_msg_add_flags(request->rq_reqmsg, MSG_RESENT);
+ if (request->rq_resend) {
+ lustre_msg_add_flags(request->rq_reqmsg, MSG_RESENT);
+ if (request->rq_resend_cb != NULL)
+ request->rq_resend_cb(request, &request->rq_async_args);
+ }
if (request->rq_memalloc)
mpflag = cfs_memory_pressure_get_and_set();
run_test 9 "pause bulk on OST (bug 1420)"
#bug 1521
-test_10() {
- do_facet client mcreate $DIR/$tfile ||
- { error "mcreate failed: $?"; return 1; }
- drop_bl_callback "chmod 0777 $DIR/$tfile" || echo "evicted as expected"
- # wait for the mds to evict the client
- #echo "sleep $(($TIMEOUT*2))"
- #sleep $(($TIMEOUT*2))
- do_facet client touch $DIR/$tfile || echo "touch failed, evicted"
- do_facet client checkstat -v -p 0777 $DIR/$tfile ||
- { error "client checkstat failed: $?"; return 3; }
- do_facet client "munlink $DIR/$tfile"
- # allow recovery to complete
- client_up || client_up || sleep $TIMEOUT
+test_10a() {
+ local before=$(date +%s)
+ local evict
+
+ do_facet client "stat $DIR > /dev/null" ||
+ error "failed to stat $DIR: $?"
+ drop_bl_callback "chmod 0777 $DIR" ||
+ error "failed to chmod $DIR: $?"
+
+ # let the client reconnect
+ client_reconnect
+ evict=$(do_facet client $LCTL get_param mdc.$FSNAME-MDT*.state |
+ awk -F"[ [,]" '/EVICTED ]$/ { if (mx<$5) {mx=$5;} } END { print mx }')
+ [ ! -z "$evict" ] && [[ $evict -gt $before ]] ||
+ (do_facet client $LCTL get_param mdc.$FSNAME-MDT*.state;
+ error "no eviction: $evict before:$before")
+
+ do_facet client checkstat -v -p 0777 $DIR ||
+ error "client checkstat failed: $?"
+}
+run_test 10a "finish request on server after client eviction (bug 1521)"
+
+test_10b() {
+ local before=$(date +%s)
+ local evict
+
+ do_facet client "stat $DIR > /dev/null" ||
+ error "failed to stat $DIR: $?"
+ drop_bl_callback_once "chmod 0777 $DIR" ||
+ error "failed to chmod $DIR: $?"
+
+ # let the client reconnect
+ client_reconnect
+ evict=$(do_facet client $LCTL get_param mdc.$FSNAME-MDT*.state |
+ awk -F"[ [,]" '/EVICTED ]$/ { if (mx<$5) {mx=$5;} } END { print mx }')
+
+ [ -z "$evict" ] || [[ $evict -le $before ]] ||
+ (do_facet client $LCTL get_param mdc.$FSNAME-MDT*.state;
+ error "eviction happened: $evict before:$before")
+
+ do_facet client checkstat -v -p 0777 $DIR ||
+ error "client checkstat failed: $?"
}
-run_test 10 "finish request on server after client eviction (bug 1521)"
+run_test 10b "re-send BL AST"
#bug 2460
# wake up a thread waiting for completion after eviction
do_facet client $MULTIOP $DIR/$tfile or ||
{ error "multiop read failed: $?"; return 3; }
- drop_bl_callback $MULTIOP $DIR/$tfile Ow || echo "evicted as expected"
+ drop_bl_callback_once $MULTIOP $DIR/$tfile Ow ||
+ echo "evicted as expected"
do_facet client munlink $DIR/$tfile ||
{ error "munlink failed: $?"; return 4; }
# let the client reconnect
sleep 5
EVICT=$(do_facet client $LCTL get_param mdc.$FSNAME-MDT*.state |
- awk -F"[ [,]" '/EVICTED]$/ { if (mx<$4) {mx=$4;} } END { print mx }')
+ awk -F"[ [,]" '/EVICTED ]$/ { if (mx<$5) {mx=$5;} } END { print mx }')
[ -z "$EVICT" ] || [[ $EVICT -le $BEFORE ]] || error "eviction happened"
}
test_28() { # bug 6086 - error adding new clients
do_facet client mcreate $DIR/$tfile || return 1
- drop_bl_callback "chmod 0777 $DIR/$tfile" ||echo "evicted as expected"
+ drop_bl_callback_once "chmod 0777 $DIR/$tfile" ||
+ echo "evicted as expected"
#define OBD_FAIL_MDS_CLIENT_ADD 0x12f
do_facet $SINGLEMDS "lctl set_param fail_loc=0x8000012f"
# fail once (evicted), reconnect fail (fail_loc), ok
pid=$!
sleep 1
lctl set_param fail_loc=0
- drop_bl_callback rm -f $DIR/$tfile
+ drop_bl_callback_once rm -f $DIR/$tfile
wait $pid
# the first 'df' could tigger the eviction caused by
- # 'drop_bl_callback', and it's normal case.
+ # 'drop_bl_callback_once', and it's normal case.
# but the next 'df' should return successfully.
do_facet client "df $DIR" || do_facet client "df $DIR"
}
# let the client reconnect
client_reconnect
EVICT=$($LCTL get_param mdc.$FSNAME-MDT*.state |
- awk -F"[ [,]" '/EVICTED]$/ { if (mx<$4) {mx=$4;} } END { print mx }')
+ awk -F"[ [,]" '/EVICTED ]$/ { if (mx<$5) {mx=$5;} } END { print mx }')
[ -z "$EVICT" ] || [[ $EVICT -le $BEFORE ]] || error "eviction happened"
}
do_facet $SINGLEMDS lctl set_param fail_loc=0x8000030b # hold enqueue
sleep 1
#define OBD_FAIL_LDLM_BL_CALLBACK_NET 0x305
+ do_facet client lctl set_param ldlm.namespaces.*.early_lock_cancel=0
do_facet client lctl set_param fail_loc=0x80000305 # drop cb, evict
cancel_lru_locks mdc
usleep 500 # wait to ensure first client is one that will be evicted
openfile -f O_RDONLY $MOUNT2/$tdir/$tfile
wait $OPENPID
+ do_facet client lctl set_param ldlm.namespaces.*.early_lock_cancel=1
do_facet $SINGLEMDS lctl debug_kernel |
grep "not entering recovery" && error "client not evicted"
do_facet client "lctl set_param fail_loc=0"
return $RC
}
-drop_bl_callback() {
+drop_bl_callback_once() {
+ rc=0
+ do_facet client lctl set_param ldlm.namespaces.*.early_lock_cancel=0
#define OBD_FAIL_LDLM_BL_CALLBACK_NET 0x305
- RC=0
do_facet client lctl set_param fail_loc=0x80000305
- do_facet client "$@" || RC=$?
+ do_facet client "$@" || rc=$?
do_facet client lctl set_param fail_loc=0
- return $RC
+ do_facet client lctl set_param ldlm.namespaces.*.early_lock_cancel=1
+ return $rc
+}
+
+drop_bl_callback() {
+ rc=0
+ do_facet client lctl set_param ldlm.namespaces.*.early_lock_cancel=0
+#define OBD_FAIL_LDLM_BL_CALLBACK_NET 0x305
+ do_facet client lctl set_param fail_loc=0x305
+ do_facet client "$@" || rc=$?
+ do_facet client lctl set_param fail_loc=0
+ do_facet client lctl set_param ldlm.namespaces.*.early_lock_cancel=1
+ return $rc
}
drop_ldlm_reply() {