* RHEL 4 and RHEL 5/SLES 10 clients behaves differently on 'cd' to a
removed cwd "./" (refer to Bugzilla 14399).
+Severity : major
+Bugzilla : 15027
+Frequency : on network error
+Description: panic with double free request if network error
+Details : mdc_finish_enqueue is finish request if any network error occuring,
+ but it's true only for synchronus enqueue, for async enqueue
+ (via ptlrpcd) this incorrect and ptlrpcd want finish request
+ himself.
+
Severity : normal
Bugzilla : 14533
Frequency : rare, on recovery
#define OBD_FAIL_MDC_REVALIDATE_PAUSE 0x800
#define OBD_FAIL_MDC_ENQUEUE_PAUSE 0x801
#define OBD_FAIL_MDC_OLD_EXT_FLAGS 0x802
+#define OBD_FAIL_MDC_GETATTR_ENQUEUE 0x803
#define OBD_FAIL_MGS 0x900
#define OBD_FAIL_MGS_ALL_REQUEST_NET 0x901
spin_lock(&cli->cl_loi_list_lock);
cli->cl_r_in_flight--;
-
+
list_for_each_safe(l, tmp, &cli->cl_cache_waiters) {
if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) {
/* No free request slots anymore */
wake_up(&mcw->mcw_waitq);
}
/* Empty waiting list? Decrease reqs in-flight number */
-
+
spin_unlock(&cli->cl_loi_list_lock);
}
struct ldlm_reply *lockrep;
ENTRY;
+ LASSERT(rc >= 0);
/* Similarly, if we're going to replay this request, we don't want to
* actually get a lock, just perform the intent. */
if (req->rq_transno || req->rq_replay) {
einfo->ei_mode = 0;
memset(lockh, 0, sizeof(*lockh));
rc = 0;
- } else if (rc != 0) {
- CERROR("ldlm_cli_enqueue: %d\n", rc);
- LASSERTF(rc < 0, "rc %d\n", rc);
- mdc_clear_replay_flag(req, rc);
- ptlrpc_req_finished(req);
- RETURN(rc);
} else { /* rc = 0 */
struct ldlm_lock *lock = ldlm_handle2lock(lockh);
LASSERT(lock);
0, NULL, lockh, 0);
mdc_exit_request(&obddev->u.cli);
mdc_put_rpc_lock(obddev->u.cli.cl_rpc_lock, it);
-
+ if (rc < 0) {
+ CERROR("ldlm_cli_enqueue: %d\n", rc);
+ mdc_clear_replay_flag(req, rc);
+ ptlrpc_req_finished(req);
+ RETURN(rc);
+ }
rc = mdc_finish_enqueue(exp, req, einfo, it, lockh, rc);
RETURN(rc);
obddev = class_exp2obd(exp);
mdc_exit_request(&obddev->u.cli);
+ if (OBD_FAIL_CHECK(OBD_FAIL_MDC_GETATTR_ENQUEUE))
+ rc = -ETIMEDOUT;
rc = ldlm_cli_enqueue_fini(exp, req, einfo->ei_type, 1, einfo->ei_mode,
&flags, NULL, 0, NULL, lockh, rc);
+ if (rc < 0) {
+ CERROR("ldlm_cli_enqueue: %d\n", rc);
+ mdc_clear_replay_flag(req, rc);
+ GOTO(out, rc);
+ }
rc = mdc_finish_enqueue(exp, req, einfo, it, lockh, rc);
if (rc)
}
run_test 122 "fail client bulk callback (shouldn't LBUG) ======="
-test_123() # statahead(bug 11401)
-{
+test_123a() { # was test 123, statahead(bug 11401)
if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
log "testing on UP system. Performance may be not as good as expected."
fi
etime=`date +%s`
delta_sa=$((etime - stime))
log "ls $i files with statahead: $delta_sa sec"
+ lctl get_param -n llite.*.statahead_stats
max=`lctl get_param -n llite.*.statahead_max | head -n 1`
lctl set_param -n llite.*.statahead_max 0
[ $error -ne 0 ] && error "statahead is slow!"
return 0
}
-run_test 123 "verify statahead work"
+run_testa 123 "verify statahead work"
+
+test_123b () { # statahead(bug 15027)
+ mkdir -p $DIR/$tdir
+ createmany -o $DIR/$tdir/$tfile-%d 1000
+
+ cancel_lru_locks mdc
+ cancel_lru_locks osc
+
+#define OBD_FAIL_MDC_GETATTR_ENQUEUE 0x803
+ sysctl -w lustre.fail_loc=0x80000803
+ ls -lR $DIR/$tdir > /dev/null
+ log "ls done"
+ sysctl -w lustre.fail_loc=0x0
+ lctl get_param -n llite.*.statahead_stats
+ rm -r $DIR/$tdir
+ sync
+
+}
+run_test 123b "not panic with network error in statahead enqueue (bug 15027)"
test_124a() {
[ -z "`lctl get_param -n mdc.*.connect_flags | grep lru_resize`" ] && \