#define OBD_FAIL_PTLRPC_CLIENT_BULK_CB2 0x515
#define OBD_FAIL_PTLRPC_DELAY_IMP_FULL 0x516
#define OBD_FAIL_PTLRPC_CANCEL_RESEND 0x517
+#define OBD_FAIL_PTLRPC_CLIENT_BULK_CB3 0x520
#define OBD_FAIL_OBD_PING_NET 0x600
#define OBD_FAIL_OBD_LOG_CANCEL_NET 0x601
}
break;
}
+
+ /* LU-6441: last md is not sent and desc->bd_md_count == 1 */
+ if (OBD_FAIL_CHECK_ORSET(OBD_FAIL_PTLRPC_CLIENT_BULK_CB3,
+ CFS_FAIL_ONCE) &&
+ posted_md == desc->bd_md_max_brw - 1) {
+ posted_md++;
+ continue;
+ }
+
/* Network is about to get at the memory */
if (desc->bd_type == BULK_PUT_SOURCE)
rc = LNetPut(conn->c_self, desc->bd_mds[posted_md],
* one. If it fails, it must be because completion just happened,
* but we must still l_wait_event() in this case, to give liblustre
* a chance to run server_bulk_callback()*/
- mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_count);
+ mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw);
for (;;) {
/* Network access will complete in finite time but the HUGE
}
run_test 224b "Don't panic on bulk IO failure"
+test_224c() { # LU-6441
+ [ $PARALLEL == "yes" ] && skip "skip parallel run" && return
+ local pages_per_rpc=$($LCTL get_param \
+ osc.*.max_pages_per_rpc)
+ local at_max=$(do_facet mgs "$LCTL get_param -n at_max")
+ local timeout=$(do_facet mgs "$LCTL get_param -n timeout")
+
+ $LCTL set_param -n osc.*.max_pages_per_rpc=1024
+ do_facet mgs "$LCTL conf_param $FSNAME.sys.at_max=0"
+ do_facet mgs "$LCTL conf_param $FSNAME.sys.timeout=5"
+
+ #define OBD_FAIL_PTLRPC_CLIENT_BULK_CB3 0x520
+ $LCTL set_param fail_loc=0x520
+ dd if=/dev/zero of=$DIR/$tfile bs=8MB count=1
+ sync
+ $LCTL set_param fail_loc=0
+
+ do_facet mgs "$LCTL conf_param $FSNAME.sys.at_max=" \
+ "$at_max"
+ do_facet mgs "$LCTL conf_param $FSNAME.sys.timeout=" \
+ "$timeout"
+ $LCTL set_param -n $pages_per_rpc
+}
+run_test 224c "Don't hang if one of md lost during large bulk RPC"
+
MDSSURVEY=${MDSSURVEY:-$(which mds-survey 2>/dev/null || true)}
test_225a () {
[ $PARALLEL == "yes" ] && skip "skip parallel run" && return