From: John L. Hammond Date: Mon, 5 Nov 2018 17:48:55 +0000 (-0600) Subject: LU-11519 hsm: handle hsd_request_count == 0 properly X-Git-Tag: 2.12.0-RC1~23 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=af05afdfb5781806ab3bc059c86c289b01713ade LU-11519 hsm: handle hsd_request_count == 0 properly In mdt_cdt_waiting_cb() it may be that the coordinator has already reached the limit of active requests and hsd contains no requests to be started. Handle this properly when trying to prioritize a restore. Signed-off-by: John L. Hammond Change-Id: Ic843b7672ae6a4509ac127c2d2f90bf3681f84fc Reviewed-on: https://review.whamcloud.com/33580 Reviewed-by: Andreas Dilger Reviewed-by: Quentin Bouget Reviewed-by: Ben Evans Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Oleg Drokin --- diff --git a/lustre/mdt/mdt_coordinator.c b/lustre/mdt/mdt_coordinator.c index 19fd0f5..6795c7d 100644 --- a/lustre/mdt/mdt_coordinator.c +++ b/lustre/mdt/mdt_coordinator.c @@ -163,6 +163,9 @@ static int mdt_cdt_waiting_cb(const struct lu_env *env, int i; /* Are agents full? */ + if (atomic_read(&cdt->cdt_request_count) >= cdt->cdt_max_requests) + RETURN(hsd->hsd_housekeeping ? 0 : LLOG_PROC_BREAK); + if (hsd->hsd_action_count + atomic_read(&cdt->cdt_request_count) >= cdt->cdt_max_requests) { /* We cannot send any more request @@ -224,8 +227,10 @@ static int mdt_cdt_waiting_cb(const struct lu_env *env, /* Discard the (whole) last hal */ hsd->hsd_request_count--; + LASSERT(hsd->hsd_request_count >= 0); tmp = &hsd->hsd_request[hsd->hsd_request_count]; hsd->hsd_action_count -= tmp->hal->hal_count; + LASSERT(hsd->hsd_action_count >= 0); OBD_FREE(tmp->hal, tmp->hal_sz); } else { /* Bailing out, this code path is too hot */