summary |
shortlog |
log |
commit | commitdiff |
tree
raw |
patch |
inline | side by side (from parent 1:
b1f6f3b)
The logic for rereading a llog buffer had an exception
for a full catalog, when lgh_last_idx = llh_cat_idx and a first
processing index is a llh_cat_idx+1. This check is based on
a value lh_last_idx, which stays unchanged between buffer read.
But llh_cat_idx could go forward, and this lead to a wrong check
where reread doesn't happen. As a result Lustre got ENOENT for
a record and stoped osp processing.
llog_cat_set_first_idx())
catlog [0x6:0x1:0x0] first idx 34730, last_idx 34731
osp_sync_process_queues()) 1 changes, 0 in progress, 0 in flight
llog_process_thread())
stop processing plain 0x76941:1:0 index 64767 count 1
llog_process_thread())
index: 34731, lh_last_idx: 34730 synced_idx: 34730 lgh_last_idx: 34731
llog_cat_process_common()) processing log [0x2780f:0x1:0x0]:0
at index 34731 of catalog [0x6:0x1:0x0]
llog_cat_id2handle()) snx11281-OST0001-osc-MDT0001:
error opening log id [0x2780f:0x1:0x0]:0:rc = -2
The patch fixes logic and also adds/changes debugging for
llog and osp.
Cray-bug-id: LUS-8193
Signed-off-by: Alexander Boyko <c17825@cray.com>
Change-Id: I9463223a1ea904b96643b19e1580f5894142c12b
Reviewed-on: https://review.whamcloud.com/37102
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Alexander Zarochentsev <alexander.zarochentsev@hpe.com>
Reviewed-by: Mike Pershin <mpershin@whamcloud.com>
* could be less than index. So we detect last index
* for processing as index == lh_last_idx+1. But when
* catalog is wrapped and full lgh_last_idx=llh_cat_idx,
* could be less than index. So we detect last index
* for processing as index == lh_last_idx+1. But when
* catalog is wrapped and full lgh_last_idx=llh_cat_idx,
- * the first processing index is llh_cat_idx+1.
+ * the first processing index is llh_cat_idx+1.The
+ * exception is !(lgh_last_idx == llh_cat_idx &&
+ * index == llh_cat_idx + 1), and after simplification
+ * it turns to
+ * lh_last_idx != LLOG_HDR_TAIL(llh)->lrt_index
+ * This exception is working for catalog only.
*/
if ((index == lh_last_idx && synced_idx != index) ||
(index == (lh_last_idx + 1) &&
*/
if ((index == lh_last_idx && synced_idx != index) ||
(index == (lh_last_idx + 1) &&
- !(index == (llh->llh_cat_idx + 1) &&
- (llh->llh_flags & LLOG_F_IS_CAT))) ||
+ lh_last_idx != LLOG_HDR_TAIL(llh)->lrt_index) ||
(rec->lrh_index == 0 && !repeated)) {
/* save offset inside buffer for the re-read */
(rec->lrh_index == 0 && !repeated)) {
/* save offset inside buffer for the re-read */
__u64 tmp_off;
int tmp_idx;
__u64 tmp_off;
int tmp_idx;
- CDEBUG(D_OTHER, "index: %d, lh_last_idx: %d "
+ CDEBUG((llh->llh_flags & LLOG_F_IS_CAT ?
+ D_HA : D_OTHER),
+ "index: %d, lh_last_idx: %d "
"synced_idx: %d lgh_last_idx: %d\n",
index, lh_last_idx, synced_idx,
loghandle->lgh_last_idx);
"synced_idx: %d lgh_last_idx: %d\n",
index, lh_last_idx, synced_idx,
loghandle->lgh_last_idx);
+ CDEBUG(D_HA, "stop processing %s "DOSTID":%x index %d count %d\n",
+ ((llh->llh_flags & LLOG_F_IS_CAT) ? "catalog" : "plain"),
+ POSTID(&loghandle->lgh_id.lgl_oi), loghandle->lgh_id.lgl_ogen,
+ index, llh->llh_count);
+
if (cd != NULL)
cd->lpcd_last_idx = last_called_index;
if (cd != NULL)
cd->lpcd_last_idx = last_called_index;
- CDEBUG(D_RPCTRACE, "catlog "DFID" first idx %u, last_idx %u\n",
+ CDEBUG(D_HA, "catlog "DFID" first idx %u, last_idx %u\n",
PFID(&cathandle->lgh_id.lgl_oi.oi_fid),
llh->llh_cat_idx, cathandle->lgh_last_idx);
}
PFID(&cathandle->lgh_id.lgl_oi.oi_fid),
llh->llh_cat_idx, cathandle->lgh_last_idx);
}
struct llog_handle *loghandle, int index)
{
int rc;
struct llog_handle *loghandle, int index)
{
int rc;
+ struct lu_fid fid = {.f_seq = 0, .f_oid = 0, .f_ver = 0};
LASSERT(index);
if (loghandle != NULL) {
/* remove destroyed llog from catalog list and
* chd_current_log variable */
LASSERT(index);
if (loghandle != NULL) {
/* remove destroyed llog from catalog list and
* chd_current_log variable */
+ fid = loghandle->lgh_id.lgl_oi.oi_fid;
down_write(&cathandle->lgh_lock);
if (cathandle->u.chd.chd_current_log == loghandle)
cathandle->u.chd.chd_current_log = NULL;
down_write(&cathandle->lgh_lock);
if (cathandle->u.chd.chd_current_log == loghandle)
cathandle->u.chd.chd_current_log = NULL;
llog_cat_set_first_idx(cathandle, index);
rc = llog_cancel_rec(env, cathandle, index);
if (rc == 0)
llog_cat_set_first_idx(cathandle, index);
rc = llog_cancel_rec(env, cathandle, index);
if (rc == 0)
- CDEBUG(D_HA, "cancel plain log at index %u of catalog "DFID"\n",
- index, PFID(&cathandle->lgh_id.lgl_oi.oi_fid));
+ CDEBUG(D_HA,
+ "cancel plain log "DFID" at index %u of catalog "DFID"\n",
+ PFID(&fid), index,
+ PFID(&cathandle->lgh_id.lgl_oi.oi_fid));
- CDEBUG(D_HA, "found record %x, %d, idx %u, id %u\n",
+ CDEBUG(D_OTHER, "found record %x, %d, idx %u, id %u\n",
rec->lrh_type, rec->lrh_len, rec->lrh_index, rec->lrh_id);
RETURN_EXIT;
rec->lrh_type, rec->lrh_len, rec->lrh_index, rec->lrh_id);
RETURN_EXIT;
list_for_each(le, &list)
count++;
if (count > 2)
list_for_each(le, &list)
count++;
if (count > 2)
- OBD_ALLOC_WAIT(arr, sizeof(int) * count);
+ OBD_ALLOC_LARGE(arr, sizeof(int) * count);
PFID(&lgid.lgl_oi.oi_fid), i);
}
if (arr)
PFID(&lgid.lgl_oi.oi_fid), i);
}
if (arr)
- OBD_FREE(arr, sizeof(int) * count);
+ OBD_FREE_LARGE(arr, sizeof(int) * count);
llog_ctxt_put(ctxt);
LASSERT(atomic_read(&d->opd_sync_rpcs_in_progress) >= done);
atomic_sub(done, &d->opd_sync_rpcs_in_progress);
llog_ctxt_put(ctxt);
LASSERT(atomic_read(&d->opd_sync_rpcs_in_progress) >= done);
atomic_sub(done, &d->opd_sync_rpcs_in_progress);
- CDEBUG(D_OTHER, "%s: %d in flight, %d in progress, done %d\n",
- d->opd_obd->obd_name, atomic_read(&d->opd_sync_rpcs_in_flight),
- atomic_read(&d->opd_sync_rpcs_in_progress), done);
+ CDEBUG((done > 2 ? D_HA : D_OTHER), "%s: %u changes, %u in progress,"
+ " %u in flight, %u done\n",
+ d->opd_obd->obd_name,
+ atomic_read(&d->opd_sync_changes),
+ atomic_read(&d->opd_sync_rpcs_in_progress),
+ atomic_read(&d->opd_sync_rpcs_in_flight),
+ done);
osp_sync_check_for_work(d);
osp_sync_check_for_work(d);
if (osp_sync_can_process_new(d, rec)) {
if (llh == NULL) {
/* ask llog for another record */
if (osp_sync_can_process_new(d, rec)) {
if (llh == NULL) {
/* ask llog for another record */
- CDEBUG(D_HA, "%u changes, %u in progress,"
- " %u in flight\n",
- atomic_read(&d->opd_sync_changes),
- atomic_read(&d->opd_sync_rpcs_in_progress),
- atomic_read(&d->opd_sync_rpcs_in_flight));
return 0;
}
osp_sync_process_record(env, d, llh, rec);
return 0;
}
osp_sync_process_record(env, d, llh, rec);