From 5ad117fc6f28389cb800953094b5acb0532f702b Mon Sep 17 00:00:00 2001 From: Etienne AUJAMES Date: Wed, 4 Sep 2024 14:22:07 +0200 Subject: [PATCH] LU-18170 obdclass: fix llog_print_cb() This patch get rid off static variables in llog_print_cb(). This enables to run several instances of "lctl llog_print" in parallel without memory corruption. It also fixes the following: - buffer overflow checks - llog EOF detection - ref leak in lod_iocontrol() Add regression test conf-sanity 123H. conf-sanity 123ad is modified to support records with varriable sizes. Test-Parameters: testlist=conf-sanity env=ONLY=123 Test-Parameters: testlist=conf-sanity env=ONLY=123ad,ONLY_REPEAT=300 Test-Parameters: testlist=conf-sanity env=ONLY=123H,ONLY_REPEAT=20 Test-Parameters: testlist=replay-single env=ONLY=100d,ONLY_REPEAT=10 Signed-off-by: Etienne AUJAMES Change-Id: Ibd971e38392d01b2069d29cb4799fbc922d31684 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/56256 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Emoly Liu Reviewed-by: Lai Siyao Reviewed-by: Oleg Drokin --- lustre/include/lustre_log.h | 9 ++- lustre/lod/lod_dev.c | 132 +++++++++++++++++++++-------------------- lustre/obdclass/llog_ioctl.c | 137 +++++++++++++++++++++---------------------- lustre/obdclass/obd_config.c | 2 +- lustre/tests/conf-sanity.sh | 48 +++++++++++++-- 5 files changed, 184 insertions(+), 144 deletions(-) diff --git a/lustre/include/lustre_log.h b/lustre/include/lustre_log.h index 9356096..f1c5053 100644 --- a/lustre/include/lustre_log.h +++ b/lustre/include/lustre_log.h @@ -183,9 +183,12 @@ int llog_catalog_list(const struct lu_env *env, struct dt_device *d, const struct lu_fid *fid); struct llog_print_data { - struct obd_ioctl_data *lprd_data; - unsigned int lprd_cfg_flags; - bool lprd_raw; + unsigned int lprd_cfg_flags; + long lprd_from; + long lprd_to; + char *lprd_out; + long lprd_left; + bool lprd_raw; }; /* llog_net.c */ diff --git a/lustre/lod/lod_dev.c b/lustre/lod/lod_dev.c index ea9d35d..7930aae 100644 --- a/lustre/lod/lod_dev.c +++ b/lustre/lod/lod_dev.c @@ -2578,10 +2578,11 @@ static int lod_pool_del_q(struct obd_device *obd, char *poolname) return err; } -static inline int lod_sub_print_llog(const struct lu_env *env, - struct dt_device *dt, void *data) +static int lod_sub_print_llog(const struct lu_env *env, struct dt_device *dt, + struct llog_print_data *lprd) { struct llog_ctxt *ctxt; + size_t len = 0; int rc = 0; ENTRY; @@ -2590,52 +2591,28 @@ static inline int lod_sub_print_llog(const struct lu_env *env, if (!ctxt) RETURN(0); - if (ctxt->loc_handle) { - struct llog_print_data *lprd = data; - struct obd_ioctl_data *ioc_data = lprd->lprd_data; - int l, remains; - long from; - char *out; - - LASSERT(ioc_data); - if (ioc_data->ioc_inllen1 > 0) { - remains = ioc_data->ioc_inllen4 + - round_up(ioc_data->ioc_inllen1, 8) + - round_up(ioc_data->ioc_inllen2, 8) + - round_up(ioc_data->ioc_inllen3, 8); - - rc = kstrtol(ioc_data->ioc_inlbuf2, 0, &from); - if (rc) - GOTO(ctxt_put, rc); + if (!ctxt->loc_handle) + GOTO(ctxt_put, rc = -EINVAL); - /* second iteration from jt_llog_print_iter() */ - if (from > 1) - GOTO(ctxt_put, rc = 0); + len = snprintf(lprd->lprd_out, lprd->lprd_left, + "%s [catalog]: "DFID"\n", + ctxt->loc_obd->obd_name, + PLOGID(&ctxt->loc_handle->lgh_id)); - out = ioc_data->ioc_bulk; - ioc_data->ioc_inllen1 = 0; - } else { - out = ioc_data->ioc_bulk + ioc_data->ioc_offset; - remains = ioc_data->ioc_count; - } + if (len >= lprd->lprd_left) { + lprd->lprd_out[lprd->lprd_left - 1] = '\0'; + GOTO(ctxt_put, rc = -E2BIG); + } - l = snprintf(out, remains, "%s [catalog]: "DFID"\n", - ctxt->loc_obd->obd_name, - PLOGID(&ctxt->loc_handle->lgh_id)); - out += l; - remains -= l; - if (remains <= 0) { - CERROR("%s: not enough space for print log records: rc = %d\n", - ctxt->loc_obd->obd_name, -LLOG_EEMPTY); - GOTO(ctxt_put, rc = -LLOG_EEMPTY); - } + lprd->lprd_out += len; + lprd->lprd_left -= len; + rc = llog_process_or_fork(env, ctxt->loc_handle, llog_print_cb, + lprd, NULL, false); - ioc_data->ioc_offset += l; - ioc_data->ioc_count = remains; + /* multiple iterations are not supported -> stop llog_print */ + if (rc == -EOVERFLOW) + rc = -E2BIG; - rc = llog_process_or_fork(env, ctxt->loc_handle, llog_print_cb, - data, NULL, false); - } GOTO(ctxt_put, rc); ctxt_put: llog_ctxt_put(ctxt); @@ -2648,29 +2625,49 @@ static int lod_llog_print(const struct lu_env *env, struct lod_device *lod, void *data) { struct lod_tgt_desc *mdt; - bool empty = true; + struct obd_ioctl_data *ioc_data = data; + struct llog_print_data lprd = { + .lprd_raw = false, + }; + size_t bufs; int rc = 0; ENTRY; - rc = lod_sub_print_llog(env, lod->lod_child, data); - if (!rc) { - empty = false; - } else if (rc == -LLOG_EEMPTY) { - rc = 0; - } else { + LASSERT(ioc_data); + + if (ioc_data->ioc_inllen2) { + rc = kstrtol(ioc_data->ioc_inlbuf2, 0, &lprd.lprd_from); + if (rc) + RETURN(rc); + + /* multiple iterations are not supported -> stop llog_print */ + if (lprd.lprd_from > 1) + RETURN(-E2BIG); + } + + bufs = ioc_data->ioc_inllen4 + + round_up(ioc_data->ioc_inllen1, 8) + + round_up(ioc_data->ioc_inllen2, 8) + + round_up(ioc_data->ioc_inllen3, 8); + + ioc_data->ioc_inllen1 = 0; + ioc_data->ioc_inllen2 = 0; + ioc_data->ioc_inllen3 = 0; + ioc_data->ioc_inllen4 = 0; + + lprd.lprd_out = ioc_data->ioc_bulk; + lprd.lprd_left = bufs; + rc = lod_sub_print_llog(env, lod->lod_child, &lprd); + if (rc) { CERROR("%s: llog_print failed: rc = %d\n", lod2obd(lod)->obd_name, rc); - RETURN(rc); + GOTO(out, rc); } lod_getref(&lod->lod_mdt_descs); lod_foreach_mdt(lod, mdt) { - rc = lod_sub_print_llog(env, mdt->ltd_tgt, data); - if (!rc) { - empty = false; - } else if (rc == -LLOG_EEMPTY) { - rc = 0; - } else { + rc = lod_sub_print_llog(env, mdt->ltd_tgt, &lprd); + if (rc) { CERROR("%s: llog_print of MDT %u failed: rc = %d\n", lod2obd(lod)->obd_name, mdt->ltd_index, rc); break; @@ -2678,7 +2675,11 @@ static int lod_llog_print(const struct lu_env *env, struct lod_device *lod, } lod_putref(lod, &lod->lod_mdt_descs); - RETURN(rc ? rc : empty ? -LLOG_EEMPTY : 0); +out: + ioc_data->ioc_count = bufs - lprd.lprd_left; + ioc_data->ioc_u32_2 = 1; + + RETURN((rc == LLOG_PROC_BREAK) ? 0 : rc); } /* cancel update catalog from update catlist */ @@ -2733,22 +2734,22 @@ static int lod_iocontrol(unsigned int cmd, struct obd_export *exp, int len, switch (cmd) { case OBD_IOC_LLOG_PRINT: { - struct llog_print_data lprd = { - .lprd_data = data, - .lprd_raw = data->ioc_u32_1, - }; char *logname; + if (!data->ioc_inllen1) { + rc = -EINVAL; + break; + } + logname = data->ioc_inlbuf1; if (strcmp(logname, lod_update_log_name) != 0) { rc = -EINVAL; CERROR("%s: llog iocontrol support %s only: rc = %d\n", lod2obd(lod)->obd_name, lod_update_log_name, rc); - RETURN(rc); + break; } - LASSERT(data->ioc_inllen1 > 0); - rc = lod_llog_print(&env, lod, &lprd); + rc = lod_llog_print(&env, lod, data); break; } case OBD_IOC_LLOG_CANCEL: @@ -2758,6 +2759,7 @@ static int lod_iocontrol(unsigned int cmd, struct obd_export *exp, int len, rc = OBD_IOC_ERROR(obd->obd_name, cmd, "unrecognized", -ENOTTY); break; } + lu_env_fini(&env); RETURN(rc); diff --git a/lustre/obdclass/llog_ioctl.c b/lustre/obdclass/llog_ioctl.c index 2b70f7f..9d037f0 100644 --- a/lustre/obdclass/llog_ioctl.c +++ b/lustre/obdclass/llog_ioctl.c @@ -216,55 +216,27 @@ int llog_print_cb(const struct lu_env *env, struct llog_handle *handle, struct llog_rec_hdr *rec, void *data) { struct llog_print_data *lprd = data; - struct obd_ioctl_data *ioc_data = lprd->lprd_data; - static int l, remains; - static long from, to; - static char *out; - int cur_index; + size_t len; + long cur_index = rec->lrh_index; int rc; ENTRY; - if (unlikely(!ioc_data)) + if (unlikely(!lprd->lprd_out)) RETURN(-EINVAL); - if (ioc_data->ioc_inllen1 > 0) { - l = 0; - remains = ioc_data->ioc_inllen4 + - round_up(ioc_data->ioc_inllen1, 8) + - round_up(ioc_data->ioc_inllen2, 8) + - round_up(ioc_data->ioc_inllen3, 8); - - rc = kstrtol(ioc_data->ioc_inlbuf2, 0, &from); - if (rc) - RETURN(rc); - - rc = kstrtol(ioc_data->ioc_inlbuf3, 0, &to); - if (rc) - RETURN(rc); - - out = ioc_data->ioc_bulk; - ioc_data->ioc_inllen1 = 0; - ioc_data->ioc_offset = 0; - ioc_data->ioc_count = remains; - } else if (ioc_data) { - out = ioc_data->ioc_bulk + ioc_data->ioc_offset; - remains = ioc_data->ioc_count; - } - - cur_index = rec->lrh_index; - ioc_data->ioc_u32_2 = llog_idx_is_eof(handle, cur_index); - if (from > MARKER_DIFF && cur_index >= from - MARKER_DIFF && - cur_index < from) { - /* LU-15706: try to remember the marker cfg_flag that the "from" - * is using, in case that the "from" record doesn't know its - * "SKIP" or not flag. - */ + /* LU-15706: try to remember the marker cfg_flag that the "from" + * is using, in case that the "from" record doesn't know its + * "SKIP" or not flag. + */ + if (cur_index < lprd->lprd_from && + cur_index >= lprd->lprd_from - MARKER_DIFF) llog_get_marker_cfg_flags(rec, &lprd->lprd_cfg_flags); - } - if (cur_index < from) + + if (cur_index < lprd->lprd_from) RETURN(0); - if (to > 0 && cur_index > to) - RETURN(-LLOG_EEMPTY); + + if (lprd->lprd_to && cur_index > lprd->lprd_to) + RETURN(LLOG_PROC_BREAK); if (handle->lgh_hdr->llh_flags & LLOG_F_IS_CAT) { struct llog_logid_rec *lir = (struct llog_logid_rec *)rec; @@ -274,37 +246,31 @@ int llog_print_cb(const struct lu_env *env, struct llog_handle *handle, RETURN(-EINVAL); } - l = snprintf(out, remains, "[index]: %05d [logid]: "DFID"\n", - cur_index, PLOGID(&lir->lid_id)); + len = snprintf(lprd->lprd_out, lprd->lprd_left, + "[index]: %05ld [logid]: "DFID"\n", + cur_index, PLOGID(&lir->lid_id)); } else if (rec->lrh_type == OBD_CFG_REC) { - int rc; - - rc = class_config_yaml_output(rec, out, remains, + rc = class_config_yaml_output(rec, lprd->lprd_out, + lprd->lprd_left, &lprd->lprd_cfg_flags, lprd->lprd_raw); if (rc < 0) RETURN(rc); - l = rc; + len = rc; } else { - l = snprintf(out, remains, - "[index]: %05d [type]: %02x [len]: %04d\n", - cur_index, rec->lrh_type, rec->lrh_len); - } - out += l; - remains -= l; - if (remains <= 0) { - CERROR("not enough space for print log records\n"); - RETURN(-LLOG_EEMPTY); + len = snprintf(lprd->lprd_out, lprd->lprd_left, + "[index]: %05ld [type]: %02x [len]: %04d\n", + cur_index, rec->lrh_type, rec->lrh_len); } - if (ioc_data) { - /* save offset and remains, then we don't always rely on those - * static variables, which is more flexible. - */ - ioc_data->ioc_offset += l; - ioc_data->ioc_count = remains; + if (len >= lprd->lprd_left) { + lprd->lprd_out[lprd->lprd_left - 1] = '\0'; + RETURN(-EOVERFLOW); } + lprd->lprd_out += len; + lprd->lprd_left -= len; + RETURN(0); } EXPORT_SYMBOL(llog_print_cb); @@ -424,16 +390,47 @@ int llog_ioctl(const struct lu_env *env, struct llog_ctxt *ctxt, GOTO(out_close, rc); break; case OBD_IOC_LLOG_PRINT: { - struct llog_print_data lprd = { - .lprd_data = data, - .lprd_raw = data->ioc_u32_1, + size_t bufs; + struct llog_print_data lprd = { 0 }; + struct llog_process_cat_data cd = { + .lpcd_read_mode = LLOG_READ_MODE_NORMAL, + .lpcd_last_idx = 0, }; - LASSERT(data->ioc_inllen1 > 0); - rc = llog_process(env, handle, llog_print_cb, &lprd, NULL); - if (rc == -LLOG_EEMPTY) + if (!data->ioc_inllen2 || !data->ioc_inllen3) + GOTO(out_close, rc = -EINVAL); + + bufs = data->ioc_inllen4 + + round_up(data->ioc_inllen1, 8) + + round_up(data->ioc_inllen2, 8) + + round_up(data->ioc_inllen3, 8); + + rc = kstrtol(data->ioc_inlbuf2, 0, &lprd.lprd_from); + if (rc) + GOTO(out_close, rc); + + rc = kstrtol(data->ioc_inlbuf3, 0, &lprd.lprd_to); + if (rc) + GOTO(out_close, rc); + + data->ioc_inllen1 = 0; + data->ioc_inllen2 = 0; + data->ioc_inllen3 = 0; + data->ioc_inllen4 = 0; + + lprd.lprd_out = data->ioc_bulk; + lprd.lprd_left = bufs; + lprd.lprd_raw = data->ioc_u32_1; + cd.lpcd_first_idx = max(0L, lprd.lprd_from - MARKER_DIFF - 1); + + rc = llog_process(env, handle, llog_print_cb, &lprd, &cd); + + /* rc == 0 means EOF */ + data->ioc_u32_2 = !rc; + data->ioc_count = bufs - lprd.lprd_left; + if (rc == LLOG_PROC_BREAK) rc = 0; - else if (rc) + if (rc) GOTO(out_close, rc); break; } diff --git a/lustre/obdclass/obd_config.c b/lustre/obdclass/obd_config.c index c3379a7..780d3ca 100644 --- a/lustre/obdclass/obd_config.c +++ b/lustre/obdclass/obd_config.c @@ -2240,7 +2240,7 @@ out_done: out_overflow: /* Return consumed bytes. If the buffer overflowed, zero last byte */ rc = ptr - buf; - if (rc > size) { + if (rc >= size) { rc = -EOVERFLOW; *(end - 1) = '\0'; } diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index 87687b8..2cc72ae 100755 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -10410,24 +10410,29 @@ run_test 123ac "llog_print with --start and --end" test_123ad() { # LU-11566 remote_mgs_nodsh && skip "remote MGS with nodsh" # older versions of lctl may not print all records properly - do_facet mgs "$LCTL help llog_print" 2>&1 | grep -q -- --start || - skip "Need 'lctl llog_print --start' on MGS" + (( MGS_VERSION >= $(version_code 2.15.90) )) || + skip "Need MGS version at least 2.15.90" [ -d $MOUNT/.lustre ] || setup # append a new record, to avoid issues if last record was cancelled local old=$($LCTL get_param -n osc.*-OST0000-*.max_dirty_mb | head -1) do_facet mgs $LCTL conf_param $FSNAME-OST0000.osc.max_dirty_mb=$old + stack_trap "do_facet mgs $LCTL conf_param -d $FSNAME-OST0000.osc.max_dirty_mb" # logid: [0x3:0xa:0x0]:0 # flags: 4 (plain) # records_count: 72 # last_index: 72 local num=$(do_facet mgs $LCTL --device MGS llog_info $FSNAME-client | - awk '/last_index:/ { print $2 - 1 }') + awk '/last_index:/ { print $2 }') + + do_facet mgs $LCTL --device MGS llog_print $FSNAME-client | + grep -q "$FSNAME-OST0000.*osc\.max_dirty_mb=$old" || + error "ocs.max_dirty_mb=$old not found in $FSNAME-client" - # - { index: 71, event: set_timeout, num: 0x14, param: sys.timeout=20 } - local last=$(do_facet mgs $LCTL --device MGS llog_print $FSNAME-client | + # - { index: 72, event: marker, flags: 0x06, ... } + local last=$(do_facet mgs $LCTL --device MGS llog_print -r $FSNAME-client | tail -1 | awk '{ print $4 }' | tr -d , ) (( last == num )) || error "llog_print only showed $last/$num records" } @@ -10759,6 +10764,39 @@ test_123G() { } run_test 123G "clear and reset all parameters using apply_yaml" +test_123H() { #LU-18170 + local old + local i + + (( MGS_VERSION >= $(version_code 2.15.90) )) || + skip "Need MGS version at least 2.15.90" + + [ -d $MOUNT/.lustre ] || setup + + old=$(do_facet mgs $LCTL get_param jobid_var) + stack_trap "do_facet mgs $LCTL set_param -d jobid_var" + stack_trap "do_facet mgs $LCTL set_param -P jobid_var=$old" + + # fill the "params" llog file + for i in {1..50}; do + do_facet mgs $LCTL set_param -P jobid_var=TEST_123H + do_facet mgs $LCTL set_param -P jobid_var=$old + done + + local llog_str + local num + llog_str=$(do_facet mgs $LCTL llog_print -r params) || + error "'lctl llog_print -r params' failed" + num=$(wc -l <<< "$llog_str") + + # llog_print parallel executions + do_facet mgs "seq 1 20 | "\ + "xargs -P20 -I{} bash -c '$LCTL llog_print -r params | wc -l' | "\ + "sort | uniq -c" | + awk '$2 == "'$num'" { print $0; if ($1 != 20) exit 1; }' || + error "'corrupted output for 'lctl llog_print -r params'" +} +run_test 123H "check concurent accesses with 'lctl llog_print" test_124() { -- 1.8.3.1