/****************** User-settable parameter keys *********************/
-/* e.g.
+/* e.g.
tunefs.lustre --param="failover.node=192.168.0.13@tcp0" /dev/sda
lctl conf_param testfs-OST0000 failover.node=3@elan,192.168.0.3@tcp0
... testfs-MDT0000.lov.stripesize=4M
... testfs-OST0000.ost.client_cache_seconds=15
- ... testfs.sys.timeout=<secs>
+ ... testfs.sys.timeout=<secs>
... testfs.llite.max_read_ahead_mb=16
*/
/* System global or special params not handled in obd's proc */
-#define PARAM_SYS_TIMEOUT "sys.timeout=" /* global */
-#define PARAM_SYS_LDLM_TIMEOUT "sys.ldlm_timeout=" /* global */
+#define PARAM_TIMEOUT "timeout=" /* global */
+#define PARAM_LDLM_TIMEOUT "ldlm_timeout=" /* global */
+#define PARAM_AT_MIN "at_min=" /* global */
+#define PARAM_AT_MAX "at_max=" /* global */
+#define PARAM_AT_EXTRA "at_extra=" /* global */
+#define PARAM_AT_EARLY_MARGIN "at_early_margin=" /* global */
+#define PARAM_AT_HISTORY "at_history=" /* global */
#define PARAM_MGSNODE "mgsnode=" /* during mount */
#define PARAM_FAILNODE "failover.node=" /* llog generation */
#define PARAM_FAILMODE "failover.mode=" /* llog generation */
#define PARAM_MDC "mdc."
#define PARAM_LLITE "llite."
#define PARAM_LOV "lov."
+#define PARAM_SYS "sys." /* global */
#endif /* _LUSTRE_PARAM_H */
networking / disk / timings affected by load (use Adaptive Timeouts) */
extern unsigned int obd_timeout; /* seconds */
extern unsigned int ldlm_timeout; /* seconds */
+extern unsigned int at_min;
+extern unsigned int at_max;
+extern unsigned int at_history;
+extern int at_early_margin;
+extern int at_extra;
extern unsigned int obd_sync_filter;
extern unsigned int obd_max_dirty_pages;
extern atomic_t obd_dirty_pages;
return rc;
}
-/* write global obd timeout or ldlm timeout param into log */
-static int mgs_write_log_timeout(struct obd_device *obd, struct fs_db *fsdb,
- struct mgs_target_info *mti, char *value,
- int cmd, char *comment)
+/* write global variable settings into log */
+static int mgs_write_log_sys(struct obd_device *obd, struct fs_db *fsdb,
+ struct mgs_target_info *mti, char *sys, char *ptr)
{
struct lustre_cfg_bufs bufs;
struct lustre_cfg *lcfg;
- int timeout;
+ char *tmp;
+ int cmd, val;
int rc;
- timeout = simple_strtoul(value, NULL, 0);
- CDEBUG(D_MGS, "timeout: %d (%s)\n", timeout, comment);
+ if (class_match_param(ptr, PARAM_TIMEOUT, &tmp) == 0)
+ cmd = LCFG_SET_TIMEOUT;
+ else if (class_match_param(ptr, PARAM_LDLM_TIMEOUT, &tmp) == 0)
+ cmd = LCFG_SET_LDLM_TIMEOUT;
+ /* Check for known params here so we can return error to lctl */
+ else if ((class_match_param(ptr, PARAM_AT_MIN, &tmp) == 0)
+ || (class_match_param(ptr, PARAM_AT_MAX, &tmp) == 0)
+ || (class_match_param(ptr, PARAM_AT_EXTRA, &tmp) == 0)
+ || (class_match_param(ptr, PARAM_AT_EARLY_MARGIN, &tmp) == 0)
+ || (class_match_param(ptr, PARAM_AT_HISTORY, &tmp) == 0))
+ cmd = LCFG_PARAM;
+ else
+ return -EINVAL;
+
+ val = simple_strtoul(tmp, NULL, 0);
+ CDEBUG(D_MGS, "global %s = %d\n", ptr, val);
lustre_cfg_bufs_reset(&bufs, NULL);
+ lustre_cfg_bufs_set_string(&bufs, 1, sys);
lcfg = lustre_cfg_new(cmd, &bufs);
- lcfg->lcfg_num = timeout;
+ lcfg->lcfg_num = val;
/* modify all servers and clients */
rc = mgs_write_log_direct_all(obd, fsdb, mti, lcfg, mti->mti_fsname,
- comment);
+ ptr);
lustre_cfg_free(lcfg);
return rc;
}
goto end_while;
}
- if (class_match_param(ptr, PARAM_SYS_TIMEOUT, &tmp) == 0) {
- rc = mgs_write_log_timeout(obd, fsdb, mti, tmp,
- LCFG_SET_TIMEOUT,
- "obd_timeout");
- goto end_while;
- }
-
- if (class_match_param(ptr, PARAM_SYS_LDLM_TIMEOUT, &tmp) == 0) {
- rc = mgs_write_log_timeout(obd, fsdb, mti, tmp,
- LCFG_SET_LDLM_TIMEOUT,
- "ldlm_timeout");
+ if (class_match_param(ptr, PARAM_SYS, &tmp) == 0) {
+ rc = mgs_write_log_sys(obd, fsdb, mti, ptr, tmp);
goto end_while;
}
unsigned int obd_debug_peer_on_timeout;
unsigned int obd_dump_on_timeout;
unsigned int obd_dump_on_eviction;
+unsigned int obd_max_dirty_pages = 256;
unsigned int obd_timeout = OBD_TIMEOUT_DEFAULT; /* seconds */
unsigned int ldlm_timeout = LDLM_TIMEOUT_DEFAULT; /* seconds */
-unsigned int obd_max_dirty_pages = 256;
-atomic_t obd_dirty_pages;
+/* Adaptive timeout defs here instead of ptlrpc module for /proc/sys/ access */
+unsigned int at_min = 0;
+#ifdef HAVE_AT_SUPPORT
+unsigned int at_max = 600;
+#else
+unsigned int at_max = 0;
+#endif
+unsigned int at_history = 600;
+int at_early_margin = 5;
+int at_extra = 30;
+atomic_t obd_dirty_pages;
cfs_waitq_t obd_race_waitq;
int obd_race_state;
EXPORT_SYMBOL(ldlm_timeout);
EXPORT_SYMBOL(obd_max_dirty_pages);
EXPORT_SYMBOL(obd_dirty_pages);
+EXPORT_SYMBOL(at_min);
+EXPORT_SYMBOL(at_max);
+EXPORT_SYMBOL(at_extra);
+EXPORT_SYMBOL(at_early_margin);
+EXPORT_SYMBOL(at_history);
EXPORT_SYMBOL(ptlrpc_put_connection_superhack);
EXPORT_SYMBOL(proc_lustre_root);
OBD_DEBUG_PEER_ON_TIMEOUT, /* dump peer debug when RPC times out */
OBD_ALLOC_FAIL_RATE, /* memory allocation random failure rate */
OBD_MAX_DIRTY_PAGES, /* maximum dirty pages */
+ OBD_AT_MIN, /* Adaptive timeouts params */
+ OBD_AT_MAX,
+ OBD_AT_EXTRA,
+ OBD_AT_EARLY_MARGIN,
+ OBD_AT_HISTORY,
};
#else
#define CTL_LUSTRE CTL_UNNUMBERED
#define OBD_DEBUG_PEER_ON_TIMEOUT CTL_UNNUMBERED
#define OBD_ALLOC_FAIL_RATE CTL_UNNUMBERED
#define OBD_MAX_DIRTY_PAGES CTL_UNNUMBERED
+#define OBD_AT_MIN CTL_UNNUMBERED
+#define OBD_AT_MAX CTL_UNNUMBERED
+#define OBD_AT_EXTRA CTL_UNNUMBERED
+#define OBD_AT_EARLY_MARGIN CTL_UNNUMBERED
+#define OBD_AT_HISTORY CTL_UNNUMBERED
#endif
int LL_PROC_PROTO(proc_fail_loc)
return ll_proc_dostring(&dummy,write,filp,buffer,lenp, ppos);
}
+int LL_PROC_PROTO(proc_at_min)
+{
+ return ll_proc_dointvec(table, write, filp, buffer, lenp, ppos);
+}
+int LL_PROC_PROTO(proc_at_max)
+{
+ return ll_proc_dointvec(table, write, filp, buffer, lenp, ppos);
+}
+int LL_PROC_PROTO(proc_at_extra)
+{
+ return ll_proc_dointvec(table, write, filp, buffer, lenp, ppos);
+}
+int LL_PROC_PROTO(proc_at_early_margin)
+{
+ return ll_proc_dointvec(table, write, filp, buffer, lenp, ppos);
+}
+int LL_PROC_PROTO(proc_at_history)
+{
+ return ll_proc_dointvec(table, write, filp, buffer, lenp, ppos);
+}
+
static cfs_sysctl_table_t obd_table[] = {
{
.ctl_name = OBD_FAIL_LOC,
.mode = 0644,
.proc_handler = &proc_max_dirty_pages_in_mb
},
+ {
+ .ctl_name = OBD_AT_MIN,
+ .procname = "at_min",
+ .data = &at_min,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_at_min
+ },
+ {
+ .ctl_name = OBD_AT_MAX,
+ .procname = "at_max",
+ .data = &at_max,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_at_max
+ },
+ {
+ .ctl_name = OBD_AT_EXTRA,
+ .procname = "at_extra",
+ .data = &at_extra,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_at_extra
+ },
+ {
+ .ctl_name = OBD_AT_EARLY_MARGIN,
+ .procname = "at_early_margin",
+ .data = &at_early_margin,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_at_early_margin
+ },
+ {
+ .ctl_name = OBD_AT_HISTORY,
+ .procname = "at_history",
+ .data = &at_history,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_at_history
+ },
{ 0 }
};
EXIT;
}
+static int class_set_global(char *ptr, int val) {
+ ENTRY;
+
+ if (class_match_param(ptr, PARAM_AT_MIN, NULL) == 0)
+ at_min = val;
+ else if (class_match_param(ptr, PARAM_AT_MAX, NULL) == 0)
+ at_max = val;
+ else if (class_match_param(ptr, PARAM_AT_EXTRA, NULL) == 0)
+ at_extra = val;
+ else if (class_match_param(ptr, PARAM_AT_EARLY_MARGIN, NULL) == 0)
+ at_early_margin = val;
+ else if (class_match_param(ptr, PARAM_AT_HISTORY, NULL) == 0)
+ at_history = val;
+ else
+ RETURN(-EINVAL);
+
+ CDEBUG(D_IOCTL, "global %s = %d\n", ptr, val);
+
+ RETURN(0);
+}
+
+
/* We can't call ll_process_config directly because it lives in a module that
must be loaded after this one. */
static int (*client_process_config)(struct lustre_cfg *lcfg) = NULL;
GOTO(out, err = 0);
}
case LCFG_PARAM: {
+ char *tmp;
/* llite has no obd */
if ((class_match_param(lustre_cfg_string(lcfg, 1),
PARAM_LLITE, 0) == 0) &&
client_process_config) {
err = (*client_process_config)(lcfg);
GOTO(out, err);
+ } else if ((class_match_param(lustre_cfg_string(lcfg, 1),
+ PARAM_SYS, &tmp) == 0)) {
+ /* Global param settings */
+ err = class_set_global(tmp, lcfg->lcfg_num);
+ /* Note that since LCFG_PARAM is LCFG_REQUIRED, new
+ unknown globals would cause config to fail */
+ if (err)
+ CWARN("Ignoring unknown param %s\n", tmp);
+ GOTO(out, 0);
}
+
/* Fall through */
break;
}
if (!matched) {
CERROR("%s: unknown param %s\n",
(char *)lustre_cfg_string(lcfg, 0), key);
- /* rc = -EINVAL; continue parsing other params */
+ /* rc = -EINVAL; continue parsing other params */
} else {
LCONSOLE_INFO("%s.%.*s: set parameter %.*s=%s\n",
lustre_cfg_string(lcfg, 0),
int test_req_buffer_pressure = 0;
CFS_MODULE_PARM(test_req_buffer_pressure, "i", int, 0444,
"set non-zero to put pressure on request buffer pools");
-unsigned int at_min = 0;
+
CFS_MODULE_PARM(at_min, "i", int, 0644,
"Adaptive timeout minimum (sec)");
-
-#ifdef HAVE_AT_SUPPORT
-unsigned int at_max = 600;
-#else
-unsigned int at_max = 0;
-#endif
-
-EXPORT_SYMBOL(at_max);
CFS_MODULE_PARM(at_max, "i", int, 0644,
"Adaptive timeout maximum (sec)");
-unsigned int at_history = 600;
CFS_MODULE_PARM(at_history, "i", int, 0644,
"Adaptive timeouts remember the slowest event that took place "
"within this period (sec)");
-static int at_early_margin = 5;
CFS_MODULE_PARM(at_early_margin, "i", int, 0644,
"How soon before an RPC deadline to send an early reply");
-static int at_extra = 30;
CFS_MODULE_PARM(at_extra, "i", int, 0644,
"How much extra time to give with each early reply");
-
/* forward ref */
static int ptlrpc_server_post_idle_rqbds (struct ptlrpc_service *svc);
array->paa_count = 0;
array->paa_deadline = -1;
- /* allocate memory for srv_at_array (ptlrpc_at_array) */
+ /* allocate memory for srv_at_array (ptlrpc_at_array) */
OBD_ALLOC(array->paa_reqs_array, sizeof(struct list_head) * size);
if (array->paa_reqs_array == NULL)
GOTO(failed, NULL);
for (index = 0; index < size; index++)
CFS_INIT_LIST_HEAD(&array->paa_reqs_array[index]);
-
+
OBD_ALLOC(array->paa_reqs_count, sizeof(__u32) * size);
if (array->paa_reqs_count == NULL)
GOTO(failed, NULL);
if (req->rq_at_linked) {
struct ptlrpc_at_array *array = &svc->srv_at_array;
__u32 index = req->rq_at_index;
-
- req->rq_at_linked = 0;
+
+ req->rq_at_linked = 0;
array->paa_reqs_count[index]--;
array->paa_count--;
}
if (array->paa_reqs_count[index] > 0) {
/* latest rpcs will have the latest deadlines in the list,
* so search backward. */
- list_for_each_entry_reverse(rq, &array->paa_reqs_array[index],
+ list_for_each_entry_reverse(rq, &array->paa_reqs_array[index],
rq_timed_list) {
if (req->rq_deadline >= rq->rq_deadline) {
- list_add(&req->rq_timed_list,
+ list_add(&req->rq_timed_list,
&rq->rq_timed_list);
break;
}
count = array->paa_count;
while (count > 0) {
count -= array->paa_reqs_count[index];
- list_for_each_entry_safe(rq, n, &array->paa_reqs_array[index],
+ list_for_each_entry_safe(rq, n, &array->paa_reqs_array[index],
rq_timed_list) {
if (rq->rq_deadline <= now + at_early_margin) {
list_move(&rq->rq_timed_list, &work_list);
rq->rq_at_linked = 0;
continue;
}
-
+
/* update the earliest deadline */
if (deadline == -1 || rq->rq_deadline < deadline)
deadline = rq->rq_deadline;
break;
}
-
+
if (++index >= array->paa_size)
index = 0;
}
cfs_timer_disarm(&service->srv_at_timer);
if (array->paa_reqs_array != NULL) {
- OBD_FREE(array->paa_reqs_array,
+ OBD_FREE(array->paa_reqs_array,
sizeof(struct list_head) * array->paa_size);
array->paa_reqs_array = NULL;
}
-
+
if (array->paa_reqs_count != NULL) {
- OBD_FREE(array->paa_reqs_count,
+ OBD_FREE(array->paa_reqs_count,
sizeof(__u32) * array->paa_size);
array->paa_reqs_count= NULL;
}
-
+
OBD_FREE(service, sizeof(*service));
return 0;
}
remote_ost_nodsh && skip "remote OST with nodsh" && return 0
# With adaptive timeouts, bulk_get won't expire until adaptive_timeout_max
- if at_is_valid && at_is_enabled; then
+ if at_is_enabled; then
at_max_saved=$(at_max_get ost1)
at_max_set $TIMEOUT ost1
fi
[ "$mdcdev" ] || exit 2
# adaptive timeouts slow this way down
- if at_is_valid && at_is_enabled; then
+ if at_is_enabled; then
at_max_saved=$(at_max_get mds)
at_max_set 40 mds
fi
echo "Cleaning up AT ..."
if [ -n "$ATOLDBASE" ]; then
- local at_history=$(do_facet mds "find /sys/ -name at_history")
- do_facet mds "echo $ATOLDBASE >> $at_history" || true
- do_facet ost1 "echo $ATOLDBASE >> $at_history" || true
+ local at_history=$($LCTL get_param -n at_history)
+ do_facet mds "lctl set_param at_history=$at_history" || true
+ do_facet ost1 "lctl set_param at_history=$at_history" || true
fi
if [ $AT_MAX_SET -ne 0 ]; then
at_start()
{
local at_max_new=600
- if ! at_is_valid; then
- skip "AT env is invalid"
- return 1
- fi
# Save at_max original values
local facet
done
if [ -z "$ATOLDBASE" ]; then
- local at_history=$(do_facet mds "find /sys/ -name at_history")
- [ -z "$at_history" ] && skip "missing /sys/.../at_history " && return 1
- ATOLDBASE=$(do_facet mds "cat $at_history")
+ ATOLDBASE=$(do_facet mds "lctl get_param -n at_history")
# speed up the timebase so we can check decreasing AT
- do_facet mds "echo 8 >> $at_history"
- do_facet ost1 "echo 8 >> $at_history"
+ do_facet mds "lctl set_param at_history=8" || true
+ do_facet ost1 "lctl set_param at_history=8" || true
# sleep for a while to cool down, should be > 8s and also allow
# at least one ping to be sent. simply use TIMEOUT to be safe.
[ "$1" != "-u" -a "$1" != "-g" ] && error "resetquota: wrong specifier $1 passed"
count=0
- if at_is_valid && at_is_enabled; then
+ if at_is_enabled; then
timeout=$(at_max_get mds)
else
timeout=$(lctl get_param -n timeout)
echo " step2: testing ......"
count=0
- if at_is_valid && at_is_enabled; then
+ if at_is_enabled; then
timeout=$(at_max_get mds)
else
timeout=$(lctl get_param -n timeout)
echo " step2: testing ......"
count=0
- if at_is_valid && at_is_enabled; then
+ if at_is_enabled; then
timeout=$(at_max_get mds)
else
timeout=$(lctl get_param -n timeout)
fi
count=0
- if at_is_valid && at_is_enabled; then
+ if at_is_enabled; then
timeout=$(at_max_get mds)
else
timeout=$(lctl get_param -n timeout)
export FSTYPE=${FSTYPE:-"ldiskfs"}
export NAME=${NAME:-local}
export DIR2
- export AT_MAX_PATH
export SAVE_PWD=${SAVE_PWD:-$LUSTRE/tests}
if [ "$ACCEPTOR_PORT" ]; then
##################################
# Adaptive Timeouts funcs
-at_is_valid() {
- if [ -z "$AT_MAX_PATH" ]; then
- AT_MAX_PATH=$(do_facet mds "find /sys/ -name at_max")
- [ -z "$AT_MAX_PATH" ] && echo "missing /sys/.../at_max " && return 1
- fi
- return 0
-}
-
at_is_enabled() {
- at_is_valid || error "invalid call"
-
# only check mds, we assume at_max is the same on all nodes
- local at_max=$(do_facet mds "cat $AT_MAX_PATH")
+ local at_max=$(do_facet mds "lctl get_param -n at_max")
if [ $at_max -eq 0 ]; then
return 1
else
at_max_get() {
local facet=$1
- at_is_valid || error "invalid call"
-
# suppose that all ost-s has the same at_max set
if [ $facet == "ost" ]; then
- do_facet ost1 "cat $AT_MAX_PATH"
+ do_facet ost1 "lctl get_param -n at_max"
else
- do_facet $facet "cat $AT_MAX_PATH"
+ do_facet $facet "lctl get_param -n at_max"
fi
}
local at_max=$1
shift
- at_is_valid || error "invalid call"
-
local facet
for facet in $@; do
if [ $facet == "ost" ]; then
for i in `seq $OSTCOUNT`; do
- do_facet ost$i "echo $at_max > $AT_MAX_PATH"
+ do_facet ost$i "lctl set_param at_max=$at_max"
done
else
- do_facet $facet "echo $at_max > $AT_MAX_PATH"
+ do_facet $facet "lctl set_param at_max=$at_max"
fi
done
}
recs_pr[i] = cur_rec;
if (ext2_test_bit(idx, (*llog)->llh_bitmap)) {
- if (le32_to_cpu(cur_rec->lrh_type) != OBD_CFG_REC)
+ if (le32_to_cpu(cur_rec->lrh_type) != OBD_CFG_REC)
printf("rec #%d type=%x len=%u\n", idx,
cur_rec->lrh_type, cur_rec->lrh_len);
} else {
/* The header counts only set records */
i--;
}
-
+
ptr += le32_to_cpu(cur_rec->lrh_len);
if ((ptr - file_buf) > file_size) {
printf("The log is corrupt (too big at %d)\n", i);
}
case(LCFG_SET_TIMEOUT):{
printf("set_timeout=%d ", lcfg->lcfg_num);
- print_1_cfg(lcfg);
+ break;
+ }
+ case(LCFG_SET_LDLM_TIMEOUT):{
+ printf("set_ldlm_timeout=%d ", lcfg->lcfg_num);
break;
}
case(LCFG_SET_UPCALL):{
}
if (marker->cm_flags & CM_EXCLUDE) {
- if (marker->cm_flags & CM_START)
+ if (marker->cm_flags & CM_START)
printf("EXCLUDE START ");
else
printf("EXCLUDE END ");
{
__u32 lopt;
int i, skip = 0;
-
+
for(i = 0; i < rec_number; i++) {
printf("#%.2d (%.3d)", le32_to_cpu(recs[i]->lrh_index),
le32_to_cpu(recs[i]->lrh_len));
lopt = le32_to_cpu(recs[i]->lrh_type);
- if (recs[i]->padding == CANCELLED)
+ if (recs[i]->padding == CANCELLED)
printf("NOT SET ");
-
+
if (lopt == OBD_CFG_REC) {
struct lustre_cfg *lcfg;
lcfg = (struct lustre_cfg *)((char*)(recs[i]) +