#define DEF_STATFS_REFRESH_INTV 5 /* OST statfs update interval, in seconds */
#define DEF_FAST_POOL_MAX_USED 30 /* open for migration if % space used is less than */
#define DEF_SLOW_POOL_MAX_USED 90 /* open for migration if % space used is less than */
+#define DEF_SSH_EXEC_TO_SECS (10 * 60) /* ssh exec timeout */
#define LAMIGO_USERFILE "/var/lib/lamigo-%s.chlg"
#define LAMIGO_DUMPFILE "/var/run/lamigo-%s.stats"
/* --tgt-free, deprecated */
"\t--slow-pool=POOL (default '%s')\n"
"\t--slow-pool-max-used=MAX stop mirroring to POOL when %% used reaches MAX (default %d)\n"
+ "\t--ssh-exec-timeout=SECS, ssh connection timeout for remote exec command (default: %d)\n"
"\t--ssh-log-verbosity=V, set SSH_OPTIONS_LOG_VERBOSITY to V (default: 0)\n"
"\t--timestamps, add timestamp to log messages (default: no)\n"
"\t--version, print version information and exit\n",
DEF_HOT_FRACTION,
DEF_HOT_AFTER_IDLE,
DEF_SLOW_POOL,
- DEF_SLOW_POOL_MAX_USED);
+ DEF_SLOW_POOL_MAX_USED,
+ DEF_SSH_EXEC_TO_SECS);
exit(EXIT_SUCCESS);
}
.o_fast_pool_max_used = DEF_FAST_POOL_MAX_USED,
.o_slow_pool_max_used = DEF_SLOW_POOL_MAX_USED,
.o_progress_interval = DEF_PROGRESS_INTV,
+ .o_ssh_exec_to = DEF_SSH_EXEC_TO_SECS,
.o_alr_extra_args = DEF_ALR_EXTRA_ARGS,
.o_alr_periods = DEF_ALR_PERIODS,
.o_alr_period_time = DEF_ALR_PERIOD_SECS,
" hot_after_idle: %d\n"
" fast_pool_max_used: %d\n"
" slow_pool_max_used: %d\n"
- " include_dom: %d\n",
+ " include_dom: %d\n"
+ " ssh_exec_to: %ld\n",
opt.o_slow_pool, opt.o_min_age, opt.o_cache_size,
opt.o_rescan, opt.o_num_threads, opt.o_pool_refresh,
opt.o_progress_interval, opt.o_alr_periods,
opt.o_alr_hot_fraction, opt.o_alr_hot_after_idle,
opt.o_fast_pool_max_used,
opt.o_slow_pool_max_used,
- opt.o_include_dom);
+ opt.o_include_dom,
+ opt.o_ssh_exec_to);
for (pl = fast_pools; pl != NULL; pl = pl->pl_next, i++)
fprintf(f, "pool %s:\n"
" osts: %d\n"
rc = a->rag_is_local ?
system(cmd) :
- lipe_ssh_exec(&rss->rss_ctx, cmd, pstatus);
+ lipe_ssh_exec_timeout(&rss->rss_ctx, cmd, pstatus,
+ opt.o_ssh_exec_to);
if (rc)
llapi_error(LLAPI_MSG_INFO, rc,
"error executing ssh command '%s' on '%s'",
LAMIGO_OPT_SLOW_POOL_MAX_USED,
LAMIGO_OPT_SRC_DOM, /* == LAMIGO_OPT_INCLUDE_DOM + warning */
LAMIGO_OPT_SRC_FREE, /* == LAMIGO_OPT_FAST_POOL_MAX_USED + math + warning */
+ LAMIGO_OPT_SSH_EXEC_TO,
LAMIGO_OPT_SSH_LOG_VERBOSITY,
LAMIGO_OPT_STATFS_REFRESH,
LAMIGO_OPT_TGT_FREE, /* == LAMIGO_OPT_SLOW_POOL_MAX_USED + math + warning */
{ "src", required_argument, NULL, 's'},
{ "src-dom", no_argument, NULL, LAMIGO_OPT_SRC_DOM},
{ "src-free", required_argument, NULL, LAMIGO_OPT_SRC_FREE},
+ { "ssh-exec-timeout", required_argument, NULL, LAMIGO_OPT_SSH_EXEC_TO },
{ "ssh-log-verbosity", required_argument, NULL, LAMIGO_OPT_SSH_LOG_VERBOSITY },
{ "statfs-refresh", required_argument, NULL, LAMIGO_OPT_STATFS_REFRESH },
{ "tgt", required_argument, NULL, 't'},
case LAMIGO_OPT_INCLUDE_DOM:
opt.o_include_dom = 1;
break;
+ case LAMIGO_OPT_SSH_EXEC_TO:
+ {
+ char *end;
+ long result = strtol(optarg, &end, 10);
+
+ if (end == optarg ||
+ ((result == LONG_MIN || result == LONG_MAX) &&
+ errno == ERANGE) ||
+ result < 0 ) { /* 0 for infinite timeout
+ * Minimum tiemout is 1 second
+ */
+ LX_FATAL("invalid ssh exec connection timeout '%s'\n",
+ optarg);
+ }
+
+ opt.o_ssh_exec_to = result;
+ }
+ break;
case LAMIGO_OPT_SSH_LOG_VERBOSITY:
lipe_ssh_log_verbosity = atoi(optarg);
break;
int o_fast_pool_max_used;
int o_slow_pool_max_used;
int o_progress_interval; /* how often to show progress */
+ long o_ssh_exec_to; /* ssh connection timeout for exec command */
char *o_alr_extra_args;
int o_alr_periods;
int o_alr_period_time;
static int lamigo_alr_agent_run(struct alr_agent *ala)
{
ssh_channel channel = NULL;
- unsigned long now = time(NULL);
- unsigned long last_checked = now;
+ time_t last_checked = time(NULL);
int rc, offset = 0, received = 0;
char cmd[PATH_MAX];
char buffer[16 * 1024];
"ofd_access_log_reader -i %d -I %d %s 2> /dev/null",
opt.o_alr_ofd_interval, mdtidx, opt.o_alr_extra_args);
- rc = lipe_ssh_start_cmd(&ala->ala_ctx, cmd, &channel);
+ rc = lipe_ssh_start_cmd_timeout(&ala->ala_ctx, cmd, &channel, 5);
if (rc != SSH_OK) {
- LX_ERROR("cannot start ofd_access_log_reader on host '%s': rc = %d\n",
+ LX_ERROR("cannot start access log reader agent on host '%s': rc = %d\n",
ala->ala_host, rc);
goto out;
}
LX_DEBUG("started access log reader agent on '%s'\n", ala->ala_host);
while (ssh_channel_is_open(channel) && !ssh_channel_is_eof(channel)) {
- rc = ssh_channel_read(channel, buffer + offset,
- sizeof(buffer) - offset, 0);
- if (rc == 0) /* ssh_channel_read timeout */
- continue;
- if (rc < 0)
+ time_t now;
+
+ rc = ssh_channel_read_timeout(channel, buffer + offset,
+ sizeof(buffer) - offset, 0,
+ -1); /* SSH_TIMEOUT_INFINITE Internally */
+ if (rc <= 0)
break;
offset = lamigo_alr_parse(ala->ala_host, buffer, offset + rc, &received);
now = time(NULL);
goto out;
}
+ /* This is always executed with SSH_TIMEOUT_DEFAULT internally */
rc = ssh_channel_request_exec(channel, cmd);
if (rc != SSH_OK) {
LX_ERROR("cannot execute SSH command: %d: %s\n",
static int lipe_ssh_session_create(ssh_session *psession, const char *host)
{
ssh_session session = NULL;
- long timeout = 5;
+ const long timeout = 5; /* Default timeout used for ssh connect */
int rc;
assert(SSH_OK == 0);
goto out;
}
+ ssh_set_blocking(session, 1);
+
/* Connect to the ssh server */
rc = ssh_connect(session);
if (rc != SSH_OK) {
return SSH_OK;
}
-int lipe_ssh_start_cmd(struct lipe_ssh_context *ctx, const char *cmd, ssh_channel *pchannel)
+int lipe_ssh_start_cmd_timeout(struct lipe_ssh_context *ctx, const char *cmd,
+ ssh_channel *pchannel, long timeout_sec)
{
int rc;
if (rc != SSH_OK)
return rc;
+ if (timeout_sec == LIPE_SSH_TIMEOUT_INFINITE)
+ timeout_sec = 0; /* Blocking with timeout 0 is infinite */
+
+ rc = ssh_options_set(ctx->lsc_session, SSH_OPTIONS_TIMEOUT, &timeout_sec);
+ if (rc != SSH_OK) {
+ LX_ERROR("cannot set SSH timeout to %ld: %dsec: %s\n",
+ timeout_sec, rc, ssh_get_error(ctx->lsc_session));
+ return SSH_ERROR;
+ }
+
rc = lipe_ssh_session_start_cmd(ctx->lsc_session, cmd, pchannel);
if (rc != SSH_OK)
lipe_ssh_context_fail(ctx);
return rc;
}
-int lipe_ssh_exec(struct lipe_ssh_context *ctx, const char *cmd, int *pstatus)
+int lipe_ssh_start_cmd(struct lipe_ssh_context *ctx, const char *cmd, ssh_channel *pchannel)
+{
+ return lipe_ssh_start_cmd_timeout(ctx, cmd, pchannel, LIPE_SSH_TIMEOUT_INFINITE);
+}
+
+int lipe_ssh_exec_timeout(struct lipe_ssh_context *ctx, const char *cmd,
+ int *pstatus, long timeout_sec)
{
int rc;
if (rc != SSH_OK)
return rc;
+ if (timeout_sec == LIPE_SSH_TIMEOUT_INFINITE)
+ timeout_sec = 0; /* Blocking call with timeout 0 is infinite */
+
+ /* Set SSH_TIMEOUT_DEFAULT */
+ rc = ssh_options_set(ctx->lsc_session, SSH_OPTIONS_TIMEOUT, &timeout_sec);
+ if (rc != SSH_OK) {
+ LX_ERROR("cannot set SSH timeout to %ld: %dsec: %s\n",
+ timeout_sec, rc, ssh_get_error(ctx->lsc_session));
+ return SSH_ERROR;
+ }
+
/* Execute a remote command */
rc = lipe_ssh_session_exec_cmd(ctx->lsc_session, cmd, pstatus);
if (rc < 0)
return rc;
}
+
+int lipe_ssh_exec(struct lipe_ssh_context *ctx, const char *cmd, int *pstatus)
+{
+ return lipe_ssh_exec_timeout(ctx, cmd, pstatus, LIPE_SSH_TIMEOUT_INFINITE);
+}
ssh_session lsc_session;
};
+#define LIPE_SSH_TIMEOUT_INFINITE (0)
+
int lipe_ssh_context_init(struct lipe_ssh_context *ctx, const char *host);
void lipe_ssh_context_destroy(struct lipe_ssh_context *ctx);
+int lipe_ssh_exec_timeout(struct lipe_ssh_context *ctx, const char *cmd,
+ int *pstatus, long timeout_sec);
+int lipe_ssh_start_cmd_timeout(struct lipe_ssh_context *ctx, const char *cmd,
+ ssh_channel *pchannel, long timeout_sec);
int lipe_ssh_exec(struct lipe_ssh_context *ctx, const char *cmd, int *pstatus);
int lipe_ssh_start_cmd(struct lipe_ssh_context *ctx, const char *cmd, ssh_channel *pchannel);